From 43dff98b022ded593e73c3784bac03bc9fc7ec55 Mon Sep 17 00:00:00 2001
From: Shaun Pereira <spereira@tusc.com.au>
Date: Fri, 28 Apr 2006 12:00:17 -0700
Subject: [X25]: fix for spinlock recurse and spinlock lockup with timer
 handler

When the sk_timer function x25_heartbeat_expiry() is called by the
kernel in a running/terminating process, spinlock-recursion and
spinlock-lockup locks up the kernel.  This has happened with testing
on some distro's and the patch below fixed it.

Signed-off-by: Shaun Pereira <spereira@tusc.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 0a92e1d..71ff308 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -114,8 +114,9 @@ static void x25_heartbeat_expiry(unsigned long param)
 			if (sock_flag(sk, SOCK_DESTROY) ||
 			    (sk->sk_state == TCP_LISTEN &&
 			     sock_flag(sk, SOCK_DEAD))) {
+				bh_unlock_sock(sk);
 				x25_destroy_socket(sk);
-				goto unlock;
+				return;
 			}
 			break;
 
@@ -128,7 +129,6 @@ static void x25_heartbeat_expiry(unsigned long param)
 	}
 restart_heartbeat:
 	x25_start_heartbeat(sk);
-unlock:
 	bh_unlock_sock(sk);
 }
 
-- 
cgit v0.10.2


From 89bbb0a361cdae50eec863f10a876b58abf7d312 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 28 Apr 2006 12:11:36 -0700
Subject: [PKT_SCHED] netem: fix loss

The following one line fix is needed to make loss function of
netem work right when doing loss on the local host.
Otherwise, higher layers just recover.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 7228d30..5a4a4d0 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -167,7 +167,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (count == 0) {
 		sch->qstats.drops++;
 		kfree_skb(skb);
-		return NET_XMIT_DROP;
+		return NET_XMIT_BYPASS;
 	}
 
 	/*
-- 
cgit v0.10.2


From 09493abfdbe8144ce0805efac7a8a3c4eb869c12 Mon Sep 17 00:00:00 2001
From: Soyoung Park <speattle@yahoo.com>
Date: Fri, 28 Apr 2006 14:59:44 -0700
Subject: [NETLINK]: cleanup unused macro in net/netlink/af_netlink.c

1 line removal, of unused macro.
ran 'egrep -r' from linux-2.6.16/ for Nprintk and
didn't see it anywhere else but here, in #define...

Signed-off-by: Soyoung Park <speattle@yahoo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2a233ff..b8ea61f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -61,7 +61,6 @@
 #include <net/scm.h>
 #include <net/netlink.h>
 
-#define Nprintk(a...)
 #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
 
 struct netlink_sock {
-- 
cgit v0.10.2


From a536e0778781c1f2ce38cf8e1d82ce258f0193c1 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 28 Apr 2006 15:19:17 -0700
Subject: [IPV4]: inet_init() -> fs_initcall

Convert inet_init to an fs_initcall to make sure its called before any
device driver's initcall.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index dc206f1..0a27745 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1257,7 +1257,7 @@ out_unregister_udp_proto:
 	goto out;
 }
 
-module_init(inet_init);
+fs_initcall(inet_init);
 
 /* ------------------------------------------------------------------------ */
 
-- 
cgit v0.10.2


From da753beaeb1446aa87bcca7e8a0026633a8914f0 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <mita@miraclelinux.com>
Date: Fri, 28 Apr 2006 15:21:23 -0700
Subject: [NET]: use hlist_unhashed()

Use hlist_unhashed() rather than accessing inside data structure.

Signed-off-by: Akinobu Mita <mita@miraclelinux.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/list.h b/include/linux/list.h
index 67258b4..76f0571 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -619,7 +619,7 @@ static inline void hlist_del_rcu(struct hlist_node *n)
 
 static inline void hlist_del_init(struct hlist_node *n)
 {
-	if (n->pprev)  {
+	if (!hlist_unhashed(n)) {
 		__hlist_del(n);
 		INIT_HLIST_NODE(n);
 	}
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 1da294c..e837f98 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -150,7 +150,7 @@ static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
 
 static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw)
 {
-	return tw->tw_death_node.pprev != NULL;
+	return !hlist_unhashed(&tw->tw_death_node);
 }
 
 static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw)
diff --git a/include/net/sock.h b/include/net/sock.h
index ff8b0da..c9fad6f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -279,7 +279,7 @@ static inline int sk_unhashed(const struct sock *sk)
 
 static inline int sk_hashed(const struct sock *sk)
 {
-	return sk->sk_node.pprev != NULL;
+	return !sk_unhashed(sk);
 }
 
 static __inline__ void sk_node_init(struct hlist_node *node)
-- 
cgit v0.10.2


From a76e07acd0de635122c5e60ccd5e55f9d5082391 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 28 Apr 2006 15:22:13 -0700
Subject: [IPSEC]: Fix IP ID selection

I was looking through the xfrm input/output code in order to abstract
out the address family specific encapsulation/decapsulation code.  During
that process I found this bug in the IP ID selection code in xfrm4_output.c.

At that point dst is still the xfrm_dst for the current SA which
represents an internal flow as far as the IPsec tunnel is concerned.
Since the IP ID is going to sit on the outside of the encapsulated
packet, we obviously want the external flow which is just dst->child.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 32ad229..4ef8efa 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -62,7 +62,7 @@ static void xfrm4_encap(struct sk_buff *skb)
 	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
 		0 : (iph->frag_off & htons(IP_DF));
 	if (!top_iph->frag_off)
-		__ip_select_ident(top_iph, dst, 0);
+		__ip_select_ident(top_iph, dst->child, 0);
 
 	top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);
 
-- 
cgit v0.10.2


From 8dff7c29707b7514043539f5ab5e0a6eb7bd9dcd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 28 Apr 2006 15:23:59 -0700
Subject: [XFRM]: fix softirq-unsafe xfrm typemap->lock use

xfrm typemap->lock may be used in softirq context, so all write_lock()
uses must be softirq-safe.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c3725fe..e5b0afc 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -57,12 +57,12 @@ int xfrm_register_type(struct xfrm_type *type, unsigned short family)
 		return -EAFNOSUPPORT;
 	typemap = afinfo->type_map;
 
-	write_lock(&typemap->lock);
+	write_lock_bh(&typemap->lock);
 	if (likely(typemap->map[type->proto] == NULL))
 		typemap->map[type->proto] = type;
 	else
 		err = -EEXIST;
-	write_unlock(&typemap->lock);
+	write_unlock_bh(&typemap->lock);
 	xfrm_policy_put_afinfo(afinfo);
 	return err;
 }
@@ -78,12 +78,12 @@ int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
 		return -EAFNOSUPPORT;
 	typemap = afinfo->type_map;
 
-	write_lock(&typemap->lock);
+	write_lock_bh(&typemap->lock);
 	if (unlikely(typemap->map[type->proto] != type))
 		err = -ENOENT;
 	else
 		typemap->map[type->proto] = NULL;
-	write_unlock(&typemap->lock);
+	write_unlock_bh(&typemap->lock);
 	xfrm_policy_put_afinfo(afinfo);
 	return err;
 }
-- 
cgit v0.10.2


From 83de47cd0c5738105f40e65191b0761dfa7431ac Mon Sep 17 00:00:00 2001
From: Hua Zhong <hzhong@gmail.com>
Date: Fri, 28 Apr 2006 15:26:50 -0700
Subject: [TCP]: Fix unlikely usage in tcp_transmit_skb()

The following unlikely should be replaced by likely because the
condition happens every time unless there is a hard error to transmit
a packet.

Signed-off-by: Hua Zhong <hzhong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a28ae59..743016b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -465,7 +465,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
 	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
-	if (unlikely(err <= 0))
+	if (likely(err <= 0))
 		return err;
 
 	tcp_enter_cwr(sk);
-- 
cgit v0.10.2


From f3111502c065d32dcb021f55e30398aaebd8fb0f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 28 Apr 2006 15:30:03 -0700
Subject: [XFRM]: fix incorrect xfrm_state_afinfo_lock use

xfrm_state_afinfo_lock can be read-locked from bh context, so take it
in a bh-safe manner in xfrm_state_register_afinfo() and
xfrm_state_unregister_afinfo(). Found by the lock validator.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3dc3e1f..93a2f36 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1061,7 +1061,7 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
 		return -EINVAL;
 	if (unlikely(afinfo->family >= NPROTO))
 		return -EAFNOSUPPORT;
-	write_lock(&xfrm_state_afinfo_lock);
+	write_lock_bh(&xfrm_state_afinfo_lock);
 	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
 		err = -ENOBUFS;
 	else {
@@ -1069,7 +1069,7 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
 		afinfo->state_byspi = xfrm_state_byspi;
 		xfrm_state_afinfo[afinfo->family] = afinfo;
 	}
-	write_unlock(&xfrm_state_afinfo_lock);
+	write_unlock_bh(&xfrm_state_afinfo_lock);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_register_afinfo);
@@ -1081,7 +1081,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
 		return -EINVAL;
 	if (unlikely(afinfo->family >= NPROTO))
 		return -EAFNOSUPPORT;
-	write_lock(&xfrm_state_afinfo_lock);
+	write_lock_bh(&xfrm_state_afinfo_lock);
 	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
 		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
 			err = -EINVAL;
@@ -1091,7 +1091,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
 			afinfo->state_bydst = NULL;
 		}
 	}
-	write_unlock(&xfrm_state_afinfo_lock);
+	write_unlock_bh(&xfrm_state_afinfo_lock);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
-- 
cgit v0.10.2


From e959d8121fcbfee6ec049cc617e9423d1799f2e4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 28 Apr 2006 15:32:29 -0700
Subject: [XFRM]: fix incorrect xfrm_policy_afinfo_lock use

xfrm_policy_afinfo_lock can be taken in bh context, at:

 [<c013fe1a>] lockdep_acquire_read+0x54/0x6d
 [<c0f6e024>] _read_lock+0x15/0x22
 [<c0e8fcdb>] xfrm_policy_get_afinfo+0x1a/0x3d
 [<c0e8fd10>] xfrm_decode_session+0x12/0x32
 [<c0e66094>] ip_route_me_harder+0x1c9/0x25b
 [<c0e770d3>] ip_nat_local_fn+0x94/0xad
 [<c0e2bbc8>] nf_iterate+0x2e/0x7a
 [<c0e2bc50>] nf_hook_slow+0x3c/0x9e
 [<c0e3a342>] ip_push_pending_frames+0x2de/0x3a7
 [<c0e53e19>] icmp_push_reply+0x136/0x141
 [<c0e543fb>] icmp_reply+0x118/0x1a0
 [<c0e54581>] icmp_echo+0x44/0x46
 [<c0e53fad>] icmp_rcv+0x111/0x138
 [<c0e36764>] ip_local_deliver+0x150/0x1f9
 [<c0e36be2>] ip_rcv+0x3d5/0x413
 [<c0df760f>] netif_receive_skb+0x337/0x356
 [<c0df76c3>] process_backlog+0x95/0x110
 [<c0df5fe2>] net_rx_action+0xa5/0x16d
 [<c012d8a7>] __do_softirq+0x6f/0xe6
 [<c0105ec2>] do_softirq+0x52/0xb1

this means that all write-locking of xfrm_policy_afinfo_lock must be
bh-safe. This patch fixes xfrm_policy_register_afinfo() and
xfrm_policy_unregister_afinfo().

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e5b0afc..b469c8b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1251,7 +1251,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 		return -EINVAL;
 	if (unlikely(afinfo->family >= NPROTO))
 		return -EAFNOSUPPORT;
-	write_lock(&xfrm_policy_afinfo_lock);
+	write_lock_bh(&xfrm_policy_afinfo_lock);
 	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
 		err = -ENOBUFS;
 	else {
@@ -1268,7 +1268,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 			afinfo->garbage_collect = __xfrm_garbage_collect;
 		xfrm_policy_afinfo[afinfo->family] = afinfo;
 	}
-	write_unlock(&xfrm_policy_afinfo_lock);
+	write_unlock_bh(&xfrm_policy_afinfo_lock);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -1280,7 +1280,7 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
 		return -EINVAL;
 	if (unlikely(afinfo->family >= NPROTO))
 		return -EAFNOSUPPORT;
-	write_lock(&xfrm_policy_afinfo_lock);
+	write_lock_bh(&xfrm_policy_afinfo_lock);
 	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
 		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
 			err = -EINVAL;
@@ -1294,7 +1294,7 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
 			afinfo->garbage_collect = NULL;
 		}
 	}
-	write_unlock(&xfrm_policy_afinfo_lock);
+	write_unlock_bh(&xfrm_policy_afinfo_lock);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
-- 
cgit v0.10.2


From c302e6d54e468ee9b1e18152e2e9da06953f3473 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 28 Apr 2006 15:59:15 -0700
Subject: [IPV6]: Fix race in route selection.

We eliminated rt6_dflt_lock (to protect default router pointer)
at 2.6.17-rc1, and introduced rt6_select() for general router selection.
The function is called in the context of rt6_lock read-lock held,
but this means, we have some race conditions when we do round-robin.

Signed-off-by; YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7907874..0190e39 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -317,7 +317,7 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
 		  __FUNCTION__, head, head ? *head : NULL, oif);
 
 	for (rt = rt0, metric = rt0->rt6i_metric;
-	     rt && rt->rt6i_metric == metric;
+	     rt && rt->rt6i_metric == metric && (!last || rt != rt0);
 	     rt = rt->u.next) {
 		int m;
 
@@ -343,9 +343,12 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
 	    (strict & RT6_SELECT_F_REACHABLE) &&
 	    last && last != rt0) {
 		/* no entries matched; do round-robin */
+		static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+		spin_lock(&lock);
 		*head = rt0->u.next;
 		rt0->u.next = last->u.next;
 		last->u.next = rt0;
+		spin_unlock(&lock);
 	}
 
 	RT6_TRACE("%s() => %p, score=%d\n",
-- 
cgit v0.10.2


From c8e1e82b6a97ad44517517aa58b7b794ead0bf33 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 29 Apr 2006 18:55:17 -0700
Subject: [TG3]: Call netif_carrier_off() during phy reset

Add netif_carrier_off() call during tg3_phy_reset(). This is needed
to properly track the netif_carrier state in cases where we do a
PHY reset with interrupts disabled. The SerDes code will not run
properly if the netif_carrier state is wrong.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 73e271e..a28accb 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -974,6 +974,8 @@ static int tg3_phy_reset_5703_4_5(struct tg3 *tp)
 	return err;
 }
 
+static void tg3_link_report(struct tg3 *);
+
 /* This will reset the tigon3 PHY if there is no valid
  * link unless the FORCE argument is non-zero.
  */
@@ -987,6 +989,11 @@ static int tg3_phy_reset(struct tg3 *tp)
 	if (err != 0)
 		return -EBUSY;
 
+	if (netif_running(tp->dev) && netif_carrier_ok(tp->dev)) {
+		netif_carrier_off(tp->dev);
+		tg3_link_report(tp);
+	}
+
 	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 ||
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 ||
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) {
-- 
cgit v0.10.2


From c424cb249dae10fb7f118f89091f1329b62b92f4 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 29 Apr 2006 18:56:34 -0700
Subject: [TG3]: Add phy workaround

Add some PHY workaround code to reduce jitter on some PHYs.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index a28accb..a307340 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -1030,6 +1030,12 @@ out:
 		tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x14e2);
 		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0400);
 	}
+	else if (tp->tg3_flags2 & TG3_FLG2_PHY_JITTER_BUG) {
+		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c00);
+		tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x000a);
+		tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x010b);
+		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0400);
+	}
 	/* Set Extended packet length bit (bit 14) on all chips that */
 	/* support jumbo frames */
 	if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401) {
@@ -10360,10 +10366,13 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	if (tp->pci_chip_rev_id == CHIPREV_ID_5704_A0)
 		tp->tg3_flags2 |= TG3_FLG2_PHY_5704_A0_BUG;
 
-	if ((tp->tg3_flags2 & TG3_FLG2_5705_PLUS) &&
-	    (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5755) &&
-	    (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5787))
-		tp->tg3_flags2 |= TG3_FLG2_PHY_BER_BUG;
+	if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) {
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 ||
+		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787)
+			tp->tg3_flags2 |= TG3_FLG2_PHY_JITTER_BUG;
+		else
+			tp->tg3_flags2 |= TG3_FLG2_PHY_BER_BUG;
+	}
 
 	tp->coalesce_mode = 0;
 	if (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_AX &&
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 8c8b987..0e29b88 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2215,6 +2215,7 @@ struct tg3 {
 #define TG3_FLG2_HW_TSO_2		0x08000000
 #define TG3_FLG2_HW_TSO			(TG3_FLG2_HW_TSO_1 | TG3_FLG2_HW_TSO_2)
 #define TG3_FLG2_1SHOT_MSI		0x10000000
+#define TG3_FLG2_PHY_JITTER_BUG		0x20000000
 
 	u32				split_mode_max_reqs;
 #define SPLIT_MODE_5704_MAX_REQ		3
-- 
cgit v0.10.2


From 58712ef9f2cbaaeac5b32ac11810a4bbd0eeacc5 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 29 Apr 2006 18:58:01 -0700
Subject: [TG3]: Reset chip when changing MAC address

Do the full chip reset when changing MAC address if ASF is enabled.

ASF sometimes uses a different MAC address than the driver. Without
the reset, the ASF MAC address may be overwritten when the driver's
MAC address is changed.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index a307340..0ccfb63 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -5732,9 +5732,23 @@ static int tg3_set_mac_addr(struct net_device *dev, void *p)
 	if (!netif_running(dev))
 		return 0;
 
-	spin_lock_bh(&tp->lock);
-	__tg3_set_mac_addr(tp);
-	spin_unlock_bh(&tp->lock);
+	if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) {
+		/* Reset chip so that ASF can re-init any MAC addresses it
+		 * needs.
+		 */
+		tg3_netif_stop(tp);
+		tg3_full_lock(tp, 1);
+
+		tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
+		tg3_init_hw(tp);
+
+		tg3_netif_start(tp);
+		tg3_full_unlock(tp);
+	} else {
+		spin_lock_bh(&tp->lock);
+		__tg3_set_mac_addr(tp);
+		spin_unlock_bh(&tp->lock);
+	}
 
 	return 0;
 }
-- 
cgit v0.10.2


From 8e7a22e3eb49042c048f24bab40cf5cf8915487d Mon Sep 17 00:00:00 2001
From: Gary Zambrano <zambrano@broadcom.com>
Date: Sat, 29 Apr 2006 18:59:13 -0700
Subject: [TG3]: Add reset_phy parameter to chip reset functions

Add a reset_phy parameter to tg3_reset_hw() and tg3_init_hw(). With
the full chip reset during MAC address change, the automatic PHY reset
during chip reset will cause a link down and bonding will not work
properly as a result. With this reset_phy parameter, we can do a chip
reset without link down when changing MAC address or MTU.

Signed-off-by: Gary Zambrano <zambrano@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 0ccfb63..97e27d8 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3544,7 +3544,7 @@ static irqreturn_t tg3_test_isr(int irq, void *dev_id,
 	return IRQ_RETVAL(0);
 }
 
-static int tg3_init_hw(struct tg3 *);
+static int tg3_init_hw(struct tg3 *, int);
 static int tg3_halt(struct tg3 *, int, int);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -3580,7 +3580,7 @@ static void tg3_reset_task(void *_data)
 	tp->tg3_flags2 &= ~TG3_FLG2_RESTART_TIMER;
 
 	tg3_halt(tp, RESET_KIND_SHUTDOWN, 0);
-	tg3_init_hw(tp);
+	tg3_init_hw(tp, 1);
 
 	tg3_netif_start(tp);
 
@@ -4055,7 +4055,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
 
 	tg3_set_mtu(dev, tp, new_mtu);
 
-	tg3_init_hw(tp);
+	tg3_init_hw(tp, 0);
 
 	tg3_netif_start(tp);
 
@@ -5740,7 +5740,7 @@ static int tg3_set_mac_addr(struct net_device *dev, void *p)
 		tg3_full_lock(tp, 1);
 
 		tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
-		tg3_init_hw(tp);
+		tg3_init_hw(tp, 0);
 
 		tg3_netif_start(tp);
 		tg3_full_unlock(tp);
@@ -5798,7 +5798,7 @@ static void __tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec)
 }
 
 /* tp->lock is held. */
-static int tg3_reset_hw(struct tg3 *tp)
+static int tg3_reset_hw(struct tg3 *tp, int reset_phy)
 {
 	u32 val, rdmac_mode;
 	int i, err, limit;
@@ -5813,7 +5813,7 @@ static int tg3_reset_hw(struct tg3 *tp)
 		tg3_abort_hw(tp, 1);
 	}
 
-	if (tp->tg3_flags2 & TG3_FLG2_MII_SERDES)
+	if ((tp->tg3_flags2 & TG3_FLG2_MII_SERDES) && reset_phy)
 		tg3_phy_reset(tp);
 
 	err = tg3_chip_reset(tp);
@@ -6354,7 +6354,7 @@ static int tg3_reset_hw(struct tg3 *tp)
 		tw32(GRC_LOCAL_CTRL, tp->grc_local_ctrl);
 	}
 
-	err = tg3_setup_phy(tp, 1);
+	err = tg3_setup_phy(tp, reset_phy);
 	if (err)
 		return err;
 
@@ -6427,7 +6427,7 @@ static int tg3_reset_hw(struct tg3 *tp)
 /* Called at device open time to get the chip ready for
  * packet processing.  Invoked with tp->lock held.
  */
-static int tg3_init_hw(struct tg3 *tp)
+static int tg3_init_hw(struct tg3 *tp, int reset_phy)
 {
 	int err;
 
@@ -6440,7 +6440,7 @@ static int tg3_init_hw(struct tg3 *tp)
 
 	tw32(TG3PCI_MEM_WIN_BASE_ADDR, 0);
 
-	err = tg3_reset_hw(tp);
+	err = tg3_reset_hw(tp, reset_phy);
 
 out:
 	return err;
@@ -6710,7 +6710,7 @@ static int tg3_test_msi(struct tg3 *tp)
 	tg3_full_lock(tp, 1);
 
 	tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
-	err = tg3_init_hw(tp);
+	err = tg3_init_hw(tp, 1);
 
 	tg3_full_unlock(tp);
 
@@ -6775,7 +6775,7 @@ static int tg3_open(struct net_device *dev)
 
 	tg3_full_lock(tp, 0);
 
-	err = tg3_init_hw(tp);
+	err = tg3_init_hw(tp, 1);
 	if (err) {
 		tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
 		tg3_free_rings(tp);
@@ -7866,7 +7866,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e
 
 	if (netif_running(dev)) {
 		tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
-		tg3_init_hw(tp);
+		tg3_init_hw(tp, 1);
 		tg3_netif_start(tp);
 	}
 
@@ -7911,7 +7911,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 
 	if (netif_running(dev)) {
 		tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
-		tg3_init_hw(tp);
+		tg3_init_hw(tp, 1);
 		tg3_netif_start(tp);
 	}
 
@@ -8549,7 +8549,7 @@ static int tg3_test_loopback(struct tg3 *tp)
 	if (!netif_running(tp->dev))
 		return TG3_LOOPBACK_FAILED;
 
-	tg3_reset_hw(tp);
+	tg3_reset_hw(tp, 1);
 
 	if (tg3_run_loopback(tp, TG3_MAC_LOOPBACK))
 		err |= TG3_MAC_LOOPBACK_FAILED;
@@ -8623,7 +8623,7 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest,
 		tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
 		if (netif_running(dev)) {
 			tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
-			tg3_init_hw(tp);
+			tg3_init_hw(tp, 1);
 			tg3_netif_start(tp);
 		}
 
@@ -11599,7 +11599,7 @@ static int tg3_suspend(struct pci_dev *pdev, pm_message_t state)
 		tg3_full_lock(tp, 0);
 
 		tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
-		tg3_init_hw(tp);
+		tg3_init_hw(tp, 1);
 
 		tp->timer.expires = jiffies + tp->timer_offset;
 		add_timer(&tp->timer);
@@ -11633,7 +11633,7 @@ static int tg3_resume(struct pci_dev *pdev)
 	tg3_full_lock(tp, 0);
 
 	tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
-	tg3_init_hw(tp);
+	tg3_init_hw(tp, 1);
 
 	tp->timer.expires = jiffies + tp->timer_offset;
 	add_timer(&tp->timer);
-- 
cgit v0.10.2


From f6d9a2565bc754043f43b8f51b19f77ee0269411 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 29 Apr 2006 19:00:24 -0700
Subject: [TG3]: Fix bug in nvram write

Fix bug in nvram write function. If the starting nvram address offset
happens to be the last dword of the page, the NVRAM_CMD_LAST bit will
not get set in the existing code. This patch fixes the bug by changing
the "else if" to "if" so that the last dword condition always gets
checked.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 97e27d8..0779544 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -9404,7 +9404,7 @@ static int tg3_nvram_write_block_buffered(struct tg3 *tp, u32 offset, u32 len,
 
 	        if ((page_off == 0) || (i == 0))
 			nvram_cmd |= NVRAM_CMD_FIRST;
-		else if (page_off == (tp->nvram_pagesize - 4))
+		if (page_off == (tp->nvram_pagesize - 4))
 			nvram_cmd |= NVRAM_CMD_LAST;
 
 		if (i == (len - 4))
-- 
cgit v0.10.2


From b276764091cf241cf0b31e8cb76c67dcf9a9c1d8 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Sat, 29 Apr 2006 19:01:06 -0700
Subject: [TG3]: Update version and reldate

Update version to 3.57.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 0779544..beeb612 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -69,8 +69,8 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"3.56"
-#define DRV_MODULE_RELDATE	"Apr 1, 2006"
+#define DRV_MODULE_VERSION	"3.57"
+#define DRV_MODULE_RELDATE	"Apr 28, 2006"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
-- 
cgit v0.10.2


From cd95842ca0ffb0e3df3b459832a60f9f4544ed9e Mon Sep 17 00:00:00 2001
From: Markus Gutschke <markus@google.com>
Date: Sun, 30 Apr 2006 15:34:29 +0100
Subject: [ARM] 3486/1: Mark memory as clobbered by the ARM _syscallX() macros

Patch from Markus Gutschke

In order to prevent gcc from making incorrect optimizations, all asm()
statements that define system calls should report memory as
clobbered. Recent versions of the headers for i386 have been changed
accordingly, but the ARM headers are still defective.

This patch fixes the bug tracked at
http://bugzilla.kernel.org/show_bug.cgi?id=6205

Signed-off-by: Markus Gutschke <markus@google.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/input/touchscreen/corgi_ts.c b/drivers/input/touchscreen/corgi_ts.c
index 1042987..5013703 100644
--- a/drivers/input/touchscreen/corgi_ts.c
+++ b/drivers/input/touchscreen/corgi_ts.c
@@ -17,7 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <asm/irq.h>
+//#include <asm/irq.h>
 
 #include <asm/arch/sharpsl.h>
 #include <asm/arch/hardware.h>
diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h
index ee8dfea..26f2f48 100644
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -410,7 +410,8 @@ type name(void) {							\
   __asm__ __volatile__ (						\
   __syscall(name)							\
 	: "=r" (__res_r0)						\
-	: __SYS_REG_LIST() );						\
+	: __SYS_REG_LIST()						\
+	: "memory" );							\
   __res = __res_r0;							\
   __syscall_return(type,__res);						\
 }
@@ -424,7 +425,8 @@ type name(type1 arg1) { 						\
   __asm__ __volatile__ (						\
   __syscall(name)							\
 	: "=r" (__res_r0)						\
-	: __SYS_REG_LIST( "0" (__r0) ) );				\
+	: __SYS_REG_LIST( "0" (__r0) )					\
+	: "memory" );							\
   __res = __res_r0;							\
   __syscall_return(type,__res);						\
 }
@@ -439,7 +441,8 @@ type name(type1 arg1,type2 arg2) {					\
   __asm__ __volatile__ (						\
   __syscall(name)							\
 	: "=r" (__res_r0)						\
-	: __SYS_REG_LIST( "0" (__r0), "r" (__r1) ) );			\
+	: __SYS_REG_LIST( "0" (__r0), "r" (__r1) )			\
+	: "memory" );							\
   __res = __res_r0;							\
   __syscall_return(type,__res);						\
 }
@@ -456,7 +459,8 @@ type name(type1 arg1,type2 arg2,type3 arg3) {				\
   __asm__ __volatile__ (						\
   __syscall(name)							\
 	: "=r" (__res_r0)						\
-	: __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2) ) );	\
+	: __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2) )		\
+	: "memory" );							\
   __res = __res_r0;							\
   __syscall_return(type,__res);						\
 }
@@ -474,7 +478,8 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {		\
   __asm__ __volatile__ (						\
   __syscall(name)							\
 	: "=r" (__res_r0)						\
-	: __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), "r" (__r3) ) ); \
+	: __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), "r" (__r3) ) \
+	: "memory" );							\
   __res = __res_r0;							\
   __syscall_return(type,__res);						\
 }
@@ -494,7 +499,8 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) {	\
   __syscall(name)							\
 	: "=r" (__res_r0)						\
 	: __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2),		\
-			  "r" (__r3), "r" (__r4) ) );			\
+			  "r" (__r3), "r" (__r4) )			\
+	: "memory" );							\
   __res = __res_r0;							\
   __syscall_return(type,__res);						\
 }
@@ -514,7 +520,8 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6
   __syscall(name)							\
 	: "=r" (__res_r0)						\
 	: __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2),		\
-			  "r" (__r3), "r" (__r4), "r" (__r5) ) );	\
+			  "r" (__r3), "r" (__r4), "r" (__r5) )		\
+	: "memory" );							\
   __res = __res_r0;							\
   __syscall_return(type,__res);						\
 }
-- 
cgit v0.10.2


From 76bbb00288e569e7bd9ec18f45e4f814352260dd Mon Sep 17 00:00:00 2001
From: Deepak Saxena <dsaxena@plexity.net>
Date: Sun, 30 Apr 2006 15:34:29 +0100
Subject: [ARM] 3487/1: IXP4xx: Support non-PCI systems

Patch from Deepak Saxena

This patch allows for the addition of IXP4xx systems that do not make
use of the PCI interface by moving the CONFIG_PCI symbol selection to
be platform-specific instead of for all of IXP4xx. If at least one machine
with PCI support is built, the PCI code will be compiled in, but when
building !PCI, this will drastically shrink the kernel size.

Signed-off-by: Deepak Saxena <dsaxena@plexity.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1dbf6dd..08b7cc9 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -150,8 +150,6 @@ config ARCH_IOP3XX
 
 config ARCH_IXP4XX
 	bool "IXP4xx-based"
-	select DMABOUNCE
-	select PCI
 	help
 	  Support for Intel's IXP4XX (XScale) family of processors.
 
diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig
index 5bf50a2..2a39f9e 100644
--- a/arch/arm/mach-ixp4xx/Kconfig
+++ b/arch/arm/mach-ixp4xx/Kconfig
@@ -11,6 +11,7 @@ comment "IXP4xx Platforms"
 config MACH_NSLU2
 	bool
 	prompt "Linksys NSLU2"
+	select PCI
 	help
 	  Say 'Y' here if you want your kernel to support Linksys's
 	  NSLU2 NAS device. For more information on this platform,
@@ -18,6 +19,7 @@ config MACH_NSLU2
 
 config ARCH_AVILA
 	bool "Avila"
+	select PCI
 	help
 	  Say 'Y' here if you want your kernel to support the Gateworks
 	  Avila Network Platform. For more information on this platform,
@@ -25,6 +27,7 @@ config ARCH_AVILA
 
 config ARCH_ADI_COYOTE
 	bool "Coyote"
+	select PCI
 	help
 	  Say 'Y' here if you want your kernel to support the ADI 
 	  Engineering Coyote Gateway Reference Platform. For more
@@ -32,6 +35,7 @@ config ARCH_ADI_COYOTE
 
 config ARCH_IXDP425
 	bool "IXDP425"
+	select PCI
 	help
 	  Say 'Y' here if you want your kernel to support Intel's 
 	  IXDP425 Development Platform (Also known as Richfield).  
@@ -39,6 +43,7 @@ config ARCH_IXDP425
 
 config MACH_IXDPG425
 	bool "IXDPG425"
+	select PCI
 	help
 	  Say 'Y' here if you want your kernel to support Intel's
 	  IXDPG425 Development Platform (Also known as Montajade).
@@ -46,6 +51,7 @@ config MACH_IXDPG425
 
 config MACH_IXDP465
 	bool "IXDP465"
+	select PCI
 	help
 	  Say 'Y' here if you want your kernel to support Intel's
 	  IXDP465 Development Platform (Also known as BMP).
@@ -72,6 +78,7 @@ config ARCH_PRPMC1100
 config MACH_NAS100D
 	bool
 	prompt "NAS100D"
+	select PCI
 	help
 	  Say 'Y' here if you want your kernel to support Iomega's
 	  NAS 100d device. For more information on this platform,
@@ -96,6 +103,7 @@ config CPU_IXP46X
 config MACH_GTWX5715
 	bool "Gemtek WX5715 (Linksys WRV54G)"
 	depends on ARCH_IXP4XX
+	select PCI
 	help
 		This board is currently inside the Linksys WRV54G Gateways.
 
@@ -110,11 +118,16 @@ config MACH_GTWX5715
 		"High Speed" UART is n/c (as far as I can tell)
 		20 Pin ARM/Xscale JTAG interface on J2
 
-
 comment "IXP4xx Options"
 
+config DMABOUNCE
+	bool
+	default y
+	depends on PCI
+
 config IXP4XX_INDIRECT_PCI
 	bool "Use indirect PCI memory access"
+	depends on PCI
 	help
           IXP4xx provides two methods of accessing PCI memory space:
 
diff --git a/arch/arm/mach-ixp4xx/Makefile b/arch/arm/mach-ixp4xx/Makefile
index 0471044..5a4aaa0 100644
--- a/arch/arm/mach-ixp4xx/Makefile
+++ b/arch/arm/mach-ixp4xx/Makefile
@@ -2,8 +2,9 @@
 # Makefile for the linux kernel.
 #
 
-obj-y	+= common.o common-pci.o 
+obj-y	+= common.o
 
+obj-$(CONFIG_PCI)		+= common-pci.o
 obj-$(CONFIG_ARCH_IXDP4XX)	+= ixdp425-pci.o ixdp425-setup.o
 obj-$(CONFIG_MACH_IXDPG425)	+= ixdpg425-pci.o coyote-setup.o
 obj-$(CONFIG_ARCH_ADI_COYOTE)	+= coyote-pci.o coyote-setup.o
diff --git a/include/asm-arm/arch-ixp4xx/io.h b/include/asm-arm/arch-ixp4xx/io.h
index 942b622..b59520e 100644
--- a/include/asm-arm/arch-ixp4xx/io.h
+++ b/include/asm-arm/arch-ixp4xx/io.h
@@ -260,6 +260,12 @@ out:
 
 #endif
 
+#ifndef CONFIG_PCI
+
+#define	__io(v)		v
+
+#else
+
 /*
  * IXP4xx does not have a transparent cpu -> PCI I/O translation
  * window.  Instead, it has a set of registers that must be tweaked
@@ -578,6 +584,7 @@ __ixp4xx_iowrite32_rep(void __iomem *addr, const void *vaddr, u32 count)
 
 #define	ioport_map(port, nr)		((void __iomem*)(port + PIO_OFFSET))
 #define	ioport_unmap(addr)
+#endif	// !CONFIG_PCI
 
 #endif	//  __ASM_ARM_ARCH_IO_H
 
diff --git a/include/asm-arm/arch-ixp4xx/memory.h b/include/asm-arm/arch-ixp4xx/memory.h
index ee211d2..af9667b 100644
--- a/include/asm-arm/arch-ixp4xx/memory.h
+++ b/include/asm-arm/arch-ixp4xx/memory.h
@@ -14,7 +14,7 @@
  */
 #define PHYS_OFFSET	UL(0x00000000)
 
-#ifndef __ASSEMBLY__
+#if !defined(__ASSEMBLY__) && defined(CONFIG_PCI)
 
 void ixp4xx_adjust_zones(int node, unsigned long *size, unsigned long *holes);
 
-- 
cgit v0.10.2


From 81d38428df26377c91e7e193aa4d2fdfdcda300a Mon Sep 17 00:00:00 2001
From: Pavel Pisa <ppisa@pikron.com>
Date: Sun, 30 Apr 2006 15:35:54 +0100
Subject: [ARM] 3485/1: i.MX: MX1 SD/MMC fix of unintentional double start
 possibility

Patch from Pavel Pisa

The clock starting imxmci_start_clock() function contains hardware
issue workaround, which repeats start attempt, if SDHC does not react on
the first trial. But the second start attempt can be taken even, if the
first succeed and test code misses time limited clock running phase
due to delay caused by schedule to other task or some another device
interrupt. This change enables to detect such situation.
The performance is not issue, because usually at full clock rate
only about six loops in delay cycle are needed.

Signed-off-by: Pavel Pisa <pisa@cmp.felk.cvut.cz>
Acked-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/imxmmc.c b/drivers/mmc/imxmmc.c
index ffb7f55..07f36c4 100644
--- a/drivers/mmc/imxmmc.c
+++ b/drivers/mmc/imxmmc.c
@@ -102,6 +102,7 @@ struct imxmci_host {
 #define IMXMCI_PEND_CPU_DATA_b	5
 #define IMXMCI_PEND_CARD_XCHG_b	6
 #define IMXMCI_PEND_SET_INIT_b	7
+#define IMXMCI_PEND_STARTED_b	8
 
 #define IMXMCI_PEND_IRQ_m	(1 << IMXMCI_PEND_IRQ_b)
 #define IMXMCI_PEND_DMA_END_m	(1 << IMXMCI_PEND_DMA_END_b)
@@ -111,6 +112,7 @@ struct imxmci_host {
 #define IMXMCI_PEND_CPU_DATA_m	(1 << IMXMCI_PEND_CPU_DATA_b)
 #define IMXMCI_PEND_CARD_XCHG_m	(1 << IMXMCI_PEND_CARD_XCHG_b)
 #define IMXMCI_PEND_SET_INIT_m	(1 << IMXMCI_PEND_SET_INIT_b)
+#define IMXMCI_PEND_STARTED_m	(1 << IMXMCI_PEND_STARTED_b)
 
 static void imxmci_stop_clock(struct imxmci_host *host)
 {
@@ -131,23 +133,52 @@ static void imxmci_stop_clock(struct imxmci_host *host)
 	dev_dbg(mmc_dev(host->mmc), "imxmci_stop_clock blocked, no luck\n");
 }
 
-static void imxmci_start_clock(struct imxmci_host *host)
+static int imxmci_start_clock(struct imxmci_host *host)
 {
-	int i = 0;
+	unsigned int trials = 0;
+	unsigned int delay_limit = 128;
+	unsigned long flags;
+
 	MMC_STR_STP_CLK &= ~STR_STP_CLK_STOP_CLK;
-	while(i < 0x1000) {
-	        if(!(i & 0x7f))
-			MMC_STR_STP_CLK |= STR_STP_CLK_START_CLK;
 
-		if(MMC_STATUS & STATUS_CARD_BUS_CLK_RUN) {
-			/* Check twice before cut */
+	clear_bit(IMXMCI_PEND_STARTED_b, &host->pending_events);
+
+	/*
+	 * Command start of the clock, this usually succeeds in less
+	 * then 6 delay loops, but during card detection (low clockrate)
+	 * it takes up to 5000 delay loops and sometimes fails for the first time
+	 */
+	MMC_STR_STP_CLK |= STR_STP_CLK_START_CLK;
+
+	do {
+		unsigned int delay = delay_limit;
+
+		while(delay--){
 			if(MMC_STATUS & STATUS_CARD_BUS_CLK_RUN)
-				return;
+				/* Check twice before cut */
+				if(MMC_STATUS & STATUS_CARD_BUS_CLK_RUN)
+					return 0;
+
+			if(test_bit(IMXMCI_PEND_STARTED_b, &host->pending_events))
+				return 0;
 		}
 
-		i++;
-	}
-	dev_dbg(mmc_dev(host->mmc), "imxmci_start_clock blocked, no luck\n");
+		local_irq_save(flags);
+		/*
+		 * Ensure, that request is not doubled under all possible circumstances.
+		 * It is possible, that cock running state is missed, because some other
+		 * IRQ or schedule delays this function execution and the clocks has
+		 * been already stopped by other means (response processing, SDHC HW)
+		 */
+		if(!test_bit(IMXMCI_PEND_STARTED_b, &host->pending_events))
+			MMC_STR_STP_CLK |= STR_STP_CLK_START_CLK;
+		local_irq_restore(flags);
+
+	} while(++trials<256);
+
+	dev_err(mmc_dev(host->mmc), "imxmci_start_clock blocked, no luck\n");
+
+	return -1;
 }
 
 static void imxmci_softreset(void)
@@ -622,6 +653,7 @@ static irqreturn_t imxmci_irq(int irq, void *devid, struct pt_regs *regs)
 	atomic_set(&host->stuck_timeout, 0);
 	host->status_reg = stat;
 	set_bit(IMXMCI_PEND_IRQ_b, &host->pending_events);
+	set_bit(IMXMCI_PEND_STARTED_b, &host->pending_events);
 	tasklet_schedule(&host->tasklet);
 
 	return IRQ_RETVAL(handled);;
-- 
cgit v0.10.2


From 46e678c96bbd775abd05d3ddbe2fd334794f9157 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Sun, 30 Apr 2006 16:36:32 +0200
Subject: [PATCH] splice: fix bugs with stealing regular pipe pages

- Check that page has suitable count for stealing in the regular pipes.
- pipe_to_file() assumes that the page is locked on succesful steal, so
  do that in the pipe steal hook
- Missing unlock_page() in add_to_page_cache() failure.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/pipe.c b/fs/pipe.c
index 7fefb10..5a36927 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -127,8 +127,15 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
 static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
 			       struct pipe_buffer *buf)
 {
-	buf->flags |= PIPE_BUF_FLAG_STOLEN;
-	return 0;
+	struct page *page = buf->page;
+
+	if (page_count(page) == 1) {
+		buf->flags |= PIPE_BUF_FLAG_STOLEN;
+		lock_page(page);
+		return 0;
+	}
+
+	return 1;
 }
 
 static void anon_pipe_buf_get(struct pipe_inode_info *info,
diff --git a/fs/splice.c b/fs/splice.c
index a46ddd2..9df28d3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -599,8 +599,10 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 			goto find_page;
 
 		page = buf->page;
-		if (add_to_page_cache(page, mapping, index, gfp_mask))
+		if (add_to_page_cache(page, mapping, index, gfp_mask)) {
+			unlock_page(page);
 			goto find_page;
+		}
 
 		if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 			lru_cache_add(page);
-- 
cgit v0.10.2


From 1241140f5183db38393556832198a3b109bf9085 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sun, 30 Apr 2006 21:40:13 -0700
Subject: [SPARC64]: Kill __flush_tlb_page() prototype.

This function no longer exists.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h
index 9ad5d9c..e3a7c45 100644
--- a/include/asm-sparc64/tlbflush.h
+++ b/include/asm-sparc64/tlbflush.h
@@ -22,8 +22,6 @@ extern void flush_tlb_pending(void);
 /* Local cpu only.  */
 extern void __flush_tlb_all(void);
 
-extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r);
-
 extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
 #ifndef CONFIG_SMP
-- 
cgit v0.10.2


From c9f2946fbec88d4baa3a6d47eb3a8e6b08b05cd9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sun, 30 Apr 2006 22:54:27 -0700
Subject: [SPARC64]: Disable preemption during flush_tlb_pending().

A context switch will force a call to flush_tlb_pending() (via
switch_to()), so if we test tlb_nr to be non-zero, then sleep, it
would become zero and later back at the original context we'll pass
zero down into the TLB flushing code which should never see a nr
argument of zero.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index a079cf4..3f10fc9 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -8,6 +8,7 @@
 #include <linux/percpu.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
+#include <linux/preempt.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -24,6 +25,8 @@ void flush_tlb_pending(void)
 {
 	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
 
+	preempt_disable();
+
 	if (mp->tlb_nr) {
 		flush_tsb_user(mp);
 
@@ -38,6 +41,8 @@ void flush_tlb_pending(void)
 		}
 		mp->tlb_nr = 0;
 	}
+
+	preempt_enable();
 }
 
 void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig)
-- 
cgit v0.10.2


From 45d9bb0e37668b7c64d1e49e98fbc4733c23b334 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 29 Mar 2006 20:02:55 -0500
Subject: [PATCH] deal with deadlocks in audit_free()

Don't assume that audit_log_exit() et.al. are called for the context of
current; pass task explictly.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7f160df..4052f0a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -536,13 +536,13 @@ error_path:
 	return;
 }
 
-static void audit_log_task_info(struct audit_buffer *ab, gfp_t gfp_mask)
+static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk, gfp_t gfp_mask)
 {
-	char name[sizeof(current->comm)];
-	struct mm_struct *mm = current->mm;
+	char name[sizeof(tsk->comm)];
+	struct mm_struct *mm = tsk->mm;
 	struct vm_area_struct *vma;
 
-	get_task_comm(name, current);
+	get_task_comm(name, tsk);
 	audit_log_format(ab, " comm=");
 	audit_log_untrustedstring(ab, name);
 
@@ -551,7 +551,7 @@ static void audit_log_task_info(struct audit_buffer *ab, gfp_t gfp_mask)
 
 	/*
 	 * this is brittle; all callers that pass GFP_ATOMIC will have
-	 * NULL current->mm and we won't get here.
+	 * NULL tsk->mm and we won't get here.
 	 */
 	down_read(&mm->mmap_sem);
 	vma = mm->mmap;
@@ -569,7 +569,7 @@ static void audit_log_task_info(struct audit_buffer *ab, gfp_t gfp_mask)
 	audit_log_task_context(ab, gfp_mask);
 }
 
-static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
+static void audit_log_exit(struct audit_context *context, struct task_struct *tsk, gfp_t gfp_mask)
 {
 	int i;
 	struct audit_buffer *ab;
@@ -587,8 +587,8 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
 		audit_log_format(ab, " success=%s exit=%ld", 
 				 (context->return_valid==AUDITSC_SUCCESS)?"yes":"no",
 				 context->return_code);
-	if (current->signal->tty && current->signal->tty->name)
-		tty = current->signal->tty->name;
+	if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
+		tty = tsk->signal->tty->name;
 	else
 		tty = "(none)";
 	audit_log_format(ab,
@@ -720,7 +720,7 @@ void audit_free(struct task_struct *tsk)
 	 * We use GFP_ATOMIC here because we might be doing this 
 	 * in the context of the idle thread */
 	if (context->in_syscall && context->auditable)
-		audit_log_exit(context, GFP_ATOMIC);
+		audit_log_exit(context, tsk, GFP_ATOMIC);
 
 	audit_free_context(context);
 }
@@ -839,7 +839,7 @@ void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
 		goto out;
 
 	if (context->in_syscall && context->auditable)
-		audit_log_exit(context, GFP_KERNEL);
+		audit_log_exit(context, tsk, GFP_KERNEL);
 
 	context->in_syscall = 0;
 	context->auditable  = 0;
-- 
cgit v0.10.2


From d6fe3945b42d09a1eca7ad180a1646e585b8594f Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Thu, 30 Mar 2006 12:20:22 -0500
Subject: [PATCH] sockaddr patch

On Thursday 23 March 2006 09:08, John D. Ramsdell wrote:
>  I noticed that a socketcall(bind) and socketcall(connect) event contain a
>  record of type=SOCKADDR, but I cannot see one for a system call event
>  associated with socketcall(accept).  Recording the sockaddr of an accepted
>  socket is important for cross platform information flow analys

Thanks for pointing this out. The following patch should address this.

Signed-off-by: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/net/socket.c b/net/socket.c
index 0ce12df..02948b6 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -267,6 +267,8 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ule
 		return -EINVAL;
 	if(len)
 	{
+		if (audit_sockaddr(klen, kaddr))
+			return -ENOMEM;
 		if(copy_to_user(uaddr,kaddr,len))
 			return -EFAULT;
 	}
-- 
cgit v0.10.2


From fa84cb935d4ec601528f5e2f0d5d31e7876a5044 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 29 Mar 2006 20:30:19 -0500
Subject: [PATCH] move call of audit_free() into do_exit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4052f0a..8ec52ff 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -698,19 +698,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
  * audit_free - free a per-task audit context
  * @tsk: task whose audit context block to free
  *
- * Called from copy_process and __put_task_struct.
+ * Called from copy_process and do_exit
  */
 void audit_free(struct task_struct *tsk)
 {
 	struct audit_context *context;
 
-	/*
-	 * No need to lock the task - when we execute audit_free()
-	 * then the task has no external references anymore, and
-	 * we are tearing it down. (The locking also confuses
-	 * DEBUG_LOCKDEP - this freeing may occur in softirq
-	 * contexts as well, via RCU.)
-	 */
 	context = audit_get_context(tsk, 0, 0);
 	if (likely(!context))
 		return;
diff --git a/kernel/exit.c b/kernel/exit.c
index f86434d..e95b932 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -35,6 +35,7 @@
 #include <linux/futex.h>
 #include <linux/compat.h>
 #include <linux/pipe_fs_i.h>
+#include <linux/audit.h> /* for audit_free() */
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -910,6 +911,8 @@ fastcall NORET_TYPE void do_exit(long code)
 	if (unlikely(tsk->compat_robust_list))
 		compat_exit_robust_list(tsk);
 #endif
+	if (unlikely(tsk->audit_context))
+		audit_free(tsk);
 	exit_mm(tsk);
 
 	exit_sem(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index d2fa57d..ac8100e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -114,8 +114,6 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
-	if (unlikely(tsk->audit_context))
-		audit_free(tsk);
 	security_task_free(tsk);
 	free_uid(tsk->user);
 	put_group_info(tsk->group_info);
-- 
cgit v0.10.2


From e495149b173d8e133e1f6f2eb86fd97be7e92010 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 29 Mar 2006 20:17:10 -0500
Subject: [PATCH] drop gfp_mask in audit_log_exit()

now we can do that - all callers are process-synchronous and do not hold
any locks.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 8ec52ff..ba0ec1b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -506,7 +506,7 @@ static inline void audit_free_context(struct audit_context *context)
 		printk(KERN_ERR "audit: freed %d contexts\n", count);
 }
 
-static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask)
+static void audit_log_task_context(struct audit_buffer *ab)
 {
 	char *ctx = NULL;
 	ssize_t len = 0;
@@ -518,7 +518,7 @@ static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask)
 		return;
 	}
 
-	ctx = kmalloc(len, gfp_mask);
+	ctx = kmalloc(len, GFP_KERNEL);
 	if (!ctx)
 		goto error_path;
 
@@ -536,47 +536,46 @@ error_path:
 	return;
 }
 
-static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk, gfp_t gfp_mask)
+static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
 {
 	char name[sizeof(tsk->comm)];
 	struct mm_struct *mm = tsk->mm;
 	struct vm_area_struct *vma;
 
+	/* tsk == current */
+
 	get_task_comm(name, tsk);
 	audit_log_format(ab, " comm=");
 	audit_log_untrustedstring(ab, name);
 
-	if (!mm)
-		return;
-
-	/*
-	 * this is brittle; all callers that pass GFP_ATOMIC will have
-	 * NULL tsk->mm and we won't get here.
-	 */
-	down_read(&mm->mmap_sem);
-	vma = mm->mmap;
-	while (vma) {
-		if ((vma->vm_flags & VM_EXECUTABLE) &&
-		    vma->vm_file) {
-			audit_log_d_path(ab, "exe=",
-					 vma->vm_file->f_dentry,
-					 vma->vm_file->f_vfsmnt);
-			break;
+	if (mm) {
+		down_read(&mm->mmap_sem);
+		vma = mm->mmap;
+		while (vma) {
+			if ((vma->vm_flags & VM_EXECUTABLE) &&
+			    vma->vm_file) {
+				audit_log_d_path(ab, "exe=",
+						 vma->vm_file->f_dentry,
+						 vma->vm_file->f_vfsmnt);
+				break;
+			}
+			vma = vma->vm_next;
 		}
-		vma = vma->vm_next;
+		up_read(&mm->mmap_sem);
 	}
-	up_read(&mm->mmap_sem);
-	audit_log_task_context(ab, gfp_mask);
+	audit_log_task_context(ab);
 }
 
-static void audit_log_exit(struct audit_context *context, struct task_struct *tsk, gfp_t gfp_mask)
+static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
 	int i;
 	struct audit_buffer *ab;
 	struct audit_aux_data *aux;
 	const char *tty;
 
-	ab = audit_log_start(context, gfp_mask, AUDIT_SYSCALL);
+	/* tsk == current */
+
+	ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL);
 	if (!ab)
 		return;		/* audit_panic has been called */
 	audit_log_format(ab, "arch=%x syscall=%d",
@@ -607,12 +606,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		  context->gid,
 		  context->euid, context->suid, context->fsuid,
 		  context->egid, context->sgid, context->fsgid, tty);
-	audit_log_task_info(ab, gfp_mask);
+	audit_log_task_info(ab, tsk);
 	audit_log_end(ab);
 
 	for (aux = context->aux; aux; aux = aux->next) {
 
-		ab = audit_log_start(context, gfp_mask, aux->type);
+		ab = audit_log_start(context, GFP_KERNEL, aux->type);
 		if (!ab)
 			continue; /* audit_panic has been called */
 
@@ -649,7 +648,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 	}
 
 	if (context->pwd && context->pwdmnt) {
-		ab = audit_log_start(context, gfp_mask, AUDIT_CWD);
+		ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD);
 		if (ab) {
 			audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt);
 			audit_log_end(ab);
@@ -659,7 +658,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		unsigned long ino  = context->names[i].ino;
 		unsigned long pino = context->names[i].pino;
 
-		ab = audit_log_start(context, gfp_mask, AUDIT_PATH);
+		ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
 		if (!ab)
 			continue; /* audit_panic has been called */
 
@@ -712,8 +711,9 @@ void audit_free(struct task_struct *tsk)
 	 * function (e.g., exit_group), then free context block. 
 	 * We use GFP_ATOMIC here because we might be doing this 
 	 * in the context of the idle thread */
+	/* that can happen only if we are called from do_exit() */
 	if (context->in_syscall && context->auditable)
-		audit_log_exit(context, tsk, GFP_ATOMIC);
+		audit_log_exit(context, tsk);
 
 	audit_free_context(context);
 }
@@ -821,6 +821,8 @@ void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
 {
 	struct audit_context *context;
 
+	/* tsk == current */
+
 	get_task_struct(tsk);
 	task_lock(tsk);
 	context = audit_get_context(tsk, valid, return_code);
@@ -832,7 +834,7 @@ void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
 		goto out;
 
 	if (context->in_syscall && context->auditable)
-		audit_log_exit(context, tsk, GFP_KERNEL);
+		audit_log_exit(context, tsk);
 
 	context->in_syscall = 0;
 	context->auditable  = 0;
-- 
cgit v0.10.2


From 5411be59db80333039386f3b1ccfe5eb9023a916 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 29 Mar 2006 20:23:36 -0500
Subject: [PATCH] drop task argument of audit_syscall_{entry,exit}

... it's always current, and that's a good thing - allows simpler locking.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index 506462e..fd7eaf7 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -671,7 +671,7 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit)
 
 	if (unlikely(current->audit_context)) {
 		if (entryexit)
-			audit_syscall_exit(current, AUDITSC_RESULT(regs->eax),
+			audit_syscall_exit(AUDITSC_RESULT(regs->eax),
 						regs->eax);
 		/* Debug traps, when using PTRACE_SINGLESTEP, must be sent only
 		 * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is
@@ -720,14 +720,13 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit)
 	ret = is_sysemu;
 out:
 	if (unlikely(current->audit_context) && !entryexit)
-		audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax,
+		audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_eax,
 				    regs->ebx, regs->ecx, regs->edx, regs->esi);
 	if (ret == 0)
 		return 0;
 
 	regs->orig_eax = -1; /* force skip of syscall restarting */
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(current, AUDITSC_RESULT(regs->eax),
-				regs->eax);
+		audit_syscall_exit(AUDITSC_RESULT(regs->eax), regs->eax);
 	return 1;
 }
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index aee14fa..00e0118 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -312,7 +312,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 
 	/*call audit_syscall_exit since we do not exit via the normal paths */
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(current, AUDITSC_RESULT(eax), eax);
+		audit_syscall_exit(AUDITSC_RESULT(eax), eax);
 
 	__asm__ __volatile__(
 		"movl %0,%%esp\n\t"
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 9887c87..e61e15e 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1644,7 +1644,7 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
 			arch = AUDIT_ARCH_IA64;
 		}
 
-		audit_syscall_entry(current, arch, syscall, arg0, arg1, arg2, arg3);
+		audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3);
 	}
 
 }
@@ -1662,7 +1662,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 
 		if (success != AUDITSC_SUCCESS)
 			result = -result;
-		audit_syscall_exit(current, success, result);
+		audit_syscall_exit(success, result);
 	}
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE)
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index f3106d0..9b4733c 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -483,7 +483,7 @@ static inline int audit_arch(void)
 asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
 {
 	if (unlikely(current->audit_context) && entryexit)
-		audit_syscall_exit(current, AUDITSC_RESULT(regs->regs[2]),
+		audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]),
 		                   regs->regs[2]);
 
 	if (!(current->ptrace & PT_PTRACED))
@@ -507,7 +507,7 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
 	}
  out:
 	if (unlikely(current->audit_context) && !entryexit)
-		audit_syscall_entry(current, audit_arch(), regs->regs[2],
+		audit_syscall_entry(audit_arch(), regs->regs[2],
 				    regs->regs[4], regs->regs[5],
 				    regs->regs[6], regs->regs[7]);
 }
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index bcb8357..4a677d1 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -538,7 +538,7 @@ void do_syscall_trace_enter(struct pt_regs *regs)
 		do_syscall_trace();
 
 	if (unlikely(current->audit_context))
-		audit_syscall_entry(current,
+		audit_syscall_entry(
 #ifdef CONFIG_PPC32
 				    AUDIT_ARCH_PPC,
 #else
@@ -556,8 +556,7 @@ void do_syscall_trace_leave(struct pt_regs *regs)
 #endif
 
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(current,
-				   (regs->ccr&0x1000)?AUDITSC_FAILURE:AUDITSC_SUCCESS,
+		audit_syscall_exit((regs->ccr&0x1000)?AUDITSC_FAILURE:AUDITSC_SUCCESS,
 				   regs->result);
 
 	if ((test_thread_flag(TIF_SYSCALL_TRACE)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 37dfe33..8f36504 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -734,7 +734,7 @@ asmlinkage void
 syscall_trace(struct pt_regs *regs, int entryexit)
 {
 	if (unlikely(current->audit_context) && entryexit)
-		audit_syscall_exit(current, AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]);
+		audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]), regs->gprs[2]);
 
 	if (!test_thread_flag(TIF_SYSCALL_TRACE))
 		goto out;
@@ -761,8 +761,7 @@ syscall_trace(struct pt_regs *regs, int entryexit)
 	}
  out:
 	if (unlikely(current->audit_context) && !entryexit)
-		audit_syscall_entry(current, 
-				    test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X,
+		audit_syscall_entry(test_thread_flag(TIF_31BIT)?AUDIT_ARCH_S390:AUDIT_ARCH_S390X,
 				    regs->gprs[2], regs->orig_gpr2, regs->gprs[3],
 				    regs->gprs[4], regs->gprs[5]);
 }
diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c
index 49e6ded..d31975e 100644
--- a/arch/sparc64/kernel/ptrace.c
+++ b/arch/sparc64/kernel/ptrace.c
@@ -653,7 +653,7 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int syscall_exit_p)
 		if (unlikely(tstate & (TSTATE_XCARRY | TSTATE_ICARRY)))
 			result = AUDITSC_FAILURE;
 
-		audit_syscall_exit(current, result, regs->u_regs[UREG_I0]);
+		audit_syscall_exit(result, regs->u_regs[UREG_I0]);
 	}
 
 	if (!(current->ptrace & PT_PTRACED))
@@ -677,8 +677,7 @@ asmlinkage void syscall_trace(struct pt_regs *regs, int syscall_exit_p)
 
 out:
 	if (unlikely(current->audit_context) && !syscall_exit_p)
-		audit_syscall_entry(current,
-				    (test_thread_flag(TIF_32BIT) ?
+		audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
 				     AUDIT_ARCH_SPARC :
 				     AUDIT_ARCH_SPARC64),
 				    regs->u_regs[UREG_G1],
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 60d2eda..9a77fb3 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -275,15 +275,13 @@ void syscall_trace(union uml_pt_regs *regs, int entryexit)
 
 	if (unlikely(current->audit_context)) {
 		if (!entryexit)
-			audit_syscall_entry(current,
-                                            HOST_AUDIT_ARCH,
+			audit_syscall_entry(HOST_AUDIT_ARCH,
 					    UPT_SYSCALL_NR(regs),
 					    UPT_SYSCALL_ARG1(regs),
 					    UPT_SYSCALL_ARG2(regs),
 					    UPT_SYSCALL_ARG3(regs),
 					    UPT_SYSCALL_ARG4(regs));
-		else audit_syscall_exit(current,
-                                        AUDITSC_RESULT(UPT_SYSCALL_RET(regs)),
+		else audit_syscall_exit(AUDITSC_RESULT(UPT_SYSCALL_RET(regs)),
                                         UPT_SYSCALL_RET(regs));
 	}
 
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index da8e790..2d50024 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -600,12 +600,12 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 
 	if (unlikely(current->audit_context)) {
 		if (test_thread_flag(TIF_IA32)) {
-			audit_syscall_entry(current, AUDIT_ARCH_I386,
+			audit_syscall_entry(AUDIT_ARCH_I386,
 					    regs->orig_rax,
 					    regs->rbx, regs->rcx,
 					    regs->rdx, regs->rsi);
 		} else {
-			audit_syscall_entry(current, AUDIT_ARCH_X86_64,
+			audit_syscall_entry(AUDIT_ARCH_X86_64,
 					    regs->orig_rax,
 					    regs->rdi, regs->rsi,
 					    regs->rdx, regs->r10);
@@ -616,7 +616,7 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs)
 asmlinkage void syscall_trace_leave(struct pt_regs *regs)
 {
 	if (unlikely(current->audit_context))
-		audit_syscall_exit(current, AUDITSC_RESULT(regs->rax), regs->rax);
+		audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax);
 
 	if ((test_thread_flag(TIF_SYSCALL_TRACE)
 	     || test_thread_flag(TIF_SINGLESTEP))
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 1c47c59..39fef6e 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -287,10 +287,10 @@ struct netlink_skb_parms;
 				/* Public API */
 extern int  audit_alloc(struct task_struct *task);
 extern void audit_free(struct task_struct *task);
-extern void audit_syscall_entry(struct task_struct *task, int arch,
+extern void audit_syscall_entry(int arch,
 				int major, unsigned long a0, unsigned long a1,
 				unsigned long a2, unsigned long a3);
-extern void audit_syscall_exit(struct task_struct *task, int failed, long return_code);
+extern void audit_syscall_exit(int failed, long return_code);
 extern void audit_getname(const char *name);
 extern void audit_putname(const char *name);
 extern void __audit_inode(const char *name, const struct inode *inode, unsigned flags);
@@ -323,8 +323,8 @@ extern int audit_set_macxattr(const char *name);
 #else
 #define audit_alloc(t) ({ 0; })
 #define audit_free(t) do { ; } while (0)
-#define audit_syscall_entry(t,ta,a,b,c,d,e) do { ; } while (0)
-#define audit_syscall_exit(t,f,r) do { ; } while (0)
+#define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0)
+#define audit_syscall_exit(f,r) do { ; } while (0)
 #define audit_getname(n) do { ; } while (0)
 #define audit_putname(n) do { ; } while (0)
 #define __audit_inode(n,i,f) do { ; } while (0)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ba0ec1b..7ed82b0 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -736,10 +736,11 @@ void audit_free(struct task_struct *tsk)
  * will only be written if another part of the kernel requests that it
  * be written).
  */
-void audit_syscall_entry(struct task_struct *tsk, int arch, int major,
+void audit_syscall_entry(int arch, int major,
 			 unsigned long a1, unsigned long a2,
 			 unsigned long a3, unsigned long a4)
 {
+	struct task_struct *tsk = current;
 	struct audit_context *context = tsk->audit_context;
 	enum audit_state     state;
 
@@ -817,12 +818,11 @@ void audit_syscall_entry(struct task_struct *tsk, int arch, int major,
  * message), then write out the syscall information.  In call cases,
  * free the names stored from getname().
  */
-void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
+void audit_syscall_exit(int valid, long return_code)
 {
+	struct task_struct *tsk = current;
 	struct audit_context *context;
 
-	/* tsk == current */
-
 	get_task_struct(tsk);
 	task_lock(tsk);
 	context = audit_get_context(tsk, valid, return_code);
-- 
cgit v0.10.2


From 97e94c453073a2aba4bb5e0825ddc5e923debf11 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 29 Mar 2006 20:26:24 -0500
Subject: [PATCH] no need to wank with task_lock() and pinning task down in
 audit_syscall_exit()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7ed82b0..8aca4ab 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -329,7 +329,6 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
 	return AUDIT_BUILD_CONTEXT;
 }
 
-/* This should be called with task_lock() held. */
 static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 						      int return_valid,
 						      int return_code)
@@ -823,15 +822,10 @@ void audit_syscall_exit(int valid, long return_code)
 	struct task_struct *tsk = current;
 	struct audit_context *context;
 
-	get_task_struct(tsk);
-	task_lock(tsk);
 	context = audit_get_context(tsk, valid, return_code);
-	task_unlock(tsk);
 
-	/* Not having a context here is ok, since the parent may have
-	 * called __put_task_struct. */
 	if (likely(!context))
-		goto out;
+		return;
 
 	if (context->in_syscall && context->auditable)
 		audit_log_exit(context, tsk);
@@ -849,8 +843,6 @@ void audit_syscall_exit(int valid, long return_code)
 		audit_free_aux(context);
 		tsk->audit_context = context;
 	}
- out:
-	put_task_struct(tsk);
 }
 
 /**
-- 
cgit v0.10.2


From 376bd9cb357ec945ac893feaeb63af7370a6e70b Mon Sep 17 00:00:00 2001
From: Darrel Goeddel <dgoeddel@trustedcs.com>
Date: Fri, 24 Feb 2006 15:44:05 -0600
Subject: [PATCH] support for context based audit filtering

The following patch provides selinux interfaces that will allow the audit
system to perform filtering based on the process context (user, role, type,
sensitivity, and clearance).  These interfaces will allow the selinux
module to perform efficient matches based on lower level selinux constructs,
rather than relying on context retrievals and string comparisons within
the audit module.  It also allows for dominance checks on the mls portion
of the contexts that are impossible with only string comparisons.

Signed-off-by: Darrel Goeddel <dgoeddel@trustedcs.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 39fef6e..740f950 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -145,6 +145,11 @@
 #define AUDIT_PERS	10
 #define AUDIT_ARCH	11
 #define AUDIT_MSGTYPE	12
+#define AUDIT_SE_USER	13	/* security label user */
+#define AUDIT_SE_ROLE	14	/* security label role */
+#define AUDIT_SE_TYPE	15	/* security label type */
+#define AUDIT_SE_SEN	16	/* security label sensitivity label */
+#define AUDIT_SE_CLR	17	/* security label clearance label */
 
 				/* These are ONLY useful when checking
 				 * at syscall exit time (AUDIT_AT_EXIT). */
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
new file mode 100644
index 0000000..9d684b1
--- /dev/null
+++ b/include/linux/selinux.h
@@ -0,0 +1,112 @@
+/*
+ * SELinux services exported to the rest of the kernel.
+ *
+ * Author: James Morris <jmorris@redhat.com>
+ *
+ * Copyright (C) 2005 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ * Copyright (C) 2006 Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+#ifndef _LINUX_SELINUX_H
+#define _LINUX_SELINUX_H
+
+struct selinux_audit_rule;
+struct audit_context;
+
+#ifdef CONFIG_SECURITY_SELINUX
+
+/**
+ *	selinux_audit_rule_init - alloc/init an selinux audit rule structure.
+ *	@field: the field this rule refers to
+ *	@op: the operater the rule uses
+ *	@rulestr: the text "target" of the rule
+ *	@rule: pointer to the new rule structure returned via this
+ *
+ *	Returns 0 if successful, -errno if not.  On success, the rule structure
+ *	will be allocated internally.  The caller must free this structure with
+ *	selinux_audit_rule_free() after use.
+ */
+int selinux_audit_rule_init(u32 field, u32 op, char *rulestr,
+                            struct selinux_audit_rule **rule);
+
+/**
+ *	selinux_audit_rule_free - free an selinux audit rule structure.
+ *	@rule: pointer to the audit rule to be freed
+ *
+ *	This will free all memory associated with the given rule.
+ *	If @rule is NULL, no operation is performed.
+ */
+void selinux_audit_rule_free(struct selinux_audit_rule *rule);
+
+/**
+ *	selinux_audit_rule_match - determine if a context ID matches a rule.
+ *	@ctxid: the context ID to check
+ *	@field: the field this rule refers to
+ *	@op: the operater the rule uses
+ *	@rule: pointer to the audit rule to check against
+ *	@actx: the audit context (can be NULL) associated with the check
+ *
+ *	Returns 1 if the context id matches the rule, 0 if it does not, and
+ *	-errno on failure.
+ */
+int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+                             struct selinux_audit_rule *rule,
+                             struct audit_context *actx);
+
+/**
+ *	selinux_audit_set_callback - set the callback for policy reloads.
+ *	@callback: the function to call when the policy is reloaded
+ *
+ *	This sets the function callback function that will update the rules
+ *	upon policy reloads.  This callback should rebuild all existing rules
+ *	using selinux_audit_rule_init().
+ */
+void selinux_audit_set_callback(int (*callback)(void));
+
+/**
+ *	selinux_task_ctxid - determine a context ID for a process.
+ *	@tsk: the task object
+ *	@ctxid: ID value returned via this
+ *
+ *	On return, ctxid will contain an ID for the context.  This value
+ *	should only be used opaquely.
+ */
+void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
+
+#else
+
+static inline int selinux_audit_rule_init(u32 field, u32 op,
+                                          char *rulestr,
+                                          struct selinux_audit_rule **rule)
+{
+	return -ENOTSUPP;
+}
+
+static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule)
+{
+	return;
+}
+
+static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+                                           struct selinux_audit_rule *rule,
+                                           struct audit_context *actx)
+{
+	return 0;
+}
+
+static inline void selinux_audit_set_callback(int (*callback)(void))
+{
+	return;
+}
+
+static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
+{
+	*ctxid = 0;
+}
+
+#endif	/* CONFIG_SECURITY_SELINUX */
+
+#endif /* _LINUX_SELINUX_H */
diff --git a/security/selinux/Makefile b/security/selinux/Makefile
index 688c0a2..faf2e02 100644
--- a/security/selinux/Makefile
+++ b/security/selinux/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_SECURITY_SELINUX) := selinux.o ss/
 
-selinux-y := avc.o hooks.o selinuxfs.o netlink.o nlmsgtab.o netif.o
+selinux-y := avc.o hooks.o selinuxfs.o netlink.o nlmsgtab.o netif.o exports.o
 
 selinux-$(CONFIG_SECURITY_NETWORK_XFRM) += xfrm.o
 
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index ac5d69b..a300702 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -800,7 +800,7 @@ out:
 int avc_ss_reset(u32 seqno)
 {
 	struct avc_callback_node *c;
-	int i, rc = 0;
+	int i, rc = 0, tmprc;
 	unsigned long flag;
 	struct avc_node *node;
 
@@ -813,15 +813,16 @@ int avc_ss_reset(u32 seqno)
 
 	for (c = avc_callbacks; c; c = c->next) {
 		if (c->events & AVC_CALLBACK_RESET) {
-			rc = c->callback(AVC_CALLBACK_RESET,
-					 0, 0, 0, 0, NULL);
-			if (rc)
-				goto out;
+			tmprc = c->callback(AVC_CALLBACK_RESET,
+			                    0, 0, 0, 0, NULL);
+			/* save the first error encountered for the return
+			   value and continue processing the callbacks */
+			if (!rc)
+				rc = tmprc;
 		}
 	}
 
 	avc_latest_notif_update(seqno, 0);
-out:
 	return rc;
 }
 
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
new file mode 100644
index 0000000..333c4c7
--- /dev/null
+++ b/security/selinux/exports.c
@@ -0,0 +1,28 @@
+/*
+ * SELinux services exported to the rest of the kernel.
+ *
+ * Author: James Morris <jmorris@redhat.com>
+ *
+ * Copyright (C) 2005 Red Hat, Inc., James Morris <jmorris@redhat.com>
+ * Copyright (C) 2006 Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/selinux.h>
+
+#include "security.h"
+#include "objsec.h"
+
+void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
+{
+	struct task_security_struct *tsec = tsk->security;
+	if (selinux_enabled)
+		*ctxid = tsec->sid;
+	else
+		*ctxid = 0;
+}
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 84047f6..7bc5b64 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -8,7 +8,7 @@
  *
  *	Support for enhanced MLS infrastructure.
  *
- * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
+ * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  */
 
 #include <linux/kernel.h>
@@ -385,6 +385,34 @@ out:
 }
 
 /*
+ * Set the MLS fields in the security context structure
+ * `context' based on the string representation in
+ * the string `str'.  This function will allocate temporary memory with the
+ * given constraints of gfp_mask.
+ */
+int mls_from_string(char *str, struct context *context, gfp_t gfp_mask)
+{
+	char *tmpstr, *freestr;
+	int rc;
+
+	if (!selinux_mls_enabled)
+		return -EINVAL;
+
+	/* we need freestr because mls_context_to_sid will change
+	   the value of tmpstr */
+	tmpstr = freestr = kstrdup(str, gfp_mask);
+	if (!tmpstr) {
+		rc = -ENOMEM;
+	} else {
+		rc = mls_context_to_sid(':', &tmpstr, context,
+		                        NULL, SECSID_NULL);
+		kfree(freestr);
+	}
+
+	return rc;
+}
+
+/*
  * Copies the effective MLS range from `src' into `dst'.
  */
 static inline int mls_scopy_context(struct context *dst,
diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h
index 03de697..fbb42f0 100644
--- a/security/selinux/ss/mls.h
+++ b/security/selinux/ss/mls.h
@@ -8,7 +8,7 @@
  *
  *	Support for enhanced MLS infrastructure.
  *
- * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
+ * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  */
 
 #ifndef _SS_MLS_H_
@@ -27,6 +27,8 @@ int mls_context_to_sid(char oldc,
 		       struct sidtab *s,
 		       u32 def_sid);
 
+int mls_from_string(char *str, struct context *context, gfp_t gfp_mask);
+
 int mls_convert_context(struct policydb *oldp,
 			struct policydb *newp,
 			struct context *context);
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 6149248..7177e98 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -7,12 +7,13 @@
  * Updated: Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
  *
  *	Support for enhanced MLS infrastructure.
+ *	Support for context based audit filters.
  *
  * Updated: Frank Mayer <mayerf@tresys.com> and Karl MacMillan <kmacmillan@tresys.com>
  *
  * 	Added conditional policy language extensions
  *
- * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc.
+ * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc.
  * Copyright (C) 2003 - 2004 Tresys Technology, LLC
  * Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com>
  *	This program is free software; you can redistribute it and/or modify
@@ -1811,3 +1812,235 @@ out:
 	POLICY_RDUNLOCK;
 	return rc;
 }
+
+struct selinux_audit_rule {
+	u32 au_seqno;
+	struct context au_ctxt;
+};
+
+void selinux_audit_rule_free(struct selinux_audit_rule *rule)
+{
+	if (rule) {
+		context_destroy(&rule->au_ctxt);
+		kfree(rule);
+	}
+}
+
+int selinux_audit_rule_init(u32 field, u32 op, char *rulestr,
+                            struct selinux_audit_rule **rule)
+{
+	struct selinux_audit_rule *tmprule;
+	struct role_datum *roledatum;
+	struct type_datum *typedatum;
+	struct user_datum *userdatum;
+	int rc = 0;
+
+	*rule = NULL;
+
+	if (!ss_initialized)
+		return -ENOTSUPP;
+
+	switch (field) {
+	case AUDIT_SE_USER:
+	case AUDIT_SE_ROLE:
+	case AUDIT_SE_TYPE:
+		/* only 'equals' and 'not equals' fit user, role, and type */
+		if (op != AUDIT_EQUAL && op != AUDIT_NOT_EQUAL)
+			return -EINVAL;
+		break;
+	case AUDIT_SE_SEN:
+	case AUDIT_SE_CLR:
+		/* we do not allow a range, indicated by the presense of '-' */
+		if (strchr(rulestr, '-'))
+			return -EINVAL;
+		break;
+	default:
+		/* only the above fields are valid */
+		return -EINVAL;
+	}
+
+	tmprule = kzalloc(sizeof(struct selinux_audit_rule), GFP_KERNEL);
+	if (!tmprule)
+		return -ENOMEM;
+
+	context_init(&tmprule->au_ctxt);
+
+	POLICY_RDLOCK;
+
+	tmprule->au_seqno = latest_granting;
+
+	switch (field) {
+	case AUDIT_SE_USER:
+		userdatum = hashtab_search(policydb.p_users.table, rulestr);
+		if (!userdatum)
+			rc = -EINVAL;
+		else
+			tmprule->au_ctxt.user = userdatum->value;
+		break;
+	case AUDIT_SE_ROLE:
+		roledatum = hashtab_search(policydb.p_roles.table, rulestr);
+		if (!roledatum)
+			rc = -EINVAL;
+		else
+			tmprule->au_ctxt.role = roledatum->value;
+		break;
+	case AUDIT_SE_TYPE:
+		typedatum = hashtab_search(policydb.p_types.table, rulestr);
+		if (!typedatum)
+			rc = -EINVAL;
+		else
+			tmprule->au_ctxt.type = typedatum->value;
+		break;
+	case AUDIT_SE_SEN:
+	case AUDIT_SE_CLR:
+		rc = mls_from_string(rulestr, &tmprule->au_ctxt, GFP_ATOMIC);
+		break;
+	}
+
+	POLICY_RDUNLOCK;
+
+	if (rc) {
+		selinux_audit_rule_free(tmprule);
+		tmprule = NULL;
+	}
+
+	*rule = tmprule;
+
+	return rc;
+}
+
+int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+                             struct selinux_audit_rule *rule,
+                             struct audit_context *actx)
+{
+	struct context *ctxt;
+	struct mls_level *level;
+	int match = 0;
+
+	if (!rule) {
+		audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR,
+		          "selinux_audit_rule_match: missing rule\n");
+		return -ENOENT;
+	}
+
+	POLICY_RDLOCK;
+
+	if (rule->au_seqno < latest_granting) {
+		audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR,
+		          "selinux_audit_rule_match: stale rule\n");
+		match = -ESTALE;
+		goto out;
+	}
+
+	ctxt = sidtab_search(&sidtab, ctxid);
+	if (!ctxt) {
+		audit_log(actx, GFP_ATOMIC, AUDIT_SELINUX_ERR,
+		          "selinux_audit_rule_match: unrecognized SID %d\n",
+		          ctxid);
+		match = -ENOENT;
+		goto out;
+	}
+
+	/* a field/op pair that is not caught here will simply fall through
+	   without a match */
+	switch (field) {
+	case AUDIT_SE_USER:
+		switch (op) {
+		case AUDIT_EQUAL:
+			match = (ctxt->user == rule->au_ctxt.user);
+			break;
+		case AUDIT_NOT_EQUAL:
+			match = (ctxt->user != rule->au_ctxt.user);
+			break;
+		}
+		break;
+	case AUDIT_SE_ROLE:
+		switch (op) {
+		case AUDIT_EQUAL:
+			match = (ctxt->role == rule->au_ctxt.role);
+			break;
+		case AUDIT_NOT_EQUAL:
+			match = (ctxt->role != rule->au_ctxt.role);
+			break;
+		}
+		break;
+	case AUDIT_SE_TYPE:
+		switch (op) {
+		case AUDIT_EQUAL:
+			match = (ctxt->type == rule->au_ctxt.type);
+			break;
+		case AUDIT_NOT_EQUAL:
+			match = (ctxt->type != rule->au_ctxt.type);
+			break;
+		}
+		break;
+	case AUDIT_SE_SEN:
+	case AUDIT_SE_CLR:
+		level = (op == AUDIT_SE_SEN ?
+		         &ctxt->range.level[0] : &ctxt->range.level[1]);
+		switch (op) {
+		case AUDIT_EQUAL:
+			match = mls_level_eq(&rule->au_ctxt.range.level[0],
+			                     level);
+			break;
+		case AUDIT_NOT_EQUAL:
+			match = !mls_level_eq(&rule->au_ctxt.range.level[0],
+			                      level);
+			break;
+		case AUDIT_LESS_THAN:
+			match = (mls_level_dom(&rule->au_ctxt.range.level[0],
+			                       level) &&
+			         !mls_level_eq(&rule->au_ctxt.range.level[0],
+			                       level));
+			break;
+		case AUDIT_LESS_THAN_OR_EQUAL:
+			match = mls_level_dom(&rule->au_ctxt.range.level[0],
+			                      level);
+			break;
+		case AUDIT_GREATER_THAN:
+			match = (mls_level_dom(level,
+			                      &rule->au_ctxt.range.level[0]) &&
+			         !mls_level_eq(level,
+			                       &rule->au_ctxt.range.level[0]));
+			break;
+		case AUDIT_GREATER_THAN_OR_EQUAL:
+			match = mls_level_dom(level,
+			                      &rule->au_ctxt.range.level[0]);
+			break;
+		}
+	}
+
+out:
+	POLICY_RDUNLOCK;
+	return match;
+}
+
+static int (*aurule_callback)(void) = NULL;
+
+static int aurule_avc_callback(u32 event, u32 ssid, u32 tsid,
+                               u16 class, u32 perms, u32 *retained)
+{
+	int err = 0;
+
+	if (event == AVC_CALLBACK_RESET && aurule_callback)
+		err = aurule_callback();
+	return err;
+}
+
+static int __init aurule_init(void)
+{
+	int err;
+
+	err = avc_add_callback(aurule_avc_callback, AVC_CALLBACK_RESET,
+	                       SECSID_NULL, SECSID_NULL, SECCLASS_NULL, 0);
+	if (err)
+		panic("avc_add_callback() failed, error %d\n", err);
+
+	return err;
+}
+__initcall(aurule_init);
+
+void selinux_audit_set_callback(int (*callback)(void))
+{
+	aurule_callback = callback;
+}
-- 
cgit v0.10.2


From 3dc7e3153eddfcf7ba8b50628775ba516e5f759f Mon Sep 17 00:00:00 2001
From: Darrel Goeddel <dgoeddel@trustedcs.com>
Date: Fri, 10 Mar 2006 18:14:06 -0600
Subject: [PATCH] support for context based audit filtering, part 2

This patch provides the ability to filter audit messages based on the
elements of the process' SELinux context (user, role, type, mls sensitivity,
and mls clearance).  It uses the new interfaces from selinux to opaquely
store information related to the selinux context and to filter based on that
information.  It also uses the callback mechanism provided by selinux to
refresh the information when a new policy is loaded.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/audit.c b/kernel/audit.c
index c8ccbd0..9060be7 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -55,6 +55,9 @@
 #include <net/netlink.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
+#include <linux/selinux.h>
+
+#include "audit.h"
 
 /* No auditing will take place until audit_initialized != 0.
  * (Initialization happens after skb_init is called.) */
@@ -564,6 +567,11 @@ static int __init audit_init(void)
 	skb_queue_head_init(&audit_skb_queue);
 	audit_initialized = 1;
 	audit_enabled = audit_default;
+
+	/* Register the callback with selinux.  This callback will be invoked
+	 * when a new policy is loaded. */
+	selinux_audit_set_callback(&selinux_audit_rule_update);
+
 	audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
 	return 0;
 }
diff --git a/kernel/audit.h b/kernel/audit.h
index bc53920..6f73392 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -54,9 +54,11 @@ enum audit_state {
 
 /* Rule lists */
 struct audit_field {
-	u32			type;
-	u32			val;
-	u32			op;
+	u32				type;
+	u32				val;
+	u32				op;
+	char				*se_str;
+	struct selinux_audit_rule	*se_rule;
 };
 
 struct audit_krule {
@@ -86,3 +88,5 @@ extern void		    audit_send_reply(int pid, int seq, int type,
 extern void		    audit_log_lost(const char *message);
 extern void		    audit_panic(const char *message);
 extern struct mutex audit_netlink_mutex;
+
+extern int selinux_audit_rule_update(void);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index d3a8539..85a7862 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -23,6 +23,7 @@
 #include <linux/audit.h>
 #include <linux/kthread.h>
 #include <linux/netlink.h>
+#include <linux/selinux.h>
 #include "audit.h"
 
 /* There are three lists of rules -- one to search at task creation
@@ -42,6 +43,13 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
 
 static inline void audit_free_rule(struct audit_entry *e)
 {
+	int i;
+	if (e->rule.fields)
+		for (i = 0; i < e->rule.field_count; i++) {
+			struct audit_field *f = &e->rule.fields[i];
+			kfree(f->se_str);
+			selinux_audit_rule_free(f->se_rule);
+		}
 	kfree(e->rule.fields);
 	kfree(e);
 }
@@ -52,9 +60,29 @@ static inline void audit_free_rule_rcu(struct rcu_head *head)
 	audit_free_rule(e);
 }
 
+/* Initialize an audit filterlist entry. */
+static inline struct audit_entry *audit_init_entry(u32 field_count)
+{
+	struct audit_entry *entry;
+	struct audit_field *fields;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (unlikely(!entry))
+		return NULL;
+
+	fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL);
+	if (unlikely(!fields)) {
+		kfree(entry);
+		return NULL;
+	}
+	entry->rule.fields = fields;
+
+	return entry;
+}
+
 /* Unpack a filter field's string representation from user-space
  * buffer. */
-static __attribute__((unused)) char *audit_unpack_string(void **bufp, size_t *remain, size_t len)
+static char *audit_unpack_string(void **bufp, size_t *remain, size_t len)
 {
 	char *str;
 
@@ -84,7 +112,6 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
 {
 	unsigned listnr;
 	struct audit_entry *entry;
-	struct audit_field *fields;
 	int i, err;
 
 	err = -EINVAL;
@@ -108,23 +135,14 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
 		goto exit_err;
 
 	err = -ENOMEM;
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (unlikely(!entry))
-		goto exit_err;
-	fields = kmalloc(sizeof(*fields) * rule->field_count, GFP_KERNEL);
-	if (unlikely(!fields)) {
-		kfree(entry);
+	entry = audit_init_entry(rule->field_count);
+	if (!entry)
 		goto exit_err;
-	}
-
-	memset(&entry->rule, 0, sizeof(struct audit_krule));
-	memset(fields, 0, sizeof(struct audit_field));
 
 	entry->rule.flags = rule->flags & AUDIT_FILTER_PREPEND;
 	entry->rule.listnr = listnr;
 	entry->rule.action = rule->action;
 	entry->rule.field_count = rule->field_count;
-	entry->rule.fields = fields;
 
 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
 		entry->rule.mask[i] = rule->mask[i];
@@ -150,15 +168,20 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 	for (i = 0; i < rule->field_count; i++) {
 		struct audit_field *f = &entry->rule.fields[i];
 
-		if (rule->fields[i] & AUDIT_UNUSED_BITS) {
-			err = -EINVAL;
-			goto exit_free;
-		}
-
 		f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
 		f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
 		f->val = rule->values[i];
 
+		if (f->type & AUDIT_UNUSED_BITS ||
+		    f->type == AUDIT_SE_USER ||
+		    f->type == AUDIT_SE_ROLE ||
+		    f->type == AUDIT_SE_TYPE ||
+		    f->type == AUDIT_SE_SEN ||
+		    f->type == AUDIT_SE_CLR) {
+			err = -EINVAL;
+			goto exit_free;
+		}
+
 		entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1;
 
 		/* Support for legacy operators where
@@ -188,8 +211,9 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 	int err = 0;
 	struct audit_entry *entry;
 	void *bufp;
-	/* size_t remain = datasz - sizeof(struct audit_rule_data); */
+	size_t remain = datasz - sizeof(struct audit_rule_data);
 	int i;
+	char *str;
 
 	entry = audit_to_entry_common((struct audit_rule *)data);
 	if (IS_ERR(entry))
@@ -207,10 +231,35 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 
 		f->op = data->fieldflags[i] & AUDIT_OPERATORS;
 		f->type = data->fields[i];
+		f->val = data->values[i];
+		f->se_str = NULL;
+		f->se_rule = NULL;
 		switch(f->type) {
-		/* call type-specific conversion routines here */
-		default:
-			f->val = data->values[i];
+		case AUDIT_SE_USER:
+		case AUDIT_SE_ROLE:
+		case AUDIT_SE_TYPE:
+		case AUDIT_SE_SEN:
+		case AUDIT_SE_CLR:
+			str = audit_unpack_string(&bufp, &remain, f->val);
+			if (IS_ERR(str))
+				goto exit_free;
+			entry->rule.buflen += f->val;
+
+			err = selinux_audit_rule_init(f->type, f->op, str,
+						      &f->se_rule);
+			/* Keep currently invalid fields around in case they
+			 * become valid after a policy reload. */
+			if (err == -EINVAL) {
+				printk(KERN_WARNING "audit rule for selinux "
+				       "\'%s\' is invalid\n",  str);
+				err = 0;
+			}
+			if (err) {
+				kfree(str);
+				goto exit_free;
+			} else
+				f->se_str = str;
+			break;
 		}
 	}
 
@@ -286,7 +335,14 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
 		data->fields[i] = f->type;
 		data->fieldflags[i] = f->op;
 		switch(f->type) {
-		/* call type-specific conversion routines here */
+		case AUDIT_SE_USER:
+		case AUDIT_SE_ROLE:
+		case AUDIT_SE_TYPE:
+		case AUDIT_SE_SEN:
+		case AUDIT_SE_CLR:
+			data->buflen += data->values[i] =
+				audit_pack_string(&bufp, f->se_str);
+			break;
 		default:
 			data->values[i] = f->val;
 		}
@@ -314,7 +370,14 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
 			return 1;
 
 		switch(a->fields[i].type) {
-		/* call type-specific comparison routines here */
+		case AUDIT_SE_USER:
+		case AUDIT_SE_ROLE:
+		case AUDIT_SE_TYPE:
+		case AUDIT_SE_SEN:
+		case AUDIT_SE_CLR:
+			if (strcmp(a->fields[i].se_str, b->fields[i].se_str))
+				return 1;
+			break;
 		default:
 			if (a->fields[i].val != b->fields[i].val)
 				return 1;
@@ -328,6 +391,81 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
 	return 0;
 }
 
+/* Duplicate selinux field information.  The se_rule is opaque, so must be
+ * re-initialized. */
+static inline int audit_dupe_selinux_field(struct audit_field *df,
+					   struct audit_field *sf)
+{
+	int ret = 0;
+	char *se_str;
+
+	/* our own copy of se_str */
+	se_str = kstrdup(sf->se_str, GFP_KERNEL);
+	if (unlikely(IS_ERR(se_str)))
+	    return -ENOMEM;
+	df->se_str = se_str;
+
+	/* our own (refreshed) copy of se_rule */
+	ret = selinux_audit_rule_init(df->type, df->op, df->se_str,
+				      &df->se_rule);
+	/* Keep currently invalid fields around in case they
+	 * become valid after a policy reload. */
+	if (ret == -EINVAL) {
+		printk(KERN_WARNING "audit rule for selinux \'%s\' is "
+		       "invalid\n", df->se_str);
+		ret = 0;
+	}
+
+	return ret;
+}
+
+/* Duplicate an audit rule.  This will be a deep copy with the exception
+ * of the watch - that pointer is carried over.  The selinux specific fields
+ * will be updated in the copy.  The point is to be able to replace the old
+ * rule with the new rule in the filterlist, then free the old rule. */
+static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
+{
+	u32 fcount = old->field_count;
+	struct audit_entry *entry;
+	struct audit_krule *new;
+	int i, err = 0;
+
+	entry = audit_init_entry(fcount);
+	if (unlikely(!entry))
+		return ERR_PTR(-ENOMEM);
+
+	new = &entry->rule;
+	new->vers_ops = old->vers_ops;
+	new->flags = old->flags;
+	new->listnr = old->listnr;
+	new->action = old->action;
+	for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
+		new->mask[i] = old->mask[i];
+	new->buflen = old->buflen;
+	new->field_count = old->field_count;
+	memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount);
+
+	/* deep copy this information, updating the se_rule fields, because
+	 * the originals will all be freed when the old rule is freed. */
+	for (i = 0; i < fcount; i++) {
+		switch (new->fields[i].type) {
+		case AUDIT_SE_USER:
+		case AUDIT_SE_ROLE:
+		case AUDIT_SE_TYPE:
+		case AUDIT_SE_SEN:
+		case AUDIT_SE_CLR:
+			err = audit_dupe_selinux_field(&new->fields[i],
+						       &old->fields[i]);
+		}
+		if (err) {
+			audit_free_rule(entry);
+			return ERR_PTR(err);
+		}
+	}
+
+	return entry;
+}
+
 /* Add rule to given filterlist if not a duplicate.  Protected by
  * audit_netlink_mutex. */
 static inline int audit_add_rule(struct audit_entry *entry,
@@ -628,3 +766,62 @@ unlock_and_return:
 	rcu_read_unlock();
 	return result;
 }
+
+/* Check to see if the rule contains any selinux fields.  Returns 1 if there
+   are selinux fields specified in the rule, 0 otherwise. */
+static inline int audit_rule_has_selinux(struct audit_krule *rule)
+{
+	int i;
+
+	for (i = 0; i < rule->field_count; i++) {
+		struct audit_field *f = &rule->fields[i];
+		switch (f->type) {
+		case AUDIT_SE_USER:
+		case AUDIT_SE_ROLE:
+		case AUDIT_SE_TYPE:
+		case AUDIT_SE_SEN:
+		case AUDIT_SE_CLR:
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/* This function will re-initialize the se_rule field of all applicable rules.
+ * It will traverse the filter lists serarching for rules that contain selinux
+ * specific filter fields.  When such a rule is found, it is copied, the
+ * selinux field is re-initialized, and the old rule is replaced with the
+ * updated rule. */
+int selinux_audit_rule_update(void)
+{
+	struct audit_entry *entry, *n, *nentry;
+	int i, err = 0;
+
+	/* audit_netlink_mutex synchronizes the writers */
+	mutex_lock(&audit_netlink_mutex);
+
+	for (i = 0; i < AUDIT_NR_FILTERS; i++) {
+		list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) {
+			if (!audit_rule_has_selinux(&entry->rule))
+				continue;
+
+			nentry = audit_dupe_rule(&entry->rule);
+			if (unlikely(IS_ERR(nentry))) {
+				/* save the first error encountered for the
+				 * return value */
+				if (!err)
+					err = PTR_ERR(nentry);
+				audit_panic("error updating selinux filters");
+				list_del_rcu(&entry->list);
+			} else {
+				list_replace_rcu(&entry->list, &nentry->list);
+			}
+			call_rcu(&entry->rcu, audit_free_rule_rcu);
+		}
+	}
+
+	mutex_unlock(&audit_netlink_mutex);
+
+	return err;
+}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 8aca4ab..d3d97d2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -58,6 +58,7 @@
 #include <linux/security.h>
 #include <linux/list.h>
 #include <linux/tty.h>
+#include <linux/selinux.h>
 
 #include "audit.h"
 
@@ -168,6 +169,9 @@ static int audit_filter_rules(struct task_struct *tsk,
 			      enum audit_state *state)
 {
 	int i, j;
+	u32 sid;
+
+	selinux_task_ctxid(tsk, &sid);
 
 	for (i = 0; i < rule->field_count; i++) {
 		struct audit_field *f = &rule->fields[i];
@@ -257,6 +261,22 @@ static int audit_filter_rules(struct task_struct *tsk,
 			if (ctx)
 				result = audit_comparator(ctx->loginuid, f->op, f->val);
 			break;
+		case AUDIT_SE_USER:
+		case AUDIT_SE_ROLE:
+		case AUDIT_SE_TYPE:
+		case AUDIT_SE_SEN:
+		case AUDIT_SE_CLR:
+			/* NOTE: this may return negative values indicating
+			   a temporary error.  We simply treat this as a
+			   match for now to avoid losing information that
+			   may be wanted.   An error message will also be
+			   logged upon error */
+			if (f->se_rule)
+				result = selinux_audit_rule_match(sid, f->type,
+				                                  f->op,
+				                                  f->se_rule,
+				                                  ctx);
+			break;
 		case AUDIT_ARG0:
 		case AUDIT_ARG1:
 		case AUDIT_ARG2:
-- 
cgit v0.10.2


From 1b50eed9cac0e8e5e4d3a522d8aa267f7f8f8acb Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Mon, 3 Apr 2006 14:06:13 -0400
Subject: [PATCH] audit inode patch

Previously, we were gathering the context instead of the sid. Now in this patch,
we gather just the sid and convert to context only if an audit event is being
output.

This patch brings the performance hit from 146% down to 23%

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index 9d684b1..84a6c74 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -15,6 +15,7 @@
 
 struct selinux_audit_rule;
 struct audit_context;
+struct inode;
 
 #ifdef CONFIG_SECURITY_SELINUX
 
@@ -76,6 +77,27 @@ void selinux_audit_set_callback(int (*callback)(void));
  */
 void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
 
+/**
+ *     selinux_ctxid_to_string - map a security context ID to a string
+ *     @ctxid: security context ID to be converted.
+ *     @ctx: address of context string to be returned
+ *     @ctxlen: length of returned context string.
+ *
+ *     Returns 0 if successful, -errno if not.  On success, the context
+ *     string will be allocated internally, and the caller must call
+ *     kfree() on it after use.
+ */
+int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen);
+
+/**
+ *     selinux_get_inode_sid - get the inode's security context ID
+ *     @inode: inode structure to get the sid from.
+ *     @sid: pointer to security context ID to be filled in.
+ *
+ *     Returns nothing
+ */
+void selinux_get_inode_sid(const struct inode *inode, u32 *sid);
+
 #else
 
 static inline int selinux_audit_rule_init(u32 field, u32 op,
@@ -107,6 +129,18 @@ static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
 	*ctxid = 0;
 }
 
+static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
+{
+       *ctx = NULL;
+       *ctxlen = 0;
+       return 0;
+}
+
+static inline void selinux_get_inode_sid(const struct inode *inode, u32 *sid)
+{
+	*sid = 0;
+}
+
 #endif	/* CONFIG_SECURITY_SELINUX */
 
 #endif /* _LINUX_SELINUX_H */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index d3d97d2..2e123a8 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -90,7 +90,7 @@ struct audit_names {
 	uid_t		uid;
 	gid_t		gid;
 	dev_t		rdev;
-	char		*ctx;
+	u32		osid;
 };
 
 struct audit_aux_data {
@@ -410,9 +410,6 @@ static inline void audit_free_names(struct audit_context *context)
 #endif
 
 	for (i = 0; i < context->name_count; i++) {
-		char *p = context->names[i].ctx;
-		context->names[i].ctx = NULL;
-		kfree(p);
 		if (context->names[i].name)
 			__putname(context->names[i].name);
 	}
@@ -674,6 +671,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		}
 	}
 	for (i = 0; i < context->name_count; i++) {
+		int call_panic = 0;
 		unsigned long ino  = context->names[i].ino;
 		unsigned long pino = context->names[i].pino;
 
@@ -703,12 +701,22 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 					 context->names[i].gid, 
 					 MAJOR(context->names[i].rdev), 
 					 MINOR(context->names[i].rdev));
-		if (context->names[i].ctx) {
-			audit_log_format(ab, " obj=%s",
-					context->names[i].ctx);
+		if (context->names[i].osid != 0) {
+			char *ctx = NULL;
+			u32 len;
+			if (selinux_ctxid_to_string(
+				context->names[i].osid, &ctx, &len)) {
+				audit_log_format(ab, " obj=%u",
+						context->names[i].osid);
+				call_panic = 1;
+			} else
+				audit_log_format(ab, " obj=%s", ctx);
+			kfree(ctx);
 		}
 
 		audit_log_end(ab);
+		if (call_panic)
+			audit_panic("error converting sid to string");
 	}
 }
 
@@ -946,37 +954,8 @@ void audit_putname(const char *name)
 void audit_inode_context(int idx, const struct inode *inode)
 {
 	struct audit_context *context = current->audit_context;
-	const char *suffix = security_inode_xattr_getsuffix();
-	char *ctx = NULL;
-	int len = 0;
-
-	if (!suffix)
-		goto ret;
-
-	len = security_inode_getsecurity(inode, suffix, NULL, 0, 0);
-	if (len == -EOPNOTSUPP)
-		goto ret;
-	if (len < 0) 
-		goto error_path;
-
-	ctx = kmalloc(len, GFP_KERNEL);
-	if (!ctx) 
-		goto error_path;
-
-	len = security_inode_getsecurity(inode, suffix, ctx, len, 0);
-	if (len < 0)
-		goto error_path;
-
-	kfree(context->names[idx].ctx);
-	context->names[idx].ctx = ctx;
-	goto ret;
 
-error_path:
-	if (ctx)
-		kfree(ctx);
-	audit_panic("error in audit_inode_context");
-ret:
-	return;
+	selinux_get_inode_sid(inode, &context->names[idx].osid);
 }
 
 
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index 333c4c7..07ddce7 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/selinux.h>
+#include <linux/fs.h>
 
 #include "security.h"
 #include "objsec.h"
@@ -26,3 +27,26 @@ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
 	else
 		*ctxid = 0;
 }
+
+int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
+{
+	if (selinux_enabled)
+		return security_sid_to_context(ctxid, ctx, ctxlen);
+	else {
+		*ctx = NULL;
+		*ctxlen = 0;
+	}
+
+	return 0;
+}
+
+void selinux_get_inode_sid(const struct inode *inode, u32 *sid)
+{
+	if (selinux_enabled) {
+		struct inode_security_struct *isec = inode->i_security;
+		*sid = isec->sid;
+		return;
+	}
+	*sid = 0;
+}
+
-- 
cgit v0.10.2


From 9c7aa6aa74fa8a5cda36e54cbbe4fffe0214497d Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Fri, 31 Mar 2006 15:22:49 -0500
Subject: [PATCH] change lspp ipc auditing

Hi,

The patch below converts IPC auditing to collect sid's and convert to context
string only if it needs to output an audit record. This patch depends on the
inode audit change patch already being applied.

Signed-off-by: Steve Grubb <sgrubb@redhat.com>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/security.h b/include/linux/security.h
index aaa0a5c..1bab48f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -869,11 +869,6 @@ struct swap_info_struct;
  *	@ipcp contains the kernel IPC permission structure
  *	@flag contains the desired (requested) permission set
  *	Return 0 if permission is granted.
- * @ipc_getsecurity:
- *      Copy the security label associated with the ipc object into
- *      @buffer.  @buffer may be NULL to request the size of the buffer 
- *      required.  @size indicates the size of @buffer in bytes. Return 
- *      number of bytes used/required on success.
  *
  * Security hooks for individual messages held in System V IPC message queues
  * @msg_msg_alloc_security:
@@ -1223,7 +1218,6 @@ struct security_operations {
 	void (*task_to_inode)(struct task_struct *p, struct inode *inode);
 
 	int (*ipc_permission) (struct kern_ipc_perm * ipcp, short flag);
-	int (*ipc_getsecurity)(struct kern_ipc_perm *ipcp, void *buffer, size_t size);
 
 	int (*msg_msg_alloc_security) (struct msg_msg * msg);
 	void (*msg_msg_free_security) (struct msg_msg * msg);
@@ -1887,11 +1881,6 @@ static inline int security_ipc_permission (struct kern_ipc_perm *ipcp,
 	return security_ops->ipc_permission (ipcp, flag);
 }
 
-static inline int security_ipc_getsecurity(struct kern_ipc_perm *ipcp, void *buffer, size_t size)
-{
-	return security_ops->ipc_getsecurity(ipcp, buffer, size);
-}
-
 static inline int security_msg_msg_alloc (struct msg_msg * msg)
 {
 	return security_ops->msg_msg_alloc_security (msg);
@@ -2532,11 +2521,6 @@ static inline int security_ipc_permission (struct kern_ipc_perm *ipcp,
 	return 0;
 }
 
-static inline int security_ipc_getsecurity(struct kern_ipc_perm *ipcp, void *buffer, size_t size)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline int security_msg_msg_alloc (struct msg_msg * msg)
 {
 	return 0;
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index 84a6c74..413d667 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -16,6 +16,7 @@
 struct selinux_audit_rule;
 struct audit_context;
 struct inode;
+struct kern_ipc_perm;
 
 #ifdef CONFIG_SECURITY_SELINUX
 
@@ -98,6 +99,15 @@ int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen);
  */
 void selinux_get_inode_sid(const struct inode *inode, u32 *sid);
 
+/**
+ *     selinux_get_ipc_sid - get the ipc security context ID
+ *     @ipcp: ipc structure to get the sid from.
+ *     @sid: pointer to security context ID to be filled in.
+ *
+ *     Returns nothing
+ */
+void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid);
+
 #else
 
 static inline int selinux_audit_rule_init(u32 field, u32 op,
@@ -141,6 +151,11 @@ static inline void selinux_get_inode_sid(const struct inode *inode, u32 *sid)
 	*sid = 0;
 }
 
+static inline void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid)
+{
+	*sid = 0;
+}
+
 #endif	/* CONFIG_SECURITY_SELINUX */
 
 #endif /* _LINUX_SELINUX_H */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2e123a8..b4f7223 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -107,7 +107,7 @@ struct audit_aux_data_ipcctl {
 	uid_t			uid;
 	gid_t			gid;
 	mode_t			mode;
-	char 			*ctx;
+	u32			osid;
 };
 
 struct audit_aux_data_socketcall {
@@ -432,11 +432,6 @@ static inline void audit_free_aux(struct audit_context *context)
 			dput(axi->dentry);
 			mntput(axi->mnt);
 		}
-		if ( aux->type == AUDIT_IPC ) {
-			struct audit_aux_data_ipcctl *axi = (void *)aux;
-			if (axi->ctx)
-				kfree(axi->ctx);
-		}
 
 		context->aux = aux->next;
 		kfree(aux);
@@ -584,7 +579,7 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
 
 static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
 {
-	int i;
+	int i, call_panic = 0;
 	struct audit_buffer *ab;
 	struct audit_aux_data *aux;
 	const char *tty;
@@ -635,8 +630,20 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		case AUDIT_IPC: {
 			struct audit_aux_data_ipcctl *axi = (void *)aux;
 			audit_log_format(ab, 
-					 " qbytes=%lx iuid=%u igid=%u mode=%x obj=%s",
-					 axi->qbytes, axi->uid, axi->gid, axi->mode, axi->ctx);
+				 " qbytes=%lx iuid=%u igid=%u mode=%x",
+				 axi->qbytes, axi->uid, axi->gid, axi->mode);
+			if (axi->osid != 0) {
+				char *ctx = NULL;
+				u32 len;
+				if (selinux_ctxid_to_string(
+						axi->osid, &ctx, &len)) {
+					audit_log_format(ab, " obj=%u",
+							axi->osid);
+					call_panic = 1;
+				} else
+					audit_log_format(ab, " obj=%s", ctx);
+				kfree(ctx);
+			}
 			break; }
 
 		case AUDIT_SOCKETCALL: {
@@ -671,7 +678,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 		}
 	}
 	for (i = 0; i < context->name_count; i++) {
-		int call_panic = 0;
 		unsigned long ino  = context->names[i].ino;
 		unsigned long pino = context->names[i].pino;
 
@@ -708,16 +714,16 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				context->names[i].osid, &ctx, &len)) {
 				audit_log_format(ab, " obj=%u",
 						context->names[i].osid);
-				call_panic = 1;
+				call_panic = 2;
 			} else
 				audit_log_format(ab, " obj=%s", ctx);
 			kfree(ctx);
 		}
 
 		audit_log_end(ab);
-		if (call_panic)
-			audit_panic("error converting sid to string");
 	}
+	if (call_panic)
+		audit_panic("error converting sid to string");
 }
 
 /**
@@ -951,7 +957,7 @@ void audit_putname(const char *name)
 #endif
 }
 
-void audit_inode_context(int idx, const struct inode *inode)
+static void audit_inode_context(int idx, const struct inode *inode)
 {
 	struct audit_context *context = current->audit_context;
 
@@ -1141,38 +1147,6 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
 	return ctx ? ctx->loginuid : -1;
 }
 
-static char *audit_ipc_context(struct kern_ipc_perm *ipcp)
-{
-	struct audit_context *context = current->audit_context;
-	char *ctx = NULL;
-	int len = 0;
-
-	if (likely(!context))
-		return NULL;
-
-	len = security_ipc_getsecurity(ipcp, NULL, 0);
-	if (len == -EOPNOTSUPP)
-		goto ret;
-	if (len < 0)
-		goto error_path;
-
-	ctx = kmalloc(len, GFP_ATOMIC);
-	if (!ctx)
-		goto error_path;
-
-	len = security_ipc_getsecurity(ipcp, ctx, len);
-	if (len < 0)
-		goto error_path;
-
-	return ctx;
-
-error_path:
-	kfree(ctx);
-	audit_panic("error in audit_ipc_context");
-ret:
-	return NULL;
-}
-
 /**
  * audit_ipc_perms - record audit data for ipc
  * @qbytes: msgq bytes
@@ -1198,7 +1172,7 @@ int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, str
 	ax->uid = uid;
 	ax->gid = gid;
 	ax->mode = mode;
-	ax->ctx = audit_ipc_context(ipcp);
+	selinux_get_ipc_sid(ipcp, &ax->osid);
 
 	ax->d.type = AUDIT_IPC;
 	ax->d.next = context->aux;
diff --git a/security/dummy.c b/security/dummy.c
index fd99429..8cccccc 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -563,11 +563,6 @@ static int dummy_ipc_permission (struct kern_ipc_perm *ipcp, short flag)
 	return 0;
 }
 
-static int dummy_ipc_getsecurity(struct kern_ipc_perm *ipcp, void *buffer, size_t size)
-{
-	return -EOPNOTSUPP;
-}
-
 static int dummy_msg_msg_alloc_security (struct msg_msg *msg)
 {
 	return 0;
@@ -976,7 +971,6 @@ void security_fixup_ops (struct security_operations *ops)
 	set_to_dummy_if_null(ops, task_reparent_to_init);
  	set_to_dummy_if_null(ops, task_to_inode);
 	set_to_dummy_if_null(ops, ipc_permission);
-	set_to_dummy_if_null(ops, ipc_getsecurity);
 	set_to_dummy_if_null(ops, msg_msg_alloc_security);
 	set_to_dummy_if_null(ops, msg_msg_free_security);
 	set_to_dummy_if_null(ops, msg_queue_alloc_security);
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index 07ddce7..7357cf2 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/selinux.h>
 #include <linux/fs.h>
+#include <linux/ipc.h>
 
 #include "security.h"
 #include "objsec.h"
@@ -50,3 +51,13 @@ void selinux_get_inode_sid(const struct inode *inode, u32 *sid)
 	*sid = 0;
 }
 
+void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid)
+{
+	if (selinux_enabled) {
+		struct ipc_security_struct *isec = ipcp->security;
+		*sid = isec->sid;
+		return;
+	}
+	*sid = 0;
+}
+
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b61b9554..3cf368a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4052,13 +4052,6 @@ static int selinux_ipc_permission(struct kern_ipc_perm *ipcp, short flag)
 	return ipc_has_perm(ipcp, av);
 }
 
-static int selinux_ipc_getsecurity(struct kern_ipc_perm *ipcp, void *buffer, size_t size)
-{
-	struct ipc_security_struct *isec = ipcp->security;
-
-	return selinux_getsecurity(isec->sid, buffer, size);
-}
-
 /* module stacking operations */
 static int selinux_register_security (const char *name, struct security_operations *ops)
 {
@@ -4321,7 +4314,6 @@ static struct security_operations selinux_ops = {
 	.task_to_inode =                selinux_task_to_inode,
 
 	.ipc_permission =		selinux_ipc_permission,
-	.ipc_getsecurity =		selinux_ipc_getsecurity,
 
 	.msg_msg_alloc_security =	selinux_msg_msg_alloc_security,
 	.msg_msg_free_security =	selinux_msg_msg_free_security,
-- 
cgit v0.10.2


From e7c3497013a7e5496ce3d5fd3c73b5cf5af7a56e Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Mon, 3 Apr 2006 09:08:13 -0400
Subject: [PATCH] Reworked patch for labels on user space messages

The below patch should be applied after the inode and ipc sid patches.
This patch is a reworking of Tim's patch that has been updated to match
the inode and ipc patches since its similar.

[updated:
>  Stephen Smalley also wanted to change a variable from isec to tsec in the
>  user sid patch.                                                              ]

Signed-off-by: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index f8f3d1c..87b8a57 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -143,6 +143,7 @@ struct netlink_skb_parms
 	__u32			dst_group;
 	kernel_cap_t		eff_cap;
 	__u32			loginuid;	/* Login (audit) uid */
+	__u32			sid;		/* SELinux security id */
 };
 
 #define NETLINK_CB(skb)		(*(struct netlink_skb_parms*)&((skb)->cb))
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index 413d667..4047bcd 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -5,6 +5,7 @@
  *
  * Copyright (C) 2005 Red Hat, Inc., James Morris <jmorris@redhat.com>
  * Copyright (C) 2006 Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
+ * Copyright (C) 2006 IBM Corporation, Timothy R. Chavez <tinytim@us.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2,
@@ -108,6 +109,16 @@ void selinux_get_inode_sid(const struct inode *inode, u32 *sid);
  */
 void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid);
 
+/**
+ *     selinux_get_task_sid - return the SID of task
+ *     @tsk: the task whose SID will be returned
+ *     @sid: pointer to security context ID to be filled in.
+ *
+ *     Returns nothing
+ */
+void selinux_get_task_sid(struct task_struct *tsk, u32 *sid);
+
+
 #else
 
 static inline int selinux_audit_rule_init(u32 field, u32 op,
@@ -156,6 +167,11 @@ static inline void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *si
 	*sid = 0;
 }
 
+static inline void selinux_get_task_sid(struct task_struct *tsk, u32 *sid)
+{
+	*sid = 0;
+}
+
 #endif	/* CONFIG_SECURITY_SELINUX */
 
 #endif /* _LINUX_SELINUX_H */
diff --git a/kernel/audit.c b/kernel/audit.c
index 9060be7..7ec9cca 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -390,7 +390,7 @@ static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type)
 
 static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	u32			uid, pid, seq;
+	u32			uid, pid, seq, sid;
 	void			*data;
 	struct audit_status	*status_get, status_set;
 	int			err;
@@ -416,6 +416,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	pid  = NETLINK_CREDS(skb)->pid;
 	uid  = NETLINK_CREDS(skb)->uid;
 	loginuid = NETLINK_CB(skb).loginuid;
+	sid  = NETLINK_CB(skb).sid;
 	seq  = nlh->nlmsg_seq;
 	data = NLMSG_DATA(nlh);
 
@@ -468,8 +469,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
 			if (ab) {
 				audit_log_format(ab,
-						 "user pid=%d uid=%u auid=%u msg='%.1024s'",
-						 pid, uid, loginuid, (char *)data);
+						 "user pid=%d uid=%u auid=%u",
+						 pid, uid, loginuid);
+				if (sid) {
+					char *ctx = NULL;
+					u32 len;
+					if (selinux_ctxid_to_string(
+							sid, &ctx, &len)) {
+						audit_log_format(ab, 
+							" subj=%u", sid);
+						/* Maybe call audit_panic? */
+					} else
+						audit_log_format(ab, 
+							" subj=%s", ctx);
+					kfree(ctx);
+				}
+				audit_log_format(ab, " msg='%.1024s'",
+					 (char *)data);
 				audit_set_pid(ab, pid);
 				audit_log_end(ab);
 			}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2a233ff..09fbc4b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -56,6 +56,7 @@
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/audit.h>
+#include <linux/selinux.h>
 
 #include <net/sock.h>
 #include <net/scm.h>
@@ -1157,6 +1158,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	NETLINK_CB(skb).dst_pid = dst_pid;
 	NETLINK_CB(skb).dst_group = dst_group;
 	NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
+	selinux_get_task_sid(current, &(NETLINK_CB(skb).sid));
 	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
 
 	/* What can I do? Netlink is asynchronous, so that
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index 7357cf2..ae4c73e 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -5,6 +5,7 @@
  *
  * Copyright (C) 2005 Red Hat, Inc., James Morris <jmorris@redhat.com>
  * Copyright (C) 2006 Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
+ * Copyright (C) 2006 IBM Corporation, Timothy R. Chavez <tinytim@us.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2,
@@ -61,3 +62,13 @@ void selinux_get_ipc_sid(const struct kern_ipc_perm *ipcp, u32 *sid)
 	*sid = 0;
 }
 
+void selinux_get_task_sid(struct task_struct *tsk, u32 *sid)
+{
+	if (selinux_enabled) {
+		struct task_security_struct *tsec = tsk->security;
+		*sid = tsec->sid;
+		return;
+	}
+	*sid = 0;
+}
+
-- 
cgit v0.10.2


From ce29b682e228c70cdc91a1b2935c5adb2087bab8 Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Sat, 1 Apr 2006 18:29:34 -0500
Subject: [PATCH] More user space subject labels

Hi,

The patch below builds upon the patch sent earlier and adds subject label to
all audit events generated via the netlink interface. It also cleans up a few
other minor things.

Signed-off-by: Steve Grubb <sgrubb@redhat.com>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 740f950..d5c4082 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -371,7 +371,7 @@ extern void		    audit_log_d_path(struct audit_buffer *ab,
 extern int audit_filter_user(struct netlink_skb_parms *cb, int type);
 extern int audit_filter_type(int type);
 extern int  audit_receive_filter(int type, int pid, int uid, int seq,
-				 void *data, size_t datasz, uid_t loginuid);
+			 void *data, size_t datasz, uid_t loginuid, u32 sid);
 #else
 #define audit_log(c,g,t,f,...) do { ; } while (0)
 #define audit_log_start(c,g,t) ({ NULL; })
diff --git a/kernel/audit.c b/kernel/audit.c
index 7ec9cca..df57b49 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -230,49 +230,103 @@ void audit_log_lost(const char *message)
 	}
 }
 
-static int audit_set_rate_limit(int limit, uid_t loginuid)
+static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
 {
-	int old		 = audit_rate_limit;
-	audit_rate_limit = limit;
-	audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, 
+	int old	= audit_rate_limit;
+
+	if (sid) {
+		char *ctx = NULL;
+		u32 len;
+		int rc;
+		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+			return rc;
+		else
+			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+				"audit_rate_limit=%d old=%d by auid=%u subj=%s",
+				limit, old, loginuid, ctx);
+		kfree(ctx);
+	} else
+		audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
 			"audit_rate_limit=%d old=%d by auid=%u",
-			audit_rate_limit, old, loginuid);
+			limit, old, loginuid);
+	audit_rate_limit = limit;
 	return old;
 }
 
-static int audit_set_backlog_limit(int limit, uid_t loginuid)
+static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
 {
-	int old		 = audit_backlog_limit;
-	audit_backlog_limit = limit;
-	audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+	int old	= audit_backlog_limit;
+
+	if (sid) {
+		char *ctx = NULL;
+		u32 len;
+		int rc;
+		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+			return rc;
+		else
+			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+			    "audit_backlog_limit=%d old=%d by auid=%u subj=%s",
+				limit, old, loginuid, ctx);
+		kfree(ctx);
+	} else
+		audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
 			"audit_backlog_limit=%d old=%d by auid=%u",
-			audit_backlog_limit, old, loginuid);
+			limit, old, loginuid);
+	audit_backlog_limit = limit;
 	return old;
 }
 
-static int audit_set_enabled(int state, uid_t loginuid)
+static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
 {
-	int old		 = audit_enabled;
+	int old = audit_enabled;
+
 	if (state != 0 && state != 1)
 		return -EINVAL;
-	audit_enabled = state;
-	audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+
+	if (sid) {
+		char *ctx = NULL;
+		u32 len;
+		int rc;
+		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+			return rc;
+		else
+			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+				"audit_enabled=%d old=%d by auid=%u subj=%s",
+				state, old, loginuid, ctx);
+		kfree(ctx);
+	} else
+		audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
 			"audit_enabled=%d old=%d by auid=%u",
-			audit_enabled, old, loginuid);
+			state, old, loginuid);
+	audit_enabled = state;
 	return old;
 }
 
-static int audit_set_failure(int state, uid_t loginuid)
+static int audit_set_failure(int state, uid_t loginuid, u32 sid)
 {
-	int old		 = audit_failure;
+	int old = audit_failure;
+
 	if (state != AUDIT_FAIL_SILENT
 	    && state != AUDIT_FAIL_PRINTK
 	    && state != AUDIT_FAIL_PANIC)
 		return -EINVAL;
-	audit_failure = state;
-	audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+
+	if (sid) {
+		char *ctx = NULL;
+		u32 len;
+		int rc;
+		if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+			return rc;
+		else
+			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+				"audit_failure=%d old=%d by auid=%u subj=%s",
+				state, old, loginuid, ctx);
+		kfree(ctx);
+	} else
+		audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
 			"audit_failure=%d old=%d by auid=%u",
-			audit_failure, old, loginuid);
+			state, old, loginuid);
+	audit_failure = state;
 	return old;
 }
 
@@ -437,25 +491,43 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			return -EINVAL;
 		status_get   = (struct audit_status *)data;
 		if (status_get->mask & AUDIT_STATUS_ENABLED) {
-			err = audit_set_enabled(status_get->enabled, loginuid);
+			err = audit_set_enabled(status_get->enabled,
+							loginuid, sid);
 			if (err < 0) return err;
 		}
 		if (status_get->mask & AUDIT_STATUS_FAILURE) {
-			err = audit_set_failure(status_get->failure, loginuid);
+			err = audit_set_failure(status_get->failure,
+							 loginuid, sid);
 			if (err < 0) return err;
 		}
 		if (status_get->mask & AUDIT_STATUS_PID) {
 			int old   = audit_pid;
+			if (sid) {
+				char *ctx = NULL;
+				u32 len;
+				int rc;
+				if ((rc = selinux_ctxid_to_string(
+						sid, &ctx, &len)))
+					return rc;
+				else
+					audit_log(NULL, GFP_KERNEL,
+						AUDIT_CONFIG_CHANGE,
+						"audit_pid=%d old=%d by auid=%u subj=%s",
+						status_get->pid, old,
+						loginuid, ctx);
+				kfree(ctx);
+			} else
+				audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+					"audit_pid=%d old=%d by auid=%u",
+					  status_get->pid, old, loginuid);
 			audit_pid = status_get->pid;
-			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
-				"audit_pid=%d old=%d by auid=%u",
-				  audit_pid, old, loginuid);
 		}
 		if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
-			audit_set_rate_limit(status_get->rate_limit, loginuid);
+			audit_set_rate_limit(status_get->rate_limit,
+							 loginuid, sid);
 		if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
 			audit_set_backlog_limit(status_get->backlog_limit,
-							loginuid);
+							loginuid, sid);
 		break;
 	case AUDIT_USER:
 	case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG:
@@ -477,7 +549,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 					if (selinux_ctxid_to_string(
 							sid, &ctx, &len)) {
 						audit_log_format(ab, 
-							" subj=%u", sid);
+							" ssid=%u", sid);
 						/* Maybe call audit_panic? */
 					} else
 						audit_log_format(ab, 
@@ -499,7 +571,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	case AUDIT_LIST:
 		err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
 					   uid, seq, data, nlmsg_len(nlh),
-					   loginuid);
+					   loginuid, sid);
 		break;
 	case AUDIT_ADD_RULE:
 	case AUDIT_DEL_RULE:
@@ -509,7 +581,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	case AUDIT_LIST_RULES:
 		err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
 					   uid, seq, data, nlmsg_len(nlh),
-					   loginuid);
+					   loginuid, sid);
 		break;
 	case AUDIT_SIGNAL_INFO:
 		sig_data.uid = audit_sig_uid;
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 85a7862..7c13490 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -586,9 +586,10 @@ static int audit_list_rules(void *_dest)
  * @data: payload data
  * @datasz: size of payload data
  * @loginuid: loginuid of sender
+ * @sid: SE Linux Security ID of sender
  */
 int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
-			 size_t datasz, uid_t loginuid)
+			 size_t datasz, uid_t loginuid, u32 sid)
 {
 	struct task_struct *tsk;
 	int *dest;
@@ -631,9 +632,23 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 
 		err = audit_add_rule(entry,
 				     &audit_filter_list[entry->rule.listnr]);
-		audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
-			"auid=%u add rule to list=%d res=%d\n",
-			loginuid, entry->rule.listnr, !err);
+		if (sid) {
+			char *ctx = NULL;
+			u32 len;
+			if (selinux_ctxid_to_string(sid, &ctx, &len)) {
+				/* Maybe call audit_panic? */
+				audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+				 "auid=%u ssid=%u add rule to list=%d res=%d",
+				 loginuid, sid, entry->rule.listnr, !err);
+			} else
+				audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+				 "auid=%u subj=%s add rule to list=%d res=%d",
+				 loginuid, ctx, entry->rule.listnr, !err);
+			kfree(ctx);
+		} else
+			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+				"auid=%u add rule to list=%d res=%d",
+				loginuid, entry->rule.listnr, !err);
 
 		if (err)
 			audit_free_rule(entry);
@@ -649,9 +664,24 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
 
 		err = audit_del_rule(entry,
 				     &audit_filter_list[entry->rule.listnr]);
-		audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
-			"auid=%u remove rule from list=%d res=%d\n",
-			loginuid, entry->rule.listnr, !err);
+
+		if (sid) {
+			char *ctx = NULL;
+			u32 len;
+			if (selinux_ctxid_to_string(sid, &ctx, &len)) {
+				/* Maybe call audit_panic? */
+				audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+					"auid=%u ssid=%u remove rule from list=%d res=%d",
+					 loginuid, sid, entry->rule.listnr, !err);
+			} else
+				audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+					"auid=%u subj=%s remove rule from list=%d res=%d",
+					 loginuid, ctx, entry->rule.listnr, !err);
+			kfree(ctx);
+		} else
+			audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+				"auid=%u remove rule from list=%d res=%d",
+				loginuid, entry->rule.listnr, !err);
 
 		audit_free_rule(entry);
 		break;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b4f7223..d94e040 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -637,7 +637,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 				u32 len;
 				if (selinux_ctxid_to_string(
 						axi->osid, &ctx, &len)) {
-					audit_log_format(ab, " obj=%u",
+					audit_log_format(ab, " osid=%u",
 							axi->osid);
 					call_panic = 1;
 				} else
@@ -712,7 +712,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			u32 len;
 			if (selinux_ctxid_to_string(
 				context->names[i].osid, &ctx, &len)) {
-				audit_log_format(ab, " obj=%u",
+				audit_log_format(ab, " osid=%u",
 						context->names[i].osid);
 				call_panic = 2;
 			} else
-- 
cgit v0.10.2


From 073115d6b29c7910feaa08241c6484637f5ca958 Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Sun, 2 Apr 2006 17:07:33 -0400
Subject: [PATCH] Rework of IPC auditing

1) The audit_ipc_perms() function has been split into two different
functions:
        - audit_ipc_obj()
        - audit_ipc_set_perm()

There's a key shift here...  The audit_ipc_obj() collects the uid, gid,
mode, and SElinux context label of the current ipc object.  This
audit_ipc_obj() hook is now found in several places.  Most notably, it
is hooked in ipcperms(), which is called in various places around the
ipc code permforming a MAC check.  Additionally there are several places
where *checkid() is used to validate that an operation is being
performed on a valid object while not necessarily having a nearby
ipcperms() call.  In these locations, audit_ipc_obj() is called to
ensure that the information is captured by the audit system.

The audit_set_new_perm() function is called any time the permissions on
the ipc object changes.  In this case, the NEW permissions are recorded
(and note that an audit_ipc_obj() call exists just a few lines before
each instance).

2) Support for an AUDIT_IPC_SET_PERM audit message type.  This allows
for separate auxiliary audit records for normal operations on an IPC
object and permissions changes.  Note that the same struct
audit_aux_data_ipcctl is used and populated, however there are separate
audit_log_format statements based on the type of the message.  Finally,
the AUDIT_IPC block of code in audit_free_aux() was extended to handle
aux messages of this new type.  No more mem leaks I hope ;-)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index d5c4082..b74c148 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -83,6 +83,7 @@
 #define AUDIT_CONFIG_CHANGE	1305	/* Audit system configuration change */
 #define AUDIT_SOCKADDR		1306	/* sockaddr copied as syscall arg */
 #define AUDIT_CWD		1307	/* Current working directory */
+#define AUDIT_IPC_SET_PERM	1311	/* IPC new permissions record type */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
@@ -319,7 +320,8 @@ extern void auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec *t, unsigned int *serial);
 extern int  audit_set_loginuid(struct task_struct *task, uid_t loginuid);
 extern uid_t audit_get_loginuid(struct audit_context *ctx);
-extern int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp);
+extern int audit_ipc_obj(struct kern_ipc_perm *ipcp);
+extern int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp);
 extern int audit_socketcall(int nargs, unsigned long *args);
 extern int audit_sockaddr(int len, void *addr);
 extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt);
@@ -338,7 +340,8 @@ extern int audit_set_macxattr(const char *name);
 #define audit_inode_child(d,i,p) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0)
 #define audit_get_loginuid(c) ({ -1; })
-#define audit_ipc_perms(q,u,g,m,i) ({ 0; })
+#define audit_ipc_obj(i) ({ 0; })
+#define audit_ipc_set_perm(q,u,g,m,i) ({ 0; })
 #define audit_socketcall(n,a) ({ 0; })
 #define audit_sockaddr(len, addr) ({ 0; })
 #define audit_avc_path(dentry, mnt) ({ 0; })
diff --git a/ipc/msg.c b/ipc/msg.c
index 48a7f17..7d1340c 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -13,6 +13,9 @@
  * mostly rewritten, threaded and wake-one semantics added
  * MSGMAX limit removed, sysctl's added
  * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
+ *
+ * support for audit of ipc object properties and permission changes
+ * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  */
 
 #include <linux/capability.h>
@@ -447,6 +450,11 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
 	if (msg_checkid(msq,msqid))
 		goto out_unlock_up;
 	ipcp = &msq->q_perm;
+
+	err = audit_ipc_obj(ipcp);
+	if (err)
+		goto out_unlock_up;
+
 	err = -EPERM;
 	if (current->euid != ipcp->cuid && 
 	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
@@ -460,7 +468,8 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
 	switch (cmd) {
 	case IPC_SET:
 	{
-		if ((err = audit_ipc_perms(setbuf.qbytes, setbuf.uid, setbuf.gid, setbuf.mode, ipcp)))
+		err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid, setbuf.mode, ipcp);
+		if (err)
 			goto out_unlock_up;
 
 		err = -EPERM;
diff --git a/ipc/sem.c b/ipc/sem.c
index 642659c..7919f8e 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -61,6 +61,9 @@
  * (c) 2001 Red Hat Inc <alan@redhat.com>
  * Lockless wakeup
  * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
+ *
+ * support for audit of ipc object properties and permission changes
+ * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  */
 
 #include <linux/config.h>
@@ -820,6 +823,11 @@ static int semctl_down(int semid, int semnum, int cmd, int version, union semun
 		goto out_unlock;
 	}	
 	ipcp = &sma->sem_perm;
+
+	err = audit_ipc_obj(ipcp);
+	if (err)
+		goto out_unlock;
+
 	if (current->euid != ipcp->cuid && 
 	    current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
 	    	err=-EPERM;
@@ -836,7 +844,8 @@ static int semctl_down(int semid, int semnum, int cmd, int version, union semun
 		err = 0;
 		break;
 	case IPC_SET:
-		if ((err = audit_ipc_perms(0, setbuf.uid, setbuf.gid, setbuf.mode, ipcp)))
+		err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode, ipcp);
+		if (err)
 			goto out_unlock;
 		ipcp->uid = setbuf.uid;
 		ipcp->gid = setbuf.gid;
diff --git a/ipc/shm.c b/ipc/shm.c
index 1c2faf6..8098968 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -13,6 +13,8 @@
  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
  *
+ * support for audit of ipc object properties and permission changes
+ * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  */
 
 #include <linux/config.h>
@@ -542,6 +544,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 		if(err)
 			goto out_unlock;
 
+		err = audit_ipc_obj(&(shp->shm_perm));
+		if (err)
+			goto out_unlock;
+
 		if (!capable(CAP_IPC_LOCK)) {
 			err = -EPERM;
 			if (current->euid != shp->shm_perm.uid &&
@@ -594,6 +600,10 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 		if(err)
 			goto out_unlock_up;
 
+		err = audit_ipc_obj(&(shp->shm_perm));
+		if (err)
+			goto out_unlock_up;
+
 		if (current->euid != shp->shm_perm.uid &&
 		    current->euid != shp->shm_perm.cuid && 
 		    !capable(CAP_SYS_ADMIN)) {
@@ -627,12 +637,15 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 		err=-EINVAL;
 		if(shp==NULL)
 			goto out_up;
-		if ((err = audit_ipc_perms(0, setbuf.uid, setbuf.gid,
-					setbuf.mode, &(shp->shm_perm))))
-			goto out_unlock_up;
 		err = shm_checkid(shp,shmid);
 		if(err)
 			goto out_unlock_up;
+		err = audit_ipc_obj(&(shp->shm_perm));
+		if (err)
+			goto out_unlock_up;
+		err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode, &(shp->shm_perm));
+		if (err)
+			goto out_unlock_up;
 		err=-EPERM;
 		if (current->euid != shp->shm_perm.uid &&
 		    current->euid != shp->shm_perm.cuid && 
diff --git a/ipc/util.c b/ipc/util.c
index b3dcfad..8193299 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -10,6 +10,8 @@
  *	      Manfred Spraul <manfred@colorfullife.com>
  * Oct 2002 - One lock per IPC id. RCU ipc_free for lock-free grow_ary().
  *            Mingming Cao <cmm@us.ibm.com>
+ * Mar 2006 - support for audit of ipc object properties
+ *            Dustin Kirkland <dustin.kirkland@us.ibm.com>
  */
 
 #include <linux/config.h>
@@ -27,6 +29,7 @@
 #include <linux/workqueue.h>
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
+#include <linux/audit.h>
 
 #include <asm/unistd.h>
 
@@ -464,8 +467,10 @@ void ipc_rcu_putref(void *ptr)
  
 int ipcperms (struct kern_ipc_perm *ipcp, short flag)
 {	/* flag will most probably be 0 or S_...UGO from <linux/stat.h> */
-	int requested_mode, granted_mode;
+	int requested_mode, granted_mode, err;
 
+	if (unlikely((err = audit_ipc_obj(ipcp))))
+		return err;
 	requested_mode = (flag >> 6) | (flag >> 3) | flag;
 	granted_mode = ipcp->mode;
 	if (current->euid == ipcp->cuid || current->euid == ipcp->uid)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index d94e040..a300736 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -646,6 +646,25 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 			}
 			break; }
 
+		case AUDIT_IPC_SET_PERM: {
+			struct audit_aux_data_ipcctl *axi = (void *)aux;
+			audit_log_format(ab,
+				" new qbytes=%lx new iuid=%u new igid=%u new mode=%x",
+				axi->qbytes, axi->uid, axi->gid, axi->mode);
+			if (axi->osid != 0) {
+				char *ctx = NULL;
+				u32 len;
+				if (selinux_ctxid_to_string(
+						axi->osid, &ctx, &len)) {
+					audit_log_format(ab, " osid=%u",
+							axi->osid);
+					call_panic = 1;
+				} else
+					audit_log_format(ab, " obj=%s", ctx);
+				kfree(ctx);
+			}
+			break; }
+
 		case AUDIT_SOCKETCALL: {
 			int i;
 			struct audit_aux_data_socketcall *axs = (void *)aux;
@@ -1148,7 +1167,36 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
 }
 
 /**
- * audit_ipc_perms - record audit data for ipc
+ * audit_ipc_obj - record audit data for ipc object
+ * @ipcp: ipc permissions
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int audit_ipc_obj(struct kern_ipc_perm *ipcp)
+{
+	struct audit_aux_data_ipcctl *ax;
+	struct audit_context *context = current->audit_context;
+
+	if (likely(!context))
+		return 0;
+
+	ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+	if (!ax)
+		return -ENOMEM;
+
+	ax->uid = ipcp->uid;
+	ax->gid = ipcp->gid;
+	ax->mode = ipcp->mode;
+	selinux_get_ipc_sid(ipcp, &ax->osid);
+
+	ax->d.type = AUDIT_IPC;
+	ax->d.next = context->aux;
+	context->aux = (void *)ax;
+	return 0;
+}
+
+/**
+ * audit_ipc_set_perm - record audit data for new ipc permissions
  * @qbytes: msgq bytes
  * @uid: msgq user id
  * @gid: msgq group id
@@ -1156,7 +1204,7 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
  *
  * Returns 0 for success or NULL context or < 0 on error.
  */
-int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp)
+int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp)
 {
 	struct audit_aux_data_ipcctl *ax;
 	struct audit_context *context = current->audit_context;
@@ -1174,7 +1222,7 @@ int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, str
 	ax->mode = mode;
 	selinux_get_ipc_sid(ipcp, &ax->osid);
 
-	ax->d.type = AUDIT_IPC;
+	ax->d.type = AUDIT_IPC_SET_PERM;
 	ax->d.next = context->aux;
 	context->aux = (void *)ax;
 	return 0;
-- 
cgit v0.10.2


From 2ad312d2093ae506ae0fa184d8d026b559083087 Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Tue, 11 Apr 2006 08:50:56 -0400
Subject: [PATCH] Audit Filter Performance

While testing the watch performance, I noticed that selinux_task_ctxid()
was creeping into the results more than it should. Investigation showed
that the function call was being called whether it was needed or not. The
below patch fixes this.

Signed-off-by: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index a300736..1c03a4e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -168,11 +168,9 @@ static int audit_filter_rules(struct task_struct *tsk,
 			      struct audit_context *ctx,
 			      enum audit_state *state)
 {
-	int i, j;
+	int i, j, need_sid = 1;
 	u32 sid;
 
-	selinux_task_ctxid(tsk, &sid);
-
 	for (i = 0; i < rule->field_count; i++) {
 		struct audit_field *f = &rule->fields[i];
 		int result = 0;
@@ -271,11 +269,16 @@ static int audit_filter_rules(struct task_struct *tsk,
 			   match for now to avoid losing information that
 			   may be wanted.   An error message will also be
 			   logged upon error */
-			if (f->se_rule)
+			if (f->se_rule) {
+				if (need_sid) {
+					selinux_task_ctxid(tsk, &sid);
+					need_sid = 0;
+				}
 				result = selinux_audit_rule_match(sid, f->type,
 				                                  f->op,
 				                                  f->se_rule,
 				                                  ctx);
+			}
 			break;
 		case AUDIT_ARG0:
 		case AUDIT_ARG1:
-- 
cgit v0.10.2


From 254abfd33a214617deb916585b306faee834c97f Mon Sep 17 00:00:00 2001
From: Roland Dreier <rolandd@cisco.com>
Date: Mon, 1 May 2006 10:40:23 -0700
Subject: IB/mthca: Fix offset in query_gid method

GuidInfo records have 8 byte GUIDs in them, so an index should be
multiplied by 8 to get an offset.  mthca_query_gid() was incorrectly
multiplying by 16.

Noticed by Leonid Keller <leonid@mellanox.co.il>.

Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 565a24b..a2eae8a 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -306,7 +306,7 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
 		goto out;
 	}
 
-	memcpy(gid->raw + 8, out_mad->data + (index % 8) * 16, 8);
+	memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
 
  out:
 	kfree(in_mad);
-- 
cgit v0.10.2


From 0568b409c74f7a125d92a09a3f386785700ef688 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 1 May 2006 19:50:48 +0200
Subject: [PATCH] splice: fix bugs in pipe_to_file()

Found by Oleg Nesterov <oleg@tv-sign.ru>, fixed by me.

- Only allow full pages to go to the page cache.
- Check page != buf->page instead of using PIPE_BUF_FLAG_STOLEN.
- Remember to clear 'stolen' if add_to_page_cache() fails.

And as a cleanup on that:

- Make the bottom fall-through logic a little less convoluted. Also make
  the steal path hold an extra reference to the page, so we don't have
  to differentiate between stolen and non-stolen at the end.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/pipe.c b/fs/pipe.c
index 5a36927..888f265 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -99,8 +99,6 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 {
 	struct page *page = buf->page;
 
-	buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
-
 	/*
 	 * If nobody else uses this page, and we don't already have a
 	 * temporary page, let's keep track of it as a one-deep
@@ -130,7 +128,6 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
 	struct page *page = buf->page;
 
 	if (page_count(page) == 1) {
-		buf->flags |= PIPE_BUF_FLAG_STOLEN;
 		lock_page(page);
 		return 0;
 	}
diff --git a/fs/splice.c b/fs/splice.c
index 9df28d3..1633778 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -78,7 +78,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
 		return 1;
 	}
 
-	buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU;
+	buf->flags |= PIPE_BUF_FLAG_LRU;
 	return 0;
 }
 
@@ -87,7 +87,7 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
 {
 	page_cache_release(buf->page);
 	buf->page = NULL;
-	buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU);
+	buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
 
 static void *page_cache_pipe_buf_map(struct file *file,
@@ -587,9 +587,10 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 		this_len = PAGE_CACHE_SIZE - offset;
 
 	/*
-	 * Reuse buf page, if SPLICE_F_MOVE is set.
+	 * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full
+	 * page.
 	 */
-	if (sd->flags & SPLICE_F_MOVE) {
+	if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) {
 		/*
 		 * If steal succeeds, buf->page is now pruned from the vm
 		 * side (LRU and page cache) and we can reuse it. The page
@@ -604,6 +605,8 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 			goto find_page;
 		}
 
+		page_cache_get(page);
+
 		if (!(buf->flags & PIPE_BUF_FLAG_LRU))
 			lru_cache_add(page);
 	} else {
@@ -662,7 +665,7 @@ find_page:
 	} else if (ret)
 		goto out;
 
-	if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) {
+	if (buf->page != page) {
 		char *dst = kmap_atomic(page, KM_USER0);
 
 		memcpy(dst + offset, src + buf->offset, this_len);
@@ -671,22 +674,20 @@ find_page:
 	}
 
 	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
-	if (ret == AOP_TRUNCATED_PAGE) {
+	if (!ret) {
+		/*
+		 * Return the number of bytes written and mark page as
+		 * accessed, we are now done!
+		 */
+		ret = this_len;
+		mark_page_accessed(page);
+		balance_dirty_pages_ratelimited(mapping);
+	} else if (ret == AOP_TRUNCATED_PAGE) {
 		page_cache_release(page);
 		goto find_page;
-	} else if (ret)
-		goto out;
-
-	/*
-	 * Return the number of bytes written.
-	 */
-	ret = this_len;
-	mark_page_accessed(page);
-	balance_dirty_pages_ratelimited(mapping);
+	}
 out:
-	if (!(buf->flags & PIPE_BUF_FLAG_STOLEN))
-		page_cache_release(page);
-
+	page_cache_release(page);
 	unlock_page(page);
 out_nomem:
 	buf->ops->unmap(info, buf);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 0008d4b..3130977 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -5,8 +5,7 @@
 
 #define PIPE_BUFFERS (16)
 
-#define PIPE_BUF_FLAG_STOLEN	0x01
-#define PIPE_BUF_FLAG_LRU	0x02
+#define PIPE_BUF_FLAG_LRU	0x01
 
 struct pipe_buffer {
 	struct page *page;
-- 
cgit v0.10.2


From f84d751994441292593523c7069ed147176f6cab Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 1 May 2006 19:59:03 +0200
Subject: [PATCH] pipe: introduce ->pin() buffer operation

The ->map() function is really expensive on highmem machines right now,
since it has to use the slower kmap() instead of kmap_atomic(). Splice
rarely needs to access the virtual address of a page, so it's a waste
of time doing it.

Introduce ->pin() to take over the responsibility of making sure the
page data is valid. ->map() is then reduced to just kmap(). That way we
can also share a most of the pipe buffer ops between pipe.c and splice.c

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/pipe.c b/fs/pipe.c
index 888f265..d9644fd 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -110,14 +110,14 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 		page_cache_release(page);
 }
 
-static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe,
-				struct pipe_buffer *buf)
+void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
+			   struct pipe_buffer *buf)
 {
 	return kmap(buf->page);
 }
 
-static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe,
-				struct pipe_buffer *buf)
+void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
+			    struct pipe_buffer *buf)
 {
 	kunmap(buf->page);
 }
@@ -135,19 +135,24 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
 	return 1;
 }
 
-static void anon_pipe_buf_get(struct pipe_inode_info *info,
-			      struct pipe_buffer *buf)
+void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
 {
 	page_cache_get(buf->page);
 }
 
+int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
+{
+	return 0;
+}
+
 static struct pipe_buf_operations anon_pipe_buf_ops = {
 	.can_merge = 1,
-	.map = anon_pipe_buf_map,
-	.unmap = anon_pipe_buf_unmap,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.pin = generic_pipe_buf_pin,
 	.release = anon_pipe_buf_release,
 	.steal = anon_pipe_buf_steal,
-	.get = anon_pipe_buf_get,
+	.get = generic_pipe_buf_get,
 };
 
 static ssize_t
@@ -183,12 +188,14 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
 			if (chars > total_len)
 				chars = total_len;
 
-			addr = ops->map(filp, pipe, buf);
-			if (IS_ERR(addr)) {
+			error = ops->pin(pipe, buf);
+			if (error) {
 				if (!ret)
-					ret = PTR_ERR(addr);
+					error = ret;
 				break;
 			}
+
+			addr = ops->map(pipe, buf);
 			error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
 			ops->unmap(pipe, buf);
 			if (unlikely(error)) {
@@ -300,11 +307,11 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
 			void *addr;
 			int error;
 
-			addr = ops->map(filp, pipe, buf);
-			if (IS_ERR(addr)) {
-				error = PTR_ERR(addr);
+			error = ops->pin(pipe, buf);
+			if (error)
 				goto out;
-			}
+
+			addr = ops->map(pipe, buf);
 			error = pipe_iov_copy_from_user(offset + addr, iov,
 							chars);
 			ops->unmap(pipe, buf);
diff --git a/fs/splice.c b/fs/splice.c
index 1633778..d7538d8 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -90,9 +90,8 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
 	buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
 
-static void *page_cache_pipe_buf_map(struct file *file,
-				     struct pipe_inode_info *info,
-				     struct pipe_buffer *buf)
+static int page_cache_pipe_buf_pin(struct pipe_inode_info *info,
+				   struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
 	int err;
@@ -118,49 +117,25 @@ static void *page_cache_pipe_buf_map(struct file *file,
 		}
 
 		/*
-		 * Page is ok afterall, fall through to mapping.
+		 * Page is ok afterall, we are done.
 		 */
 		unlock_page(page);
 	}
 
-	return kmap(page);
+	return 0;
 error:
 	unlock_page(page);
-	return ERR_PTR(err);
-}
-
-static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info,
-				      struct pipe_buffer *buf)
-{
-	kunmap(buf->page);
-}
-
-static void *user_page_pipe_buf_map(struct file *file,
-				    struct pipe_inode_info *pipe,
-				    struct pipe_buffer *buf)
-{
-	return kmap(buf->page);
-}
-
-static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe,
-				     struct pipe_buffer *buf)
-{
-	kunmap(buf->page);
-}
-
-static void page_cache_pipe_buf_get(struct pipe_inode_info *info,
-				    struct pipe_buffer *buf)
-{
-	page_cache_get(buf->page);
+	return err;
 }
 
 static struct pipe_buf_operations page_cache_pipe_buf_ops = {
 	.can_merge = 0,
-	.map = page_cache_pipe_buf_map,
-	.unmap = page_cache_pipe_buf_unmap,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.pin = page_cache_pipe_buf_pin,
 	.release = page_cache_pipe_buf_release,
 	.steal = page_cache_pipe_buf_steal,
-	.get = page_cache_pipe_buf_get,
+	.get = generic_pipe_buf_get,
 };
 
 static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -171,11 +146,12 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 
 static struct pipe_buf_operations user_page_pipe_buf_ops = {
 	.can_merge = 0,
-	.map = user_page_pipe_buf_map,
-	.unmap = user_page_pipe_buf_unmap,
+	.map = generic_pipe_buf_map,
+	.unmap = generic_pipe_buf_unmap,
+	.pin = generic_pipe_buf_pin,
 	.release = page_cache_pipe_buf_release,
 	.steal = user_page_pipe_buf_steal,
-	.get = page_cache_pipe_buf_get,
+	.get = generic_pipe_buf_get,
 };
 
 /*
@@ -517,26 +493,16 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
 {
 	struct file *file = sd->file;
 	loff_t pos = sd->pos;
-	ssize_t ret;
-	void *ptr;
-	int more;
+	int ret, more;
 
-	/*
-	 * Sub-optimal, but we are limited by the pipe ->map. We don't
-	 * need a kmap'ed buffer here, we just want to make sure we
-	 * have the page pinned if the pipe page originates from the
-	 * page cache.
-	 */
-	ptr = buf->ops->map(file, info, buf);
-	if (IS_ERR(ptr))
-		return PTR_ERR(ptr);
-
-	more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
+	ret = buf->ops->pin(info, buf);
+	if (!ret) {
+		more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
 
-	ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len,
-				   &pos, more);
+		ret = file->f_op->sendpage(file, buf->page, buf->offset,
+					   sd->len, &pos, more);
+	}
 
-	buf->ops->unmap(info, buf);
 	return ret;
 }
 
@@ -569,15 +535,14 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 	unsigned int offset, this_len;
 	struct page *page;
 	pgoff_t index;
-	char *src;
 	int ret;
 
 	/*
 	 * make sure the data in this buffer is uptodate
 	 */
-	src = buf->ops->map(file, info, buf);
-	if (IS_ERR(src))
-		return PTR_ERR(src);
+	ret = buf->ops->pin(info, buf);
+	if (unlikely(ret))
+		return ret;
 
 	index = sd->pos >> PAGE_CACHE_SHIFT;
 	offset = sd->pos & ~PAGE_CACHE_MASK;
@@ -666,11 +631,16 @@ find_page:
 		goto out;
 
 	if (buf->page != page) {
-		char *dst = kmap_atomic(page, KM_USER0);
+		/*
+		 * Careful, ->map() uses KM_USER0!
+		 */
+		char *src = buf->ops->map(info, buf);
+		char *dst = kmap_atomic(page, KM_USER1);
 
 		memcpy(dst + offset, src + buf->offset, this_len);
 		flush_dcache_page(page);
-		kunmap_atomic(dst, KM_USER0);
+		kunmap_atomic(dst, KM_USER1);
+		buf->ops->unmap(info, buf);
 	}
 
 	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
@@ -690,7 +660,6 @@ out:
 	page_cache_release(page);
 	unlock_page(page);
 out_nomem:
-	buf->ops->unmap(info, buf);
 	return ret;
 }
 
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 3130977..b8aae1f 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -14,10 +14,23 @@ struct pipe_buffer {
 	unsigned int flags;
 };
 
+/*
+ * Note on the nesting of these functions:
+ *
+ * ->pin()
+ *	->steal()
+ *	...
+ *	->map()
+ *	...
+ *	->unmap()
+ *
+ * That is, ->map() must be called on a pinned buffer, same goes for ->steal().
+ */
 struct pipe_buf_operations {
 	int can_merge;
-	void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *);
+	void * (*map)(struct pipe_inode_info *, struct pipe_buffer *);
 	void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *);
+	int (*pin)(struct pipe_inode_info *, struct pipe_buffer *);
 	void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
 	int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
 	void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
@@ -50,6 +63,12 @@ struct pipe_inode_info * alloc_pipe_info(struct inode * inode);
 void free_pipe_info(struct inode * inode);
 void __free_pipe_info(struct pipe_inode_info *);
 
+/* Generic pipe buffer ops functions */
+void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *);
+void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *);
+void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
+int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *);
+
 /*
  * splice is tied to pipes as a transport (at least for now), so we'll just
  * add the splice flags here.
-- 
cgit v0.10.2


From 7f9c51f0d9783c78db5c2aa16806d0c256ac667f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 1 May 2006 19:59:32 +0200
Subject: [PATCH] Add ->splice_read/splice_write to def_blk_fops

It can use the generic handlers.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index af88c43..f5958f4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1104,6 +1104,8 @@ const struct file_operations def_blk_fops = {
 	.readv		= generic_file_readv,
 	.writev		= generic_file_write_nolock,
 	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
 };
 
 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
-- 
cgit v0.10.2


From e27dedd84c119e2f7af54fcde3293be5ad812103 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 1 May 2006 19:59:54 +0200
Subject: [PATCH] splice: call handle_ra_miss() on failure to lookup page

Notify the readahead logic of the missing page. Suggested by
Oleg Nesterov.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/splice.c b/fs/splice.c
index d7538d8..0a69164 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -302,6 +302,12 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 		page = find_get_page(mapping, index);
 		if (!page) {
 			/*
+			 * Make sure the read-ahead engine is notified
+			 * about this failure.
+			 */
+			handle_ra_miss(mapping, &in->f_ra, index);
+
+			/*
 			 * page didn't exist, allocate one.
 			 */
 			page = page_cache_alloc_cold(mapping);
-- 
cgit v0.10.2


From f6762b7ad8edd6abc802542ce845d3bc8adcb92f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 1 May 2006 20:02:05 +0200
Subject: [PATCH] pipe: enable atomic copying of pipe data to/from user space

The pipe ->map() method uses kmap() to virtually map the pages, which
is both slow and has known scalability issues on SMP. This patch enables
atomic copying of pipe pages, by pre-faulting data and using kmap_atomic()
instead.

lmbench bw_pipe and lat_pipe measurements agree this is a Good Thing. Here
are results from that on a UP machine with highmem (1.5GiB of RAM), running
first a UP kernel, SMP kernel, and SMP kernel patched.

Vanilla-UP:
Pipe bandwidth: 1622.28 MB/sec
Pipe bandwidth: 1610.59 MB/sec
Pipe bandwidth: 1608.30 MB/sec
Pipe latency: 7.3275 microseconds
Pipe latency: 7.2995 microseconds
Pipe latency: 7.3097 microseconds

Vanilla-SMP:
Pipe bandwidth: 1382.19 MB/sec
Pipe bandwidth: 1317.27 MB/sec
Pipe bandwidth: 1355.61 MB/sec
Pipe latency: 9.6402 microseconds
Pipe latency: 9.6696 microseconds
Pipe latency: 9.6153 microseconds

Patched-SMP:
Pipe bandwidth: 1578.70 MB/sec
Pipe bandwidth: 1579.95 MB/sec
Pipe bandwidth: 1578.63 MB/sec
Pipe latency: 9.1654 microseconds
Pipe latency: 9.2266 microseconds
Pipe latency: 9.1527 microseconds

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/pipe.c b/fs/pipe.c
index d9644fd..3941a7f 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -55,7 +55,8 @@ void pipe_wait(struct pipe_inode_info *pipe)
 }
 
 static int
-pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
+pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
+			int atomic)
 {
 	unsigned long copy;
 
@@ -64,8 +65,13 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
 			iov++;
 		copy = min_t(unsigned long, len, iov->iov_len);
 
-		if (copy_from_user(to, iov->iov_base, copy))
-			return -EFAULT;
+		if (atomic) {
+			if (__copy_from_user_inatomic(to, iov->iov_base, copy))
+				return -EFAULT;
+		} else {
+			if (copy_from_user(to, iov->iov_base, copy))
+				return -EFAULT;
+		}
 		to += copy;
 		len -= copy;
 		iov->iov_base += copy;
@@ -75,7 +81,8 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
 }
 
 static int
-pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
+pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
+		      int atomic)
 {
 	unsigned long copy;
 
@@ -84,8 +91,13 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
 			iov++;
 		copy = min_t(unsigned long, len, iov->iov_len);
 
-		if (copy_to_user(iov->iov_base, from, copy))
-			return -EFAULT;
+		if (atomic) {
+			if (__copy_to_user_inatomic(iov->iov_base, from, copy))
+				return -EFAULT;
+		} else {
+			if (copy_to_user(iov->iov_base, from, copy))
+				return -EFAULT;
+		}
 		from += copy;
 		len -= copy;
 		iov->iov_base += copy;
@@ -94,6 +106,47 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
 	return 0;
 }
 
+/*
+ * Attempt to pre-fault in the user memory, so we can use atomic copies.
+ * Returns the number of bytes not faulted in.
+ */
+static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
+{
+	while (!iov->iov_len)
+		iov++;
+
+	while (len > 0) {
+		unsigned long this_len;
+
+		this_len = min_t(unsigned long, len, iov->iov_len);
+		if (fault_in_pages_writeable(iov->iov_base, this_len))
+			break;
+
+		len -= this_len;
+		iov++;
+	}
+
+	return len;
+}
+
+/*
+ * Pre-fault in the user memory, so we can use atomic copies.
+ */
+static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
+{
+	while (!iov->iov_len)
+		iov++;
+
+	while (len > 0) {
+		unsigned long this_len;
+
+		this_len = min_t(unsigned long, len, iov->iov_len);
+		fault_in_pages_readable(iov->iov_base, this_len);
+		len -= this_len;
+		iov++;
+	}
+}
+
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 				  struct pipe_buffer *buf)
 {
@@ -111,15 +164,24 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 }
 
 void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
-			   struct pipe_buffer *buf)
+			   struct pipe_buffer *buf, int atomic)
 {
+	if (atomic) {
+		buf->flags |= PIPE_BUF_FLAG_ATOMIC;
+		return kmap_atomic(buf->page, KM_USER0);
+	}
+
 	return kmap(buf->page);
 }
 
 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
-			    struct pipe_buffer *buf)
+			    struct pipe_buffer *buf, void *map_data)
 {
-	kunmap(buf->page);
+	if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
+		buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
+		kunmap_atomic(map_data, KM_USER0);
+	} else
+		kunmap(buf->page);
 }
 
 static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -183,7 +245,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
 			struct pipe_buf_operations *ops = buf->ops;
 			void *addr;
 			size_t chars = buf->len;
-			int error;
+			int error, atomic;
 
 			if (chars > total_len)
 				chars = total_len;
@@ -195,12 +257,21 @@ pipe_readv(struct file *filp, const struct iovec *_iov,
 				break;
 			}
 
-			addr = ops->map(pipe, buf);
-			error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
-			ops->unmap(pipe, buf);
+			atomic = !iov_fault_in_pages_write(iov, chars);
+redo:
+			addr = ops->map(pipe, buf, atomic);
+			error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
+			ops->unmap(pipe, buf, addr);
 			if (unlikely(error)) {
+				/*
+				 * Just retry with the slow path if we failed.
+				 */
+				if (atomic) {
+					atomic = 0;
+					goto redo;
+				}
 				if (!ret)
-					ret = -EFAULT;
+					ret = error;
 				break;
 			}
 			ret += chars;
@@ -304,21 +375,28 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
 		int offset = buf->offset + buf->len;
 
 		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
+			int error, atomic = 1;
 			void *addr;
-			int error;
 
 			error = ops->pin(pipe, buf);
 			if (error)
 				goto out;
 
-			addr = ops->map(pipe, buf);
+			iov_fault_in_pages_read(iov, chars);
+redo1:
+			addr = ops->map(pipe, buf, atomic);
 			error = pipe_iov_copy_from_user(offset + addr, iov,
-							chars);
-			ops->unmap(pipe, buf);
+							chars, atomic);
+			ops->unmap(pipe, buf, addr);
 			ret = error;
 			do_wakeup = 1;
-			if (error)
+			if (error) {
+				if (atomic) {
+					atomic = 0;
+					goto redo1;
+				}
 				goto out;
+			}
 			buf->len += chars;
 			total_len -= chars;
 			ret = chars;
@@ -341,7 +419,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
 			int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
 			struct pipe_buffer *buf = pipe->bufs + newbuf;
 			struct page *page = pipe->tmp_page;
-			int error;
+			char *src;
+			int error, atomic = 1;
 
 			if (!page) {
 				page = alloc_page(GFP_HIGHUSER);
@@ -361,11 +440,27 @@ pipe_writev(struct file *filp, const struct iovec *_iov,
 			if (chars > total_len)
 				chars = total_len;
 
-			error = pipe_iov_copy_from_user(kmap(page), iov, chars);
-			kunmap(page);
+			iov_fault_in_pages_read(iov, chars);
+redo2:
+			if (atomic)
+				src = kmap_atomic(page, KM_USER0);
+			else
+				src = kmap(page);
+
+			error = pipe_iov_copy_from_user(src, iov, chars,
+							atomic);
+			if (atomic)
+				kunmap_atomic(src, KM_USER0);
+			else
+				kunmap(page);
+
 			if (unlikely(error)) {
+				if (atomic) {
+					atomic = 0;
+					goto redo2;
+				}
 				if (!ret)
-					ret = -EFAULT;
+					ret = error;
 				break;
 			}
 			ret += chars;
diff --git a/fs/splice.c b/fs/splice.c
index 0a69164..d4664a2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -640,13 +640,13 @@ find_page:
 		/*
 		 * Careful, ->map() uses KM_USER0!
 		 */
-		char *src = buf->ops->map(info, buf);
+		char *src = buf->ops->map(info, buf, 1);
 		char *dst = kmap_atomic(page, KM_USER1);
 
 		memcpy(dst + offset, src + buf->offset, this_len);
 		flush_dcache_page(page);
 		kunmap_atomic(dst, KM_USER1);
-		buf->ops->unmap(info, buf);
+		buf->ops->unmap(info, buf, src);
 	}
 
 	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index b8aae1f..4c05449 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -5,7 +5,8 @@
 
 #define PIPE_BUFFERS (16)
 
-#define PIPE_BUF_FLAG_LRU	0x01
+#define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
+#define PIPE_BUF_FLAG_ATOMIC	0x02	/* was atomically mapped */
 
 struct pipe_buffer {
 	struct page *page;
@@ -28,8 +29,8 @@ struct pipe_buffer {
  */
 struct pipe_buf_operations {
 	int can_merge;
-	void * (*map)(struct pipe_inode_info *, struct pipe_buffer *);
-	void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *);
+	void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int);
+	void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *);
 	int (*pin)(struct pipe_inode_info *, struct pipe_buffer *);
 	void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
 	int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
@@ -64,8 +65,8 @@ void free_pipe_info(struct inode * inode);
 void __free_pipe_info(struct pipe_inode_info *);
 
 /* Generic pipe buffer ops functions */
-void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *);
-void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *);
+void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int);
+void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *);
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *);
 
-- 
cgit v0.10.2


From 7afa6fd037e51e95d322990cb127bb2b1217251a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Mon, 1 May 2006 20:02:33 +0200
Subject: [PATCH] vmsplice: allow user to pass in gift pages

If SPLICE_F_GIFT is set, the user is basically giving this pages away to
the kernel. That means we can steal them for eg page cache uses instead
of copying it.

The data must be properly page aligned and also a multiple of the page size
in length.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/splice.c b/fs/splice.c
index d4664a2..b150493 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -141,7 +141,10 @@ static struct pipe_buf_operations page_cache_pipe_buf_ops = {
 static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 				    struct pipe_buffer *buf)
 {
-	return 1;
+	if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
+		return 1;
+
+	return 0;
 }
 
 static struct pipe_buf_operations user_page_pipe_buf_ops = {
@@ -186,6 +189,9 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 			buf->offset = spd->partial[page_nr].offset;
 			buf->len = spd->partial[page_nr].len;
 			buf->ops = spd->ops;
+			if (spd->flags & SPLICE_F_GIFT)
+				buf->flags |= PIPE_BUF_FLAG_GIFT;
+
 			pipe->nrbufs++;
 			page_nr++;
 			ret += buf->len;
@@ -1073,7 +1079,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
  */
 static int get_iovec_page_array(const struct iovec __user *iov,
 				unsigned int nr_vecs, struct page **pages,
-				struct partial_page *partial)
+				struct partial_page *partial, int aligned)
 {
 	int buffers = 0, error = 0;
 
@@ -1113,6 +1119,15 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 		 * in the user pages.
 		 */
 		off = (unsigned long) base & ~PAGE_MASK;
+
+		/*
+		 * If asked for alignment, the offset must be zero and the
+		 * length a multiple of the PAGE_SIZE.
+		 */
+		error = -EINVAL;
+		if (aligned && (off || len & ~PAGE_MASK))
+			break;
+
 		npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		if (npages > PIPE_BUFFERS - buffers)
 			npages = PIPE_BUFFERS - buffers;
@@ -1206,7 +1221,8 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
 	else if (unlikely(!nr_segs))
 		return 0;
 
-	spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial);
+	spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
+					    flags & SPLICE_F_GIFT);
 	if (spd.nr_pages <= 0)
 		return spd.nr_pages;
 
@@ -1314,6 +1330,12 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 				obuf = opipe->bufs + nbuf;
 				*obuf = *ibuf;
 
+				/*
+				 * Don't inherit the gift flag, we need to
+				 * prevent multiple steals of this page.
+				 */
+				obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+
 				if (obuf->len > len)
 					obuf->len = len;
 
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 4c05449..df4d3fa 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -7,6 +7,7 @@
 
 #define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
 #define PIPE_BUF_FLAG_ATOMIC	0x02	/* was atomically mapped */
+#define PIPE_BUF_FLAG_GIFT	0x04	/* page is a gift */
 
 struct pipe_buffer {
 	struct page *page;
@@ -79,6 +80,7 @@ int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *);
 				 /* we may still block on the fd we splice */
 				 /* from/to, of course */
 #define SPLICE_F_MORE	(0x04)	/* expect more data */
+#define SPLICE_F_GIFT	(0x08)	/* pages passed in are a gift */
 
 /*
  * Passed to the actors
-- 
cgit v0.10.2


From 755e4ca4a9885b79a14169ab5b615920eb38a32a Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:22:57 -0700
Subject: IB/ipath: fix race with exposing reset file

We were accidentally exposing the "reset" sysfs file more than once
per device.

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 7d3fb69..28ddceb 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -277,13 +277,14 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
 
 bail:
 	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-	mutex_unlock(&ipath_mutex);
 
 	/* Only expose a way to reset the device if we
 	   make it into diag mode. */
 	if (ret == 0)
 		ipath_expose_reset(&dd->pcidev->dev);
 
+	mutex_unlock(&ipath_mutex);
+
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index 32acd80..f323791 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -711,10 +711,22 @@ static struct attribute_group dev_attr_group = {
  * enters diag mode.  A device reset is quite likely to crash the
  * machine entirely, so we don't want to normally make it
  * available.
+ *
+ * Called with ipath_mutex held.
  */
 int ipath_expose_reset(struct device *dev)
 {
-	return device_create_file(dev, &dev_attr_reset);
+	static int exposed;
+	int ret;
+
+	if (!exposed) {
+		ret = device_create_file(dev, &dev_attr_reset);
+		exposed = 1;
+	}
+	else
+		ret = 0;
+
+	return ret;
 }
 
 int ipath_driver_create_group(struct device_driver *drv)
-- 
cgit v0.10.2


From 68dd43a162b43218d2e5ac1d139c1d53da965f54 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:22:58 -0700
Subject: IB/ipath: set up 32-bit DMA mask if 64-bit setup fails

Some systems do not set up 64-bit maps on systems with 2GB or less of
memory installed, so we have to fall back to trying a 32-bit setup.

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index e7617c3..7ff95b7 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -418,9 +418,19 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 
 	ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
 	if (ret) {
-		dev_info(&pdev->dev, "pci_set_dma_mask unit %u "
-			 "fails: %d\n", dd->ipath_unit, ret);
-		goto bail_regions;
+		/*
+		 * if the 64 bit setup fails, try 32 bit.  Some systems
+		 * do not setup 64 bit maps on systems with 2GB or less
+		 * memory installed.
+		 */
+		ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+		if (ret) {
+			dev_info(&pdev->dev, "pci_set_dma_mask unit %u "
+				 "fails: %d\n", dd->ipath_unit, ret);
+			goto bail_regions;
+		}
+		else
+			ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
 	}
 
 	pci_set_master(pdev);
-- 
cgit v0.10.2


From 23e86a4584606a08393e0ad3a5ca27b19a3de374 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:22:59 -0700
Subject: IB/ipath: iterate over correct number of ports during reset

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 7ff95b7..398add4 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1959,7 +1959,7 @@ int ipath_reset_device(int unit)
 	}
 
 	if (dd->ipath_pd)
-		for (i = 1; i < dd->ipath_portcnt; i++) {
+		for (i = 1; i < dd->ipath_cfgports; i++) {
 			if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
 				ipath_dbg("unit %u port %d is in use "
 					  "(PID %u cmd %s), can't reset\n",
-- 
cgit v0.10.2


From 52e7fad825c47fb86801f23b9f793da1d2774f18 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:23:00 -0700
Subject: IB/ipath: change handling of PIO buffers

Different ipath hardware types have different numbers of buffers
available, so we decide on the counts ourselves unless we are specifically
overridden with a module parameter.

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 2823ff9..16f640e 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -53,13 +53,19 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
 
 /*
  * Number of buffers reserved for driver (layered drivers and SMA
- * send).  Reserved at end of buffer list.
+ * send).  Reserved at end of buffer list.   Initialized based on
+ * number of PIO buffers if not set via module interface.
+ * The problem with this is that it's global, but we'll use different
+ * numbers for different chip types.  So the default value is not
+ * very useful.  I've redefined it for the 1.3 release so that it's
+ * zero unless set by the user to something else, in which case we
+ * try to respect it.
  */
-static ushort ipath_kpiobufs = 32;
+static ushort ipath_kpiobufs;
 
 static int ipath_set_kpiobufs(const char *val, struct kernel_param *kp);
 
-module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_uint,
+module_param_call(kpiobufs, ipath_set_kpiobufs, param_get_ushort,
 		  &ipath_kpiobufs, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
 
@@ -531,8 +537,11 @@ static int init_housekeeping(struct ipath_devdata *dd,
 	 * Don't clear ipath_flags as 8bit mode was set before
 	 * entering this func. However, we do set the linkstate to
 	 * unknown, so we can watch for a transition.
+	 * PRESENT is set because we want register reads to work,
+	 * and the kernel infrastructure saw it in config space;
+	 * We clear it if we have failures.
 	 */
-	dd->ipath_flags |= IPATH_LINKUNK;
+	dd->ipath_flags |= IPATH_LINKUNK | IPATH_PRESENT;
 	dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED |
 			     IPATH_LINKDOWN | IPATH_LINKINIT);
 
@@ -560,6 +569,7 @@ static int init_housekeeping(struct ipath_devdata *dd,
 	    || (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
 		ipath_dev_err(dd, "Register read failures from chip, "
 			      "giving up initialization\n");
+		dd->ipath_flags &= ~IPATH_PRESENT;
 		ret = -ENODEV;
 		goto done;
 	}
@@ -682,16 +692,14 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	 */
 	dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
 		/ (sizeof(u64) * BITS_PER_BYTE / 2);
-	if (!ipath_kpiobufs)	/* have to have at least 1, for SMA */
-		kpiobufs = ipath_kpiobufs = 1;
-	else if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) <
-		 (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT)) {
-		dev_info(&dd->pcidev->dev, "Too few PIO buffers (%u) "
-			 "for %u ports to have %u each!\n",
-			 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
-			 dd->ipath_cfgports, IPATH_MIN_USER_PORT_BUFCNT);
-		kpiobufs = 1;	/* reserve just the minimum for SMA/ether */
-	} else
+	if (ipath_kpiobufs == 0) {
+		/* not set by user, or set explictly to default  */
+		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
+			kpiobufs = 32;
+		else
+			kpiobufs = 16;
+	}
+	else
 		kpiobufs = ipath_kpiobufs;
 
 	if (kpiobufs >
-- 
cgit v0.10.2


From fccea663643cedfa310c5254da30e1e35e09199b Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:23:02 -0700
Subject: IB/ipath: fix verbs registration

Remember when the verbs layer unregisters from the lower-level code.

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index 69ed110..9cb5258 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -46,13 +46,15 @@
 /* Acquire before ipath_devs_lock. */
 static DEFINE_MUTEX(ipath_layer_mutex);
 
+static int ipath_verbs_registered;
+
 u16 ipath_layer_rcv_opcode;
+
 static int (*layer_intr)(void *, u32);
 static int (*layer_rcv)(void *, void *, struct sk_buff *);
 static int (*layer_rcv_lid)(void *, void *);
 static int (*verbs_piobufavail)(void *);
 static void (*verbs_rcv)(void *, void *, void *, u32);
-static int ipath_verbs_registered;
 
 static void *(*layer_add_one)(int, struct ipath_devdata *);
 static void (*layer_remove_one)(void *);
@@ -586,6 +588,8 @@ void ipath_verbs_unregister(void)
 	verbs_rcv = NULL;
 	verbs_timer_cb = NULL;
 
+	ipath_verbs_registered = 0;
+
 	mutex_unlock(&ipath_layer_mutex);
 }
 
-- 
cgit v0.10.2


From c71c30dcba142f16bc5f651812b1bc0b9f70f02d Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:23:03 -0700
Subject: IB/ipath: prevent hardware from being accessed during reset

The reset code now turns off the PRESENT flag during a reset, so that
other code won't attempt to access a device that's in mid-reset.

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 0bcb428..8e3b95b 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -719,11 +719,24 @@ static void handle_rcv(struct ipath_devdata *dd, u32 istat)
 irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
 {
 	struct ipath_devdata *dd = data;
-	u32 istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
+	u32 istat;
 	ipath_err_t estat = 0;
 	static unsigned unexpected = 0;
 	irqreturn_t ret;
 
+	if(!(dd->ipath_flags & IPATH_PRESENT)) {
+		/* this is mostly so we don't try to touch the chip while
+		 * it is being reset */
+		/*
+		 * This return value is perhaps odd, but we do not want the
+		 * interrupt core code to remove our interrupt handler
+		 * because we don't appear to be handling an interrupt
+		 * during a chip reset.
+		 */
+		return IRQ_HANDLED;
+	}
+
+	istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
 	if (unlikely(!istat)) {
 		ipath_stats.sps_nullintr++;
 		ret = IRQ_NONE; /* not our interrupt, or already handled */
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 0ce5f19..e6507f8 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -731,7 +731,7 @@ u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
 static inline u32 ipath_read_ureg32(const struct ipath_devdata *dd,
 				    ipath_ureg regno, int port)
 {
-	if (!dd->ipath_kregbase)
+	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
 		return 0;
 
 	return readl(regno + (u64 __iomem *)
@@ -762,7 +762,7 @@ static inline void ipath_write_ureg(const struct ipath_devdata *dd,
 static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
 				    ipath_kreg regno)
 {
-	if (!dd->ipath_kregbase)
+	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
 		return -1;
 	return readl((u32 __iomem *) & dd->ipath_kregbase[regno]);
 }
@@ -770,7 +770,7 @@ static inline u32 ipath_read_kreg32(const struct ipath_devdata *dd,
 static inline u64 ipath_read_kreg64(const struct ipath_devdata *dd,
 				    ipath_kreg regno)
 {
-	if (!dd->ipath_kregbase)
+	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
 		return -1;
 
 	return readq(&dd->ipath_kregbase[regno]);
@@ -786,7 +786,7 @@ static inline void ipath_write_kreg(const struct ipath_devdata *dd,
 static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
 				  ipath_sreg regno)
 {
-	if (!dd->ipath_kregbase)
+	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
 		return 0;
 
 	return readq(regno + (u64 __iomem *)
@@ -797,7 +797,7 @@ static inline u64 ipath_read_creg(const struct ipath_devdata *dd,
 static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
 					 ipath_sreg regno)
 {
-	if (!dd->ipath_kregbase)
+	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT))
 		return 0;
 	return readl(regno + (u64 __iomem *)
 		     (dd->ipath_cregbase +
diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_pe800.c
index e1dc4f7..6318067 100644
--- a/drivers/infiniband/hw/ipath/ipath_pe800.c
+++ b/drivers/infiniband/hw/ipath/ipath_pe800.c
@@ -972,6 +972,8 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd)
 	/* Use ERROR so it shows up in logs, etc. */
 	ipath_dev_err(dd, "Resetting PE-800 unit %u\n",
 		      dd->ipath_unit);
+	/* keep chip from being accessed in a few places */
+	dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
 	val = dd->ipath_control | INFINIPATH_C_RESET;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val);
 	mb();
@@ -997,6 +999,8 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd)
 		if ((r = pci_enable_device(dd->pcidev)))
 			ipath_dev_err(dd, "pci_enable_device failed after "
 				      "reset: %d\n", r);
+		/* whether it worked or not, mark as present, again */
+		dd->ipath_flags |= IPATH_PRESENT;
 		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision);
 		if (val == dd->ipath_revision) {
 			ipath_cdbg(VERBOSE, "Got matching revision "
-- 
cgit v0.10.2


From 76f0dd141b477094b026206c0d8c21beac6069f5 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:23:05 -0700
Subject: IB/ipath: simplify RC send posting

Remove some unnecessarily complicated tests.

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index f232e77..eb81424 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -531,19 +531,12 @@ int ipath_post_rc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 	}
 	wqe->wr.num_sge = j;
 	qp->s_head = next;
-	/*
-	 * Wake up the send tasklet if the QP is not waiting
-	 * for an RNR timeout.
-	 */
-	next = qp->s_rnr_timeout;
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
-	if (next == 0) {
-		if (qp->ibqp.qp_type == IB_QPT_UC)
-			ipath_do_uc_send((unsigned long) qp);
-		else
-			ipath_do_rc_send((unsigned long) qp);
-	}
+	if (qp->ibqp.qp_type == IB_QPT_UC)
+		ipath_do_uc_send((unsigned long) qp);
+	else
+		ipath_do_rc_send((unsigned long) qp);
 
 	ret = 0;
 
-- 
cgit v0.10.2


From 9b2017f1e1c95625b2ca2a1ec5317097117d7078 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:23:06 -0700
Subject: IB/ipath: simplify IB timer usage

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 8d2558a..cb9e387 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -449,7 +449,6 @@ static void ipath_ib_timer(void *arg)
 {
 	struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
 	struct ipath_qp *resend = NULL;
-	struct ipath_qp *rnr = NULL;
 	struct list_head *last;
 	struct ipath_qp *qp;
 	unsigned long flags;
@@ -465,32 +464,18 @@ static void ipath_ib_timer(void *arg)
 	last = &dev->pending[dev->pending_index];
 	while (!list_empty(last)) {
 		qp = list_entry(last->next, struct ipath_qp, timerwait);
-		if (last->next == LIST_POISON1 ||
-		    last->next != &qp->timerwait ||
-		    qp->timerwait.prev != last) {
-			INIT_LIST_HEAD(last);
-		} else {
-			list_del(&qp->timerwait);
-			qp->timerwait.prev = (struct list_head *) resend;
-			resend = qp;
-			atomic_inc(&qp->refcount);
-		}
+		list_del(&qp->timerwait);
+		qp->timer_next = resend;
+		resend = qp;
+		atomic_inc(&qp->refcount);
 	}
 	last = &dev->rnrwait;
 	if (!list_empty(last)) {
 		qp = list_entry(last->next, struct ipath_qp, timerwait);
 		if (--qp->s_rnr_timeout == 0) {
 			do {
-				if (last->next == LIST_POISON1 ||
-				    last->next != &qp->timerwait ||
-				    qp->timerwait.prev != last) {
-					INIT_LIST_HEAD(last);
-					break;
-				}
 				list_del(&qp->timerwait);
-				qp->timerwait.prev =
-					(struct list_head *) rnr;
-				rnr = qp;
+				tasklet_hi_schedule(&qp->s_task);
 				if (list_empty(last))
 					break;
 				qp = list_entry(last->next, struct ipath_qp,
@@ -530,8 +515,7 @@ static void ipath_ib_timer(void *arg)
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 
 	/* XXX What if timer fires again while this is running? */
-	for (qp = resend; qp != NULL;
-	     qp = (struct ipath_qp *) qp->timerwait.prev) {
+	for (qp = resend; qp != NULL; qp = qp->timer_next) {
 		struct ib_wc wc;
 
 		spin_lock_irqsave(&qp->s_lock, flags);
@@ -545,9 +529,6 @@ static void ipath_ib_timer(void *arg)
 		if (atomic_dec_and_test(&qp->refcount))
 			wake_up(&qp->wait);
 	}
-	for (qp = rnr; qp != NULL;
-	     qp = (struct ipath_qp *) qp->timerwait.prev)
-		tasklet_hi_schedule(&qp->s_task);
 }
 
 /**
@@ -556,9 +537,9 @@ static void ipath_ib_timer(void *arg)
  *
  * This is called from ipath_intr() at interrupt level when a PIO buffer is
  * available after ipath_verbs_send() returned an error that no buffers were
- * available.  Return 0 if we consumed all the PIO buffers and we still have
+ * available.  Return 1 if we consumed all the PIO buffers and we still have
  * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
- * return one).
+ * return zero).
  */
 static int ipath_ib_piobufavail(void *arg)
 {
@@ -579,7 +560,7 @@ static int ipath_ib_piobufavail(void *arg)
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 
 bail:
-	return 1;
+	return 0;
 }
 
 static int ipath_query_device(struct ib_device *ibdev,
@@ -1159,7 +1140,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
 
 	len = sprintf(buf,
 		      "RC resends  %d\n"
-		      "RC QACKs    %d\n"
+		      "RC no QACK  %d\n"
 		      "RC ACKs     %d\n"
 		      "RC SEQ NAKs %d\n"
 		      "RC RDMA seq %d\n"
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index fcafbc7..4f8d593 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -282,7 +282,8 @@ struct ipath_srq {
  */
 struct ipath_qp {
 	struct ib_qp ibqp;
-	struct ipath_qp *next;	/* link list for QPN hash table */
+	struct ipath_qp *next;		/* link list for QPN hash table */
+	struct ipath_qp *timer_next;	/* link list for ipath_ib_timer() */
 	struct list_head piowait;	/* link for wait PIO buf */
 	struct list_head timerwait;	/* link for waiting for timeouts */
 	struct ib_ah_attr remote_ah_attr;
-- 
cgit v0.10.2


From ae15f540cc52acbcbfdf4b902d214a5db5c835d2 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:23:07 -0700
Subject: IB/ipath: improve sparse annotation

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ips_common.h b/drivers/infiniband/hw/ipath/ips_common.h
index 410a764..ab7cbbb 100644
--- a/drivers/infiniband/hw/ipath/ips_common.h
+++ b/drivers/infiniband/hw/ipath/ips_common.h
@@ -95,7 +95,7 @@ struct ether_header {
 	__u8 seq_num;
 	__le32 len;
 	/* MUST be of word size due to PIO write requirements */
-	__u32 csum;
+	__le32 csum;
 	__le16 csum_offset;
 	__le16 flags;
 	__u16 first_2_bytes;
-- 
cgit v0.10.2


From d562a5ae69bd5643d777788117d02acb22fab347 Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:23:08 -0700
Subject: IB/ipath: fix label name in interrupt handler

Names that are the opposite of their intended meanings are not so helpful.

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 8e3b95b..3e72a1f 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -665,14 +665,14 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
 
 	ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
 	if (ret > 0)
-		goto clear;
+		goto set;
 
 	ret = __ipath_verbs_piobufavail(dd);
 	if (ret > 0)
-		goto clear;
+		goto set;
 
 	return;
-clear:
+set:
 	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 			 dd->ipath_sendctrl);
-- 
cgit v0.10.2


From f6f0413e10b76440fb82efebc63120f3b6d42adb Mon Sep 17 00:00:00 2001
From: Bryan O'Sullivan <bos@pathscale.com>
Date: Mon, 24 Apr 2006 14:23:09 -0700
Subject: IB/ipath: tidy up white space in a few files

Signed-off-by: Bryan O'Sullivan <bos@pathscale.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index 593e289..4676238 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -60,11 +60,11 @@
 #define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
 #define __IPATH_SMADBG      0x8000	/* sma packet debug */
-#define __IPATH_IPATHDBG    0x10000	/* Ethernet (IPATH) general debug on */
-#define __IPATH_IPATHWARN   0x20000	/* Ethernet (IPATH) warnings on */
-#define __IPATH_IPATHERR    0x40000	/* Ethernet (IPATH) errors on */
-#define __IPATH_IPATHPD     0x80000	/* Ethernet (IPATH) packet dump on */
-#define __IPATH_IPATHTABLE  0x100000	/* Ethernet (IPATH) table dump on */
+#define __IPATH_IPATHDBG    0x10000	/* Ethernet (IPATH) gen debug */
+#define __IPATH_IPATHWARN   0x20000	/* Ethernet (IPATH) warnings */
+#define __IPATH_IPATHERR    0x40000	/* Ethernet (IPATH) errors */
+#define __IPATH_IPATHPD     0x80000	/* Ethernet (IPATH) packet dump */
+#define __IPATH_IPATHTABLE  0x100000	/* Ethernet (IPATH) table dump */
 
 #else				/* _IPATH_DEBUGGING */
 
@@ -79,11 +79,12 @@
 #define __IPATH_TRSAMPLE  0x0	/* generate trace buffer sample entries */
 #define __IPATH_VERBDBG   0x0	/* very verbose debug */
 #define __IPATH_PKTDBG    0x0	/* print packet data */
-#define __IPATH_PROCDBG   0x0	/* print process startup (init)/exit messages */
+#define __IPATH_PROCDBG   0x0	/* process startup (init)/exit messages */
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG     0x0
 #define __IPATH_EPKTDBG   0x0	/* print ethernet packet data */
-#define __IPATH_SMADBG    0x0   /* print process startup (init)/exit messages */#define __IPATH_IPATHDBG  0x0	/* Ethernet (IPATH) table dump on */
+#define __IPATH_SMADBG    0x0   /* process startup (init)/exit messages */
+#define __IPATH_IPATHDBG  0x0	/* Ethernet (IPATH) table dump on */
 #define __IPATH_IPATHWARN 0x0	/* Ethernet (IPATH) warnings on   */
 #define __IPATH_IPATHERR  0x0	/* Ethernet (IPATH) errors on   */
 #define __IPATH_IPATHPD   0x0	/* Ethernet (IPATH) packet dump on   */
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 1e59750..402126e 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -34,8 +34,9 @@
 #define _IPATH_REGISTERS_H
 
 /*
- * This file should only be included by kernel source, and by the diags.
- * It defines the registers, and their contents, for the InfiniPath HT-400 chip
+ * This file should only be included by kernel source, and by the diags.  It
+ * defines the registers, and their contents, for the InfiniPath HT-400
+ * chip.
  */
 
 /*
@@ -156,8 +157,10 @@
 #define INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT 8
 #define INFINIPATH_IBCC_LINKINITCMD_MASK 0x3ULL
 #define INFINIPATH_IBCC_LINKINITCMD_DISABLE 1
-#define INFINIPATH_IBCC_LINKINITCMD_POLL 2	/* cycle through TS1/TS2 till OK */
-#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3	/* wait for TS1, then go on */
+/* cycle through TS1/TS2 till OK */
+#define INFINIPATH_IBCC_LINKINITCMD_POLL 2
+/* wait for TS1, then go on */
+#define INFINIPATH_IBCC_LINKINITCMD_SLEEP 3
 #define INFINIPATH_IBCC_LINKINITCMD_SHIFT 16
 #define INFINIPATH_IBCC_LINKCMD_MASK 0x3ULL
 #define INFINIPATH_IBCC_LINKCMD_INIT 1	/* move to 0x11 */
@@ -182,7 +185,8 @@
 #define INFINIPATH_IBCS_LINKSTATE_SHIFT 4
 #define INFINIPATH_IBCS_TXREADY       0x40000000
 #define INFINIPATH_IBCS_TXCREDITOK    0x80000000
-/* link training states (shift by INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
+/* link training states (shift by
+   INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) */
 #define INFINIPATH_IBCS_LT_STATE_DISABLED	0x00
 #define INFINIPATH_IBCS_LT_STATE_LINKUP		0x01
 #define INFINIPATH_IBCS_LT_STATE_POLLACTIVE	0x02
@@ -267,10 +271,12 @@
 /* kr_serdesconfig0 bits */
 #define INFINIPATH_SERDC0_RESET_MASK  0xfULL	/* overal reset bits */
 #define INFINIPATH_SERDC0_RESET_PLL   0x10000000ULL	/* pll reset */
-#define INFINIPATH_SERDC0_TXIDLE      0xF000ULL	/* tx idle enables (per lane) */
-#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL	/* rx detect enables (per lane) */
-#define INFINIPATH_SERDC0_L1PWR_DN	 0xF0ULL	/* L1 Power down; use with RXDETECT,
-							   Otherwise not used on IB side */
+/* tx idle enables (per lane) */
+#define INFINIPATH_SERDC0_TXIDLE      0xF000ULL
+/* rx detect enables (per lane) */
+#define INFINIPATH_SERDC0_RXDETECT_EN 0xF0000ULL
+/* L1 Power down; use with RXDETECT, Otherwise not used on IB side */
+#define INFINIPATH_SERDC0_L1PWR_DN	 0xF0ULL
 
 /* kr_xgxsconfig bits */
 #define INFINIPATH_XGXS_RESET          0x7ULL
@@ -390,12 +396,13 @@ struct ipath_kregs {
 	ipath_kreg kr_txintmemsize;
 	ipath_kreg kr_xgxsconfig;
 	ipath_kreg kr_ibpllcfg;
-	/* use these two (and the following N ports) only with ipath_k*_kreg64_port();
-	 * not *kreg64() */
+	/* use these two (and the following N ports) only with
+	 * ipath_k*_kreg64_port(); not *kreg64() */
 	ipath_kreg kr_rcvhdraddr;
 	ipath_kreg kr_rcvhdrtailaddr;
 
-	/* remaining registers are not present on all types of infinipath chips  */
+	/* remaining registers are not present on all types of infinipath
+	   chips  */
 	ipath_kreg kr_rcvpktledcnt;
 	ipath_kreg kr_pcierbuftestreg0;
 	ipath_kreg kr_pcierbuftestreg1;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 01cfb30..e606daf 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -46,8 +46,10 @@
  * This is called from ipath_post_ud_send() to forward a WQE addressed
  * to the same HCA.
  */
-static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_sge_state *ss,
-			      u32 length, struct ib_send_wr *wr, struct ib_wc *wc)
+static void ipath_ud_loopback(struct ipath_qp *sqp,
+			      struct ipath_sge_state *ss,
+			      u32 length, struct ib_send_wr *wr,
+			      struct ib_wc *wc)
 {
 	struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
 	struct ipath_qp *qp;
-- 
cgit v0.10.2


From 235acec78e87a60ace01d1ecb4b87ad1d689715a Mon Sep 17 00:00:00 2001
From: Bastian Blank <bastian@waldi.eu.org>
Date: Mon, 1 May 2006 12:15:42 -0700
Subject: [PATCH] s390: make qeth buildable

Signed-off-by: Bastian Blank <bastian@waldi.eu.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Jeff Garzik <jeff@garzik.org>
Acked-by: Frank Pavlic <fpavlic@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index b3c6e79..cb14642 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -8014,7 +8014,6 @@ static int (*qeth_old_arp_constructor) (struct neighbour *);
 
 static struct neigh_ops arp_direct_ops_template = {
 	.family = AF_INET,
-	.destructor = NULL,
 	.solicit = NULL,
 	.error_report = NULL,
 	.output = dev_queue_xmit,
-- 
cgit v0.10.2


From df30d0f4ca3c41b60068232d6de9d58be88436f0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 1 May 2006 12:15:44 -0700
Subject: [PATCH] md: Avoid oops when attempting to fix read errors on raid10

We should add to the counter for the rdev *after* checking if the rdev is
NULL!!!

Signed-off-by: Neil Brown <neilb@suse.de>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 617012b..ddc1dfc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1435,9 +1435,9 @@ static void raid10d(mddev_t *mddev)
 						sl--;
 						d = r10_bio->devs[sl].devnum;
 						rdev = conf->mirrors[d].rdev;
-						atomic_add(s, &rdev->corrected_errors);
 						if (rdev &&
 						    test_bit(In_sync, &rdev->flags)) {
+							atomic_add(s, &rdev->corrected_errors);
 							if (sync_page_io(rdev->bdev,
 									 r10_bio->devs[sl].addr +
 									 sect + rdev->data_offset,
-- 
cgit v0.10.2


From e0a33270ed0e8e00cbb882a33d21e1f92aac0ceb Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 1 May 2006 12:15:45 -0700
Subject: [PATCH] md: Fixed refcounting/locking when attempting read error
 correction in raid10

We need to hold a reference to rdevs while reading and writing to attempt to
correct read errors.  This reference must be taken under an rcu lock.

Signed-off-by: Neil Brown <neilb@suse.de>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ddc1dfc..1440935 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1407,36 +1407,45 @@ static void raid10d(mddev_t *mddev)
 				if (s > (PAGE_SIZE>>9))
 					s = PAGE_SIZE >> 9;
 
+				rcu_read_lock();
 				do {
 					int d = r10_bio->devs[sl].devnum;
-					rdev = conf->mirrors[d].rdev;
+					rdev = rcu_dereference(conf->mirrors[d].rdev);
 					if (rdev &&
-					    test_bit(In_sync, &rdev->flags) &&
-					    sync_page_io(rdev->bdev,
-							 r10_bio->devs[sl].addr +
-							 sect + rdev->data_offset,
-							 s<<9,
-							 conf->tmppage, READ))
-						success = 1;
-					else {
-						sl++;
-						if (sl == conf->copies)
-							sl = 0;
+					    test_bit(In_sync, &rdev->flags)) {
+						atomic_inc(&rdev->nr_pending);
+						rcu_read_unlock();
+						success = sync_page_io(rdev->bdev,
+								       r10_bio->devs[sl].addr +
+								       sect + rdev->data_offset,
+								       s<<9,
+								       conf->tmppage, READ);
+						rdev_dec_pending(rdev, mddev);
+						rcu_read_lock();
+						if (success)
+							break;
 					}
+					sl++;
+					if (sl == conf->copies)
+						sl = 0;
 				} while (!success && sl != r10_bio->read_slot);
+				rcu_read_unlock();
 
 				if (success) {
 					int start = sl;
 					/* write it back and re-read */
+					rcu_read_lock();
 					while (sl != r10_bio->read_slot) {
 						int d;
 						if (sl==0)
 							sl = conf->copies;
 						sl--;
 						d = r10_bio->devs[sl].devnum;
-						rdev = conf->mirrors[d].rdev;
+						rdev = rcu_dereference(conf->mirrors[d].rdev);
 						if (rdev &&
 						    test_bit(In_sync, &rdev->flags)) {
+							atomic_inc(&rdev->nr_pending);
+							rcu_read_unlock();
 							atomic_add(s, &rdev->corrected_errors);
 							if (sync_page_io(rdev->bdev,
 									 r10_bio->devs[sl].addr +
@@ -1444,6 +1453,8 @@ static void raid10d(mddev_t *mddev)
 									 s<<9, conf->tmppage, WRITE) == 0)
 								/* Well, this device is dead */
 								md_error(mddev, rdev);
+							rdev_dec_pending(rdev, mddev);
+							rcu_read_lock();
 						}
 					}
 					sl = start;
@@ -1453,17 +1464,22 @@ static void raid10d(mddev_t *mddev)
 							sl = conf->copies;
 						sl--;
 						d = r10_bio->devs[sl].devnum;
-						rdev = conf->mirrors[d].rdev;
+						rdev = rcu_dereference(conf->mirrors[d].rdev);
 						if (rdev &&
 						    test_bit(In_sync, &rdev->flags)) {
+							atomic_inc(&rdev->nr_pending);
+							rcu_read_unlock();
 							if (sync_page_io(rdev->bdev,
 									 r10_bio->devs[sl].addr +
 									 sect + rdev->data_offset,
 									 s<<9, conf->tmppage, READ) == 0)
 								/* Well, this device is dead */
 								md_error(mddev, rdev);
+							rdev_dec_pending(rdev, mddev);
+							rcu_read_lock();
 						}
 					}
+					rcu_read_unlock();
 				} else {
 					/* Cannot read from anywhere -- bye bye array */
 					md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev);
-- 
cgit v0.10.2


From bea2771871ed1084c9a85ed0c86340f188505702 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 1 May 2006 12:15:46 -0700
Subject: [PATCH] md: Change ENOTSUPP to EOPNOTSUPP

Because that is what you get if a BIO_RW_BARRIER isn't supported!

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 6081941..532d040 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -315,7 +315,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 		if (r1_bio->bios[mirror] == bio)
 			break;
 
-	if (error == -ENOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
+	if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
 		set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
 		set_bit(R1BIO_BarrierRetry, &r1_bio->state);
 		r1_bio->mddev->barriers_work = 0;
@@ -1404,7 +1404,7 @@ static void raid1d(mddev_t *mddev)
 			unplug = 1;
 		} else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
 			/* some requests in the r1bio were BIO_RW_BARRIER
-			 * requests which failed with -ENOTSUPP.  Hohumm..
+			 * requests which failed with -EOPNOTSUPP.  Hohumm..
 			 * Better resubmit without the barrier.
 			 * We know which devices to resubmit for, because
 			 * all others have had their bios[] entry cleared.
-- 
cgit v0.10.2


From 62de608da0b0ab17d81a233b50d1e952b9816f69 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 1 May 2006 12:15:47 -0700
Subject: [PATCH] md: Improve detection of lack of barrier support in raid1

Move the test for 'do barrier work' down a bit so that if the first write to a
raid1 is a BIO_RW_BARRIER write, the checking done by superblock writes will
cause the right thing to happen.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 532d040..b8c13c8 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -753,18 +753,24 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	const int rw = bio_data_dir(bio);
 	int do_barriers;
 
-	if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
-		bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
-		return 0;
-	}
-
 	/*
 	 * Register the new request and wait if the reconstruction
 	 * thread has put up a bar for new requests.
 	 * Continue immediately if no resync is active currently.
+	 * We test barriers_work *after* md_write_start as md_write_start
+	 * may cause the first superblock write, and that will check out
+	 * if barriers work.
 	 */
+
 	md_write_start(mddev, bio); /* wait on superblock update early */
 
+	if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
+		if (rw == WRITE)
+			md_write_end(mddev);
+		bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
+		return 0;
+	}
+
 	wait_barrier(conf);
 
 	disk_stat_inc(mddev->gendisk, ios[rw]);
-- 
cgit v0.10.2


From 5e7dd2ab6b9bdfa60e19b8739e6b2a204fd4f477 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 1 May 2006 12:15:47 -0700
Subject: [PATCH] md: Fix 'rdev->nr_pending' count when retrying barrier
 requests

When retrying a failed BIO_RW_BARRIER request, we need to keep the reference
in ->nr_pending over the whole retry.  Currently, we only hold the reference
if the failed request is the *last* one to finish - which is silly, because it
would normally be the first to finish.

So move the rdev_dec_pending call up into the didn't-fail branch.  As the rdev
isn't used in the later code, calling rdev_dec_pending earlier doesn't hurt.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b8c13c8..4070eff 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -319,6 +319,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 		set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
 		set_bit(R1BIO_BarrierRetry, &r1_bio->state);
 		r1_bio->mddev->barriers_work = 0;
+		/* Don't rdev_dec_pending in this branch - keep it for the retry */
 	} else {
 		/*
 		 * this branch is our 'one mirror IO has finished' event handler:
@@ -365,6 +366,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 				}
 			}
 		}
+		rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
 	}
 	/*
 	 *
@@ -374,11 +376,9 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
 		if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
 			reschedule_retry(r1_bio);
-			/* Don't dec_pending yet, we want to hold
-			 * the reference over the retry
-			 */
 			goto out;
 		}
+		/* it really is the end of this request */
 		if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
 			/* free extra copy of the data pages */
 			int i = bio->bi_vcnt;
@@ -393,8 +393,6 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 		md_write_end(r1_bio->mddev);
 		raid_end_bio_io(r1_bio);
 	}
-
-	rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
  out:
 	if (to_put)
 		bio_put(to_put);
@@ -1414,6 +1412,7 @@ static void raid1d(mddev_t *mddev)
 			 * Better resubmit without the barrier.
 			 * We know which devices to resubmit for, because
 			 * all others have had their bios[] entry cleared.
+			 * We already have a nr_pending reference on these rdevs.
 			 */
 			int i;
 			clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
-- 
cgit v0.10.2


From d2610202290b4924b71747314a0f88f28807702e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 1 May 2006 12:15:48 -0700
Subject: [PATCH] x86_64: Add compat_sys_vmsplice and use it in x86-64

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 57fc37e..5a92fed 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -695,4 +695,5 @@ ia32_sys_call_table:
 	.quad sys_splice
 	.quad sys_sync_file_range
 	.quad sys_tee
+	.quad compat_sys_vmsplice
 ia32_syscall_end:		
diff --git a/fs/compat.c b/fs/compat.c
index 2e32bd3..3f3e8f4 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1317,6 +1317,26 @@ out:
 	return ret;
 }
 
+asmlinkage long
+compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
+		    unsigned int nr_segs, unsigned int flags)
+{
+	unsigned i;
+	struct iovec *iov;
+	if (nr_segs >= UIO_MAXIOV)
+		return -EINVAL;
+	iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec));
+	for (i = 0; i < nr_segs; i++) {
+		struct compat_iovec v;
+		if (get_user(v.iov_base, &iov32[i].iov_base) ||
+		    get_user(v.iov_len, &iov32[i].iov_len) ||
+		    put_user(compat_ptr(v.iov_base), &iov[i].iov_base) ||
+		    put_user(v.iov_len, &iov[i].iov_len))
+			return -EFAULT;
+	}
+	return sys_vmsplice(fd, iov, nr_segs, flags);
+}
+
 /*
  * Exactly like fs/open.c:sys_open(), except that it doesn't set the
  * O_LARGEFILE flag.
-- 
cgit v0.10.2


From 32828546b32a96d550b85eab25609bc4ba3942ab Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 1 May 2006 12:15:49 -0700
Subject: [PATCH] i386/x86-64: Fix ACPI disabled LAPIC handling mismerge

The patch I submitted earlier to fix disabled LAPIC handling in ACPI was
mismerged for some reason I still don't quite understand.  Parts of it was
applied to the wrong function.

This patch fixes it up.

Cc: <len.brown@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 049a255..4c785a6 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -215,7 +215,7 @@ static int __init acpi_parse_madt(unsigned long phys_addr, unsigned long size)
 {
 	struct acpi_table_madt *madt = NULL;
 
-	if (!phys_addr || !size || !cpu_has_apic)
+	if (!phys_addr || !size)
 		return -EINVAL;
 
 	madt = (struct acpi_table_madt *)__acpi_map_table(phys_addr, size);
@@ -1151,6 +1151,9 @@ int __init acpi_boot_init(void)
 
 	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
 
+	if (!cpu_has_apic)
+		return -ENODEV;
+
 	/*
 	 * set sci_int and PM timer address
 	 */
-- 
cgit v0.10.2


From 5871aa6d5a98f315016d1deee07425c269c37f29 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 1 May 2006 12:15:50 -0700
Subject: [PATCH] i386: Fix overflow in e820_all_mapped

The 32bit version of e820_all_mapped() needs to use u64 to avoid overflows on
PAE systems.  Pointed out by Jan Beulich

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 80cb3b2..d77e89a 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -970,8 +970,10 @@ efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
   * not-overlapping, which is the case
   */
 int __init
-e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
+e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
 {
+	u64 start = s;
+	u64 end = e;
 	int i;
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
-- 
cgit v0.10.2


From 42e4c8585f8cbbfac3b70aa2d0a4f869509a0354 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 1 May 2006 12:15:51 -0700
Subject: [PATCH] i386: Remove apic= warning

The apic= option can be used to set the APIC driver too.  When that is done
this code would always produce bogus warnings.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 254cee9..013b85d 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -757,10 +757,6 @@ static int __init apic_set_verbosity(char *str)
 		apic_verbosity = APIC_DEBUG;
 	else if (strcmp("verbose", str) == 0)
 		apic_verbosity = APIC_VERBOSE;
-	else
-		printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-				" use apic=verbose or apic=debug\n", str);
-
 	return 1;
 }
 
-- 
cgit v0.10.2


From f3a19cb45f4730c4ce09ca9bccf197efd010dc3b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 1 May 2006 12:15:52 -0700
Subject: [PATCH] silence initcall warnings

Suppress the initcall-return-value warnings unless initcall_debug was
specified.

They do find bugs, but they're extremely small ones and as Andi points out,
people get distressed.

Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/init/main.c b/init/main.c
index 4a2f089..f715b9b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -582,7 +582,7 @@ static void __init do_initcalls(void)
 
 		result = (*call)();
 
-		if (result && (result != -ENODEV || initcall_debug)) {
+		if (result && result != -ENODEV && initcall_debug) {
 			sprintf(msgbuf, "error code %d", result);
 			msg = msgbuf;
 		}
-- 
cgit v0.10.2


From c39e50b4bada27102306d7fa68ea35dc4e61bd68 Mon Sep 17 00:00:00 2001
From: "Victor V. Vengerov" <Victor.Vengerov@oktetlabs.ru>
Date: Mon, 1 May 2006 12:15:53 -0700
Subject: [PATCH] uml: fix iomem list traversal

We need to walk the region list properly.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 0500800..fc0f0b0 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -407,6 +407,8 @@ unsigned long find_iomem(char *driver, unsigned long *len_out)
 			*len_out = region->size;
 			return(region->virt);
 		}
+
+		region = region->next;
 	}
 
 	return(0);
-- 
cgit v0.10.2


From 893bb96a29bee4a5cf2486720ac79412aaee5066 Mon Sep 17 00:00:00 2001
From: Joris van Rantwijk <jvrantwijk@xs4all.nl>
Date: Mon, 1 May 2006 12:15:56 -0700
Subject: [PATCH] uml: skas0 support for 2G/2G hosts

A quick hack to allow skas0 mode to run on 2G/2G hosts.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 05fbb20..76e85bb 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -57,20 +57,6 @@ config STATIC_LINK
 	chroot, and you disable CONFIG_MODE_TT, you probably want to say Y
 	here.
 
-config HOST_2G_2G
-	bool "2G/2G host address space split"
-	default n
-	depends on MODE_TT
-	help
-	This is needed when the host on which you run has a 2G/2G memory
-	split, instead of the customary 3G/1G.
-
-	Note that to enable such a host
-	configuration, which makes sense only in some cases, you need special
-	host patches.
-
-	So, if you do not know what to do here, say 'N'.
-
 config KERNEL_HALF_GIGS
 	int "Kernel address space size (in .5G units)"
 	default "1"
diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386
index 85e6a55..f6eb72d 100644
--- a/arch/um/Kconfig.i386
+++ b/arch/um/Kconfig.i386
@@ -16,6 +16,19 @@ config SEMAPHORE_SLEEPERS
 	bool
 	default y
 
+config HOST_2G_2G
+	bool "2G/2G host address space split"
+	default n
+	help
+	This is needed when the host on which you run has a 2G/2G memory
+	split, instead of the customary 3G/1G.
+
+	Note that to enable such a host
+	configuration, which makes sense only in some cases, you need special
+	host patches.
+
+	So, if you do not know what to do here, say 'N'.
+
 config TOP_ADDR
  	hex
  	default 0xc0000000 if !HOST_2G_2G
@@ -35,11 +48,13 @@ config 3_LEVEL_PGTABLES
 
 config STUB_CODE
 	hex
-	default 0xbfffe000
+	default 0xbfffe000 if !HOST_2G_2G
+	default 0x7fffe000 if HOST_2G_2G
 
 config STUB_DATA
 	hex
-	default 0xbffff000
+	default 0xbffff000 if !HOST_2G_2G
+	default 0x7ffff000 if HOST_2G_2G
 
 config STUB_START
 	hex
-- 
cgit v0.10.2


From 191ef966ac164a1ce3e06290ca52296744a4aee2 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Mon, 1 May 2006 12:15:57 -0700
Subject: [PATCH] uml: remove NULL checks and add some CodingStyle

Remove redundant NULL checks before [kv]free + small CodingStyle cleanup for
arch/

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index c39ea3a..2ffda01 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -89,16 +89,18 @@ void sigio_handler(int sig, union uml_pt_regs *regs)
 	struct irq_fd *irq_fd;
 	int n;
 
-	if(smp_sigio_handler()) return;
-	while(1){
+	if (smp_sigio_handler())
+		return;
+
+	while (1) {
 		n = os_waiting_for_events(active_fds);
 		if (n <= 0) {
 			if(n == -EINTR) continue;
 			else break;
 		}
 
-		for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){
-			if(irq_fd->current_events != 0){
+		for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
+			if (irq_fd->current_events != 0) {
 				irq_fd->current_events = 0;
 				do_IRQ(irq_fd->irq, regs);
 			}
@@ -110,19 +112,17 @@ void sigio_handler(int sig, union uml_pt_regs *regs)
 
 static void maybe_sigio_broken(int fd, int type)
 {
-	if(os_isatty(fd)){
-		if((type == IRQ_WRITE) && !pty_output_sigio){
+	if (os_isatty(fd)) {
+		if ((type == IRQ_WRITE) && !pty_output_sigio) {
 			write_sigio_workaround();
 			add_sigio_fd(fd, 0);
-		}
-		else if((type == IRQ_READ) && !pty_close_sigio){
+		} else if ((type == IRQ_READ) && !pty_close_sigio) {
 			write_sigio_workaround();
 			add_sigio_fd(fd, 1);
 		}
 	}
 }
 
-
 int activate_fd(int irq, int fd, int type, void *dev_id)
 {
 	struct pollfd *tmp_pfd;
@@ -132,16 +132,18 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 
 	pid = os_getpid();
 	err = os_set_fd_async(fd, pid);
-	if(err < 0)
+	if (err < 0)
 		goto out;
 
 	new_fd = um_kmalloc(sizeof(*new_fd));
 	err = -ENOMEM;
-	if(new_fd == NULL)
+	if (new_fd == NULL)
 		goto out;
 
-	if(type == IRQ_READ) events = UM_POLLIN | UM_POLLPRI;
-	else events = UM_POLLOUT;
+	if (type == IRQ_READ)
+		events = UM_POLLIN | UM_POLLPRI;
+	else
+		events = UM_POLLOUT;
 	*new_fd = ((struct irq_fd) { .next  		= NULL,
 				     .id 		= dev_id,
 				     .fd 		= fd,
@@ -165,8 +167,8 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 	 * a semaphore.
 	 */
 	flags = irq_lock();
-	for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){
-		if((irq_fd->fd == fd) && (irq_fd->type == type)){
+	for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
+		if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
 			printk("Registering fd %d twice\n", fd);
 			printk("Irqs : %d, %d\n", irq_fd->irq, irq);
 			printk("Ids : 0x%p, 0x%p\n", irq_fd->id, dev_id);
@@ -175,13 +177,13 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 	}
 
 	/*-------------*/
-	if(type == IRQ_WRITE)
+	if (type == IRQ_WRITE)
 		fd = -1;
 
 	tmp_pfd = NULL;
 	n = 0;
 
-	while(1){
+	while (1) {
 		n = os_create_pollfd(fd, events, tmp_pfd, n);
 		if (n == 0)
 			break;
@@ -198,10 +200,8 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 		 * then we free the buffer tmp_fds and try again.
 		 */
 		irq_unlock(flags);
-		if (tmp_pfd != NULL) {
-			kfree(tmp_pfd);
-			tmp_pfd = NULL;
-		}
+		kfree(tmp_pfd);
+		tmp_pfd = NULL;
 
 		tmp_pfd = um_kmalloc(n);
 		if (tmp_pfd == NULL)
@@ -249,7 +249,7 @@ static int same_irq_and_dev(struct irq_fd *irq, void *d)
 {
 	struct irq_and_dev *data = d;
 
-	return((irq->irq == data->irq) && (irq->id == data->dev));
+	return ((irq->irq == data->irq) && (irq->id == data->dev));
 }
 
 void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
@@ -262,7 +262,7 @@ void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 
 static int same_fd(struct irq_fd *irq, void *fd)
 {
-	return(irq->fd == *((int *) fd));
+	return (irq->fd == *((int *)fd));
 }
 
 void free_irq_by_fd(int fd)
@@ -276,16 +276,17 @@ static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
 	int i = 0;
 	int fdi;
 
-	for(irq=active_fds; irq != NULL; irq = irq->next){
-		if((irq->fd == fd) && (irq->irq == irqnum)) break;
+	for (irq = active_fds; irq != NULL; irq = irq->next) {
+		if ((irq->fd == fd) && (irq->irq == irqnum))
+			break;
 		i++;
 	}
-	if(irq == NULL){
+	if (irq == NULL) {
 		printk("find_irq_by_fd doesn't have descriptor %d\n", fd);
 		goto out;
 	}
 	fdi = os_get_pollfd(i);
-	if((fdi != -1) && (fdi != fd)){
+	if ((fdi != -1) && (fdi != fd)) {
 		printk("find_irq_by_fd - mismatch between active_fds and "
 		       "pollfds, fd %d vs %d, need %d\n", irq->fd,
 		       fdi, fd);
@@ -294,7 +295,7 @@ static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
 	}
 	*index_out = i;
  out:
-	return(irq);
+	return irq;
 }
 
 void reactivate_fd(int fd, int irqnum)
@@ -305,7 +306,7 @@ void reactivate_fd(int fd, int irqnum)
 
 	flags = irq_lock();
 	irq = find_irq_by_fd(fd, irqnum, &i);
-	if(irq == NULL){
+	if (irq == NULL) {
 		irq_unlock(flags);
 		return;
 	}
@@ -326,7 +327,7 @@ void deactivate_fd(int fd, int irqnum)
 
 	flags = irq_lock();
 	irq = find_irq_by_fd(fd, irqnum, &i);
-	if(irq == NULL)
+	if (irq == NULL)
 		goto out;
 	os_set_pollfd(i, -1);
  out:
@@ -338,15 +339,15 @@ int deactivate_all_fds(void)
 	struct irq_fd *irq;
 	int err;
 
-	for(irq=active_fds;irq != NULL;irq = irq->next){
+	for (irq = active_fds; irq != NULL; irq = irq->next) {
 		err = os_clear_fd_async(irq->fd);
-		if(err)
-			return(err);
+		if (err)
+			return err;
 	}
 	/* If there is a signal already queued, after unblocking ignore it */
 	os_set_ioignore();
 
-	return(0);
+	return 0;
 }
 
 void forward_interrupts(int pid)
@@ -356,9 +357,9 @@ void forward_interrupts(int pid)
 	int err;
 
 	flags = irq_lock();
-	for(irq=active_fds;irq != NULL;irq = irq->next){
+	for (irq = active_fds; irq != NULL; irq = irq->next) {
 		err = os_set_owner(irq->fd, pid);
-		if(err < 0){
+		if (err < 0) {
 			/* XXX Just remove the irq rather than
 			 * print out an infinite stream of these
 			 */
@@ -379,7 +380,7 @@ void forward_interrupts(int pid)
 unsigned int do_IRQ(int irq, union uml_pt_regs *regs)
 {
        irq_enter();
-       __do_IRQ(irq, (struct pt_regs *) regs);
+       __do_IRQ(irq, (struct pt_regs *)regs);
        irq_exit();
        return 1;
 }
@@ -392,12 +393,12 @@ int um_request_irq(unsigned int irq, int fd, int type,
 	int err;
 
 	err = request_irq(irq, handler, irqflags, devname, dev_id);
-	if(err)
-		return(err);
+	if (err)
+		return err;
 
-	if(fd != -1)
+	if (fd != -1)
 		err = activate_fd(irq, fd, type, dev_id);
-	return(err);
+	return err;
 }
 EXPORT_SYMBOL(um_request_irq);
 EXPORT_SYMBOL(reactivate_fd);
@@ -409,7 +410,7 @@ unsigned long irq_lock(void)
 	unsigned long flags;
 
 	spin_lock_irqsave(&irq_spinlock, flags);
-	return(flags);
+	return flags;
 }
 
 void irq_unlock(unsigned long flags)
@@ -452,7 +453,7 @@ void __init init_IRQ(void)
 	irq_desc[TIMER_IRQ].depth = 1;
 	irq_desc[TIMER_IRQ].handler = &SIGVTALRM_irq_type;
 	enable_irq(TIMER_IRQ);
-	for(i=1;i<NR_IRQS;i++){
+	for (i = 1; i < NR_IRQS; i++) {
 		irq_desc[i].status = IRQ_DISABLED;
 		irq_desc[i].action = NULL;
 		irq_desc[i].depth = 1;
@@ -467,7 +468,7 @@ int init_aio_irq(int irq, char *name, irqreturn_t (*handler)(int, void *,
 	int fds[2], err;
 
 	err = os_pipe(fds, 1, 1);
-	if(err){
+	if (err) {
 		printk("init_aio_irq - os_pipe failed, err = %d\n", -err);
 		goto out;
 	}
@@ -475,7 +476,7 @@ int init_aio_irq(int irq, char *name, irqreturn_t (*handler)(int, void *,
 	err = um_request_irq(irq, fds[0], IRQ_READ, handler,
 			     SA_INTERRUPT | SA_SAMPLE_RANDOM, name,
 			     (void *) (long) fds[0]);
-	if(err){
+	if (err) {
 		printk("init_aio_irq - : um_request_irq failed, err = %d\n",
 		       err);
 		goto out_close;
@@ -488,5 +489,5 @@ int init_aio_irq(int irq, char *name, irqreturn_t (*handler)(int, void *,
 	os_close_file(fds[0]);
 	os_close_file(fds[1]);
  out:
-	return(err);
+	return err;
 }
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index e599be4..3788d45 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -29,21 +29,21 @@ int os_waiting_for_events(struct irq_fd *active_fds)
 	int i, n, err;
 
 	n = poll(pollfds, pollfds_num, 0);
-	if(n < 0){
+	if (n < 0) {
 		err = -errno;
-		if(errno != EINTR)
+		if (errno != EINTR)
 			printk("sigio_handler: os_waiting_for_events:"
 			       " poll returned %d, errno = %d\n", n, errno);
 		return err;
 	}
 
-	if(n == 0)
+	if (n == 0)
 		return 0;
 
 	irq_fd = active_fds;
 
-	for(i = 0; i < pollfds_num; i++){
-		if(pollfds[i].revents != 0){
+	for (i = 0; i < pollfds_num; i++) {
+		if (pollfds[i].revents != 0) {
 			irq_fd->current_events = pollfds[i].revents;
 			pollfds[i].fd = -1;
 		}
@@ -54,7 +54,7 @@ int os_waiting_for_events(struct irq_fd *active_fds)
 
 int os_isatty(int fd)
 {
-	return(isatty(fd));
+	return isatty(fd);
 }
 
 int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
@@ -65,7 +65,7 @@ int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
 			return((pollfds_size + 1) * sizeof(pollfds[0]));
 		}
 
-		if(pollfds != NULL){
+		if (pollfds != NULL) {
 			memcpy(tmp_pfd, pollfds,
 			       sizeof(pollfds[0]) * pollfds_size);
 			/* remove old pollfds */
@@ -73,18 +73,15 @@ int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
 		}
 		pollfds = tmp_pfd;
 		pollfds_size++;
-	} else {
-		/* remove not used tmp_pfd */
-		if (tmp_pfd != NULL)
-			kfree(tmp_pfd);
-	}
+	} else
+		kfree(tmp_pfd);	/* remove not used tmp_pfd */
 
-	pollfds[pollfds_num] = ((struct pollfd) { .fd 	= fd,
-						  .events 	= events,
-						  .revents 	= 0 });
+	pollfds[pollfds_num] = ((struct pollfd) { .fd		= fd,
+						  .events	= events,
+						  .revents	= 0 });
 	pollfds_num++;
 
-	return(0);
+	return 0;
 }
 
 void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
@@ -94,11 +91,11 @@ void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
 	int i = 0;
 
 	prev = &active_fds;
-	while(*prev != NULL){
-		if((*test)(*prev, arg)){
+	while (*prev != NULL) {
+		if ((*test)(*prev, arg)) {
 			struct irq_fd *old_fd = *prev;
-			if((pollfds[i].fd != -1) &&
-			   (pollfds[i].fd != (*prev)->fd)){
+			if ((pollfds[i].fd != -1) &&
+			    (pollfds[i].fd != (*prev)->fd)) {
 				printk("os_free_irq_by_cb - mismatch between "
 				       "active_fds and pollfds, fd %d vs %d\n",
 				       (*prev)->fd, pollfds[i].fd);
@@ -110,7 +107,6 @@ void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
 			/* This moves the *whole* array after pollfds[i]
 			 * (though it doesn't spot as such)!
 			 */
-
 			memmove(&pollfds[i], &pollfds[i + 1],
 			       (pollfds_num - i) * sizeof(pollfds[0]));
 			if(*last_irq_ptr2 == &old_fd->next)
@@ -129,10 +125,9 @@ void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
 	return;
 }
 
-
 int os_get_pollfd(int i)
 {
-	return(pollfds[i].fd);
+	return pollfds[i].fd;
 }
 
 void os_set_pollfd(int i, int fd)
@@ -151,8 +146,10 @@ void init_irq_signals(int on_sigstack)
 	int flags;
 
 	flags = on_sigstack ? SA_ONSTACK : 0;
-	if(timer_irq_inited) h = (__sighandler_t) alarm_handler;
-	else h = boot_timer_handler;
+	if (timer_irq_inited)
+		h = (__sighandler_t)alarm_handler;
+	else
+		h = boot_timer_handler;
 
 	set_handler(SIGVTALRM, h, flags | SA_RESTART,
 		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1);
-- 
cgit v0.10.2


From e3104f50d89b1fffe1fd973e32248a5c7f1bb41e Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 1 May 2006 12:15:58 -0700
Subject: [PATCH] uml: clean up after MADVISE_REMOVE

The MADVISE_REMOVE-checking code didn't clean up after itself.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 3505f44..233be2f 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -206,29 +206,36 @@ int os_drop_memory(void *addr, int length)
 int can_drop_memory(void)
 {
 	void *addr;
-	int fd;
+	int fd, ok = 0;
 
 	printk("Checking host MADV_REMOVE support...");
 	fd = create_mem_file(UM_KERN_PAGE_SIZE);
 	if(fd < 0){
 		printk("Creating test memory file failed, err = %d\n", -fd);
-		return 0;
+		goto out;
 	}
 
 	addr = mmap64(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
 		      MAP_SHARED, fd, 0);
 	if(addr == MAP_FAILED){
 		printk("Mapping test memory file failed, err = %d\n", -errno);
-		return 0;
+		goto out_close;
 	}
 
 	if(madvise(addr, UM_KERN_PAGE_SIZE, MADV_REMOVE) != 0){
 		printk("MADV_REMOVE failed, err = %d\n", -errno);
-		return 0;
+		goto out_unmap;
 	}
 
 	printk("OK\n");
-	return 1;
+	ok = 1;
+
+out_unmap:
+	munmap(addr, UM_KERN_PAGE_SIZE);
+out_close:
+	close(fd);
+out:
+	return ok;
 }
 
 void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
-- 
cgit v0.10.2


From 347b3dc2faf34d63a96b41e3244b743cc72a2aa0 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 1 May 2006 12:15:59 -0700
Subject: [PATCH] uml: update defconfig

Bring defconfig up to date.

Also disable CONFIG_BLK_DEV_UBD_SYNC by default.  By performing synchronous
I/O to the host, it slows things down, only protects against host crashes, and
can make a UML appear to hang while it waits for the host's disk.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/defconfig b/arch/um/defconfig
index 80d30d1..402a74d 100644
--- a/arch/um/defconfig
+++ b/arch/um/defconfig
@@ -1,14 +1,13 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-rc6-mm1
-# Tue Jun 14 18:22:21 2005
+# Linux kernel version: 2.6.17-rc3
+# Fri Apr 28 09:31:20 2006
 #
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_UML=y
 CONFIG_MMU=y
-CONFIG_UID16=y
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_IRQ_RELEASE_METHOD=y
 
 #
 # UML-specific options
@@ -16,8 +15,50 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y
 # CONFIG_MODE_TT is not set
 # CONFIG_STATIC_LINK is not set
 CONFIG_MODE_SKAS=y
+
+#
+# Host processor type and features
+#
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+CONFIG_M686=y
+# CONFIG_MPENTIUMII is not set
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MEFFICEON is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MGEODEGX1 is not set
+# CONFIG_MGEODE_LX is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+# CONFIG_X86_GENERIC is not set
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_PPRO_FENCE=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
 CONFIG_UML_X86=y
 # CONFIG_64BIT is not set
+CONFIG_SEMAPHORE_SLEEPERS=y
+# CONFIG_HOST_2G_2G is not set
 CONFIG_TOP_ADDR=0xc0000000
 # CONFIG_3_LEVEL_PGTABLES is not set
 CONFIG_STUB_CODE=0xbfffe000
@@ -25,22 +66,24 @@ CONFIG_STUB_DATA=0xbffff000
 CONFIG_STUB_START=0xbfffe000
 CONFIG_ARCH_HAS_SC_SIGNALS=y
 CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y
+CONFIG_GENERIC_HWEIGHT=y
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_FLATMEM_MANUAL=y
 # CONFIG_DISCONTIGMEM_MANUAL is not set
 # CONFIG_SPARSEMEM_MANUAL is not set
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
 CONFIG_LD_SCRIPT_DYN=y
 CONFIG_NET=y
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=m
 # CONFIG_HOSTFS is not set
+# CONFIG_HPPFS is not set
 CONFIG_MCONSOLE=y
 # CONFIG_MAGIC_SYSRQ is not set
-# CONFIG_HOST_2G_2G is not set
 CONFIG_NEST_LEVEL=0
-CONFIG_KERNEL_HALF_GIGS=1
 # CONFIG_HIGHMEM is not set
 CONFIG_KERNEL_STACK_ORDER=2
 CONFIG_UML_REAL_TIME_CLOCK=y
@@ -49,7 +92,6 @@ CONFIG_UML_REAL_TIME_CLOCK=y
 # Code maturity level options
 #
 CONFIG_EXPERIMENTAL=y
-CONFIG_CLEAN_COMPILE=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 
@@ -57,6 +99,7 @@ CONFIG_INIT_ENV_ARG_LIMIT=32
 # General setup
 #
 CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -64,26 +107,28 @@ CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 CONFIG_SYSCTL=y
 # CONFIG_AUDIT is not set
-# CONFIG_HOTPLUG is not set
-CONFIG_KOBJECT_UEVENT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
+# CONFIG_RELAY is not set
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_UID16=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
 # CONFIG_KALLSYMS_ALL is not set
 CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
+CONFIG_ELF_CORE=y
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
 CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
+CONFIG_SLAB=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
+# CONFIG_SLOB is not set
 
 #
 # Loadable module support
@@ -91,18 +136,43 @@ CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_KMOD=y
 
 #
-# Generic Driver Options
+# Block layer
 #
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_UBD=y
+# CONFIG_BLK_DEV_UBD_SYNC is not set
+CONFIG_BLK_DEV_COW_COMMON=y
+# CONFIG_MMAPPER is not set
+CONFIG_BLK_DEV_LOOP=m
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+CONFIG_BLK_DEV_NBD=m
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_BLK_DEV_INITRD is not set
+# CONFIG_ATA_OVER_ETH is not set
 
 #
 # Character Devices
@@ -127,50 +197,23 @@ CONFIG_UML_SOUND=m
 CONFIG_SOUND=m
 CONFIG_HOSTAUDIO=m
 CONFIG_UML_RANDOM=y
-# CONFIG_MMAPPER is not set
-
-#
-# Block devices
-#
-CONFIG_BLK_DEV_UBD=y
-CONFIG_BLK_DEV_UBD_SYNC=y
-CONFIG_BLK_DEV_COW_COMMON=y
-CONFIG_BLK_DEV_LOOP=m
-# CONFIG_BLK_DEV_CRYPTOLOOP is not set
-CONFIG_BLK_DEV_NBD=m
-# CONFIG_BLK_DEV_RAM is not set
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_LBD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-# CONFIG_ATA_OVER_ETH is not set
-CONFIG_NETDEVICES=y
 
 #
-# UML Network Devices
+# Generic Driver Options
 #
-CONFIG_UML_NET=y
-CONFIG_UML_NET_ETHERTAP=y
-CONFIG_UML_NET_TUNTAP=y
-CONFIG_UML_NET_SLIP=y
-CONFIG_UML_NET_DAEMON=y
-CONFIG_UML_NET_MCAST=y
-CONFIG_UML_NET_SLIRP=y
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
 
 #
-# Networking support
+# Networking
 #
 
 #
 # Networking options
 #
+# CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
@@ -178,6 +221,7 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 # CONFIG_IP_MULTICAST is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
 # CONFIG_IP_PNP is not set
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
@@ -186,27 +230,31 @@ CONFIG_INET=y
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
 # CONFIG_INET_TUNNEL is not set
-CONFIG_IP_TCPDIAG=y
-# CONFIG_IP_TCPDIAG_IPV6 is not set
-
-#
-# TCP congestion control
-#
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
 CONFIG_TCP_CONG_BIC=y
-CONFIG_TCP_CONG_WESTWOOD=y
-CONFIG_TCP_CONG_HTCP=y
-# CONFIG_TCP_CONG_HSTCP is not set
-# CONFIG_TCP_CONG_HYBLA is not set
-# CONFIG_TCP_CONG_VEGAS is not set
-# CONFIG_TCP_CONG_SCALABLE is not set
 # CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
 # CONFIG_NETFILTER is not set
 
 #
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
 # SCTP Configuration (EXPERIMENTAL)
 #
 # CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
 # CONFIG_VLAN_8021Q is not set
@@ -224,27 +272,47 @@ CONFIG_TCP_CONG_HTCP=y
 # QoS and/or fair queueing
 #
 # CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
 
 #
 # Network testing
 #
 # CONFIG_NET_PKTGEN is not set
-# CONFIG_KGDBOE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NETPOLL_RX is not set
-# CONFIG_NETPOLL_TRAP is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
 # CONFIG_HAMRADIO is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_IEEE80211 is not set
+
+#
+# UML Network Devices
+#
+CONFIG_UML_NET=y
+CONFIG_UML_NET_ETHERTAP=y
+CONFIG_UML_NET_TUNTAP=y
+CONFIG_UML_NET_SLIP=y
+CONFIG_UML_NET_DAEMON=y
+CONFIG_UML_NET_MCAST=y
+# CONFIG_UML_NET_PCAP is not set
+CONFIG_UML_NET_SLIRP=y
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
 CONFIG_DUMMY=m
 # CONFIG_BONDING is not set
 # CONFIG_EQUALIZER is not set
 CONFIG_TUN=m
 
 #
+# PHY device support
+#
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
 # Wan interfaces
 #
 # CONFIG_WAN is not set
@@ -263,6 +331,13 @@ CONFIG_SLIP=m
 # CONFIG_SLIP_MODE_SLIP6 is not set
 # CONFIG_SHAPER is not set
 # CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
 
 #
 # File systems
@@ -274,17 +349,14 @@ CONFIG_EXT3_FS=y
 # CONFIG_EXT3_FS_XATTR is not set
 CONFIG_JBD=y
 # CONFIG_JBD_DEBUG is not set
-# CONFIG_REISER4_FS is not set
 CONFIG_REISERFS_FS=y
 # CONFIG_REISERFS_CHECK is not set
 # CONFIG_REISERFS_PROC_INFO is not set
 # CONFIG_REISERFS_FS_XATTR is not set
 # CONFIG_JFS_FS is not set
-
-#
-# XFS support
-#
+# CONFIG_FS_POSIX_ACL is not set
 # CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_ROMFS_FS is not set
 CONFIG_INOTIFY=y
@@ -295,11 +367,6 @@ CONFIG_QUOTACTL=y
 CONFIG_DNOTIFY=y
 CONFIG_AUTOFS_FS=m
 CONFIG_AUTOFS4_FS=m
-
-#
-# Caches
-#
-# CONFIG_FSCACHE is not set
 # CONFIG_FUSE_FS is not set
 
 #
@@ -323,14 +390,10 @@ CONFIG_JOLIET=y
 CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
 CONFIG_SYSFS=y
-# CONFIG_DEVFS_FS is not set
-# CONFIG_DEVPTS_FS_XATTR is not set
 CONFIG_TMPFS=y
-# CONFIG_TMPFS_XATTR is not set
 # CONFIG_HUGETLB_PAGE is not set
 CONFIG_RAMFS=y
 # CONFIG_CONFIGFS_FS is not set
-# CONFIG_RELAYFS_FS is not set
 
 #
 # Miscellaneous filesystems
@@ -430,6 +493,7 @@ CONFIG_NLS_DEFAULT="iso8859-1"
 # Library routines
 #
 # CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
 CONFIG_CRC32=m
 # CONFIG_LIBCRC32C is not set
 
@@ -448,12 +512,18 @@ CONFIG_LOG_BUF_SHIFT=14
 CONFIG_DETECT_SOFTLOCKUP=y
 # CONFIG_SCHEDSTATS is not set
 CONFIG_DEBUG_SLAB=y
+# CONFIG_DEBUG_SLAB_LEAK is not set
+# CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_KOBJECT is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
 CONFIG_FRAME_POINTER=y
+# CONFIG_UNWIND_INFO is not set
+CONFIG_FORCED_INLINING=y
+# CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_GPROF is not set
 # CONFIG_GCOV is not set
 # CONFIG_SYSCALL_DEBUG is not set
-- 
cgit v0.10.2


From 2ace87b9502d922397cabaf07d73e0b60c480ecf Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 1 May 2006 12:16:00 -0700
Subject: [PATCH] uml: error handling fixes

Blairsorblade noticed some confusion between our use of a system
call's return value and errno.  This patch fixes a number of related
bugs -
	using errno instead of a return value
	using a return value instead of errno
	forgetting to negate a error return to get a positive error code

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 3bd10de..0925133 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -171,7 +171,7 @@ int os_sigio_async(int master, int slave)
 
 	flags = fcntl(master, F_GETFL);
 	if(flags < 0)
-		return errno;
+		return -errno;
 
 	if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) ||
 	   (fcntl(master, F_SETOWN, os_getpid()) < 0))
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 0776bc1..bd89c6b 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -344,12 +344,12 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 	err = ptrace_setregs(pid, regs);
 	if(err < 0)
 		panic("copy_context_skas0 : PTRACE_SETREGS failed, "
-		      "pid = %d, errno = %d\n", pid, errno);
+		      "pid = %d, errno = %d\n", pid, -err);
 
 	err = ptrace_setfpregs(pid, fp_regs);
 	if(err < 0)
 		panic("copy_context_skas0 : PTRACE_SETFPREGS failed, "
-		      "pid = %d, errno = %d\n", pid, errno);
+		      "pid = %d, errno = %d\n", pid, -err);
 
 	/* set a well known return code for detection of child write failure */
 	child_data->err = 12345678;
@@ -362,7 +362,7 @@ int copy_context_skas0(unsigned long new_stack, int pid)
 	pid = data->err;
 	if(pid < 0)
 		panic("copy_context_skas0 - stub-parent reports error %d\n",
-		      pid);
+		      -pid);
 
 	/* Wait, until child has finished too: read child's result from
 	 * child's stack and check it.
diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c
index 7a6f6b9..516f66d 100644
--- a/arch/um/os-Linux/sys-i386/registers.c
+++ b/arch/um/os-Linux/sys-i386/registers.c
@@ -104,7 +104,7 @@ void init_registers(int pid)
 	err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs);
 	if(err)
 		panic("check_ptrace : PTRACE_GETREGS failed, errno = %d",
-		      err);
+		      errno);
 
 	errno = 0;
 	err = ptrace(PTRACE_GETFPXREGS, pid, 0, exec_fpx_regs);
@@ -119,7 +119,7 @@ void init_registers(int pid)
 	err = ptrace(PTRACE_GETFPREGS, pid, 0, exec_fp_regs);
 	if(err)
 		panic("check_ptrace : PTRACE_GETFPREGS failed, errno = %d",
-		      err);
+		      errno);
 }
 
 void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c
index 001941f..becd898 100644
--- a/arch/um/os-Linux/sys-x86_64/registers.c
+++ b/arch/um/os-Linux/sys-x86_64/registers.c
@@ -62,12 +62,12 @@ void init_registers(int pid)
 	err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs);
 	if(err)
 		panic("check_ptrace : PTRACE_GETREGS failed, errno = %d",
-		      err);
+		      errno);
 
 	err = ptrace(PTRACE_GETFPREGS, pid, 0, exec_fp_regs);
 	if(err)
 		panic("check_ptrace : PTRACE_GETFPREGS failed, errno = %d",
-		      err);
+		      errno);
 }
 
 void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 34bfc1b..362db05 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -178,14 +178,14 @@ static void __init create_pid_file(void)
 	fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644);
 	if(fd < 0){
 		printk("Open of machine pid file \"%s\" failed: %s\n",
-		       file, strerror(-fd));
+		       file, strerror(errno));
 		return;
 	}
 
 	snprintf(pid, sizeof(pid), "%d\n", getpid());
 	n = write(fd, pid, strlen(pid));
 	if(n != strlen(pid))
-		printk("Write of pid file failed - err = %d\n", -n);
+		printk("Write of pid file failed - err = %d\n", errno);
 
 	close(fd);
 }
-- 
cgit v0.10.2


From b15fb6b157b83ce983e42130ab7d1a866020d8f5 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Mon, 1 May 2006 12:16:01 -0700
Subject: [PATCH] uml: fix patch mismerge

I sent a patch, it was applied as cda402b283c34a24b091f78eee116963e9494762,
then it was applied again as 181ae4005d0a4010802be534d929b38c42b9ac06 by
mistake.  But while the 1st time it modified (correctly) cow_header_v3, the
2nd it modified cow_header_v3_broken.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c
index 6ab852b..0ec4052 100644
--- a/arch/um/drivers/cow_user.c
+++ b/arch/um/drivers/cow_user.c
@@ -100,7 +100,7 @@ struct cow_header_v3_broken {
 	__u32 alignment;
 	__u32 cow_format;
 	char backing_file[PATH_LEN_V3];
-} __attribute__((packed));
+};
 
 /* COW format definitions - for now, we have only the usual COW bitmap */
 #define COW_BITMAP 0
-- 
cgit v0.10.2


From cb98cdcd0dd892dcaec7dabd57c3b00890006b61 Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Mon, 1 May 2006 12:16:01 -0700
Subject: [PATCH] uml: search from uml_net in a more reasonable PATH

Append /usr/lib/uml to the existing PATH environment variable to let execvp()
search uml_net in FHS compliant locations.

Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 2878e89..3a0ac38 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -74,6 +74,34 @@ static void last_ditch_exit(int sig)
 	exit(1);
 }
 
+#define UML_LIB_PATH	":/usr/lib/uml"
+
+static void setup_env_path(void)
+{
+	char *new_path = NULL;
+	char *old_path = NULL;
+	int path_len = 0;
+
+	old_path = getenv("PATH");
+	/* if no PATH variable is set or it has an empty value
+	 * just use the default + /usr/lib/uml
+	 */
+	if (!old_path || (path_len = strlen(old_path)) == 0) {
+		putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH);
+		return;
+	}
+
+	/* append /usr/lib/uml to the existing path */
+	path_len += strlen("PATH=" UML_LIB_PATH) + 1;
+	new_path = malloc(path_len);
+	if (!new_path) {
+		perror("coudn't malloc to set a new PATH");
+		return;
+	}
+	snprintf(new_path, path_len, "PATH=%s" UML_LIB_PATH, old_path);
+	putenv(new_path);
+}
+
 extern int uml_exitcode;
 
 extern void scan_elf_aux( char **envp);
@@ -114,6 +142,8 @@ int main(int argc, char **argv, char **envp)
 
 	set_stklim();
 
+	setup_env_path();
+
 	new_argv = malloc((argc + 1) * sizeof(char *));
 	if(new_argv == NULL){
 		perror("Mallocing argv");
-- 
cgit v0.10.2


From cb8aa3d29b562e4c16fdfa4ecc8170ddc55397f4 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Mon, 1 May 2006 12:16:03 -0700
Subject: [PATCH] uml: use Kbuild tracking for all files and fix compilation
 output

Move the build of user-offsets to arch/um/sys-$(SUBARCH), where it's located.
So we can also build it via Kbuild with its dependency tracking rather than by
hand.  While hacking here, fix also a lot of little cosmetic things.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/Makefile b/arch/um/Makefile
index a508e7a..930e006 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -96,7 +96,8 @@ PHONY += linux
 all: linux
 
 linux: vmlinux
-	ln -f $< $@
+	@echo '  SYMLINK $@'
+	$(Q)ln -f $< $@
 
 define archhelp
   echo '* linux		- Binary kernel image (./linux) - for backward'
@@ -203,8 +204,8 @@ endef
 $(ARCH_DIR)/include/uml-config.h : include/linux/autoconf.h
 	$(call filechk,umlconfig)
 
-$(ARCH_DIR)/user-offsets.s: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.c
-	$(CC) $(USER_CFLAGS) -S -o $@ $<
+$(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s: FORCE
+	$(Q)$(MAKE) $(build)=$(ARCH_DIR)/sys-$(SUBARCH) $@
 
 define filechk_gen-asm-offsets
         (set -e; \
@@ -219,13 +220,11 @@ define filechk_gen-asm-offsets
          echo ""; )
 endef
 
-$(ARCH_DIR)/include/user_constants.h: $(ARCH_DIR)/user-offsets.s
+$(ARCH_DIR)/include/user_constants.h: $(ARCH_DIR)/sys-$(SUBARCH)/user-offsets.s
 	$(call filechk,gen-asm-offsets)
 
-CLEAN_FILES += $(ARCH_DIR)/user-offsets.s
-
 $(ARCH_DIR)/include/kern_constants.h: $(objtree)/$(ARCH_DIR)/include
 	@echo '  SYMLINK $@'
-	$(Q) ln -sf ../../../include/asm-um/asm-offsets.h $@
+	$(Q)ln -sf ../../../include/asm-um/asm-offsets.h $@
 
 export SUBARCH USER_CFLAGS OS
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 98b20b7..82121ab 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -10,9 +10,12 @@ subarch-obj-$(CONFIG_MODULES) += kernel/module.o
 
 USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o stub_segv.o
 
-include arch/um/scripts/Makefile.rules
+USER_OBJS += user-offsets.s
+extra-y += user-offsets.s
 
 extra-$(CONFIG_MODE_TT) += unmap.o
 
+include arch/um/scripts/Makefile.rules
+
 $(obj)/stub_segv.o $(obj)/unmap.o: \
 	_c_flags = $(call unprofile,$(CFLAGS))
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index b5fc22b..f739bea 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -18,9 +18,12 @@ ldt-y = ../sys-i386/ldt.o
 
 USER_OBJS := ptrace_user.o sigcontext.o stub_segv.o
 
-include arch/um/scripts/Makefile.rules
+USER_OBJS += user-offsets.s
+extra-y += user-offsets.s
 
 extra-$(CONFIG_MODE_TT) += unmap.o
 
+include arch/um/scripts/Makefile.rules
+
 $(obj)/stub_segv.o $(obj)/unmap.o: \
 	_c_flags = $(call unprofile,$(CFLAGS))
-- 
cgit v0.10.2


From 275e6e1ee2bafde77e9390b27e876fa83f24cb60 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Mon, 1 May 2006 12:16:04 -0700
Subject: [PATCH] uml: fix compilation and execution with hardened GCC

To make some half-assembly stubs compile, disable various "hardened" GCC
features:

*) we can't make it build PIC code as we need %ebx to do syscalls and GCC
   wants it free for PIC

*) we can't leave stack protection as the stub is moved (not relocated!) in
   memory so the RIP-relative access to the canary tries reading from an
   unmapped address and causes a segfault, since we move the stub of various
   megabytes (the exact amount will be decided at runtime) away from the
   link-time address.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/Makefile b/arch/um/Makefile
index 930e006..bed604a 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -118,6 +118,10 @@ prepare: $(ARCH_DIR)/include/kern_constants.h
 LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static
 LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib
 
+CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \
+	$(call cc-option, -fno-stack-protector,) \
+	$(call cc-option, -fno-stack-protector-all,)
+
 CPP_MODE-$(CONFIG_MODE_TT) := -DMODE_TT
 CONFIG_KERNEL_STACK_ORDER ?= 2
 STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )
@@ -227,4 +231,4 @@ $(ARCH_DIR)/include/kern_constants.h: $(objtree)/$(ARCH_DIR)/include
 	@echo '  SYMLINK $@'
 	$(Q)ln -sf ../../../include/asm-um/asm-offsets.h $@
 
-export SUBARCH USER_CFLAGS OS
+export SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index 57181a9..ad84296 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -11,4 +11,11 @@ USER_OBJS := clone.o
 include arch/um/scripts/Makefile.rules
 
 # clone.o is in the stub, so it can't be built with profiling
-$(obj)/clone.o : c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS))
+# GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
+# disable it
+
+CFLAGS_clone.o := $(CFLAGS_NO_HARDENING)
+
+# since we're setting c_flags we _must_ add $(CFLAGS_$(*F).o).
+
+$(obj)/clone.o : c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) $(CFLAGS_$(*F).o)
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 82121ab..3734c3e 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -13,6 +13,8 @@ USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o stub_segv.o
 USER_OBJS += user-offsets.s
 extra-y += user-offsets.s
 
+CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
+
 extra-$(CONFIG_MODE_TT) += unmap.o
 
 include arch/um/scripts/Makefile.rules
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index f739bea..6d3b29c 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -21,6 +21,8 @@ USER_OBJS := ptrace_user.o sigcontext.o stub_segv.o
 USER_OBJS += user-offsets.s
 extra-y += user-offsets.s
 
+CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
+
 extra-$(CONFIG_MODE_TT) += unmap.o
 
 include arch/um/scripts/Makefile.rules
-- 
cgit v0.10.2


From 7b12b9137930eb821b68e1bfa11e9de692208620 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Mon, 1 May 2006 12:16:05 -0700
Subject: [PATCH] uml: cleanup unprofile expression and build infrastructure

*) Rather than duplicate in various buggy ways the application of
   CFLAGS_NO_HARDENING and UNPROFILE (which apply to the same files),
   centralize it in Makefile.rules.  UNPROFILE_OBJS mustn't be listed in
   USER_OBJS but are compiled as such.

I've also verified that unprofile didn't work in the current form, because we
set _c_flags directly (using CFLAGS and not USER_CFLAGS, which is wrong),
which is normally used by c_flags, but we also override c_flags for all
USER_OBJS, and there we don't call unprofile.

Instead it only worked for unmap.o, the only one which wasn't a USER_OBJ.

We need to set c_flags (which is not a public Kbuild API) to clear a lot of
compilation flags like -nostdinc which Kbuild forces on everything.

*) Rather than $(CFLAGS_$(notdir $@)), which expands to CFLAGS_anObj.s when
   building "anObj.s", use $(CFLAGS_$(*F).o) which always accesses
   CFLAGS_anObj.o, like done by Kbuild.

*) Make c_flags apply to all targets having the same basename, rather than
   listing .s, .i, .lst and .o, with the use (which I tested) of

	$(USER_OBJS:.o=.%): c_flags = ...

and of

 -      $(obj)/unmap.c: _c_flags = ...
 +      $(obj)/unmap.%: _c_flags = ...

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index ad84296..ea3a8e4 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -6,16 +6,11 @@
 obj-y := clone.o exec_kern.o mem.o mmu.o process_kern.o \
 	syscall.o tlb.o uaccess.o
 
-USER_OBJS := clone.o
-
-include arch/um/scripts/Makefile.rules
-
 # clone.o is in the stub, so it can't be built with profiling
 # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
 # disable it
 
 CFLAGS_clone.o := $(CFLAGS_NO_HARDENING)
+UNPROFILE_OBJS := clone.o
 
-# since we're setting c_flags we _must_ add $(CFLAGS_$(*F).o).
-
-$(obj)/clone.o : c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) $(CFLAGS_$(*F).o)
+include arch/um/scripts/Makefile.rules
diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules
index 5e7a9c3..1347dc6 100644
--- a/arch/um/scripts/Makefile.rules
+++ b/arch/um/scripts/Makefile.rules
@@ -7,11 +7,19 @@ USER_SINGLE_OBJS := \
 USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m)  $(USER_SINGLE_OBJS))
 USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
 
-$(USER_OBJS) $(USER_OBJS:.o=.i) $(USER_OBJS:.o=.s) $(USER_OBJS:.o=.lst): \
-	c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) $(CFLAGS_$(notdir $@))
+$(USER_OBJS:.o=.%): \
+	c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) $(CFLAGS_$(*F).o)
 $(USER_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \
 	-Dunix -D__unix__ -D__$(SUBARCH)__
 
+# These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of
+# using it directly.
+UNPROFILE_OBJS := $(foreach file,$(UNPROFILE_OBJS),$(obj)/$(file))
+
+$(UNPROFILE_OBJS:.o=.%): \
+	c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) $(CFLAGS_$(*F).o)
+$(UNPROFILE_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \
+	-Dunix -D__unix__ -D__$(SUBARCH)__
 
 # The stubs and unmap.o can't try to call mcount or update basic block data
 define unprofile
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 3734c3e..374d61a 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -8,16 +8,16 @@ subarch-obj-y = lib/bitops.o kernel/semaphore.o
 subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem.o
 subarch-obj-$(CONFIG_MODULES) += kernel/module.o
 
-USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o stub_segv.o
+USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o
 
 USER_OBJS += user-offsets.s
 extra-y += user-offsets.s
 
-CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
-
 extra-$(CONFIG_MODE_TT) += unmap.o
 
+UNPROFILE_OBJS := stub_segv.o
+CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
+
 include arch/um/scripts/Makefile.rules
 
-$(obj)/stub_segv.o $(obj)/unmap.o: \
-	_c_flags = $(call unprofile,$(CFLAGS))
+$(obj)/unmap.%: _c_flags = $(call unprofile,$(CFLAGS))
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index 6d3b29c..c19794d 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -16,16 +16,16 @@ subarch-obj-$(CONFIG_MODULES) += kernel/module.o
 
 ldt-y = ../sys-i386/ldt.o
 
-USER_OBJS := ptrace_user.o sigcontext.o stub_segv.o
+USER_OBJS := ptrace_user.o sigcontext.o
 
 USER_OBJS += user-offsets.s
 extra-y += user-offsets.s
 
-CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
-
 extra-$(CONFIG_MODE_TT) += unmap.o
 
+UNPROFILE_OBJS := stub_segv.o
+CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
+
 include arch/um/scripts/Makefile.rules
 
-$(obj)/stub_segv.o $(obj)/unmap.o: \
-	_c_flags = $(call unprofile,$(CFLAGS))
+$(obj)/unmap.%: _c_flags = $(call unprofile,$(CFLAGS))
-- 
cgit v0.10.2


From cead61a6717a9873426b08d73a34a325e3546f5d Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Mon, 1 May 2006 12:16:06 -0700
Subject: [PATCH] uml: export symbols added by GCC hardened

GCC hardened introduces additional symbol refererences (for the canary and
friends), also in modules - add weak export_symbols for them.  We already
tested that the weak declaration creates no problem on both GCC's providing
the function definition and on GCC's which don't provide it.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c
index 2598158..3f33165 100644
--- a/arch/um/os-Linux/user_syms.c
+++ b/arch/um/os-Linux/user_syms.c
@@ -96,6 +96,13 @@ EXPORT_SYMBOL_PROTO(getuid);
 EXPORT_SYMBOL_PROTO(fsync);
 EXPORT_SYMBOL_PROTO(fdatasync);
 
+/* Export symbols used by GCC for the stack protector. */
+extern void __stack_smash_handler(void *) __attribute__((weak));
+EXPORT_SYMBOL(__stack_smash_handler);
+
+extern long __guard __attribute__((weak));
+EXPORT_SYMBOL(__guard);
+
 /*
  * Overrides for Emacs so that we follow Linus's tabbing style.
  * Emacs will notice this stuff at the end of the file and automatically
-- 
cgit v0.10.2


From a4b741e380cb56045ccea36f2dbc10e42af21f24 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 1 May 2006 12:16:06 -0700
Subject: [PATCH] uml: uml-makefile-nicer uses SYMLINK incorrectly

Blaisorblade's uml-makefile-nicer makes a V=0 build say SYMLINK where
what's happening is really a LINK.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Acked-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/Makefile b/arch/um/Makefile
index bed604a..f6ad832 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -96,7 +96,7 @@ PHONY += linux
 all: linux
 
 linux: vmlinux
-	@echo '  SYMLINK $@'
+	@echo '  LINK $@'
 	$(Q)ln -f $< $@
 
 define archhelp
-- 
cgit v0.10.2


From 4c28f81193b6778f7b49090930d88e6d12bcb928 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 1 May 2006 12:16:08 -0700
Subject: [PATCH] page migration: Fix fallback behavior for dirty pages

Currently we check PageDirty() in order to make the decision to swap out
the page.  However, the dirty information may be only be contained in the
ptes pointing to the page.  We need to first unmap the ptes before checking
for PageDirty().  If unmap is successful then the page count of the page
will also be decreased so that pageout() works properly.

This is a fix necessary for 2.6.17.  Without this fix we may migrate dirty
pages for filesystems without migration functions.  Filesystems may keep
pointers to dirty pages.  Migration of dirty pages can result in the
filesystem keeping pointers to freed pages.

Unmapping is currently not be separated out from removing all the
references to a page and moving the mapping.  Therefore try_to_unmap will
be called again in migrate_page() if the writeout is successful.  However,
it wont do anything since the ptes are already removed.

The coming updates to the page migration code will restructure the code
so that this is no longer necessary.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/mm/migrate.c b/mm/migrate.c
index d444229..1c25040 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -439,6 +439,17 @@ redo:
 			goto unlock_both;
                 }
 
+		/* Make sure the dirty bit is up to date */
+		if (try_to_unmap(page, 1) == SWAP_FAIL) {
+			rc = -EPERM;
+			goto unlock_both;
+		}
+
+		if (page_mapcount(page)) {
+			rc = -EAGAIN;
+			goto unlock_both;
+		}
+
 		/*
 		 * Default handling if a filesystem does not provide
 		 * a migration function. We can only migrate clean
-- 
cgit v0.10.2


From 2c43630fb0ff3f01c29367248ffa4a851e2c9b34 Mon Sep 17 00:00:00 2001
From: Pat Gefre <pfg@sgi.com>
Date: Mon, 1 May 2006 12:16:08 -0700
Subject: [PATCH] Altix: correct ioc3 port order

Currently loading the ioc3 as a module will cause the ports to be numbered
in reverse order.  This mod maintains the proper order of cards for port
numbering.

Signed-off-by: Patrick Gefre <pfg@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/sn/ioc3.c b/drivers/sn/ioc3.c
index 0b49ff7..501316b 100644
--- a/drivers/sn/ioc3.c
+++ b/drivers/sn/ioc3.c
@@ -678,7 +678,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
 	/* Track PCI-device specific data */
 	pci_set_drvdata(pdev, idd);
 	down_write(&ioc3_devices_rwsem);
-	list_add(&idd->list, &ioc3_devices);
+	list_add_tail(&idd->list, &ioc3_devices);
 	idd->id = ioc3_counter++;
 	up_write(&ioc3_devices_rwsem);
 
-- 
cgit v0.10.2


From 46a66eecdf7bc12562ecb492297447ed0e1ecf59 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mjkravetz@verizon.net>
Date: Mon, 1 May 2006 12:16:09 -0700
Subject: [PATCH] sparsemem interaction with memory add bug fixes

This patch fixes two bugs with the way sparsemem interacts with memory add.
They are:

- memory leak if memmap for section already exists

- calling alloc_bootmem_node() after boot

These bugs were discovered and a first cut at the fixes were provided by
Arnd Bergmann <arnd@arndb.de> and Joel Schopp <jschopp@us.ibm.com>.

Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/mm/sparse.c b/mm/sparse.c
index 0a51f36..d7c32de 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -32,7 +32,10 @@ static struct mem_section *sparse_index_alloc(int nid)
 	unsigned long array_size = SECTIONS_PER_ROOT *
 				   sizeof(struct mem_section);
 
-	section = alloc_bootmem_node(NODE_DATA(nid), array_size);
+	if (system_state == SYSTEM_RUNNING)
+		section = kmalloc_node(array_size, GFP_KERNEL, nid);
+	else
+		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
 
 	if (section)
 		memset(section, 0, array_size);
@@ -281,9 +284,9 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 
 	ret = sparse_init_one_section(ms, section_nr, memmap);
 
-	if (ret <= 0)
-		__kfree_section_memmap(memmap, nr_pages);
 out:
 	pgdat_resize_unlock(pgdat, &flags);
+	if (ret <= 0)
+		__kfree_section_memmap(memmap, nr_pages);
 	return ret;
 }
-- 
cgit v0.10.2


From bed120c64eb07b6838bb758109811484af8cebba Mon Sep 17 00:00:00 2001
From: Joel H Schopp <jschopp@us.ibm.com>
Date: Mon, 1 May 2006 12:16:11 -0700
Subject: [PATCH] spufs: fix for CONFIG_NUMA

Based on an older patch from  Mike Kravetz <kravetz@us.ibm.com>

We need to have a mem_map for high addresses in order to make fops->no_page
work on spufs mem and register files.  So far, we have used the
memory_present() function during early bootup, but that did not work when
CONFIG_NUMA was enabled.

We now use the __add_pages() function to add the mem_map when loading the
spufs module, which is a lot nicer.

Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index c2a3db8..6a02d51 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -12,7 +12,8 @@ config SPU_FS
 
 config SPUFS_MMAP
 	bool
-	depends on SPU_FS && SPARSEMEM && !PPC_64K_PAGES
+	depends on SPU_FS && SPARSEMEM
+	select MEMORY_HOTPLUG
 	default y
 
 endmenu
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index dac5d03..6574b22 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -29,6 +29,8 @@
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -46,6 +48,7 @@
 #include <asm/cputable.h>
 #include <asm/ppc-pci.h>
 #include <asm/irq.h>
+#include <asm/spu.h>
 
 #include "interrupt.h"
 #include "iommu.h"
@@ -69,77 +72,6 @@ static void cell_show_cpuinfo(struct seq_file *m)
 	of_node_put(root);
 }
 
-#ifdef CONFIG_SPARSEMEM
-static int __init find_spu_node_id(struct device_node *spe)
-{
-	unsigned int *id;
-#ifdef CONFIG_NUMA
-	struct device_node *cpu;
-	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
-#else
-	id = NULL;
-#endif
-	return id ? *id : 0;
-}
-
-static void __init cell_spuprop_present(struct device_node *spe,
-				       const char *prop, int early)
-{
-	struct address_prop {
-		unsigned long address;
-		unsigned int len;
-	} __attribute__((packed)) *p;
-	int proplen;
-
-	unsigned long start_pfn, end_pfn, pfn;
-	int node_id;
-
-	p = (void*)get_property(spe, prop, &proplen);
-	WARN_ON(proplen != sizeof (*p));
-
-	node_id = find_spu_node_id(spe);
-
-	start_pfn = p->address >> PAGE_SHIFT;
-	end_pfn = (p->address + p->len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
-	/* We need to call memory_present *before* the call to sparse_init,
-	   but we can initialize the page structs only *after* that call.
-	   Thus, we're being called twice. */
-	if (early)
-		memory_present(node_id, start_pfn, end_pfn);
-	else {
-		/* As the pages backing SPU LS and I/O are outside the range
-		   of regular memory, their page structs were not initialized
-		   by free_area_init. Do it here instead. */
-		for (pfn = start_pfn; pfn < end_pfn; pfn++) {
-			struct page *page = pfn_to_page(pfn);
-			set_page_links(page, ZONE_DMA, node_id, pfn);
-			init_page_count(page);
-			reset_page_mapcount(page);
-			SetPageReserved(page);
-			INIT_LIST_HEAD(&page->lru);
-		}
-	}
-}
-
-static void __init cell_spumem_init(int early)
-{
-	struct device_node *node;
-	for (node = of_find_node_by_type(NULL, "spe");
-			node; node = of_find_node_by_type(node, "spe")) {
-		cell_spuprop_present(node, "local-store", early);
-		cell_spuprop_present(node, "problem", early);
-		cell_spuprop_present(node, "priv1", early);
-		cell_spuprop_present(node, "priv2", early);
-	}
-}
-#else
-static void __init cell_spumem_init(int early)
-{
-}
-#endif
-
 static void cell_progress(char *s, unsigned short hex)
 {
 	printk("*** %04x : %s\n", hex, s ? s : "");
@@ -172,8 +104,6 @@ static void __init cell_setup_arch(void)
 #endif
 
 	mmio_nvram_init();
-
-	cell_spumem_init(0);
 }
 
 /*
@@ -189,8 +119,6 @@ static void __init cell_init_early(void)
 
 	ppc64_interrupt_controller = IC_CELL_PIC;
 
-	cell_spumem_init(1);
-
 	DBG(" <- cell_init_early()\n");
 }
 
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index ef47a62..b788e16 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -520,6 +520,56 @@ void spu_irq_setaffinity(struct spu *spu, int cpu)
 }
 EXPORT_SYMBOL_GPL(spu_irq_setaffinity);
 
+static int __init find_spu_node_id(struct device_node *spe)
+{
+	unsigned int *id;
+	struct device_node *cpu;
+	cpu = spe->parent->parent;
+	id = (unsigned int *)get_property(cpu, "node-id", NULL);
+	return id ? *id : 0;
+}
+
+static int __init cell_spuprop_present(struct device_node *spe,
+					const char *prop)
+{
+	static DEFINE_MUTEX(add_spumem_mutex);
+
+	struct address_prop {
+		unsigned long address;
+		unsigned int len;
+	} __attribute__((packed)) *p;
+	int proplen;
+
+	unsigned long start_pfn, nr_pages;
+	int node_id;
+	struct pglist_data *pgdata;
+	struct zone *zone;
+	int ret;
+
+	p = (void*)get_property(spe, prop, &proplen);
+	WARN_ON(proplen != sizeof (*p));
+
+	start_pfn = p->address >> PAGE_SHIFT;
+	nr_pages = ((unsigned long)p->len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	/*
+	 * XXX need to get the correct NUMA node in here. This may
+	 * be different from the spe::node_id property, e.g. when
+	 * the host firmware is not NUMA aware.
+	 */
+	node_id = 0;
+
+	pgdata = NODE_DATA(node_id);
+	zone = pgdata->node_zones;
+
+	/* XXX rethink locking here */
+	mutex_lock(&add_spumem_mutex);
+	ret = __add_pages(zone, start_pfn, nr_pages);
+	mutex_unlock(&add_spumem_mutex);
+
+	return ret;
+}
+
 static void __iomem * __init map_spe_prop(struct device_node *n,
 						 const char *name)
 {
@@ -530,6 +580,8 @@ static void __iomem * __init map_spe_prop(struct device_node *n,
 
 	void *p;
 	int proplen;
+	void* ret = NULL;
+	int err = 0;
 
 	p = get_property(n, name, &proplen);
 	if (proplen != sizeof (struct address_prop))
@@ -537,7 +589,14 @@ static void __iomem * __init map_spe_prop(struct device_node *n,
 
 	prop = p;
 
-	return ioremap(prop->address, prop->len);
+	err = cell_spuprop_present(n, name);
+	if (err && (err != -EEXIST))
+		goto out;
+
+	ret = ioremap(prop->address, prop->len);
+
+ out:
+	return ret;
 }
 
 static void spu_unmap(struct spu *spu)
@@ -597,17 +656,6 @@ out:
 	return ret;
 }
 
-static int __init find_spu_node_id(struct device_node *spe)
-{
-	unsigned int *id;
-	struct device_node *cpu;
-
-	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
-
-	return id ? *id : 0;
-}
-
 static int __init create_spu(struct device_node *spe)
 {
 	struct spu *spu;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1fe76d9..1ae2b2c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -69,12 +69,16 @@ int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
 	for (i = 0; i < nr_pages; i += PAGES_PER_SECTION) {
 		err = __add_section(zone, phys_start_pfn + i);
 
-		if (err)
+		/* We want to keep adding the rest of the
+		 * sections if the first ones already exist
+		 */
+		if (err && (err != -EEXIST))
 			break;
 	}
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(__add_pages);
 
 static void grow_zone_span(struct zone *zone,
 		unsigned long start_pfn, unsigned long end_pfn)
-- 
cgit v0.10.2


From 953039c8df7beb2694814e20e2707a77d335a2e3 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Mon, 1 May 2006 12:16:12 -0700
Subject: [PATCH] powerpc: Allow devices to register with numa topology

Change of_node_to_nid() to traverse the device tree, looking for a numa id.
Cell uses this to assign ids to SPUs, which are children of the CPU node.
Existing users of of_node_to_nid() are altered to use of_node_to_nid_single(),
which doesn't do the traversal.

Export an attach_sysdev_to_node() function, allowing system devices (eg.
SPUs) to link themselves into the numa topology in sysfs.

Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index ed737ca..5bc2585 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -322,13 +322,31 @@ static void register_nodes(void)
 		}
 	}
 }
+
+int sysfs_add_device_to_node(struct sys_device *dev, int nid)
+{
+	struct node *node = &node_devices[nid];
+	return sysfs_create_link(&node->sysdev.kobj, &dev->kobj,
+			kobject_name(&dev->kobj));
+}
+
+void sysfs_remove_device_from_node(struct sys_device *dev, int nid)
+{
+	struct node *node = &node_devices[nid];
+	sysfs_remove_link(&node->sysdev.kobj, kobject_name(&dev->kobj));
+}
+
 #else
 static void register_nodes(void)
 {
 	return;
 }
+
 #endif
 
+EXPORT_SYMBOL_GPL(sysfs_add_device_to_node);
+EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
+
 /* Only valid if CPU is present. */
 static ssize_t show_physical_id(struct sys_device *dev, char *buf)
 {
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0a335f3..092355f 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -194,7 +194,7 @@ static int *of_get_associativity(struct device_node *dev)
 /* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
  * info is found.
  */
-static int of_node_to_nid(struct device_node *device)
+static int of_node_to_nid_single(struct device_node *device)
 {
 	int nid = -1;
 	unsigned int *tmp;
@@ -216,6 +216,28 @@ out:
 	return nid;
 }
 
+/* Walk the device tree upwards, looking for an associativity id */
+int of_node_to_nid(struct device_node *device)
+{
+	struct device_node *tmp;
+	int nid = -1;
+
+	of_node_get(device);
+	while (device) {
+		nid = of_node_to_nid_single(device);
+		if (nid != -1)
+			break;
+
+	        tmp = device;
+		device = of_get_parent(tmp);
+		of_node_put(tmp);
+	}
+	of_node_put(device);
+
+	return nid;
+}
+EXPORT_SYMBOL_GPL(of_node_to_nid);
+
 /*
  * In theory, the "ibm,associativity" property may contain multiple
  * associativity lists because a resource may be multiply connected
@@ -300,7 +322,7 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu)
 		goto out;
 	}
 
-	nid = of_node_to_nid(cpu);
+	nid = of_node_to_nid_single(cpu);
 
 	if (nid < 0 || !node_online(nid))
 		nid = any_online_node(NODE_MASK_ALL);
@@ -393,7 +415,7 @@ static int __init parse_numa_properties(void)
 
 		cpu = find_cpu_node(i);
 		BUG_ON(!cpu);
-		nid = of_node_to_nid(cpu);
+		nid = of_node_to_nid_single(cpu);
 		of_node_put(cpu);
 
 		/*
@@ -437,7 +459,7 @@ new_range:
 		 * have associativity properties.  If none, then
 		 * everything goes to default_nid.
 		 */
-		nid = of_node_to_nid(memory);
+		nid = of_node_to_nid_single(memory);
 		if (nid < 0)
 			nid = default_nid;
 		node_set_online(nid);
@@ -776,7 +798,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
 ha_new_range:
 		start = read_n_cells(n_mem_addr_cells, &memcell_buf);
 		size = read_n_cells(n_mem_size_cells, &memcell_buf);
-		nid = of_node_to_nid(memory);
+		nid = of_node_to_nid_single(memory);
 
 		/* Domains not present at boot default to 0 */
 		if (nid < 0 || !node_online(nid))
diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h
index 1e19cd0..87362a0 100644
--- a/include/asm-powerpc/topology.h
+++ b/include/asm-powerpc/topology.h
@@ -4,6 +4,9 @@
 
 #include <linux/config.h>
 
+struct sys_device;
+struct device_node;
+
 #ifdef CONFIG_NUMA
 
 #include <asm/mmzone.h>
@@ -27,6 +30,8 @@ static inline int node_to_first_cpu(int node)
 	return first_cpu(tmp);
 }
 
+int of_node_to_nid(struct device_node *device);
+
 #define pcibus_to_node(node)    (-1)
 #define pcibus_to_cpumask(bus)	(cpu_online_map)
 
@@ -57,10 +62,29 @@ static inline int node_to_first_cpu(int node)
 
 extern void __init dump_numa_cpu_topology(void);
 
+extern int sysfs_add_device_to_node(struct sys_device *dev, int nid);
+extern void sysfs_remove_device_from_node(struct sys_device *dev, int nid);
+
 #else
 
+static inline int of_node_to_nid(struct device_node *device)
+{
+	return 0;
+}
+
 static inline void dump_numa_cpu_topology(void) {}
 
+static inline int sysfs_add_device_to_node(struct sys_device *dev, int nid)
+{
+	return 0;
+}
+
+static inline void sysfs_remove_device_from_node(struct sys_device *dev,
+						int nid)
+{
+}
+
+
 #include <asm-generic/topology.h>
 
 #endif /* CONFIG_NUMA */
-- 
cgit v0.10.2


From 8261aa600943046a5305564a1cd7432009971799 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Mon, 1 May 2006 12:16:13 -0700
Subject: [PATCH] powerpc: cell: Add numa id to struct spu

Add an nid member to the spu structure, and store the numa id of the spu there
on creation.

Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index b788e16..ad141fe 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -529,8 +529,8 @@ static int __init find_spu_node_id(struct device_node *spe)
 	return id ? *id : 0;
 }
 
-static int __init cell_spuprop_present(struct device_node *spe,
-					const char *prop)
+static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe,
+		const char *prop)
 {
 	static DEFINE_MUTEX(add_spumem_mutex);
 
@@ -541,7 +541,6 @@ static int __init cell_spuprop_present(struct device_node *spe,
 	int proplen;
 
 	unsigned long start_pfn, nr_pages;
-	int node_id;
 	struct pglist_data *pgdata;
 	struct zone *zone;
 	int ret;
@@ -552,14 +551,7 @@ static int __init cell_spuprop_present(struct device_node *spe,
 	start_pfn = p->address >> PAGE_SHIFT;
 	nr_pages = ((unsigned long)p->len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
-	/*
-	 * XXX need to get the correct NUMA node in here. This may
-	 * be different from the spe::node_id property, e.g. when
-	 * the host firmware is not NUMA aware.
-	 */
-	node_id = 0;
-
-	pgdata = NODE_DATA(node_id);
+	pgdata = NODE_DATA(spu->nid);
 	zone = pgdata->node_zones;
 
 	/* XXX rethink locking here */
@@ -570,8 +562,8 @@ static int __init cell_spuprop_present(struct device_node *spe,
 	return ret;
 }
 
-static void __iomem * __init map_spe_prop(struct device_node *n,
-						 const char *name)
+static void __iomem * __init map_spe_prop(struct spu *spu,
+		struct device_node *n, const char *name)
 {
 	struct address_prop {
 		unsigned long address;
@@ -589,7 +581,7 @@ static void __iomem * __init map_spe_prop(struct device_node *n,
 
 	prop = p;
 
-	err = cell_spuprop_present(n, name);
+	err = cell_spuprop_present(spu, n, name);
 	if (err && (err != -EEXIST))
 		goto out;
 
@@ -607,44 +599,45 @@ static void spu_unmap(struct spu *spu)
 	iounmap((u8 __iomem *)spu->local_store);
 }
 
-static int __init spu_map_device(struct spu *spu, struct device_node *spe)
+static int __init spu_map_device(struct spu *spu, struct device_node *node)
 {
 	char *prop;
 	int ret;
 
 	ret = -ENODEV;
-	prop = get_property(spe, "isrc", NULL);
+	prop = get_property(node, "isrc", NULL);
 	if (!prop)
 		goto out;
 	spu->isrc = *(unsigned int *)prop;
 
-	spu->name = get_property(spe, "name", NULL);
+	spu->name = get_property(node, "name", NULL);
 	if (!spu->name)
 		goto out;
 
-	prop = get_property(spe, "local-store", NULL);
+	prop = get_property(node, "local-store", NULL);
 	if (!prop)
 		goto out;
 	spu->local_store_phys = *(unsigned long *)prop;
 
 	/* we use local store as ram, not io memory */
-	spu->local_store = (void __force *)map_spe_prop(spe, "local-store");
+	spu->local_store = (void __force *)
+		map_spe_prop(spu, node, "local-store");
 	if (!spu->local_store)
 		goto out;
 
-	prop = get_property(spe, "problem", NULL);
+	prop = get_property(node, "problem", NULL);
 	if (!prop)
 		goto out_unmap;
 	spu->problem_phys = *(unsigned long *)prop;
 
-	spu->problem= map_spe_prop(spe, "problem");
+	spu->problem= map_spe_prop(spu, node, "problem");
 	if (!spu->problem)
 		goto out_unmap;
 
-	spu->priv1= map_spe_prop(spe, "priv1");
+	spu->priv1= map_spe_prop(spu, node, "priv1");
 	/* priv1 is not available on a hypervisor */
 
-	spu->priv2= map_spe_prop(spe, "priv2");
+	spu->priv2= map_spe_prop(spu, node, "priv2");
 	if (!spu->priv2)
 		goto out_unmap;
 	ret = 0;
@@ -672,6 +665,10 @@ static int __init create_spu(struct device_node *spe)
 		goto out_free;
 
 	spu->node = find_spu_node_id(spe);
+	spu->nid = of_node_to_nid(spe);
+	if (spu->nid == -1)
+		spu->nid = 0;
+
 	spu->stop_code = 0;
 	spu->slb_replace = 0;
 	spu->mm = NULL;
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h
index f431d8b0..7cfcff3 100644
--- a/include/asm-powerpc/spu.h
+++ b/include/asm-powerpc/spu.h
@@ -117,6 +117,7 @@ struct spu {
 	struct list_head list;
 	struct list_head sched_list;
 	int number;
+	int nid;
 	u32 isrc;
 	u32 node;
 	u64 flags;
-- 
cgit v0.10.2


From 022e4fc0fb2e4e179aaba4ee139dcb8fded0cba2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 1 May 2006 12:16:14 -0700
Subject: [PATCH] s390: fix ipd handling

As pointed out by Paulo Marques <pmarques@grupopie.com> MAX_IPD_TIME is by
a factor of ten too small.  Since this means that we allow ten times more
IPDs in the intended time frame this could result in a cpu check stop of a
physical cpu.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 5ae1480..f99e553 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/workqueue.h>
+#include <linux/time.h>
 
 #include <asm/lowcore.h>
 
@@ -363,7 +364,7 @@ s390_revalidate_registers(struct mci *mci)
 }
 
 #define MAX_IPD_COUNT	29
-#define MAX_IPD_TIME	(5 * 60 * 100 * 1000) /* 5 minutes */
+#define MAX_IPD_TIME	(5 * 60 * USEC_PER_SEC) /* 5 minutes */
 
 /*
  * machine check handler.
-- 
cgit v0.10.2


From b44df334a7e909d88cf5c54cc0481b4e2eaeca23 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 1 May 2006 12:16:15 -0700
Subject: [PATCH] s390: bug in setup_rt_frame

Consider return value of __put_user() when setting up a signal frame
instead of ignoring it.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index ae1927e..d48cfc7 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -358,8 +358,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	} else {
                 regs->gprs[14] = (unsigned long)
 			frame->retcode | PSW_ADDR_AMODE;
-		err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
-	                          (u16 __user *)(frame->retcode));
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
+			       (u16 __user *)(frame->retcode)))
+			goto give_sigsegv;
 	}
 
 	/* Set up backchain. */
-- 
cgit v0.10.2


From 3418ff76119da52f808eb496191d1fd380f53f3d Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Mon, 1 May 2006 12:16:16 -0700
Subject: [PATCH] RTC: rtc-dev tweak for 64-bit kernel

Make rtc-dev work well on 64-bit platforms with 32-bit userland.  On those
platforms, users might try to read 32-bit integer value.  This patch make
rtc-dev's read() work well for both "int" and "long" size.  This tweak is came
from genrtc driver.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index b1e3e61..6c9ad92 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -58,7 +58,7 @@ rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	unsigned long data;
 	ssize_t ret;
 
-	if (count < sizeof(unsigned long))
+	if (count != sizeof(unsigned int) && count < sizeof(unsigned long))
 		return -EINVAL;
 
 	add_wait_queue(&rtc->irq_queue, &wait);
@@ -90,11 +90,16 @@ rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	if (ret == 0) {
 		/* Check for any data updates */
 		if (rtc->ops->read_callback)
-			data = rtc->ops->read_callback(rtc->class_dev.dev, data);
-
-		ret = put_user(data, (unsigned long __user *)buf);
-		if (ret == 0)
-			ret = sizeof(unsigned long);
+			data = rtc->ops->read_callback(rtc->class_dev.dev,
+						       data);
+
+		if (sizeof(int) != sizeof(long) &&
+		    count == sizeof(unsigned int))
+			ret = put_user(data, (unsigned int __user *)buf) ?:
+				sizeof(unsigned int);
+		else
+			ret = put_user(data, (unsigned long __user *)buf) ?:
+				sizeof(unsigned long);
 	}
 	return ret;
 }
-- 
cgit v0.10.2


From f3537ea7b9c2f10397a8b68cd006981d7c615431 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Mon, 1 May 2006 12:16:17 -0700
Subject: [PATCH] genrtc: fix read on 64-bit platforms

Fix genrtc's read() routine for 64-bit platforms.  Current gen_rtc_read()
stores 64bit integer and returns 8 even if an user tried to read a 32bit
integer.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/genrtc.c b/drivers/char/genrtc.c
index d3a2bc3..588fca5 100644
--- a/drivers/char/genrtc.c
+++ b/drivers/char/genrtc.c
@@ -200,13 +200,13 @@ static ssize_t gen_rtc_read(struct file *file, char __user *buf,
 	/* first test allows optimizer to nuke this case for 32-bit machines */
 	if (sizeof (int) != sizeof (long) && count == sizeof (unsigned int)) {
 		unsigned int uidata = data;
-		retval = put_user(uidata, (unsigned long __user *)buf);
+		retval = put_user(uidata, (unsigned int __user *)buf) ?:
+			sizeof(unsigned int);
 	}
 	else {
-		retval = put_user(data, (unsigned long __user *)buf);
+		retval = put_user(data, (unsigned long __user *)buf) ?:
+			sizeof(unsigned long);
 	}
-	if (!retval)
-		retval = sizeof(unsigned long);
  out:
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&gen_rtc_wait, &wait);
-- 
cgit v0.10.2


From 160bd18e5e545cbb4e5c26f54414485f8ac291ec Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Mon, 1 May 2006 12:16:18 -0700
Subject: [PATCH] x86_64: make PC Speaker driver work

The PC Speaker driver's ->probe() routine doesn't even get called in the
64-bit kernels.  The reason for that is that the arch code apparently has
to explictly add a "pcspkr" platform device in order for the driver core to
call the ->probe() routine.  arch/i386/kernel/setup.c unconditionally adds
a "pcspkr" device, but the x86_64 kernel has no code at all related to the
PC Speaker.

The patch below copies the relevant code from i386 to x86_64, which makes
the PC Speaker work for me on x86_64.

Cc: Dmitry Torokhov <dtor_core@ameritech.net>
Acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 759070c..ebc3c33 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -1426,3 +1426,22 @@ struct seq_operations cpuinfo_op = {
 	.show =	show_cpuinfo,
 };
 
+#ifdef CONFIG_INPUT_PCSPKR
+#include <linux/platform_device.h>
+static __init int add_pcspkr(void)
+{
+	struct platform_device *pd;
+	int ret;
+
+	pd = platform_device_alloc("pcspkr", -1);
+	if (!pd)
+		return -ENOMEM;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		platform_device_put(pd);
+
+	return ret;
+}
+device_initcall(add_pcspkr);
+#endif
-- 
cgit v0.10.2


From 6ba815ded3fef03e888a9fc8eae3113938ff5349 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 1 May 2006 12:16:19 -0700
Subject: [PATCH] timer TSC check suspend notifier change

At suspend time, the TSC CPUFREQ_SUSPENDCHANGE notifier change might
wrongly enable interrupt.  cpufreq driver suspend/resume is in interrupt
disabled environment.

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index 5e41ee2..f1187dd 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -279,7 +279,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 {
 	struct cpufreq_freqs *freq = data;
 
-	if (val != CPUFREQ_RESUMECHANGE)
+	if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
 		write_seqlock_irq(&xtime_lock);
 	if (!ref_freq) {
 		if (!freq->old){
@@ -312,7 +312,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	}
 
 end:
-	if (val != CPUFREQ_RESUMECHANGE)
+	if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
 		write_sequnlock_irq(&xtime_lock);
 
 	return 0;
-- 
cgit v0.10.2


From 46c5ea3c9ae7fbc6e52a13c92e59d4fc7f4ca80a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 2 May 2006 05:12:22 +0200
Subject: [NETFILTER] x_tables: fix compat related crash on non-x86

When iptables userspace adds an ipt_standard_target, it calculates the size
of the entire entry as:

sizeof(struct ipt_entry) + XT_ALIGN(sizeof(struct ipt_standard_target))

ipt_standard_target looks like this:

  struct xt_standard_target
  {
        struct xt_entry_target target;
        int verdict;
  };

xt_entry_target contains a pointer, so when compiled for 64 bit the
structure gets an extra 4 byte of padding at the end. On 32 bit
architectures where iptables aligns to 8 byte it will also have 4
byte padding at the end because it is only 36 bytes large.

The compat_ipt_standard_fn in the kernel adjusts the offsets by

  sizeof(struct ipt_standard_target) - sizeof(struct compat_ipt_standard_target),

which will always result in 4, even if the structure from userspace
was already padded to a multiple of 8. On x86 this works out by
accident because userspace only aligns to 4, on all other
architectures this is broken and causes incorrect adjustments to
the size and following offsets.

Thanks to Linus for lots of debugging help and testing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 3870145..48cc32d8 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -337,6 +337,10 @@ struct compat_xt_entry_match
 			char name[XT_FUNCTION_MAXNAMELEN - 1];
 			u_int8_t revision;
 		} user;
+		struct {
+			u_int16_t match_size;
+			compat_uptr_t match;
+		} kernel;
 		u_int16_t match_size;
 	} u;
 	unsigned char data[0];
@@ -350,6 +354,10 @@ struct compat_xt_entry_target
 			char name[XT_FUNCTION_MAXNAMELEN - 1];
 			u_int8_t revision;
 		} user;
+		struct {
+			u_int16_t target_size;
+			compat_uptr_t target;
+		} kernel;
 		u_int16_t target_size;
 	} u;
 	unsigned char data[0];
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d25ac8b..6d1c115 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -956,15 +956,16 @@ struct compat_ipt_standard_target
 	compat_int_t verdict;
 };
 
-#define IPT_ST_OFFSET	(sizeof(struct ipt_standard_target) - \
-				sizeof(struct compat_ipt_standard_target))
-
 struct compat_ipt_standard
 {
 	struct compat_ipt_entry entry;
 	struct compat_ipt_standard_target target;
 };
 
+#define IPT_ST_LEN		XT_ALIGN(sizeof(struct ipt_standard_target))
+#define IPT_ST_COMPAT_LEN	COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target))
+#define IPT_ST_OFFSET		(IPT_ST_LEN - IPT_ST_COMPAT_LEN)
+
 static int compat_ipt_standard_fn(void *target,
 		void **dstptr, int *size, int convert)
 {
@@ -975,35 +976,29 @@ static int compat_ipt_standard_fn(void *target,
 	ret = 0;
 	switch (convert) {
 		case COMPAT_TO_USER:
-			pst = (struct ipt_standard_target *)target;
+			pst = target;
 			memcpy(&compat_st.target, &pst->target,
-					sizeof(struct ipt_entry_target));
+				sizeof(compat_st.target));
 			compat_st.verdict = pst->verdict;
 			if (compat_st.verdict > 0)
 				compat_st.verdict -=
 					compat_calc_jump(compat_st.verdict);
-			compat_st.target.u.user.target_size =
-			sizeof(struct compat_ipt_standard_target);
-			if (__copy_to_user(*dstptr, &compat_st,
-				sizeof(struct compat_ipt_standard_target)))
+			compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN;
+			if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN))
 				ret = -EFAULT;
 			*size -= IPT_ST_OFFSET;
-			*dstptr += sizeof(struct compat_ipt_standard_target);
+			*dstptr += IPT_ST_COMPAT_LEN;
 			break;
 		case COMPAT_FROM_USER:
-			pcompat_st =
-				(struct compat_ipt_standard_target *)target;
-			memcpy(&st.target, &pcompat_st->target,
-					sizeof(struct ipt_entry_target));
+			pcompat_st = target;
+			memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN);
 			st.verdict = pcompat_st->verdict;
 			if (st.verdict > 0)
 				st.verdict += compat_calc_jump(st.verdict);
-			st.target.u.user.target_size =
-			sizeof(struct ipt_standard_target);
-			memcpy(*dstptr, &st,
-					sizeof(struct ipt_standard_target));
+			st.target.u.user.target_size = IPT_ST_LEN;
+			memcpy(*dstptr, &st, IPT_ST_LEN);
 			*size += IPT_ST_OFFSET;
-			*dstptr += sizeof(struct ipt_standard_target);
+			*dstptr += IPT_ST_LEN;
 			break;
 		case COMPAT_CALC_SIZE:
 			*size += IPT_ST_OFFSET;
-- 
cgit v0.10.2


From 7591489a8fbee83f19bacc75756989a6a4d0389c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 2 May 2006 12:57:18 +0200
Subject: [PATCH] vmsplice: fix badly placed end paranthesis

We need to use the minium of {len, PAGE_SIZE-off}, not {len, PAGE_SIZE}-off.
The latter doesn't make any sense, and could cause us to attempt negative
length transfers...

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/splice.c b/fs/splice.c
index b150493..b0c157d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1143,7 +1143,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 		 * Fill this contiguous range into the partial page map.
 		 */
 		for (i = 0; i < error; i++) {
-			const int plen = min_t(size_t, len, PAGE_SIZE) - off;
+			const int plen = min_t(size_t, len, PAGE_SIZE - off);
 
 			partial[buffers].offset = off;
 			partial[buffers].len = plen;
-- 
cgit v0.10.2


From a893b99be71f1d669b74f840e3a683dd077d007b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 2 May 2006 15:03:27 +0200
Subject: [PATCH] splice: fix page LRU accounting

Currently we rely on the PIPE_BUF_FLAG_LRU flag being set correctly
to know whether we need to fiddle with page LRU state after stealing it,
however for some origins we just don't know if the page is on the LRU
list or not.

So remove PIPE_BUF_FLAG_LRU and do this check/add manually in pipe_to_file()
instead.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/splice.c b/fs/splice.c
index b0c157d..3318b96 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -78,7 +78,6 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
 		return 1;
 	}
 
-	buf->flags |= PIPE_BUF_FLAG_LRU;
 	return 0;
 }
 
@@ -86,8 +85,6 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
 					struct pipe_buffer *buf)
 {
 	page_cache_release(buf->page);
-	buf->page = NULL;
-	buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
 
 static int page_cache_pipe_buf_pin(struct pipe_inode_info *info,
@@ -570,22 +567,36 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 	if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) {
 		/*
 		 * If steal succeeds, buf->page is now pruned from the vm
-		 * side (LRU and page cache) and we can reuse it. The page
-		 * will also be looked on successful return.
+		 * side (page cache) and we can reuse it. The page will also
+		 * be locked on successful return.
 		 */
 		if (buf->ops->steal(info, buf))
 			goto find_page;
 
 		page = buf->page;
+		page_cache_get(page);
+
+		/*
+		 * page must be on the LRU for adding to the pagecache.
+		 * Check this without grabbing the zone lock, if it isn't
+		 * the do grab the zone lock, recheck, and add if necessary.
+		 */
+		if (!PageLRU(page)) {
+			struct zone *zone = page_zone(page);
+
+			spin_lock_irq(&zone->lru_lock);
+			if (!PageLRU(page)) {
+				SetPageLRU(page);
+				add_page_to_inactive_list(zone, page);
+			}
+			spin_unlock_irq(&zone->lru_lock);
+		}
+
 		if (add_to_page_cache(page, mapping, index, gfp_mask)) {
+			page_cache_release(page);
 			unlock_page(page);
 			goto find_page;
 		}
-
-		page_cache_get(page);
-
-		if (!(buf->flags & PIPE_BUF_FLAG_LRU))
-			lru_cache_add(page);
 	} else {
 find_page:
 		page = find_lock_page(mapping, index);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index df4d3fa..070954f 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -5,9 +5,8 @@
 
 #define PIPE_BUFFERS (16)
 
-#define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
-#define PIPE_BUF_FLAG_ATOMIC	0x02	/* was atomically mapped */
-#define PIPE_BUF_FLAG_GIFT	0x04	/* page is a gift */
+#define PIPE_BUF_FLAG_ATOMIC	0x01	/* was atomically mapped */
+#define PIPE_BUF_FLAG_GIFT	0x02	/* page is a gift */
 
 struct pipe_buffer {
 	struct page *page;
-- 
cgit v0.10.2


From 330ab71619bacc4d4494227a6cfc9b7f5500403d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Tue, 2 May 2006 15:29:57 +0200
Subject: [PATCH] vmsplice: restrict stealing a little more

Apply the same rules as the anon pipe pages, only allow stealing
if no one else is using the page.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/pipe.c b/fs/pipe.c
index 3941a7f..5acd895 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -184,8 +184,8 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
 		kunmap(buf->page);
 }
 
-static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
-			       struct pipe_buffer *buf)
+int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
+			   struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
 
@@ -213,7 +213,7 @@ static struct pipe_buf_operations anon_pipe_buf_ops = {
 	.unmap = generic_pipe_buf_unmap,
 	.pin = generic_pipe_buf_pin,
 	.release = anon_pipe_buf_release,
-	.steal = anon_pipe_buf_steal,
+	.steal = generic_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
 };
 
diff --git a/fs/splice.c b/fs/splice.c
index 3318b96..7fb0497 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -141,7 +141,7 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 	if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
 		return 1;
 
-	return 0;
+	return generic_pipe_buf_steal(pipe, buf);
 }
 
 static struct pipe_buf_operations user_page_pipe_buf_ops = {
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 070954f..ba73108 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -69,6 +69,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int);
 void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *);
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *);
+int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
 
 /*
  * splice is tied to pipes as a transport (at least for now), so we'll just
-- 
cgit v0.10.2


From 37be4e7809e0581db85387e126ae4da68c3d6286 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Tue, 2 May 2006 17:24:59 +0100
Subject: [MMC] extend data timeout for writes

The CSD contains a "read2write factor" which determines the multiplier to
be applied to the read timeout to obtain the write timeout.  We were
ignoring this parameter, resulting in the possibility for writes being
timed out too early.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index da6ddd9..05aa4d6 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -549,6 +549,7 @@ static void mmc_decode_csd(struct mmc_card *card)
 		csd->read_partial = UNSTUFF_BITS(resp, 79, 1);
 		csd->write_misalign = UNSTUFF_BITS(resp, 78, 1);
 		csd->read_misalign = UNSTUFF_BITS(resp, 77, 1);
+		csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3);
 		csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4);
 		csd->write_partial = UNSTUFF_BITS(resp, 21, 1);
 	} else {
@@ -583,6 +584,7 @@ static void mmc_decode_csd(struct mmc_card *card)
 		csd->read_partial = UNSTUFF_BITS(resp, 79, 1);
 		csd->write_misalign = UNSTUFF_BITS(resp, 78, 1);
 		csd->read_misalign = UNSTUFF_BITS(resp, 77, 1);
+		csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3);
 		csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4);
 		csd->write_partial = UNSTUFF_BITS(resp, 21, 1);
 	}
diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c
index 8eb2a2e..06bd1f4 100644
--- a/drivers/mmc/mmc_block.c
+++ b/drivers/mmc/mmc_block.c
@@ -187,6 +187,12 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 			brq.cmd.opcode = MMC_WRITE_BLOCK;
 			brq.data.flags |= MMC_DATA_WRITE;
 			brq.data.blocks = 1;
+
+			/*
+			 * Scale up the timeout by the r2w factor
+			 */
+			brq.data.timeout_ns <<= card->csd.r2w_factor;
+			brq.data.timeout_clks <<= card->csd.r2w_factor;
 		}
 
 		if (brq.data.blocks > 1) {
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 30dd978..991a373 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -28,6 +28,7 @@ struct mmc_csd {
 	unsigned short		cmdclass;
 	unsigned short		tacc_clks;
 	unsigned int		tacc_ns;
+	unsigned int		r2w_factor;
 	unsigned int		max_dtr;
 	unsigned int		read_blkbits;
 	unsigned int		write_blkbits;
-- 
cgit v0.10.2


From 58741e8b3603e56c3699550e8bc89cb136329343 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Tue, 2 May 2006 20:02:39 +0100
Subject: [MMC] PXA and i.MX: don't avoid sending stop command on error

Always send a stop command at the end of a data transfer.  If we avoid
sending the stop command, some cards remain in data transfer mode, and
refuse to accept further read/write commands.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/imxmmc.c b/drivers/mmc/imxmmc.c
index 07f36c4..bc27192 100644
--- a/drivers/mmc/imxmmc.c
+++ b/drivers/mmc/imxmmc.c
@@ -529,7 +529,7 @@ static int imxmci_data_done(struct imxmci_host *host, unsigned int stat)
 
 	data_error = imxmci_finish_data(host, stat);
 
-	if (host->req->stop && (data_error == MMC_ERR_NONE)) {
+	if (host->req->stop) {
 		imxmci_stop_clock(host);
 		imxmci_start_cmd(host, host->req->stop, 0);
 	} else {
diff --git a/drivers/mmc/pxamci.c b/drivers/mmc/pxamci.c
index eb42cb3..15a5caa 100644
--- a/drivers/mmc/pxamci.c
+++ b/drivers/mmc/pxamci.c
@@ -291,7 +291,7 @@ static int pxamci_data_done(struct pxamci_host *host, unsigned int stat)
 	pxamci_disable_irq(host, DATA_TRAN_DONE);
 
 	host->data = NULL;
-	if (host->mrq->stop && data->error == MMC_ERR_NONE) {
+	if (host->mrq->stop) {
 		pxamci_stop_clock(host);
 		pxamci_start_cmd(host, host->mrq->stop, 0);
 	} else {
-- 
cgit v0.10.2


From d78e9079af7526c4661ff484322094832776ef41 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Tue, 2 May 2006 20:18:53 +0100
Subject: [MMC] PXA: reduce the number of lines PXAMCI debug uses

There's no reason for the PXAMCI debug code to print so many lines - it
causes the kernel buffer to overflow when trying to debug this driver.
Remove some debug messages which are duplicated by core code, and
combine other messages, resulting in fewer characters written to the
kernel log.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/pxamci.c b/drivers/mmc/pxamci.c
index 15a5caa..40970c4 100644
--- a/drivers/mmc/pxamci.c
+++ b/drivers/mmc/pxamci.c
@@ -198,7 +198,6 @@ static void pxamci_start_cmd(struct pxamci_host *host, struct mmc_command *cmd,
 
 static void pxamci_finish_request(struct pxamci_host *host, struct mmc_request *mrq)
 {
-	pr_debug("PXAMCI: request done\n");
 	host->mrq = NULL;
 	host->cmd = NULL;
 	host->data = NULL;
@@ -309,12 +308,10 @@ static irqreturn_t pxamci_irq(int irq, void *devid, struct pt_regs *regs)
 
 	ireg = readl(host->base + MMC_I_REG);
 
-	pr_debug("PXAMCI: irq %08x\n", ireg);
-
 	if (ireg) {
 		unsigned stat = readl(host->base + MMC_STAT);
 
-		pr_debug("PXAMCI: stat %08x\n", stat);
+		pr_debug("PXAMCI: irq %08x stat %08x\n", ireg, stat);
 
 		if (ireg & END_CMD_RES)
 			handled |= pxamci_cmd_done(host, stat);
@@ -368,7 +365,7 @@ static void pxamci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct pxamci_host *host = mmc_priv(mmc);
 
-	pr_debug("pxamci_set_ios: clock %u power %u vdd %u.%02u\n",
+	pr_debug("PXAMCI: clock %u power %u vdd %u.%02u\n",
 		 ios->clock, ios->power_mode, ios->vdd / 100,
 		 ios->vdd % 100);
 
@@ -397,7 +394,7 @@ static void pxamci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 			host->cmdat |= CMDAT_INIT;
 	}
 
-	pr_debug("pxamci_set_ios: clkrt = %x cmdat = %x\n",
+	pr_debug("PXAMCI: clkrt = %x cmdat = %x\n",
 		 host->clkrt, host->cmdat);
 }
 
-- 
cgit v0.10.2


From b0b8dab288590ede2377a671db0a31380f454541 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olh@suse.de>
Date: Thu, 27 Apr 2006 18:23:49 -0700
Subject: [PATCH] mv643xx_eth: provide sysfs class device symlink

On Sat, Mar 11, Olaf Hering wrote:
> Why is the /sys/class/net/eth0/device symlink not created for the
> mv643xx_eth driver? Does this work for other platform device drivers?
> Seems to work for the ps2 keyboard at least.

The SET_NETDEV_DEV has to be done before a call to register_netdev.  With
the new patch below, the device symlink for the platform device was
created.  Unfortunately, after the 4 ls commands, the network connection
died.  No idea if the box crashed or if something else broke, lost remote
access.

Provide sysfs 'device' in /class/net/ethN Also, set module owner field,
like pcnet32 driver does.

Signed-off-by: Olaf Hering <olh@suse.de>
Acked-by: Dale Farnsworth <dale@farnsworth.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Jeff Garzik <jeff@garzik.org>

diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index ea62a3e..411f4d8 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1419,6 +1419,8 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 	mv643xx_eth_update_pscr(dev, &cmd);
 	mv643xx_set_settings(dev, &cmd);
 
+	SET_MODULE_OWNER(dev);
+	SET_NETDEV_DEV(dev, &pdev->dev);
 	err = register_netdev(dev);
 	if (err)
 		goto out;
-- 
cgit v0.10.2


From 3e0d167a6b6e5722d7fadfad9b817f668ab41ec1 Mon Sep 17 00:00:00 2001
From: Craig Brind <craigbrind@gmail.com>
Date: Thu, 27 Apr 2006 02:30:46 -0700
Subject: [PATCH] via-rhine: zero pad short packets on Rhine I ethernet cards

Fixes Rhine I cards disclosing fragments of previously transmitted frames
in new transmissions.

Before transmission, any socket buffer (skb) shorter than the ethernet
minimum length of 60 bytes was zero-padded.  On Rhine I cards the data can
later be copied into an aligned transmission buffer without copying this
padding.  This resulted in the transmission of the frame with the extra
bytes beyond the provided content leaking the previous contents of this
buffer on to the network.

Now zero-padding is repeated in the local aligned buffer if one is used.

Following a suggestion from the via-rhine maintainer, no attempt is made
here to avoid the duplicated effort of padding the skb if it is known that
an aligned buffer will definitely be used.  This is to make the change
"obviously correct" and allow it to be applied to a stable kernel if
necessary.  There is no change to the flow of control and the changes are
only to the Rhine I code path.

The patch has run on an in-service Rhine-I host without incident.  Frames
shorter than 60 bytes are now correctly zero-padded when captured on a
separate host.  I see no unusual stats reported by ifconfig, and no unusual
log messages.

Signed-off-by: Craig Brind <craigbrind@gmail.com>
Signed-off-by: Roger Luethi <rl@hellgate.ch>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Jeff Garzik <jeff@garzik.org>

diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index 6a23964..a6dc53b 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -129,6 +129,7 @@
 	- Massive clean-up
 	- Rewrite PHY, media handling (remove options, full_duplex, backoff)
 	- Fix Tx engine race for good
+	- Craig Brind: Zero padded aligned buffers for short packets.
 
 */
 
@@ -1326,7 +1327,12 @@ static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev)
 			rp->stats.tx_dropped++;
 			return 0;
 		}
+
+		/* Padding is not copied and so must be redone. */
 		skb_copy_and_csum_dev(skb, rp->tx_buf[entry]);
+		if (skb->len < ETH_ZLEN)
+			memset(rp->tx_buf[entry] + skb->len, 0,
+			       ETH_ZLEN - skb->len);
 		rp->tx_skbuff_dma[entry] = 0;
 		rp->tx_ring[entry].addr = cpu_to_le32(rp->tx_bufs_dma +
 						      (rp->tx_buf[entry] -
-- 
cgit v0.10.2


From ebf34c9b6fcd22338ef764b039b3ac55ed0e297b Mon Sep 17 00:00:00 2001
From: Ayaz Abdulla <aabdulla@nvidia.com>
Date: Tue, 2 May 2006 15:26:06 -0400
Subject: forcedeth: fix multi irq issues

This patch fixes the issues with multiple irqs.

I am resending based on feedback. I decoupled the dma mask for
consistent memory and fixed leak with multiple irq in error path.

Thanks to Manfred for catching the spin lock problem.

Signed-Off-By: Ayaz Abdulla <aabdulla@nvidia.com>

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 9788b1e..f7235c9 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -106,6 +106,7 @@
  *	0.51: 20 Jan 2006: Add 64bit consistent memory allocation for rings.
  *	0.52: 20 Jan 2006: Add MSI/MSIX support.
  *	0.53: 19 Mar 2006: Fix init from low power mode and add hw reset.
+ *	0.54: 21 Mar 2006: Fix spin locks for multi irqs and cleanup.
  *
  * Known bugs:
  * We suspect that on some hardware no TX done interrupts are generated.
@@ -117,7 +118,7 @@
  * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few
  * superfluous timer interrupts from the nic.
  */
-#define FORCEDETH_VERSION		"0.53"
+#define FORCEDETH_VERSION		"0.54"
 #define DRV_NAME			"forcedeth"
 
 #include <linux/module.h>
@@ -710,6 +711,72 @@ static void setup_hw_rings(struct net_device *dev, int rxtx_flags)
 	}
 }
 
+static int using_multi_irqs(struct net_device *dev)
+{
+	struct fe_priv *np = get_nvpriv(dev);
+
+	if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
+	    ((np->msi_flags & NV_MSI_X_ENABLED) &&
+	     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1)))
+		return 0;
+	else
+		return 1;
+}
+
+static void nv_enable_irq(struct net_device *dev)
+{
+	struct fe_priv *np = get_nvpriv(dev);
+
+	if (!using_multi_irqs(dev)) {
+		if (np->msi_flags & NV_MSI_X_ENABLED)
+			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
+		else
+			enable_irq(dev->irq);
+	} else {
+		enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+		enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
+		enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
+	}
+}
+
+static void nv_disable_irq(struct net_device *dev)
+{
+	struct fe_priv *np = get_nvpriv(dev);
+
+	if (!using_multi_irqs(dev)) {
+		if (np->msi_flags & NV_MSI_X_ENABLED)
+			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
+		else
+			disable_irq(dev->irq);
+	} else {
+		disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+		disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
+		disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
+	}
+}
+
+/* In MSIX mode, a write to irqmask behaves as XOR */
+static void nv_enable_hw_interrupts(struct net_device *dev, u32 mask)
+{
+	u8 __iomem *base = get_hwbase(dev);
+
+	writel(mask, base + NvRegIrqMask);
+}
+
+static void nv_disable_hw_interrupts(struct net_device *dev, u32 mask)
+{
+	struct fe_priv *np = get_nvpriv(dev);
+	u8 __iomem *base = get_hwbase(dev);
+
+	if (np->msi_flags & NV_MSI_X_ENABLED) {
+		writel(mask, base + NvRegIrqMask);
+	} else {
+		if (np->msi_flags & NV_MSI_ENABLED)
+			writel(0, base + NvRegMSIIrqMask);
+		writel(0, base + NvRegIrqMask);
+	}
+}
+
 #define MII_READ	(-1)
 /* mii_rw: read/write a register on the PHY.
  *
@@ -1019,24 +1086,25 @@ static void nv_do_rx_refill(unsigned long data)
 	struct net_device *dev = (struct net_device *) data;
 	struct fe_priv *np = netdev_priv(dev);
 
-
-	if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
-	    ((np->msi_flags & NV_MSI_X_ENABLED) && 
-	     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
-		disable_irq(dev->irq);
+	if (!using_multi_irqs(dev)) {
+		if (np->msi_flags & NV_MSI_X_ENABLED)
+			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
+		else
+			disable_irq(dev->irq);
 	} else {
 		disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
 	}
 	if (nv_alloc_rx(dev)) {
-		spin_lock(&np->lock);
+		spin_lock_irq(&np->lock);
 		if (!np->in_shutdown)
 			mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
-		spin_unlock(&np->lock);
+		spin_unlock_irq(&np->lock);
 	}
-	if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
-	    ((np->msi_flags & NV_MSI_X_ENABLED) && 
-	     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
-		enable_irq(dev->irq);
+	if (!using_multi_irqs(dev)) {
+		if (np->msi_flags & NV_MSI_X_ENABLED)
+			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
+		else
+			enable_irq(dev->irq);
 	} else {
 		enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
 	}
@@ -1668,15 +1736,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 		 * guessed, there is probably a simpler approach.
 		 * Changing the MTU is a rare event, it shouldn't matter.
 		 */
-		if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
-		    ((np->msi_flags & NV_MSI_X_ENABLED) && 
-		     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
-			disable_irq(dev->irq);
-		} else {
-			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
-			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
-			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
-		}
+		nv_disable_irq(dev);
 		spin_lock_bh(&dev->xmit_lock);
 		spin_lock(&np->lock);
 		/* stop engines */
@@ -1709,15 +1769,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 		nv_start_tx(dev);
 		spin_unlock(&np->lock);
 		spin_unlock_bh(&dev->xmit_lock);
-		if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
-		    ((np->msi_flags & NV_MSI_X_ENABLED) && 
-		     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
-			enable_irq(dev->irq);
-		} else {
-			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
-			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
-			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
-		}
+		nv_enable_irq(dev);
 	}
 	return 0;
 }
@@ -2108,16 +2160,16 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data, struct pt_regs *regs)
 		if (!(events & np->irqmask))
 			break;
 
-		spin_lock(&np->lock);
+		spin_lock_irq(&np->lock);
 		nv_tx_done(dev);
-		spin_unlock(&np->lock);
+		spin_unlock_irq(&np->lock);
 		
 		if (events & (NVREG_IRQ_TX_ERR)) {
 			dprintk(KERN_DEBUG "%s: received irq with events 0x%x. Probably TX fail.\n",
 						dev->name, events);
 		}
 		if (i > max_interrupt_work) {
-			spin_lock(&np->lock);
+			spin_lock_irq(&np->lock);
 			/* disable interrupts on the nic */
 			writel(NVREG_IRQ_TX_ALL, base + NvRegIrqMask);
 			pci_push(base);
@@ -2127,7 +2179,7 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data, struct pt_regs *regs)
 				mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
 			}
 			printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_tx.\n", dev->name, i);
-			spin_unlock(&np->lock);
+			spin_unlock_irq(&np->lock);
 			break;
 		}
 
@@ -2157,14 +2209,14 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data, struct pt_regs *regs)
 		
 		nv_rx_process(dev);
 		if (nv_alloc_rx(dev)) {
-			spin_lock(&np->lock);
+			spin_lock_irq(&np->lock);
 			if (!np->in_shutdown)
 				mod_timer(&np->oom_kick, jiffies + OOM_REFILL);
-			spin_unlock(&np->lock);
+			spin_unlock_irq(&np->lock);
 		}
 		
 		if (i > max_interrupt_work) {
-			spin_lock(&np->lock);
+			spin_lock_irq(&np->lock);
 			/* disable interrupts on the nic */
 			writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask);
 			pci_push(base);
@@ -2174,7 +2226,7 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data, struct pt_regs *regs)
 				mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
 			}
 			printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_rx.\n", dev->name, i);
-			spin_unlock(&np->lock);
+			spin_unlock_irq(&np->lock);
 			break;
 		}
 
@@ -2203,14 +2255,14 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data, struct pt_regs *regs)
 			break;
 		
 		if (events & NVREG_IRQ_LINK) {
-			spin_lock(&np->lock);
+			spin_lock_irq(&np->lock);
 			nv_link_irq(dev);
-			spin_unlock(&np->lock);
+			spin_unlock_irq(&np->lock);
 		}
 		if (np->need_linktimer && time_after(jiffies, np->link_timeout)) {
-			spin_lock(&np->lock);
+			spin_lock_irq(&np->lock);
 			nv_linkchange(dev);
-			spin_unlock(&np->lock);
+			spin_unlock_irq(&np->lock);
 			np->link_timeout = jiffies + LINK_TIMEOUT;
 		}
 		if (events & (NVREG_IRQ_UNKNOWN)) {
@@ -2218,7 +2270,7 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data, struct pt_regs *regs)
 						dev->name, events);
 		}
 		if (i > max_interrupt_work) {
-			spin_lock(&np->lock);
+			spin_lock_irq(&np->lock);
 			/* disable interrupts on the nic */
 			writel(NVREG_IRQ_OTHER, base + NvRegIrqMask);
 			pci_push(base);
@@ -2228,7 +2280,7 @@ static irqreturn_t nv_nic_irq_other(int foo, void *data, struct pt_regs *regs)
 				mod_timer(&np->nic_poll, jiffies + POLL_WAIT);
 			}
 			printk(KERN_DEBUG "%s: too many iterations (%d) in nv_nic_irq_other.\n", dev->name, i);
-			spin_unlock(&np->lock);
+			spin_unlock_irq(&np->lock);
 			break;
 		}
 
@@ -2251,10 +2303,11 @@ static void nv_do_nic_poll(unsigned long data)
 	 * nv_nic_irq because that may decide to do otherwise
 	 */
 
-	if (!(np->msi_flags & NV_MSI_X_ENABLED) ||
-	    ((np->msi_flags & NV_MSI_X_ENABLED) && 
-	     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
-		disable_irq(dev->irq);
+	if (!using_multi_irqs(dev)) {
+		if (np->msi_flags & NV_MSI_X_ENABLED)
+			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
+		else
+			disable_irq(dev->irq);
 		mask = np->irqmask;
 	} else {
 		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
@@ -2277,11 +2330,12 @@ static void nv_do_nic_poll(unsigned long data)
 	writel(mask, base + NvRegIrqMask);
 	pci_push(base);
 
-	if (!(np->msi_flags & NV_MSI_X_ENABLED) || 
-	    ((np->msi_flags & NV_MSI_X_ENABLED) && 
-	     ((np->msi_flags & NV_MSI_X_VECTORS_MASK) == 0x1))) {
+	if (!using_multi_irqs(dev)) {
 		nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL);
-		enable_irq(dev->irq);
+		if (np->msi_flags & NV_MSI_X_ENABLED)
+			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
+		else
+			enable_irq(dev->irq);
 	} else {
 		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
 			nv_nic_irq_rx((int) 0, (void *) data, (struct pt_regs *) NULL);
@@ -2628,6 +2682,113 @@ static void set_msix_vector_map(struct net_device *dev, u32 vector, u32 irqmask)
 	writel(readl(base + NvRegMSIXMap1) | msixmap, base + NvRegMSIXMap1);
 }
 
+static int nv_request_irq(struct net_device *dev)
+{
+	struct fe_priv *np = get_nvpriv(dev);
+	u8 __iomem *base = get_hwbase(dev);
+	int ret = 1;
+	int i;
+
+	if (np->msi_flags & NV_MSI_X_CAPABLE) {
+		for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) {
+			np->msi_x_entry[i].entry = i;
+		}
+		if ((ret = pci_enable_msix(np->pci_dev, np->msi_x_entry, (np->msi_flags & NV_MSI_X_VECTORS_MASK))) == 0) {
+			np->msi_flags |= NV_MSI_X_ENABLED;
+			if (optimization_mode == NV_OPTIMIZATION_MODE_THROUGHPUT) {
+				/* Request irq for rx handling */
+				if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector, &nv_nic_irq_rx, SA_SHIRQ, dev->name, dev) != 0) {
+					printk(KERN_INFO "forcedeth: request_irq failed for rx %d\n", ret);
+					pci_disable_msix(np->pci_dev);
+					np->msi_flags &= ~NV_MSI_X_ENABLED;
+					goto out_err;
+				}
+				/* Request irq for tx handling */
+				if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector, &nv_nic_irq_tx, SA_SHIRQ, dev->name, dev) != 0) {
+					printk(KERN_INFO "forcedeth: request_irq failed for tx %d\n", ret);
+					pci_disable_msix(np->pci_dev);
+					np->msi_flags &= ~NV_MSI_X_ENABLED;
+					goto out_free_rx;
+				}
+				/* Request irq for link and timer handling */
+				if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector, &nv_nic_irq_other, SA_SHIRQ, dev->name, dev) != 0) {
+					printk(KERN_INFO "forcedeth: request_irq failed for link %d\n", ret);
+					pci_disable_msix(np->pci_dev);
+					np->msi_flags &= ~NV_MSI_X_ENABLED;
+					goto out_free_tx;
+				}
+				/* map interrupts to their respective vector */
+				writel(0, base + NvRegMSIXMap0);
+				writel(0, base + NvRegMSIXMap1);
+				set_msix_vector_map(dev, NV_MSI_X_VECTOR_RX, NVREG_IRQ_RX_ALL);
+				set_msix_vector_map(dev, NV_MSI_X_VECTOR_TX, NVREG_IRQ_TX_ALL);
+				set_msix_vector_map(dev, NV_MSI_X_VECTOR_OTHER, NVREG_IRQ_OTHER);
+			} else {
+				/* Request irq for all interrupts */
+				if (request_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0) {
+					printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret);
+					pci_disable_msix(np->pci_dev);
+					np->msi_flags &= ~NV_MSI_X_ENABLED;
+					goto out_err;
+				}
+
+				/* map interrupts to vector 0 */
+				writel(0, base + NvRegMSIXMap0);
+				writel(0, base + NvRegMSIXMap1);
+			}
+		}
+	}
+	if (ret != 0 && np->msi_flags & NV_MSI_CAPABLE) {
+		if ((ret = pci_enable_msi(np->pci_dev)) == 0) {
+			np->msi_flags |= NV_MSI_ENABLED;
+			if (request_irq(np->pci_dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0) {
+				printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret);
+				pci_disable_msi(np->pci_dev);
+				np->msi_flags &= ~NV_MSI_ENABLED;
+				goto out_err;
+			}
+
+			/* map interrupts to vector 0 */
+			writel(0, base + NvRegMSIMap0);
+			writel(0, base + NvRegMSIMap1);
+			/* enable msi vector 0 */
+			writel(NVREG_MSI_VECTOR_0_ENABLED, base + NvRegMSIIrqMask);
+		}
+	}
+	if (ret != 0) {
+		if (request_irq(np->pci_dev->irq, &nv_nic_irq, SA_SHIRQ, dev->name, dev) != 0)
+			goto out_err;
+	}
+
+	return 0;
+out_free_tx:
+	free_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector, dev);
+out_free_rx:
+	free_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector, dev);
+out_err:
+	return 1;
+}
+
+static void nv_free_irq(struct net_device *dev)
+{
+	struct fe_priv *np = get_nvpriv(dev);
+	int i;
+
+	if (np->msi_flags & NV_MSI_X_ENABLED) {
+		for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) {
+			free_irq(np->msi_x_entry[i].vector, dev);
+		}
+		pci_disable_msix(np->pci_dev);
+		np->msi_flags &= ~NV_MSI_X_ENABLED;
+	} else {
+		free_irq(np->pci_dev->irq, dev);
+		if (np->msi_flags & NV_MSI_ENABLED) {
+			pci_disable_msi(np->pci_dev);
+			np->msi_flags &= ~NV_MSI_ENABLED;
+		}
+	}
+}
+
 static int nv_open(struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
@@ -2720,12 +2881,16 @@ static int nv_open(struct net_device *dev)
 	udelay(10);
 	writel(readl(base + NvRegPowerState) | NVREG_POWERSTATE_VALID, base + NvRegPowerState);
 
-	writel(0, base + NvRegIrqMask);
+	nv_disable_hw_interrupts(dev, np->irqmask);
 	pci_push(base);
 	writel(NVREG_MIISTAT_MASK2, base + NvRegMIIStatus);
 	writel(NVREG_IRQSTAT_MASK, base + NvRegIrqStatus);
 	pci_push(base);
 
+	if (nv_request_irq(dev)) {
+		goto out_drain;
+	}
+
 	if (np->msi_flags & NV_MSI_X_CAPABLE) {
 		for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) {
 			np->msi_x_entry[i].entry = i;
@@ -2799,7 +2964,7 @@ static int nv_open(struct net_device *dev)
 	}
 
 	/* ask for interrupts */
-	writel(np->irqmask, base + NvRegIrqMask);
+	nv_enable_hw_interrupts(dev, np->irqmask);
 
 	spin_lock_irq(&np->lock);
 	writel(NVREG_MCASTADDRA_FORCE, base + NvRegMulticastAddrA);
@@ -2843,7 +3008,6 @@ static int nv_close(struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base;
-	int i;
 
 	spin_lock_irq(&np->lock);
 	np->in_shutdown = 1;
@@ -2861,31 +3025,13 @@ static int nv_close(struct net_device *dev)
 
 	/* disable interrupts on the nic or we will lock up */
 	base = get_hwbase(dev);
-	if (np->msi_flags & NV_MSI_X_ENABLED) {
-		writel(np->irqmask, base + NvRegIrqMask);
-	} else {
-		if (np->msi_flags & NV_MSI_ENABLED)
-			writel(0, base + NvRegMSIIrqMask);
-		writel(0, base + NvRegIrqMask);
-	}
+	nv_disable_hw_interrupts(dev, np->irqmask);
 	pci_push(base);
 	dprintk(KERN_INFO "%s: Irqmask is zero again\n", dev->name);
 
 	spin_unlock_irq(&np->lock);
 
-	if (np->msi_flags & NV_MSI_X_ENABLED) {
-		for (i = 0; i < (np->msi_flags & NV_MSI_X_VECTORS_MASK); i++) {
-			free_irq(np->msi_x_entry[i].vector, dev);
-		}
-		pci_disable_msix(np->pci_dev);
-		np->msi_flags &= ~NV_MSI_X_ENABLED;
-	} else {
-		free_irq(np->pci_dev->irq, dev);
-		if (np->msi_flags & NV_MSI_ENABLED) {
-			pci_disable_msi(np->pci_dev);
-			np->msi_flags &= ~NV_MSI_ENABLED;
-		}
-	}
+	nv_free_irq(dev);
 
 	drain_ring(dev);
 
@@ -2974,20 +3120,18 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 	if (id->driver_data & DEV_HAS_HIGH_DMA) {
 		/* packet format 3: supports 40-bit addressing */
 		np->desc_ver = DESC_VER_3;
+		np->txrxctl_bits = NVREG_TXRXCTL_DESC_3;
 		if (pci_set_dma_mask(pci_dev, DMA_39BIT_MASK)) {
 			printk(KERN_INFO "forcedeth: 64-bit DMA failed, using 32-bit addressing for device %s.\n",
 					pci_name(pci_dev));
 		} else {
-			if (pci_set_consistent_dma_mask(pci_dev, 0x0000007fffffffffULL)) {
-				printk(KERN_INFO "forcedeth: 64-bit DMA (consistent) failed for device %s.\n",
-					pci_name(pci_dev));
-				goto out_relreg;
-			} else {
-				dev->features |= NETIF_F_HIGHDMA;
-				printk(KERN_INFO "forcedeth: using HIGHDMA\n");
-			}
+			dev->features |= NETIF_F_HIGHDMA;
+			printk(KERN_INFO "forcedeth: using HIGHDMA\n");
+		}
+		if (pci_set_consistent_dma_mask(pci_dev, 0x0000007fffffffffULL)) {
+			printk(KERN_INFO "forcedeth: 64-bit DMA (consistent) failed for device %s.\n",
+			       pci_name(pci_dev));
 		}
-		np->txrxctl_bits = NVREG_TXRXCTL_DESC_3;
 	} else if (id->driver_data & DEV_HAS_LARGEDESC) {
 		/* packet format 2: supports jumbo frames */
 		np->desc_ver = DESC_VER_2;
-- 
cgit v0.10.2


From b2556da55f78a9dbe92830b1d1c0b612edfea9fd Mon Sep 17 00:00:00 2001
From: Uwe Zeisberger <Uwe_Zeisberger@digi.com>
Date: Tue, 2 May 2006 20:40:56 +0100
Subject: [ARM] 3488/1: make icedcc_putc do the right thing

Patch from Uwe Zeisberger

 a) use coprocessor 14
 b) make reading the dcc status volatile

Signed-off-by: Uwe Zeisberger <Uwe_Zeisberger@digi.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index 0af3772..ace3fb58 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -38,10 +38,10 @@ static void icedcc_putc(int ch)
 		if (--i < 0)
 			return;
 
-		asm("mrc p14, 0, %0, c0, c0, 0" : "=r" (status));
+		asm volatile ("mrc p14, 0, %0, c0, c0, 0" : "=r" (status));
 	} while (status & 2);
 
-	asm("mcr p15, 0, %0, c1, c0, 0" : : "r" (ch));
+	asm("mcr p14, 0, %0, c1, c0, 0" : : "r" (ch));
 }
 
 #define putc(ch)	icedcc_putc(ch)
-- 
cgit v0.10.2


From e17df688f7064dae1417ce425dd1e4b71d24d63b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 2 May 2006 23:23:07 +0200
Subject: [NETFILTER] SCTP conntrack: fix infinite loop

fix infinite loop in the SCTP-netfilter code: check SCTP chunk size to
guarantee progress of for_each_sctp_chunk(). (all other uses of
for_each_sctp_chunk() are preceded by do_basic_checks(), so this fix
should be complete.)

Based on patch from Ingo Molnar <mingo@elte.hu>

CVE-2006-1527

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 5259abd..0416073 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -235,12 +235,15 @@ static int do_basic_checks(struct ip_conntrack *conntrack,
 			flag = 1;
 		}
 
-		/* Cookie Ack/Echo chunks not the first OR 
-		   Init / Init Ack / Shutdown compl chunks not the only chunks */
-		if ((sch->type == SCTP_CID_COOKIE_ACK 
+		/*
+		 * Cookie Ack/Echo chunks not the first OR
+		 * Init / Init Ack / Shutdown compl chunks not the only chunks
+		 * OR zero-length.
+		 */
+		if (((sch->type == SCTP_CID_COOKIE_ACK
 			|| sch->type == SCTP_CID_COOKIE_ECHO
 			|| flag)
-		     && count !=0 ) {
+		      && count !=0) || !sch->length) {
 			DEBUGP("Basic checks failed\n");
 			return 1;
 		}
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 9cccc32..0c6da49 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -240,12 +240,15 @@ static int do_basic_checks(struct nf_conn *conntrack,
 			flag = 1;
 		}
 
-		/* Cookie Ack/Echo chunks not the first OR 
-		   Init / Init Ack / Shutdown compl chunks not the only chunks */
-		if ((sch->type == SCTP_CID_COOKIE_ACK 
+		/*
+		 * Cookie Ack/Echo chunks not the first OR
+		 * Init / Init Ack / Shutdown compl chunks not the only chunks
+		 * OR zero-length.
+		 */
+		if (((sch->type == SCTP_CID_COOKIE_ACK
 			|| sch->type == SCTP_CID_COOKIE_ECHO
 			|| flag)
-		     && count !=0 ) {
+		      && count !=0) || !sch->length) {
 			DEBUGP("Basic checks failed\n");
 			return 1;
 		}
-- 
cgit v0.10.2


From 054d8ff37710efaebd1998ce94d366df315a354f Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linas@austin.ibm.com>
Date: Thu, 27 Apr 2006 02:31:20 -0700
Subject: [PATCH] powerpc/pseries: avoid crash in PCI code if mem system not up

The powerpc code is currently performing PCI setup before memory
initialization.  PCI setup touches PCI config space registers.  If the PCI
card is bad, this will evoke an error, which currrently can't be handled,
as the PCI error recovery code expects kmalloc() to be functional.  This
patch will cause the system to punt instead of crashing with

cpu 0x0: Vector: 300 (Data Access) at [c0000000004434d0]
    pc: c0000000000c06b4: .kmem_cache_alloc+0x8c/0xf4
    lr: c00000000004ad6c: .eeh_send_failure_event+0x48/0xfc

This patch will also print name of the offending pci device.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index a1bda6f..40020c6 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -118,7 +118,15 @@ int eeh_send_failure_event (struct device_node *dn,
 {
 	unsigned long flags;
 	struct eeh_event *event;
+	char *location;
 
+	if (!mem_init_done) {
+		printk(KERN_ERR "EEH: event during early boot not handled\n");
+		location = (char *) get_property(dn, "ibm,loc-code", NULL);
+		printk(KERN_ERR "EEH: device node = %s\n", dn->full_name);
+		printk(KERN_ERR "EEH: PCI location = %s\n", location);
+		return 1;
+	}
 	event = kmalloc(sizeof(*event), GFP_ATOMIC);
 	if (event == NULL) {
 		printk (KERN_ERR "EEH: out of memory, event not handled\n");
-- 
cgit v0.10.2


From 0ccde0a290b44b8296b82a7683b4c299eb51ba6b Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Date: Fri, 28 Apr 2006 17:38:42 +0530
Subject: [PATCH] powerpc/kprobes: fix singlestep out-of-line

We currently single-step inline if the instruction on which a kprobe is
inserted is a trap variant.

- variants (such as tdnei, used by BUG()) typically evaluate a condition
  and cause a trap only if the condition is satisfied.
- kprobes uses the unconditional "trap" (0x7fe00008) and single-stepping
  again on this instruction, resulting in another trap without
  evaluating the condition is obviously incorrect.

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 856ef1a..f788663 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -90,15 +90,15 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
 
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
 {
-	kprobe_opcode_t insn = *p->ainsn.insn;
-
 	regs->msr |= MSR_SE;
 
-	/* single step inline if it is a trap variant */
-	if (is_trap(insn))
-		regs->nip = (unsigned long)p->addr;
-	else
-		regs->nip = (unsigned long)p->ainsn.insn;
+	/*
+	 * On powerpc we should single step on the original
+	 * instruction even if the probed insn is a trap
+	 * variant as values in regs could play a part in
+	 * if the trap is taken or not
+	 */
+	regs->nip = (unsigned long)p->ainsn.insn;
 }
 
 static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
-- 
cgit v0.10.2


From 61f5657c50341198ff05e375e6f1fc0476556562 Mon Sep 17 00:00:00 2001
From: Vitaly Bordug <vbordug@ru.mvista.com>
Date: Sat, 29 Apr 2006 22:32:44 +0400
Subject: [PATCH] ppc32 CPM_UART: Fixed break send on SCC

SCC uart sends a break sequence each time it is stopped with the
CPM_CR_STOP_TX command. That means that each time an application closes the
serial device, a break is transmitted. To fix this, graceful tx stop is
issued for SCC.

Signed-off-by: David Jander <david.jander@protonic.nl>
Signed-off-by: Vitaly Bordug <vbordug@ru.mvista.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index ced193b..b9d1185 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -457,7 +457,11 @@ static void cpm_uart_shutdown(struct uart_port *port)
 		}
 
 		/* Shut them really down and reinit buffer descriptors */
-		cpm_line_cr_cmd(line, CPM_CR_STOP_TX);
+		if (IS_SMC(pinfo))
+			cpm_line_cr_cmd(line, CPM_CR_STOP_TX);
+		else
+			cpm_line_cr_cmd(line, CPM_CR_GRA_STOP_TX);
+
 		cpm_uart_initbd(pinfo);
 	}
 }
diff --git a/include/asm-ppc/commproc.h b/include/asm-ppc/commproc.h
index 973e609..31f3629 100644
--- a/include/asm-ppc/commproc.h
+++ b/include/asm-ppc/commproc.h
@@ -35,6 +35,7 @@
 #define CPM_CR_INIT_TX		((ushort)0x0002)
 #define CPM_CR_HUNT_MODE	((ushort)0x0003)
 #define CPM_CR_STOP_TX		((ushort)0x0004)
+#define CPM_CR_GRA_STOP_TX	((ushort)0x0005)
 #define CPM_CR_RESTART_TX	((ushort)0x0006)
 #define CPM_CR_CLOSE_RX_BD	((ushort)0x0007)
 #define CPM_CR_SET_GADDR	((ushort)0x0008)
diff --git a/include/asm-ppc/cpm2.h b/include/asm-ppc/cpm2.h
index b638b87..c70344b 100644
--- a/include/asm-ppc/cpm2.h
+++ b/include/asm-ppc/cpm2.h
@@ -69,7 +69,7 @@
 #define CPM_CR_INIT_TX		((ushort)0x0002)
 #define CPM_CR_HUNT_MODE	((ushort)0x0003)
 #define CPM_CR_STOP_TX		((ushort)0x0004)
-#define CPM_CR_GRA_STOP_TX      ((ushort)0x0005)
+#define CPM_CR_GRA_STOP_TX	((ushort)0x0005)
 #define CPM_CR_RESTART_TX	((ushort)0x0006)
 #define CPM_CR_SET_GADDR	((ushort)0x0008)
 #define CPM_CR_START_IDMA	((ushort)0x0009)
-- 
cgit v0.10.2


From 6e1976961c9bd9a3dc368139fab1883961efc879 Mon Sep 17 00:00:00 2001
From: Vitaly Bordug <vbordug@ru.mvista.com>
Date: Sat, 29 Apr 2006 23:06:00 +0400
Subject: [PATCH] ppc32 CPM_UART: fixes and improvements

A number of small issues are fixed, and added the header file, missed from the
original series. With this, driver should be pretty stable as tested among
both platform-device-driven and "old way" boards. Also added missing GPL
statement , and updated year field on existing ones to reflect
code update.

Signed-off-by: Vitaly Bordug <vbordug@ru.mvista.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/ppc/platforms/mpc866ads_setup.c b/arch/ppc/platforms/mpc866ads_setup.c
index 6ce3b84..d919dab 100644
--- a/arch/ppc/platforms/mpc866ads_setup.c
+++ b/arch/ppc/platforms/mpc866ads_setup.c
@@ -378,7 +378,7 @@ int __init mpc866ads_init(void)
 	ppc_sys_device_setfunc(MPC8xx_CPM_SMC1, PPC_SYS_FUNC_UART);
 #endif
 
-#ifdef CONFIG_SERIAL_CPM_SMCer
+#ifdef CONFIG_SERIAL_CPM_SMC
 	ppc_sys_device_enable(MPC8xx_CPM_SMC2);
 	ppc_sys_device_setfunc(MPC8xx_CPM_SMC2, PPC_SYS_FUNC_UART);
 #endif
diff --git a/drivers/serial/cpm_uart/cpm_uart.h b/drivers/serial/cpm_uart/cpm_uart.h
index aa5eb7d..3b35cb7 100644
--- a/drivers/serial/cpm_uart/cpm_uart.h
+++ b/drivers/serial/cpm_uart/cpm_uart.h
@@ -5,6 +5,13 @@
  *
  *  Copyright (C) 2004 Freescale Semiconductor, Inc.
  *
+ *  2006 (c) MontaVista Software, Inc.
+ * 	Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
  */
 #ifndef CPM_UART_H
 #define CPM_UART_H
@@ -101,12 +108,13 @@ static inline unsigned long cpu2cpm_addr(void* addr, struct uart_cpm_port *pinfo
 	int offset;
 	u32 val = (u32)addr;
 	/* sane check */
-	if ((val >= (u32)pinfo->mem_addr) &&
+	if (likely((val >= (u32)pinfo->mem_addr)) &&
 			(val<((u32)pinfo->mem_addr + pinfo->mem_size))) {
 		offset = val - (u32)pinfo->mem_addr;
 		return pinfo->dma_addr+offset;
 	}
-	printk("%s(): address %x to translate out of range!\n", __FUNCTION__, val);
+	/* something nasty happened */
+	BUG();
 	return 0;
 }
 
@@ -115,12 +123,13 @@ static inline void *cpm2cpu_addr(unsigned long addr, struct uart_cpm_port *pinfo
 	int offset;
 	u32 val = addr;
 	/* sane check */
-	if ((val >= pinfo->dma_addr) &&
-			(val<(pinfo->dma_addr + pinfo->mem_size))) {
+	if (likely((val >= pinfo->dma_addr) &&
+			(val<(pinfo->dma_addr + pinfo->mem_size)))) {
 		offset = val - (u32)pinfo->dma_addr;
 		return (void*)(pinfo->mem_addr+offset);
 	}
-	printk("%s(): address %x to translate out of range!\n", __FUNCTION__, val);
+	/* something nasty happened */
+	BUG();
 	return 0;
 }
 
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index b9d1185..969f949 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -12,7 +12,8 @@
  *
  *  Copyright (C) 2004 Freescale Semiconductor, Inc.
  *            (C) 2004 Intracom, S.A.
- *            (C) 2005 MontaVista Software, Inc. by Vitaly Bordug <vbordug@ru.mvista.com>
+ *            (C) 2005-2006 MontaVista Software, Inc.
+ * 		Vitaly Bordug <vbordug@ru.mvista.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -81,7 +82,7 @@ early_uart_get_pdev(int index)
 }
 
 
-void cpm_uart_count(void)
+static void cpm_uart_count(void)
 {
 	cpm_uart_nr = 0;
 #ifdef CONFIG_SERIAL_CPM_SMC1
@@ -104,6 +105,21 @@ void cpm_uart_count(void)
 #endif
 }
 
+/* Get UART number by its id */
+static int cpm_uart_id2nr(int id)
+{
+	int i;
+	if (id < UART_NR) {
+		for (i=0; i<UART_NR; i++) {
+			if (cpm_uart_port_map[i] == id)
+				return i;
+		}
+	}
+
+	/* not found or invalid argument */
+	return -1;
+}
+
 /*
  * Check, if transmit buffers are processed
 */
@@ -1012,7 +1028,11 @@ int cpm_uart_drv_get_platform_data(struct platform_device *pdev, int is_con)
 	int line;
 	u32 mem, pram;
 
-	for (line=0; line<UART_NR && cpm_uart_port_map[line]!=pdata->fs_no; line++);
+	line = cpm_uart_id2nr(idx);
+	if(line < 0) {
+		printk(KERN_ERR"%s(): port %d is not registered", __FUNCTION__, idx);
+		return -1;
+	}
 
 	pinfo = (struct uart_cpm_port *) &cpm_uart_ports[idx];
 
@@ -1245,8 +1265,7 @@ static int cpm_uart_drv_probe(struct device *dev)
 	}
 
 	pdata = pdev->dev.platform_data;
-	pr_debug("cpm_uart_drv_probe: Adding CPM UART %d\n",
-			cpm_uart_port_map[pdata->fs_no]);
+	pr_debug("cpm_uart_drv_probe: Adding CPM UART %d\n", cpm_uart_id2nr(pdata->fs_no));
 
 	if ((ret = cpm_uart_drv_get_platform_data(pdev, 0)))
 		return ret;
@@ -1265,7 +1284,7 @@ static int cpm_uart_drv_remove(struct device *dev)
 	struct fs_uart_platform_info *pdata = pdev->dev.platform_data;
 
 	pr_debug("cpm_uart_drv_remove: Removing CPM UART %d\n",
-			cpm_uart_port_map[pdata->fs_no]);
+			cpm_uart_id2nr(pdata->fs_no));
 
         uart_remove_one_port(&cpm_reg, &cpm_uart_ports[pdata->fs_no].port);
         return 0;
diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm1.c b/drivers/serial/cpm_uart/cpm_uart_cpm1.c
index a5a3062..17406a0 100644
--- a/drivers/serial/cpm_uart/cpm_uart_cpm1.c
+++ b/drivers/serial/cpm_uart/cpm_uart_cpm1.c
@@ -8,6 +8,8 @@
  *
  *  Copyright (C) 2004 Freescale Semiconductor, Inc.
  *            (C) 2004 Intracom, S.A.
+ *            (C) 2006 MontaVista Software, Inc.
+ * 		Vitaly Bordug <vbordug@ru.mvista.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm2.c b/drivers/serial/cpm_uart/cpm_uart_cpm2.c
index 7c6b07a..4b2de08 100644
--- a/drivers/serial/cpm_uart/cpm_uart_cpm2.c
+++ b/drivers/serial/cpm_uart/cpm_uart_cpm2.c
@@ -8,6 +8,8 @@
  * 
  *  Copyright (C) 2004 Freescale Semiconductor, Inc.
  *            (C) 2004 Intracom, S.A.
+ *            (C) 2006 MontaVista Software, Inc.
+ * 		Vitaly Bordug <vbordug@ru.mvista.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/include/linux/fs_uart_pd.h b/include/linux/fs_uart_pd.h
new file mode 100644
index 0000000..f597512
--- /dev/null
+++ b/include/linux/fs_uart_pd.h
@@ -0,0 +1,60 @@
+/*
+ * Platform information definitions for the CPM Uart driver.
+ *
+ * 2006 (c) MontaVista Software, Inc.
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef FS_UART_PD_H
+#define FS_UART_PD_H
+
+#include <linux/version.h>
+#include <asm/types.h>
+
+enum fs_uart_id {
+	fsid_smc1_uart,
+	fsid_smc2_uart,
+	fsid_scc1_uart,
+	fsid_scc2_uart,
+	fsid_scc3_uart,
+	fsid_scc4_uart,
+	fs_uart_nr,
+};
+
+static inline int fs_uart_id_scc2fsid(int id)
+{
+    return fsid_scc1_uart + id - 1;
+}
+
+static inline int fs_uart_id_fsid2scc(int id)
+{
+    return id - fsid_scc1_uart + 1;
+}
+
+static inline int fs_uart_id_smc2fsid(int id)
+{
+    return fsid_smc1_uart + id - 1;
+}
+
+static inline int fs_uart_id_fsid2smc(int id)
+{
+    return id - fsid_smc1_uart + 1;
+}
+
+struct fs_uart_platform_info {
+        void(*init_ioports)(void);
+	/* device specific information */
+	int fs_no;		/* controller index */
+	u32 uart_clk;
+	u8 tx_num_fifo;
+	u8 tx_buf_size;
+	u8 rx_num_fifo;
+	u8 rx_buf_size;
+	u8 brg;
+};
+
+#endif
-- 
cgit v0.10.2


From 6bfd93c32a5065d0e858780b3beb0b667081601c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 3 May 2006 23:02:04 +1000
Subject: powerpc: Fix incorrect might_sleep in __get_user/__put_user on kernel
 addresses

We have a case where __get_user and __put_user can validly be used
on kernel addresses in interrupt context - namely, the alignment
exception handler, as our get/put_unaligned just do a single access
and rely on the alignment exception handler to fix things up in the
rare cases where the cpu can't handle it in hardware.  Thus we can
get alignment exceptions in the network stack at interrupt level.
The alignment exception handler does a __get_user to read the
instruction and blows up in might_sleep().

Since a __get_user on a kernel address won't actually ever sleep,
this makes the might_sleep conditional on the address being less
than PAGE_OFFSET.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/include/asm-powerpc/uaccess.h b/include/asm-powerpc/uaccess.h
index 3872e92..d83fc29 100644
--- a/include/asm-powerpc/uaccess.h
+++ b/include/asm-powerpc/uaccess.h
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <asm/processor.h>
+#include <asm/page.h>
 
 #define VERIFY_READ	0
 #define VERIFY_WRITE	1
@@ -179,9 +180,11 @@ do {								\
 #define __put_user_nocheck(x, ptr, size)			\
 ({								\
 	long __pu_err;						\
-	might_sleep();						\
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);		\
+	if (!is_kernel_addr((unsigned long)__pu_addr))		\
+		might_sleep();					\
 	__chk_user_ptr(ptr);					\
-	__put_user_size((x), (ptr), (size), __pu_err);		\
+	__put_user_size((x), __pu_addr, (size), __pu_err);	\
 	__pu_err;						\
 })
 
@@ -258,9 +261,11 @@ do {								\
 ({								\
 	long __gu_err;						\
 	unsigned long __gu_val;					\
+	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
 	__chk_user_ptr(ptr);					\
-	might_sleep();						\
-	__get_user_size(__gu_val, (ptr), (size), __gu_err);	\
+	if (!is_kernel_addr((unsigned long)__gu_addr))		\
+		might_sleep();					\
+	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
 	(x) = (__typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
@@ -270,9 +275,11 @@ do {								\
 ({								\
 	long __gu_err;						\
 	long long __gu_val;					\
+	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
 	__chk_user_ptr(ptr);					\
-	might_sleep();						\
-	__get_user_size(__gu_val, (ptr), (size), __gu_err);	\
+	if (!is_kernel_addr((unsigned long)__gu_addr))		\
+		might_sleep();					\
+	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
 	(x) = (__typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
-- 
cgit v0.10.2


From d205819e2346d20fee41297ea6cf789c591abccf Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 3 May 2006 23:04:37 +1000
Subject: [PATCH] powerpc: Use the ibm,pa-features property if available

Forthcoming IBM machines will have a "ibm,pa-features" property on CPU
nodes, that contains bits indicating which optional architecture
features are implemented by the CPU.  This adds code to use the
property, if present, to update our CPU feature bitmaps.  Note that
this means we can both set and clear feature bits based on what
the firmware tells us.

This is based on a patch by Will Schmidt <willschm@us.ibm.com>.

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 1cb69e8..9a07f97 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -885,6 +885,74 @@ void __init unflatten_device_tree(void)
 	DBG(" <- unflatten_device_tree()\n");
 }
 
+/*
+ * ibm,pa-features is a per-cpu property that contains a string of
+ * attribute descriptors, each of which has a 2 byte header plus up
+ * to 254 bytes worth of processor attribute bits.  First header
+ * byte specifies the number of bytes following the header.
+ * Second header byte is an "attribute-specifier" type, of which
+ * zero is the only currently-defined value.
+ * Implementation:  Pass in the byte and bit offset for the feature
+ * that we are interested in.  The function will return -1 if the
+ * pa-features property is missing, or a 1/0 to indicate if the feature
+ * is supported/not supported.  Note that the bit numbers are
+ * big-endian to match the definition in PAPR.
+ */
+static struct ibm_pa_feature {
+	unsigned long	cpu_features;	/* CPU_FTR_xxx bit */
+	unsigned int	cpu_user_ftrs;	/* PPC_FEATURE_xxx bit */
+	unsigned char	pabyte;		/* byte number in ibm,pa-features */
+	unsigned char	pabit;		/* bit number (big-endian) */
+	unsigned char	invert;		/* if 1, pa bit set => clear feature */
+} ibm_pa_features[] __initdata = {
+	{0, PPC_FEATURE_HAS_MMU,	0, 0, 0},
+	{0, PPC_FEATURE_HAS_FPU,	0, 1, 0},
+	{CPU_FTR_SLB, 0,		0, 2, 0},
+	{CPU_FTR_CTRL, 0,		0, 3, 0},
+	{CPU_FTR_NOEXECUTE, 0,		0, 6, 0},
+	{CPU_FTR_NODSISRALIGN, 0,	1, 1, 1},
+	{CPU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
+};
+
+static void __init check_cpu_pa_features(unsigned long node)
+{
+	unsigned char *pa_ftrs;
+	unsigned long len, tablelen, i, bit;
+
+	pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen);
+	if (pa_ftrs == NULL)
+		return;
+
+	/* find descriptor with type == 0 */
+	for (;;) {
+		if (tablelen < 3)
+			return;
+		len = 2 + pa_ftrs[0];
+		if (tablelen < len)
+			return;		/* descriptor 0 not found */
+		if (pa_ftrs[1] == 0)
+			break;
+		tablelen -= len;
+		pa_ftrs += len;
+	}
+
+	/* loop over bits we know about */
+	for (i = 0; i < ARRAY_SIZE(ibm_pa_features); ++i) {
+		struct ibm_pa_feature *fp = &ibm_pa_features[i];
+
+		if (fp->pabyte >= pa_ftrs[0])
+			continue;
+		bit = (pa_ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1;
+		if (bit ^ fp->invert) {
+			cur_cpu_spec->cpu_features |= fp->cpu_features;
+			cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+		} else {
+			cur_cpu_spec->cpu_features &= ~fp->cpu_features;
+			cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+		}
+	}
+}
+
 static int __init early_init_dt_scan_cpus(unsigned long node,
 					  const char *uname, int depth,
 					  void *data)
@@ -969,6 +1037,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	}
 #endif /* CONFIG_ALTIVEC */
 
+	check_cpu_pa_features(node);
+
 #ifdef CONFIG_PPC_PSERIES
 	if (nthreads > 1)
 		cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
-- 
cgit v0.10.2


From 30d55280b867aa0cae99f836ad0181bb0bf8f9cb Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Wed, 3 May 2006 10:52:36 -0400
Subject: [PATCH] selinux: Clear selinux_enabled flag upon runtime disable.

Clear selinux_enabled flag upon runtime disable of SELinux by userspace,
and make sure it is defined even if selinux= boot parameter support is
not enabled in configuration.

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <jmorris@namei.org>
Tested-by: Jon Smirl <jonsmirl@gmail.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3cf368a..d987048 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -101,6 +101,8 @@ static int __init selinux_enabled_setup(char *str)
 	return 1;
 }
 __setup("selinux=", selinux_enabled_setup);
+#else
+int selinux_enabled = 1;
 #endif
 
 /* Original (dummy) security module. */
@@ -4535,6 +4537,7 @@ int selinux_disable(void)
 	printk(KERN_INFO "SELinux:  Disabled at runtime.\n");
 
 	selinux_disabled = 1;
+	selinux_enabled = 0;
 
 	/* Reset security_ops to the secondary module, dummy or capability. */
 	security_ops = secondary_ops;
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 5f016c9..063af47 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -29,12 +29,7 @@
 #define POLICYDB_VERSION_MIN   POLICYDB_VERSION_BASE
 #define POLICYDB_VERSION_MAX   POLICYDB_VERSION_AVTAB
 
-#ifdef CONFIG_SECURITY_SELINUX_BOOTPARAM
 extern int selinux_enabled;
-#else
-#define selinux_enabled 1
-#endif
-
 extern int selinux_mls_enabled;
 
 int security_load_policy(void * data, size_t len);
-- 
cgit v0.10.2


From 8c45112b823972e9ff7bbca77dc592ca2224951b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 3 May 2006 13:55:46 -0700
Subject: [SPARC]: Hook up vmsplice into syscall tables.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
index db8faa7..6e1135c 100644
--- a/arch/sparc/kernel/systbls.S
+++ b/arch/sparc/kernel/systbls.S
@@ -23,7 +23,7 @@ sys_call_table:
 /*10*/  .long sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys_mknod
 /*15*/	.long sys_chmod, sys_lchown16, sparc_brk, sys_nis_syscall, sys_lseek
 /*20*/	.long sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16
-/*25*/	.long sys_time, sys_ptrace, sys_alarm, sys_sigaltstack, sys_pause
+/*25*/	.long sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_pause
 /*30*/	.long sys_utime, sys_lchown, sys_fchown, sys_access, sys_nice
 /*35*/	.long sys_chown, sys_sync, sys_kill, sys_newstat, sys_sendfile
 /*40*/	.long sys_newlstat, sys_dup, sys_pipe, sys_times, sys_getuid
diff --git a/arch/sparc64/kernel/sys32.S b/arch/sparc64/kernel/sys32.S
index f9b7576..bdf1f4d 100644
--- a/arch/sparc64/kernel/sys32.S
+++ b/arch/sparc64/kernel/sys32.S
@@ -139,6 +139,7 @@ SIGN3(sys32_ioprio_set, sys_ioprio_set, %o0, %o1, %o2)
 SIGN2(sys32_splice, sys_splice, %o0, %o1)
 SIGN2(sys32_sync_file_range, compat_sync_file_range, %o0, %o5)
 SIGN2(sys32_tee, sys_tee, %o0, %o1)
+SIGN1(sys32_vmsplice, compat_sys_vmsplice, %o0)
 
 	.globl		sys32_mmap2
 sys32_mmap2:
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index 62672cd..d4b39cd 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -25,7 +25,7 @@ sys_call_table32:
 /*10*/  .word sys_unlink, sunos_execv, sys_chdir, sys32_chown16, sys32_mknod
 /*15*/	.word sys_chmod, sys32_lchown16, sparc_brk, sys32_perfctr, sys32_lseek
 /*20*/	.word sys_getpid, sys_capget, sys_capset, sys32_setuid16, sys32_getuid16
-/*25*/	.word compat_sys_time, sys_ptrace, sys_alarm, sys32_sigaltstack, sys32_pause
+/*25*/	.word sys32_vmsplice, sys_ptrace, sys_alarm, sys32_sigaltstack, sys32_pause
 /*30*/	.word compat_sys_utime, sys_lchown, sys_fchown, sys32_access, sys32_nice
 	.word sys_chown, sys_sync, sys32_kill, compat_sys_newstat, sys32_sendfile
 /*40*/	.word compat_sys_newlstat, sys_dup, sys_pipe, compat_sys_times, sys_getuid
@@ -94,7 +94,7 @@ sys_call_table:
 /*10*/  .word sys_unlink, sys_nis_syscall, sys_chdir, sys_chown, sys_mknod
 /*15*/	.word sys_chmod, sys_lchown, sparc_brk, sys_perfctr, sys_lseek
 /*20*/	.word sys_getpid, sys_capget, sys_capset, sys_setuid, sys_getuid
-/*25*/	.word sys_nis_syscall, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall
+/*25*/	.word sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall
 /*30*/	.word sys_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice
 	.word sys_nis_syscall, sys_sync, sys_kill, sys_newstat, sys_sendfile64
 /*40*/	.word sys_newlstat, sys_dup, sys_pipe, sys_times, sys_nis_syscall
diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h
index 32a48f6..f5611a7 100644
--- a/include/asm-sparc/unistd.h
+++ b/include/asm-sparc/unistd.h
@@ -41,7 +41,7 @@
 #define __NR_capset		 22 /* Linux Specific				   */
 #define __NR_setuid              23 /* Implemented via setreuid in SunOS           */
 #define __NR_getuid              24 /* Common                                      */
-/* #define __NR_time alias	 25    ENOSYS under SunOS			   */
+#define __NR_vmsplice	         25 /* ENOSYS under SunOS			   */
 #define __NR_ptrace              26 /* Common                                      */
 #define __NR_alarm               27 /* Implemented via setitimer in SunOS          */
 #define __NR_sigaltstack	 28 /* Common					   */
diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h
index ca80e8a..6870574 100644
--- a/include/asm-sparc64/unistd.h
+++ b/include/asm-sparc64/unistd.h
@@ -41,7 +41,7 @@
 #define __NR_capset		 22 /* Linux Specific				   */
 #define __NR_setuid              23 /* Implemented via setreuid in SunOS           */
 #define __NR_getuid              24 /* Common                                      */
-/* #define __NR_time alias	 25    ENOSYS under SunOS			   */
+#define __NR_vmsplice	         25 /* ENOSYS under SunOS			   */
 #define __NR_ptrace              26 /* Common                                      */
 #define __NR_alarm               27 /* Implemented via setitimer in SunOS          */
 #define __NR_sigaltstack	 28 /* Common					   */
-- 
cgit v0.10.2


From f0ec5e39765cd254d436a6d86e211d81795952a4 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Wed, 3 May 2006 19:54:57 -0700
Subject: [PATCH] Remove wrong cpu_has_apic checks that came from mismerging

We only need to check cpu_has_apic in the IO-APIC/L-APIC parsing, not for
all of ACPI.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 4c785a6..40e5aba 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -1102,9 +1102,6 @@ int __init acpi_boot_table_init(void)
 	dmi_check_system(acpi_dmi_table);
 #endif
 
-	if (!cpu_has_apic)
-		return -ENODEV;
-
 	/*
 	 * If acpi_disabled, bail out
 	 * One exception: acpi=ht continues far enough to enumerate LAPICs
@@ -1151,9 +1148,6 @@ int __init acpi_boot_init(void)
 
 	acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
 
-	if (!cpu_has_apic)
-		return -ENODEV;
-
 	/*
 	 * set sci_int and PM timer address
 	 */
-- 
cgit v0.10.2


From 6760da0197a6ee327a09dafc070b26e2f02651fe Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Wed, 3 May 2006 19:55:03 -0700
Subject: [PATCH] uml: change timer initialization

inet_init, which schedules, is called before the UML timer_init, which sets
up the timer.  The result is the interval timers being manipulated before
the appropriate signal handlers are established, causing unhandled timers.

This is fixed by making timer_init be called earlier.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c
index 3c7626c..528cf62 100644
--- a/arch/um/kernel/time_kern.c
+++ b/arch/um/kernel/time_kern.c
@@ -209,4 +209,4 @@ int __init timer_init(void)
 	return(0);
 }
 
-__initcall(timer_init);
+arch_initcall(timer_init);
-- 
cgit v0.10.2


From 96941026a51e9cb8c2d2be7499301b7fcd9ee225 Mon Sep 17 00:00:00 2001
From: mark gross <mgross@linux.intel.com>
Date: Wed, 3 May 2006 19:55:07 -0700
Subject: [PATCH] EDAC Coexistence with BIOS

Address the issue of EDAC/BIOS coexistence for the e752x chip-sets.

We have found a problem where the BIOS will start the system with the error
registers (dev0:fun1) hidden and assuming it has exclusive access to them.
The edac driver violates this assumption.

The workaround this patch offers is to honor the hidden-ness as an
indication that it is not safe to use those registers.

Signed-off-by: Mark Gross <mark.gross@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index 66572c5..fce3193 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -25,6 +25,8 @@
 #include <linux/slab.h>
 #include "edac_mc.h"
 
+static int force_function_unhide;
+
 #define e752x_printk(level, fmt, arg...) \
 	edac_printk(level, "e752x", fmt, ##arg)
 
@@ -782,8 +784,16 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx)
 	debugf0("%s(): mci\n", __func__);
 	debugf0("Starting Probe1\n");
 
-	/* enable device 0 function 1 */
+	/* check to see if device 0 function 1 is enabled; if it isn't, we
+	 * assume the BIOS has reserved it for a reason and is expecting
+	 * exclusive access, we take care not to violate that assumption and
+	 * fail the probe. */
 	pci_read_config_byte(pdev, E752X_DEVPRES1, &stat8);
+	if (!force_function_unhide && !(stat8 & (1 << 5))) {
+		printk(KERN_INFO "Contact your BIOS vendor to see if the "
+			"E752x error registers can be safely un-hidden\n");
+		goto fail;
+	}
 	stat8 |= (1 << 5);
 	pci_write_config_byte(pdev, E752X_DEVPRES1, stat8);
 
@@ -1063,3 +1073,8 @@ module_exit(e752x_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman\n");
 MODULE_DESCRIPTION("MC support for Intel e752x memory controllers");
+
+module_param(force_function_unhide, int, 0444);
+MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:"
+" 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access");
+
-- 
cgit v0.10.2


From 8683dc9990158c221e05959935e7dd50a956c574 Mon Sep 17 00:00:00 2001
From: Brent Casavant <bcasavan@sgi.com>
Date: Wed, 3 May 2006 19:55:10 -0700
Subject: [PATCH] Altix: correct ioc4 port order

Currently loading the ioc3 as a module will cause the ports to be numbered
in reverse order.  This mod maintains the proper order of cards for port
numbering.

Signed-off-by: Brent Casavant <bcasavan@sgi.com>
Cc: Pat Gefre <pfg@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/sn/ioc4.c b/drivers/sn/ioc4.c
index 67140a5..cdeff90 100644
--- a/drivers/sn/ioc4.c
+++ b/drivers/sn/ioc4.c
@@ -310,7 +310,7 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
 	pci_set_drvdata(idd->idd_pdev, idd);
 
 	mutex_lock(&ioc4_mutex);
-	list_add(&idd->idd_list, &ioc4_devices);
+	list_add_tail(&idd->idd_list, &ioc4_devices);
 
 	/* Add this IOC4 to all submodules */
 	list_for_each_entry(is, &ioc4_submodules, is_list) {
-- 
cgit v0.10.2


From 5dea5176e5c32ef9f0d1a41d28427b3bf6881b3a Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 3 May 2006 19:55:12 -0700
Subject: [PATCH] ext3: multile block allocate little endian fixes

Some places in ext3 multiple block allocation code (in 2.6.17-rc3) don't
handle the little endian well.  This was resulting in *wrong* block numbers
being assigned to in-memory block variables and then stored on disk
eventually.  The following patch has been verified to fix an ext3
filesystem failure when run ltp test on a 64 bit machine.

Signed-off-by; Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 48ae033..2edd7ee 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -711,7 +711,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 	 * direct blocks blocks
 	 */
 	if (num == 0 && blks > 1) {
-		current_block = le32_to_cpu(where->key + 1);
+		current_block = le32_to_cpu(where->key) + 1;
 		for (i = 1; i < blks; i++)
 			*(where->p + i ) = cpu_to_le32(current_block++);
 	}
@@ -724,7 +724,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 	if (block_i) {
 		block_i->last_alloc_logical_block = block + blks - 1;
 		block_i->last_alloc_physical_block =
-				le32_to_cpu(where[num].key + blks - 1);
+				le32_to_cpu(where[num].key) + blks - 1;
 	}
 
 	/* We are done with atomic stuff, now do the rest of housekeeping */
@@ -814,11 +814,13 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
-		first_block = chain[depth - 1].key;
+		first_block = le32_to_cpu(chain[depth - 1].key);
 		clear_buffer_new(bh_result);
 		count++;
 		/*map more blocks*/
 		while (count < maxblocks && count <= blocks_to_boundary) {
+			unsigned long blk;
+
 			if (!verify_chain(chain, partial)) {
 				/*
 				 * Indirect block might be removed by
@@ -831,8 +833,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 				count = 0;
 				break;
 			}
-			if (le32_to_cpu(*(chain[depth-1].p+count) ==
-					(first_block + count)))
+			blk = le32_to_cpu(*(chain[depth-1].p + count));
+
+			if (blk == first_block + count)
 				count++;
 			else
 				break;
-- 
cgit v0.10.2


From bfc4ee39fdbb2deb8864785d5e5bc5cdd3b31a69 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 3 May 2006 10:35:10 +0200
Subject: [PATCH] splice: fix unlocking of page on error ->prepare_write()

Looking at generic_file_buffered_write(), we need to unlock_page() if
prepare write fails and it isn't due to racing with truncate().

Also trim the size if ->prepare_write() fails, if we have to.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/splice.c b/fs/splice.c
index 7fb0497..27f5e37 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -647,11 +647,24 @@ find_page:
 	}
 
 	ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
-	if (ret == AOP_TRUNCATED_PAGE) {
+	if (unlikely(ret)) {
+		loff_t isize = i_size_read(mapping->host);
+
+		if (ret != AOP_TRUNCATED_PAGE)
+			unlock_page(page);
 		page_cache_release(page);
-		goto find_page;
-	} else if (ret)
+		if (ret == AOP_TRUNCATED_PAGE)
+			goto find_page;
+
+		/*
+		 * prepare_write() may have instantiated a few blocks
+		 * outside i_size.  Trim these off again.
+		 */
+		if (sd->pos + this_len > isize)
+			vmtruncate(mapping->host, isize);
+
 		goto out;
+	}
 
 	if (buf->page != page) {
 		/*
-- 
cgit v0.10.2


From 1432873af7ae29d4bb3c56114c05b539d078ca62 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 3 May 2006 10:35:26 +0200
Subject: [PATCH] splice: LRU fixups

Nick says that the current construct isn't safe. This goes back to the
original, but sets PIPE_BUF_FLAG_LRU on user pages as well as they all
seem to be on the LRU in the first place.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/splice.c b/fs/splice.c
index 27f5e37..0b20242 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -78,6 +78,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
 		return 1;
 	}
 
+	buf->flags |= PIPE_BUF_FLAG_LRU;
 	return 0;
 }
 
@@ -85,6 +86,7 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
 					struct pipe_buffer *buf)
 {
 	page_cache_release(buf->page);
+	buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
 
 static int page_cache_pipe_buf_pin(struct pipe_inode_info *info,
@@ -141,6 +143,7 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 	if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
 		return 1;
 
+	buf->flags |= PIPE_BUF_FLAG_LRU;
 	return generic_pipe_buf_steal(pipe, buf);
 }
 
@@ -566,37 +569,23 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 	 */
 	if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) {
 		/*
-		 * If steal succeeds, buf->page is now pruned from the vm
-		 * side (page cache) and we can reuse it. The page will also
-		 * be locked on successful return.
+		 * If steal succeeds, buf->page is now pruned from the
+		 * pagecache and we can reuse it. The page will also be
+		 * locked on successful return.
 		 */
 		if (buf->ops->steal(info, buf))
 			goto find_page;
 
 		page = buf->page;
-		page_cache_get(page);
-
-		/*
-		 * page must be on the LRU for adding to the pagecache.
-		 * Check this without grabbing the zone lock, if it isn't
-		 * the do grab the zone lock, recheck, and add if necessary.
-		 */
-		if (!PageLRU(page)) {
-			struct zone *zone = page_zone(page);
-
-			spin_lock_irq(&zone->lru_lock);
-			if (!PageLRU(page)) {
-				SetPageLRU(page);
-				add_page_to_inactive_list(zone, page);
-			}
-			spin_unlock_irq(&zone->lru_lock);
-		}
-
 		if (add_to_page_cache(page, mapping, index, gfp_mask)) {
-			page_cache_release(page);
 			unlock_page(page);
 			goto find_page;
 		}
+
+		page_cache_get(page);
+
+		if (!(buf->flags & PIPE_BUF_FLAG_LRU))
+			lru_cache_add(page);
 	} else {
 find_page:
 		page = find_lock_page(mapping, index);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index ba73108..ea4f7cd 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -5,8 +5,9 @@
 
 #define PIPE_BUFFERS (16)
 
-#define PIPE_BUF_FLAG_ATOMIC	0x01	/* was atomically mapped */
-#define PIPE_BUF_FLAG_GIFT	0x02	/* page is a gift */
+#define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
+#define PIPE_BUF_FLAG_ATOMIC	0x02	/* was atomically mapped */
+#define PIPE_BUF_FLAG_GIFT	0x04	/* page is a gift */
 
 struct pipe_buffer {
 	struct page *page;
-- 
cgit v0.10.2


From 76ad4d11105ccd40a536db1057083f28326019fd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 3 May 2006 10:41:33 +0200
Subject: [PATCH] splice: rename remaining info variables to pipe

Same thing was done in fs/pipe.c and most of fs/splice.c, but we had
a few missing still.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/splice.c b/fs/splice.c
index 0b20242..8fa9217 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -51,7 +51,7 @@ struct splice_pipe_desc {
  * addition of remove_mapping(). If success is returned, the caller may
  * attempt to reuse this page for another destination.
  */
-static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
+static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
 				     struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
@@ -82,14 +82,14 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info,
 	return 0;
 }
 
-static void page_cache_pipe_buf_release(struct pipe_inode_info *info,
+static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
 					struct pipe_buffer *buf)
 {
 	page_cache_release(buf->page);
 	buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
 
-static int page_cache_pipe_buf_pin(struct pipe_inode_info *info,
+static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe,
 				   struct pipe_buffer *buf)
 {
 	struct page *page = buf->page;
@@ -500,14 +500,14 @@ EXPORT_SYMBOL(generic_file_splice_read);
  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
  * using sendpage(). Return the number of bytes sent.
  */
-static int pipe_to_sendpage(struct pipe_inode_info *info,
+static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 			    struct pipe_buffer *buf, struct splice_desc *sd)
 {
 	struct file *file = sd->file;
 	loff_t pos = sd->pos;
 	int ret, more;
 
-	ret = buf->ops->pin(info, buf);
+	ret = buf->ops->pin(pipe, buf);
 	if (!ret) {
 		more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
 
@@ -538,7 +538,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *info,
  * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
  * a new page in the output file page cache and fill/dirty that.
  */
-static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
+static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 			struct splice_desc *sd)
 {
 	struct file *file = sd->file;
@@ -552,7 +552,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 	/*
 	 * make sure the data in this buffer is uptodate
 	 */
-	ret = buf->ops->pin(info, buf);
+	ret = buf->ops->pin(pipe, buf);
 	if (unlikely(ret))
 		return ret;
 
@@ -573,7 +573,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf,
 		 * pagecache and we can reuse it. The page will also be
 		 * locked on successful return.
 		 */
-		if (buf->ops->steal(info, buf))
+		if (buf->ops->steal(pipe, buf))
 			goto find_page;
 
 		page = buf->page;
@@ -659,13 +659,13 @@ find_page:
 		/*
 		 * Careful, ->map() uses KM_USER0!
 		 */
-		char *src = buf->ops->map(info, buf, 1);
+		char *src = buf->ops->map(pipe, buf, 1);
 		char *dst = kmap_atomic(page, KM_USER1);
 
 		memcpy(dst + offset, src + buf->offset, this_len);
 		flush_dcache_page(page);
 		kunmap_atomic(dst, KM_USER1);
-		buf->ops->unmap(info, buf, src);
+		buf->ops->unmap(pipe, buf, src);
 	}
 
 	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
-- 
cgit v0.10.2


From a0548871ed267ae12eb1c860c5aaebd9e466b34e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Wed, 3 May 2006 10:58:22 +0200
Subject: [PATCH] splice: redo page lookup if add_to_page_cache() returns
 -EEXIST

This can happen quite easily, if several processes are trying to splice
the same file at the same time. It's not a failure, it just means someone
raced with us in allocating this file page. So just dump the allocated
page and relookup the original.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/splice.c b/fs/splice.c
index 8fa9217..a285fd7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -324,6 +324,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 					      mapping_gfp_mask(mapping));
 			if (unlikely(error)) {
 				page_cache_release(page);
+				if (error == -EEXIST)
+					continue;
 				break;
 			}
 			/*
-- 
cgit v0.10.2


From 6fd737031eb6869430d0f3cf6bf1440adf7aedf5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 May 2006 23:16:29 -0700
Subject: [NETFILTER]: H.323 helper: fix endless loop caused by invalid TPKT
 len

When the TPKT len included in the packet is below the lowest valid value
of 4 an underflow occurs which results in an endless loop.

Found by testcase 0000058 from the PROTOS c07-h2250v4 testsuite.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
index 2c2fb70..518f581 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
@@ -162,6 +162,8 @@ static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct,
 
 	/* Validate TPKT length */
 	tpktlen = tpkt[2] * 256 + tpkt[3];
+	if (tpktlen < 4)
+		goto clear_out;
 	if (tpktlen > tcpdatalen) {
 		if (tcpdatalen == 4) {	/* Separate TPKT header */
 			/* Netmeeting sends TPKT header and data separately */
-- 
cgit v0.10.2


From 4228e2a9890cd01b0c8cc58af6fd9e08a4b5e8a7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 May 2006 23:17:11 -0700
Subject: [NETFILTER]: H.323 helper: fix use of uninitialized data

When a Choice element contains an unsupported choice no error is returned
and parsing continues normally, but the choice value is not set and
contains data from the last parsed message. This may in turn lead to
parsing of more stale data and following crashes.

Fixes a crash triggered by testcase 0003243 from the PROTOS c07-h2250v4
testsuite following random other testcases:

CPU:    0
EIP:    0060:[<c01a9554>]    Not tainted VLI
EFLAGS: 00210646   (2.6.17-rc2 #3)
EIP is at memmove+0x19/0x22
eax: d7be0307   ebx: d7be0307   ecx: e841fcf9   edx: d7be0307
esi: bfffffff   edi: bfffffff   ebp: da5eb980   esp: c0347e2c
ds: 007b   es: 007b   ss: 0068
Process events/0 (pid: 4, threadinfo=c0347000 task=dff86a90)
Stack: <0>00000006 c0347ea6 d7be0301 e09a6b2c 00000006 da5eb980 d7be003e d7be0052
       c0347f6c e09a6d9c 00000006 c0347ea6 00000006 00000000 d7b9a548 00000000
       c0347f6c d7b9a548 00000004 e0a1a119 0000028f 00000006 c0347ea6 00000006
Call Trace:
 [<e09a6b2c>] mangle_contents+0x40/0xd8 [ip_nat]
 [<e09a6d9c>] ip_nat_mangle_tcp_packet+0xa1/0x191 [ip_nat]
 [<e0a1a119>] set_addr+0x60/0x14d [ip_nat_h323]
 [<e0ab6e66>] q931_help+0x2da/0x71a [ip_conntrack_h323]
 [<e0ab6e98>] q931_help+0x30c/0x71a [ip_conntrack_h323]
 [<e09af242>] ip_conntrack_help+0x22/0x2f [ip_conntrack]
 [<c022934a>] nf_iterate+0x2e/0x5f
 [<c025d357>] xfrm4_output_finish+0x0/0x39f
 [<c02294ce>] nf_hook_slow+0x42/0xb0
 [<c025d357>] xfrm4_output_finish+0x0/0x39f
 [<c025d732>] xfrm4_output+0x3c/0x4e
 [<c025d357>] xfrm4_output_finish+0x0/0x39f
 [<c0230370>] ip_forward+0x1c2/0x1fa
 [<c022f417>] ip_rcv+0x388/0x3b5
 [<c02188f9>] netif_receive_skb+0x2bc/0x2ec
 [<c0218994>] process_backlog+0x6b/0xd0
 [<c021675a>] net_rx_action+0x4b/0xb7
 [<c0115606>] __do_softirq+0x35/0x7d
 [<c0104294>] do_softirq+0x38/0x3f

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c b/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c
index 4807800..f52d2c4 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c
@@ -703,6 +703,10 @@ int decode_choice(bitstr_t * bs, field_t * f, char *base, int level)
 		type = get_bits(bs, f->sz);
 	}
 
+	/* Write Type */
+	if (base)
+		*(unsigned *) base = type;
+
 	/* Check Range */
 	if (type >= f->ub) {	/* Newer version? */
 		BYTE_ALIGN(bs);
@@ -712,10 +716,6 @@ int decode_choice(bitstr_t * bs, field_t * f, char *base, int level)
 		return H323_ERROR_NONE;
 	}
 
-	/* Write Type */
-	if (base)
-		*(unsigned *) base = type;
-
 	/* Transfer to son level */
 	son = &f->fields[type];
 	if (son->attr & STOP) {
-- 
cgit v0.10.2


From 2354feaeb2acb78f6aabdf8410d55b44492a7949 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 May 2006 23:19:26 -0700
Subject: [NETFILTER]: NAT: silence unused variable warnings with CONFIG_XFRM=n

net/ipv4/netfilter/ip_nat_standalone.c: In function 'ip_nat_out':
net/ipv4/netfilter/ip_nat_standalone.c:223: warning: unused variable 'ctinfo'
net/ipv4/netfilter/ip_nat_standalone.c:222: warning: unused variable 'ct'

Surprisingly no complaints so far ..

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 8f760b2..67e6767 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -219,8 +219,10 @@ ip_nat_out(unsigned int hooknum,
 	   const struct net_device *out,
 	   int (*okfn)(struct sk_buff *))
 {
+#ifdef CONFIG_XFRM
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
+#endif
 	unsigned int ret;
 
 	/* root is playing with raw sockets. */
-- 
cgit v0.10.2


From 7582e9d17edbabab6cbe59467c5d1b5e8c04fca8 Mon Sep 17 00:00:00 2001
From: Jing Min Zhao <zhaojingmin@users.sourceforge.net>
Date: Wed, 3 May 2006 23:19:59 -0700
Subject: [NETFILTER]: H.323 helper: Change author's email address

Signed-off-by: Jing Min Zhao <zhaojingmin@users.sourceforge.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h b/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h
index 0bd8280..c6e9a0b 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_helper_h323_asn1.h
@@ -2,7 +2,7 @@
  * ip_conntrack_helper_h323_asn1.h - BER and PER decoding library for H.323
  * 			      	     conntrack/NAT module.
  *
- * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@hotmail.com>
+ * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
  *
  * This source code is licensed under General Public License version 2.
  *
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c b/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c
index f52d2c4..355a53a 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323_asn1.c
@@ -2,7 +2,7 @@
  * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
  * 			      	     conntrack/NAT module.
  *
- * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@hotmail.com>
+ * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
  *
  * This source code is licensed under General Public License version 2.
  *
-- 
cgit v0.10.2


From 7800007c1e2d42cd4120b87b0ba3f3480f17f30a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 May 2006 23:20:27 -0700
Subject: [NETFILTER]: x_tables: don't use __copy_{from,to}_user on unchecked
 memory in compat layer

Noticed by Linus Torvalds <torvalds@osdl.org>

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6d1c115..cee3397 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1441,7 +1441,7 @@ static int compat_copy_entry_to_user(struct ipt_entry *e,
 	ret = -EFAULT;
 	origsize = *size;
 	ce = (struct compat_ipt_entry __user *)*dstptr;
-	if (__copy_to_user(ce, e, sizeof(struct ipt_entry)))
+	if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
 		goto out;
 
 	*dstptr += sizeof(struct compat_ipt_entry);
@@ -1459,9 +1459,9 @@ static int compat_copy_entry_to_user(struct ipt_entry *e,
 		goto out;
 	ret = -EFAULT;
 	next_offset = e->next_offset - (origsize - *size);
-	if (__put_user(target_offset, &ce->target_offset))
+	if (put_user(target_offset, &ce->target_offset))
 		goto out;
-	if (__put_user(next_offset, &ce->next_offset))
+	if (put_user(next_offset, &ce->next_offset))
 		goto out;
 	return 0;
 out:
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 17abf60..99293c6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -289,7 +289,7 @@ int xt_compat_match(void *match, void **dstptr, int *size, int convert)
 		case COMPAT_TO_USER:
 			pm = (struct xt_entry_match *)match;
 			msize = pm->u.user.match_size;
-			if (__copy_to_user(*dstptr, pm, msize)) {
+			if (copy_to_user(*dstptr, pm, msize)) {
 				ret = -EFAULT;
 				break;
 			}
@@ -366,7 +366,7 @@ int xt_compat_target(void *target, void **dstptr, int *size, int convert)
 		case COMPAT_TO_USER:
 			pt = (struct xt_entry_target *)target;
 			tsize = pt->u.user.target_size;
-			if (__copy_to_user(*dstptr, pt, tsize)) {
+			if (copy_to_user(*dstptr, pt, tsize)) {
 				ret = -EFAULT;
 				break;
 			}
-- 
cgit v0.10.2


From 0cc5ae24af08abe8e2a467f45b54c48a0f52670f Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 3 May 2006 23:22:01 -0700
Subject: [ROSE]: Remove useless prototype for rose_remove_neigh().

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 8631b65..4cb6bfa 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -48,8 +48,6 @@ static DEFINE_SPINLOCK(rose_route_list_lock);
 
 struct rose_neigh *rose_loopback_neigh;
 
-static void rose_remove_neigh(struct rose_neigh *);
-
 /*
  *	Add a new route to a node, and in the process add the node and the
  *	neighbour if it is new.
-- 
cgit v0.10.2


From 3f072310d0ca85891323e9d325c37c76de389387 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 3 May 2006 23:22:36 -0700
Subject: [AX.25]: Spelling fix

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index f04f863..5ac9825 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -360,7 +360,7 @@ struct file_operations ax25_route_fops = {
 /*
  *	Find AX.25 route
  *
- *	Only routes with a refernce rout of zero can be destroyed.
+ *	Only routes with a reference count of zero can be destroyed.
  */
 static ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
 {
-- 
cgit v0.10.2


From 86cfcb95ec60e910d7efcb35ae89bf3403befaad Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 3 May 2006 23:23:48 -0700
Subject: [AX25, ROSE]: Remove useless SET_MODULE_OWNER calls.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 509afdd..621e558 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -185,7 +185,6 @@ static struct net_device_stats *nr_get_stats(struct net_device *dev)
 
 void nr_setup(struct net_device *dev)
 {
-	SET_MODULE_OWNER(dev);
 	dev->mtu		= NR_MAX_PACKET_SIZE;
 	dev->hard_start_xmit	= nr_xmit;
 	dev->open		= nr_open;
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index d297af7..2a1bf8e 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -135,7 +135,6 @@ static struct net_device_stats *rose_get_stats(struct net_device *dev)
 
 void rose_setup(struct net_device *dev)
 {
-	SET_MODULE_OWNER(dev);
 	dev->mtu		= ROSE_MAX_PACKET_SIZE - 2;
 	dev->hard_start_xmit	= rose_xmit;
 	dev->open		= rose_open;
-- 
cgit v0.10.2


From 3ab33dcc82e014c69ebad3b524d0053378ef76c3 Mon Sep 17 00:00:00 2001
From: Ralf Baechle DL5RB <ralf@linux-mips.org>
Date: Wed, 3 May 2006 23:24:35 -0700
Subject: [HAMRADIO]: Remove remaining SET_MODULE_OWNER calls from hamradio
 drivers.

Signed-off-by: Ralf Baechle DL5RB <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 79a8fbc..0d5fccc 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -582,7 +582,6 @@ static int __init setup_adapter(int card_base, int type, int n)
 		INIT_WORK(&priv->rx_work, rx_bh, priv);
 		dev->priv = priv;
 		sprintf(dev->name, "dmascc%i", 2 * n + i);
-		SET_MODULE_OWNER(dev);
 		dev->base_addr = card_base;
 		dev->irq = irq;
 		dev->open = scc_open;
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 6ace0e9..5927784 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -1550,7 +1550,6 @@ static unsigned char ax25_nocall[AX25_ADDR_LEN] =
 
 static void scc_net_setup(struct net_device *dev)
 {
-	SET_MODULE_OWNER(dev);
 	dev->tx_queue_len    = 16;	/* should be enough... */
 
 	dev->open            = scc_net_open;
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index fe22479..b498840 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -1098,7 +1098,6 @@ static void yam_setup(struct net_device *dev)
 
 	dev->base_addr = yp->iobase;
 	dev->irq = yp->irq;
-	SET_MODULE_OWNER(dev);
 
 	dev->open = yam_open;
 	dev->stop = yam_close;
-- 
cgit v0.10.2


From 70868eace5031298c6f6e991a40a2106957f582c Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 3 May 2006 23:25:17 -0700
Subject: [AX.25]: Move AX.25 symbol exports

Move AX.25 symbol exports to next to their definitions where they're
supposed to be these days.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index dbf9b47..a9f13df 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -228,6 +228,8 @@ ax25_cb *ax25_find_cb(ax25_address *src_addr, ax25_address *dest_addr,
 	return NULL;
 }
 
+EXPORT_SYMBOL(ax25_find_cb);
+
 void ax25_send_to_raw(ax25_address *addr, struct sk_buff *skb, int proto)
 {
 	ax25_cb *s;
@@ -1979,24 +1981,6 @@ static struct notifier_block ax25_dev_notifier = {
 	.notifier_call =ax25_device_event,
 };
 
-EXPORT_SYMBOL(ax25_hard_header);
-EXPORT_SYMBOL(ax25_rebuild_header);
-EXPORT_SYMBOL(ax25_findbyuid);
-EXPORT_SYMBOL(ax25_find_cb);
-EXPORT_SYMBOL(ax25_linkfail_register);
-EXPORT_SYMBOL(ax25_linkfail_release);
-EXPORT_SYMBOL(ax25_listen_register);
-EXPORT_SYMBOL(ax25_listen_release);
-EXPORT_SYMBOL(ax25_protocol_register);
-EXPORT_SYMBOL(ax25_protocol_release);
-EXPORT_SYMBOL(ax25_send_frame);
-EXPORT_SYMBOL(ax25_uid_policy);
-EXPORT_SYMBOL(ax25cmp);
-EXPORT_SYMBOL(ax2asc);
-EXPORT_SYMBOL(asc2ax);
-EXPORT_SYMBOL(null_ax25_address);
-EXPORT_SYMBOL(ax25_display_timer);
-
 static int __init ax25_init(void)
 {
 	int rc = proto_register(&ax25_proto, 0);
diff --git a/net/ax25/ax25_addr.c b/net/ax25/ax25_addr.c
index 0164a15..5f0896a 100644
--- a/net/ax25/ax25_addr.c
+++ b/net/ax25/ax25_addr.c
@@ -11,6 +11,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/string.h>
@@ -33,6 +34,8 @@
  */
 ax25_address null_ax25_address = {{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00}};
 
+EXPORT_SYMBOL(null_ax25_address);
+
 /*
  *	ax25 -> ascii conversion
  */
@@ -64,6 +67,8 @@ char *ax2asc(char *buf, ax25_address *a)
 
 }
 
+EXPORT_SYMBOL(ax2asc);
+
 /*
  *	ascii -> ax25 conversion
  */
@@ -97,6 +102,8 @@ void asc2ax(ax25_address *addr, char *callsign)
 	addr->ax25_call[6] &= 0x1E;
 }
 
+EXPORT_SYMBOL(asc2ax);
+
 /*
  *	Compare two ax.25 addresses
  */
@@ -116,6 +123,8 @@ int ax25cmp(ax25_address *a, ax25_address *b)
  	return 2;			/* Partial match */
 }
 
+EXPORT_SYMBOL(ax25cmp);
+
 /*
  *	Compare two AX.25 digipeater paths.
  */
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index d68aff1..3bb1527 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -12,6 +12,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
@@ -74,6 +75,8 @@ int ax25_protocol_register(unsigned int pid,
 	return 1;
 }
 
+EXPORT_SYMBOL(ax25_protocol_register);
+
 void ax25_protocol_release(unsigned int pid)
 {
 	struct protocol_struct *s, *protocol;
@@ -106,6 +109,8 @@ void ax25_protocol_release(unsigned int pid)
 	write_unlock(&protocol_list_lock);
 }
 
+EXPORT_SYMBOL(ax25_protocol_release);
+
 int ax25_linkfail_register(void (*func)(ax25_cb *, int))
 {
 	struct linkfail_struct *linkfail;
@@ -123,6 +128,8 @@ int ax25_linkfail_register(void (*func)(ax25_cb *, int))
 	return 1;
 }
 
+EXPORT_SYMBOL(ax25_linkfail_register);
+
 void ax25_linkfail_release(void (*func)(ax25_cb *, int))
 {
 	struct linkfail_struct *s, *linkfail;
@@ -155,6 +162,8 @@ void ax25_linkfail_release(void (*func)(ax25_cb *, int))
 	spin_unlock_bh(&linkfail_lock);
 }
 
+EXPORT_SYMBOL(ax25_linkfail_release);
+
 int ax25_listen_register(ax25_address *callsign, struct net_device *dev)
 {
 	struct listen_struct *listen;
@@ -176,6 +185,8 @@ int ax25_listen_register(ax25_address *callsign, struct net_device *dev)
 	return 1;
 }
 
+EXPORT_SYMBOL(ax25_listen_register);
+
 void ax25_listen_release(ax25_address *callsign, struct net_device *dev)
 {
 	struct listen_struct *s, *listen;
@@ -208,6 +219,8 @@ void ax25_listen_release(ax25_address *callsign, struct net_device *dev)
 	spin_unlock_bh(&listen_lock);
 }
 
+EXPORT_SYMBOL(ax25_listen_release);
+
 int (*ax25_protocol_function(unsigned int pid))(struct sk_buff *, ax25_cb *)
 {
 	int (*res)(struct sk_buff *, ax25_cb *) = NULL;
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index d643dac..a0b534f 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -12,6 +12,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/string.h>
@@ -221,3 +222,5 @@ int ax25_rebuild_header(struct sk_buff *skb)
 
 #endif
 
+EXPORT_SYMBOL(ax25_hard_header);
+EXPORT_SYMBOL(ax25_rebuild_header);
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 5fc048d..5d99852 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -14,6 +14,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/string.h>
@@ -104,6 +105,8 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax2
 	return ax25;			/* We had to create it */
 }
 
+EXPORT_SYMBOL(ax25_send_frame);
+
 /*
  *	All outgoing AX.25 I frames pass via this routine. Therefore this is
  *	where the fragmentation of frames takes place. If fragment is set to
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index 7a6b50a..ec25405 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -18,6 +18,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/jiffies.h>
 #include <linux/timer.h>
 #include <linux/string.h>
@@ -137,6 +138,8 @@ unsigned long ax25_display_timer(struct timer_list *timer)
 	return timer->expires - jiffies;
 }
 
+EXPORT_SYMBOL(ax25_display_timer);
+
 static void ax25_heartbeat_expiry(unsigned long param)
 {
 	int proto = AX25_PROTO_STD_SIMPLEX;
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index b8b5854..5e9a81e 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -49,6 +49,8 @@ static DEFINE_RWLOCK(ax25_uid_lock);
 
 int ax25_uid_policy = 0;
 
+EXPORT_SYMBOL(ax25_uid_policy);
+
 ax25_uid_assoc *ax25_findbyuid(uid_t uid)
 {
 	ax25_uid_assoc *ax25_uid, *res = NULL;
@@ -67,6 +69,8 @@ ax25_uid_assoc *ax25_findbyuid(uid_t uid)
 	return res;
 }
 
+EXPORT_SYMBOL(ax25_findbyuid);
+
 int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
 {
 	ax25_uid_assoc *ax25_uid;
-- 
cgit v0.10.2


From 4cc7c2734e2b4032103e47d8f3e8b6fa3360d3f1 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 3 May 2006 23:26:20 -0700
Subject: [ROSE]: Fix routing table locking in rose_remove_neigh.

The locking rule for rose_remove_neigh() are that the caller needs to
hold rose_neigh_list_lock, so we better don't take it yet again in
rose_neigh_list_lock.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 4cb6bfa..a22542f 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -233,11 +233,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
 
 	skb_queue_purge(&rose_neigh->queue);
 
-	spin_lock_bh(&rose_neigh_list_lock);
-
 	if ((s = rose_neigh_list) == rose_neigh) {
 		rose_neigh_list = rose_neigh->next;
-		spin_unlock_bh(&rose_neigh_list_lock);
 		kfree(rose_neigh->digipeat);
 		kfree(rose_neigh);
 		return;
@@ -246,7 +243,6 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
 	while (s != NULL && s->next != NULL) {
 		if (s->next == rose_neigh) {
 			s->next = rose_neigh->next;
-			spin_unlock_bh(&rose_neigh_list_lock);
 			kfree(rose_neigh->digipeat);
 			kfree(rose_neigh);
 			return;
@@ -254,7 +250,6 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
 
 		s = s->next;
 	}
-	spin_unlock_bh(&rose_neigh_list_lock);
 }
 
 /*
-- 
cgit v0.10.2


From e1fdb5b39656ea2be8cadde565e543649a988af9 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 3 May 2006 23:27:16 -0700
Subject: [AX.25]: Eleminate HZ from AX.25 kernel interfaces

Convert all AX.25 sysctl time values from jiffies to ms as units.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/ax25.h b/include/net/ax25.h
index d052b22..5bd9974 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -145,14 +145,14 @@ enum {
 #define	AX25_DEF_CONMODE	2			/* Connected mode allowed */
 #define	AX25_DEF_WINDOW		2			/* Window=2 */
 #define	AX25_DEF_EWINDOW	32			/* Module-128 Window=32 */
-#define	AX25_DEF_T1		(10 * HZ)		/* T1=10s */
-#define	AX25_DEF_T2		(3 * HZ)		/* T2=3s  */
-#define	AX25_DEF_T3		(300 * HZ)		/* T3=300s */
+#define	AX25_DEF_T1		10000			/* T1=10s */
+#define	AX25_DEF_T2		3000			/* T2=3s  */
+#define	AX25_DEF_T3		300000			/* T3=300s */
 #define	AX25_DEF_N2		10			/* N2=10 */
-#define AX25_DEF_IDLE		(0 * 60 * HZ)		/* Idle=None */
+#define AX25_DEF_IDLE		0			/* Idle=None */
 #define AX25_DEF_PACLEN		256			/* Paclen=256 */
 #define	AX25_DEF_PROTOCOL	AX25_PROTO_STD_SIMPLEX	/* Standard AX.25 */
-#define AX25_DEF_DS_TIMEOUT	(3 * 60 * HZ)		/* DAMA timeout 3 minutes */
+#define AX25_DEF_DS_TIMEOUT	180000			/* DAMA timeout 3 minutes */
 
 typedef struct ax25_uid_assoc {
 	struct hlist_node	uid_node;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index a9f13df..a2e0dd0 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -426,6 +426,26 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
 	return 0;
 }
 
+static void ax25_fillin_cb_from_dev(ax25_cb *ax25, ax25_dev *ax25_dev)
+{
+	ax25->rtt     = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]) / 2;
+	ax25->t1      = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]);
+	ax25->t2      = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T2]);
+	ax25->t3      = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T3]);
+	ax25->n2      = ax25_dev->values[AX25_VALUES_N2];
+	ax25->paclen  = ax25_dev->values[AX25_VALUES_PACLEN];
+	ax25->idle    = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_IDLE]);
+	ax25->backoff = ax25_dev->values[AX25_VALUES_BACKOFF];
+
+	if (ax25_dev->values[AX25_VALUES_AXDEFMODE]) {
+		ax25->modulus = AX25_EMODULUS;
+		ax25->window  = ax25_dev->values[AX25_VALUES_EWINDOW];
+	} else {
+		ax25->modulus = AX25_MODULUS;
+		ax25->window  = ax25_dev->values[AX25_VALUES_WINDOW];
+	}
+}
+
 /*
  *	Fill in a created AX.25 created control block with the default
  *	values for a particular device.
@@ -435,39 +455,28 @@ void ax25_fillin_cb(ax25_cb *ax25, ax25_dev *ax25_dev)
 	ax25->ax25_dev = ax25_dev;
 
 	if (ax25->ax25_dev != NULL) {
-		ax25->rtt     = ax25_dev->values[AX25_VALUES_T1] / 2;
-		ax25->t1      = ax25_dev->values[AX25_VALUES_T1];
-		ax25->t2      = ax25_dev->values[AX25_VALUES_T2];
-		ax25->t3      = ax25_dev->values[AX25_VALUES_T3];
-		ax25->n2      = ax25_dev->values[AX25_VALUES_N2];
-		ax25->paclen  = ax25_dev->values[AX25_VALUES_PACLEN];
-		ax25->idle    = ax25_dev->values[AX25_VALUES_IDLE];
-		ax25->backoff = ax25_dev->values[AX25_VALUES_BACKOFF];
-
-		if (ax25_dev->values[AX25_VALUES_AXDEFMODE]) {
-			ax25->modulus = AX25_EMODULUS;
-			ax25->window  = ax25_dev->values[AX25_VALUES_EWINDOW];
-		} else {
-			ax25->modulus = AX25_MODULUS;
-			ax25->window  = ax25_dev->values[AX25_VALUES_WINDOW];
-		}
+		ax25_fillin_cb_from_dev(ax25, ax25_dev);
+		return;
+	}
+
+	/*
+	 * No device, use kernel / AX.25 spec default values
+	 */
+	ax25->rtt     = msecs_to_jiffies(AX25_DEF_T1) / 2;
+	ax25->t1      = msecs_to_jiffies(AX25_DEF_T1);
+	ax25->t2      = msecs_to_jiffies(AX25_DEF_T2);
+	ax25->t3      = msecs_to_jiffies(AX25_DEF_T3);
+	ax25->n2      = AX25_DEF_N2;
+	ax25->paclen  = AX25_DEF_PACLEN;
+	ax25->idle    = msecs_to_jiffies(AX25_DEF_IDLE);
+	ax25->backoff = AX25_DEF_BACKOFF;
+
+	if (AX25_DEF_AXDEFMODE) {
+		ax25->modulus = AX25_EMODULUS;
+		ax25->window  = AX25_DEF_EWINDOW;
 	} else {
-		ax25->rtt     = AX25_DEF_T1 / 2;
-		ax25->t1      = AX25_DEF_T1;
-		ax25->t2      = AX25_DEF_T2;
-		ax25->t3      = AX25_DEF_T3;
-		ax25->n2      = AX25_DEF_N2;
-		ax25->paclen  = AX25_DEF_PACLEN;
-		ax25->idle    = AX25_DEF_IDLE;
-		ax25->backoff = AX25_DEF_BACKOFF;
-
-		if (AX25_DEF_AXDEFMODE) {
-			ax25->modulus = AX25_EMODULUS;
-			ax25->window  = AX25_DEF_EWINDOW;
-		} else {
-			ax25->modulus = AX25_MODULUS;
-			ax25->window  = AX25_DEF_WINDOW;
-		}
+		ax25->modulus = AX25_MODULUS;
+		ax25->window  = AX25_DEF_WINDOW;
 	}
 }
 
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 061083e..5961459 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -61,7 +61,8 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev)
 		return;
 
 	del_timer(&ax25_dev->dama.slave_timer);
-	ax25_dev->dama.slave_timeout = ax25_dev->values[AX25_VALUES_DS_TIMEOUT] / 10;
+	ax25_dev->dama.slave_timeout =
+		msecs_to_jiffies(ax25_dev->values[AX25_VALUES_DS_TIMEOUT]) / 10;
 	ax25_ds_add_timer(ax25_dev);
 }
 
diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c
index 894a225..bdb64c3 100644
--- a/net/ax25/sysctl_net_ax25.c
+++ b/net/ax25/sysctl_net_ax25.c
@@ -18,14 +18,14 @@ static int min_backoff[1],		max_backoff[] = {2};
 static int min_conmode[1],		max_conmode[] = {2};
 static int min_window[] = {1},		max_window[] = {7};
 static int min_ewindow[] = {1},		max_ewindow[] = {63};
-static int min_t1[] = {1},		max_t1[] = {30 * HZ};
-static int min_t2[] = {1},		max_t2[] = {20 * HZ};
-static int min_t3[1],   		max_t3[] = {3600 * HZ};
-static int min_idle[1],  		max_idle[] = {65535 * HZ};
+static int min_t1[] = {1},		max_t1[] = {30000};
+static int min_t2[] = {1},		max_t2[] = {20000};
+static int min_t3[1],			max_t3[] = {3600000};
+static int min_idle[1],			max_idle[] = {65535000};
 static int min_n2[] = {1},		max_n2[] = {31};
 static int min_paclen[] = {1},		max_paclen[] = {512};
 static int min_proto[1],		max_proto[] = { AX25_PROTO_MAX };
-static int min_ds_timeout[1],   	max_ds_timeout[] = {65535 * HZ};
+static int min_ds_timeout[1],		max_ds_timeout[] = {65535000};
 
 static struct ctl_table_header *ax25_table_header;
 
-- 
cgit v0.10.2


From 4d8937d0b113e8ec39f7d18cf13804f3b5fb8fd4 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 3 May 2006 23:27:47 -0700
Subject: [NETROM]: Eleminate HZ from NET/ROM kernel interfaces

Convert all NET/ROM sysctl time values from jiffies to ms as units.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/netrom.h b/include/net/netrom.h
index a5ee53b..e0ca112 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -42,11 +42,11 @@ enum {
 #define	NR_COND_PEER_RX_BUSY		0x04
 #define	NR_COND_OWN_RX_BUSY		0x08
 
-#define NR_DEFAULT_T1			(120 * HZ)	/* Outstanding frames - 120 seconds */
-#define NR_DEFAULT_T2			(5   * HZ)	/* Response delay     - 5 seconds */
+#define NR_DEFAULT_T1			120000		/* Outstanding frames - 120 seconds */
+#define NR_DEFAULT_T2			5000		/* Response delay     - 5 seconds */
 #define NR_DEFAULT_N2			3		/* Number of Retries - 3 */
-#define	NR_DEFAULT_T4			(180 * HZ)	/* Busy Delay - 180 seconds */
-#define	NR_DEFAULT_IDLE			(0 * 60 * HZ)	/* No Activity Timeout - none */
+#define	NR_DEFAULT_T4			180000		/* Busy Delay - 180 seconds */
+#define	NR_DEFAULT_IDLE			0		/* No Activity Timeout - none */
 #define	NR_DEFAULT_WINDOW		4		/* Default Window Size - 4 */
 #define	NR_DEFAULT_OBS			6		/* Default Obsolescence Count - 6 */
 #define	NR_DEFAULT_QUAL			10		/* Default Neighbour Quality - 10 */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index d44981f..ecd288b 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -425,11 +425,16 @@ static int nr_create(struct socket *sock, int protocol)
 
 	nr_init_timers(sk);
 
-	nr->t1     = sysctl_netrom_transport_timeout;
-	nr->t2     = sysctl_netrom_transport_acknowledge_delay;
-	nr->n2     = sysctl_netrom_transport_maximum_tries;
-	nr->t4     = sysctl_netrom_transport_busy_delay;
-	nr->idle   = sysctl_netrom_transport_no_activity_timeout;
+	nr->t1     =
+		msecs_to_jiffies(sysctl_netrom_transport_timeout);
+	nr->t2     =
+		msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay);
+	nr->n2     =
+		msecs_to_jiffies(sysctl_netrom_transport_maximum_tries);
+	nr->t4     =
+		msecs_to_jiffies(sysctl_netrom_transport_busy_delay);
+	nr->idle   =
+		msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout);
 	nr->window = sysctl_netrom_transport_requested_window_size;
 
 	nr->bpqext = 1;
-- 
cgit v0.10.2


From 82e84249f0ee098e004c8bd6d90a1640bd56cfbb Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 3 May 2006 23:28:20 -0700
Subject: [ROSE]: Eleminate HZ from ROSE kernel interfaces

Convert all ROSE sysctl time values from jiffies to ms as units.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/rose.h b/include/net/rose.h
index 3249b97..012b09e 100644
--- a/include/net/rose.h
+++ b/include/net/rose.h
@@ -49,14 +49,14 @@ enum {
 	ROSE_STATE_5			/* Deferred Call Acceptance */
 };
 
-#define ROSE_DEFAULT_T0			(180 * HZ)	/* Default T10 T20 value */
-#define ROSE_DEFAULT_T1			(200 * HZ)	/* Default T11 T21 value */
-#define ROSE_DEFAULT_T2			(180 * HZ)	/* Default T12 T22 value */
-#define	ROSE_DEFAULT_T3			(180 * HZ)	/* Default T13 T23 value */
-#define	ROSE_DEFAULT_HB			(5 * HZ)	/* Default Holdback value */
-#define	ROSE_DEFAULT_IDLE		(0 * 60 * HZ)	/* No Activity Timeout - none */
+#define ROSE_DEFAULT_T0			180000		/* Default T10 T20 value */
+#define ROSE_DEFAULT_T1			200000		/* Default T11 T21 value */
+#define ROSE_DEFAULT_T2			180000		/* Default T12 T22 value */
+#define	ROSE_DEFAULT_T3			180000		/* Default T13 T23 value */
+#define	ROSE_DEFAULT_HB			5000		/* Default Holdback value */
+#define	ROSE_DEFAULT_IDLE		0		/* No Activity Timeout - none */
 #define	ROSE_DEFAULT_ROUTING		1		/* Default routing flag */
-#define	ROSE_DEFAULT_FAIL_TIMEOUT	(120 * HZ)	/* Time until link considered usable */
+#define	ROSE_DEFAULT_FAIL_TIMEOUT	120000		/* Time until link considered usable */
 #define	ROSE_DEFAULT_MAXVC		50		/* Maximum number of VCs per neighbour */
 #define	ROSE_DEFAULT_WINDOW_SIZE	7		/* Default window size */
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ea65396..ef4538a 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -518,11 +518,11 @@ static int rose_create(struct socket *sock, int protocol)
 	init_timer(&rose->timer);
 	init_timer(&rose->idletimer);
 
-	rose->t1   = sysctl_rose_call_request_timeout;
-	rose->t2   = sysctl_rose_reset_request_timeout;
-	rose->t3   = sysctl_rose_clear_request_timeout;
-	rose->hb   = sysctl_rose_ack_hold_back_timeout;
-	rose->idle = sysctl_rose_no_activity_timeout;
+	rose->t1   = msecs_to_jiffies(sysctl_rose_call_request_timeout);
+	rose->t2   = msecs_to_jiffies(sysctl_rose_reset_request_timeout);
+	rose->t3   = msecs_to_jiffies(sysctl_rose_clear_request_timeout);
+	rose->hb   = msecs_to_jiffies(sysctl_rose_ack_hold_back_timeout);
+	rose->idle = msecs_to_jiffies(sysctl_rose_no_activity_timeout);
 
 	rose->state = ROSE_STATE_0;
 
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index 09e9e9d..bd86a63 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -40,7 +40,8 @@ void rose_start_ftimer(struct rose_neigh *neigh)
 
 	neigh->ftimer.data     = (unsigned long)neigh;
 	neigh->ftimer.function = &rose_ftimer_expiry;
-	neigh->ftimer.expires  = jiffies + sysctl_rose_link_fail_timeout;
+	neigh->ftimer.expires  =
+		jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout);
 
 	add_timer(&neigh->ftimer);
 }
@@ -51,7 +52,8 @@ static void rose_start_t0timer(struct rose_neigh *neigh)
 
 	neigh->t0timer.data     = (unsigned long)neigh;
 	neigh->t0timer.function = &rose_t0timer_expiry;
-	neigh->t0timer.expires  = jiffies + sysctl_rose_restart_request_timeout;
+	neigh->t0timer.expires  =
+		jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout);
 
 	add_timer(&neigh->t0timer);
 }
-- 
cgit v0.10.2


From 75c2d9077c63ac21488129cc23561d4f4fd0f5e5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 3 May 2006 23:31:35 -0700
Subject: [TCP]: Fix sock_orphan dead lock

Calling sock_orphan inside bh_lock_sock in tcp_close can lead to dead
locks.  For example, the inet_diag code holds sk_callback_lock without
disabling BH.  If an inbound packet arrives during that admittedly tiny
window, it will cause a dead lock on bh_lock_sock.  Another possible
path would be through sock_wfree if the network device driver frees the
tx skb in process context with BH enabled.

We can fix this by moving sock_orphan out of bh_lock_sock.

The tricky bit is to work out when we need to destroy the socket
ourselves and when it has already been destroyed by someone else.

By moving sock_orphan before the release_sock we can solve this
problem.  This is because as long as we own the socket lock its
state cannot change.

So we simply record the socket state before the release_sock
and then check the state again after we regain the socket lock.
If the socket state has transitioned to TCP_CLOSE in the time being,
we know that the socket has been destroyed.  Otherwise the socket is
still ours to keep.

Note that I've also moved the increment on the orphan count forward.
This may look like a problem as we're increasing it even if the socket
is just about to be destroyed where it'll be decreased again.  However,
this simply enlarges a window that already exists.  This also changes
the orphan count test by one.

Considering what the orphan count is meant to do this is no big deal.

This problem was discoverd by Ingo Molnar using his lock validator.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 87f68e7..e2b7b80 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1468,6 +1468,7 @@ void tcp_close(struct sock *sk, long timeout)
 {
 	struct sk_buff *skb;
 	int data_was_unread = 0;
+	int state;
 
 	lock_sock(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
@@ -1544,6 +1545,11 @@ void tcp_close(struct sock *sk, long timeout)
 	sk_stream_wait_close(sk, timeout);
 
 adjudge_to_death:
+	state = sk->sk_state;
+	sock_hold(sk);
+	sock_orphan(sk);
+	atomic_inc(sk->sk_prot->orphan_count);
+
 	/* It is the last release_sock in its life. It will remove backlog. */
 	release_sock(sk);
 
@@ -1555,8 +1561,9 @@ adjudge_to_death:
 	bh_lock_sock(sk);
 	BUG_TRAP(!sock_owned_by_user(sk));
 
-	sock_hold(sk);
-	sock_orphan(sk);
+	/* Have we already been destroyed by a softirq or backlog? */
+	if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
+		goto out;
 
 	/*	This is a (useful) BSD violating of the RFC. There is a
 	 *	problem with TCP as specified in that the other end could
@@ -1584,7 +1591,6 @@ adjudge_to_death:
 			if (tmo > TCP_TIMEWAIT_LEN) {
 				inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
 			} else {
-				atomic_inc(sk->sk_prot->orphan_count);
 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
 				goto out;
 			}
@@ -1603,7 +1609,6 @@ adjudge_to_death:
 			NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
 		}
 	}
-	atomic_inc(sk->sk_prot->orphan_count);
 
 	if (sk->sk_state == TCP_CLOSE)
 		inet_csk_destroy_sock(sk);
-- 
cgit v0.10.2


From d1a649838802edd94b6335834919463c6ae61f40 Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <patrick@tykepenguin.com>
Date: Wed, 3 May 2006 23:36:23 -0700
Subject: [DECNET]: Fix level1 router hello

This patch fixes hello messages sent when a node is a level 1
router. Slightly contrary to the spec (maybe) VMS ignores hello
messages that do not name level2 routers that it also knows about.

So, here we simply name all the routers that the node knows about
rather just other level1 routers.  (I hope the patch is clearer than
the description. sorry).

Signed-off-by: Patrick Caulfield <patrick@tykepenguin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 7c8692c..66e230c 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -493,7 +493,6 @@ struct elist_cb_state {
 static void neigh_elist_cb(struct neighbour *neigh, void *_info)
 {
 	struct elist_cb_state *s = _info;
-	struct dn_dev *dn_db;
 	struct dn_neigh *dn;
 
 	if (neigh->dev != s->dev)
@@ -503,10 +502,6 @@ static void neigh_elist_cb(struct neighbour *neigh, void *_info)
 	if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2)))
 		return;
 
-	dn_db = (struct dn_dev *) s->dev->dn_ptr;
-	if (dn_db->parms.forwarding == 1 && (dn->flags & DN_NDFLAG_R2))
-		return;
-
 	if (s->t == s->n)
 		s->rs = dn_find_slot(s->ptr, s->n, dn->priority);
 	else
-- 
cgit v0.10.2


From 98232d504db0a1f91ecaa93686ed3bf61963103b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Thu, 4 May 2006 09:13:49 +0200
Subject: [PATCH] compat_sys_vmsplice: one-off in UIO_MAXIOV check

nr_segs may not be > UIO_MAXIOV, however it may be equal to. This makes
the behaviour identical to the real sys_vmsplice(). The other foov
syscalls also agree that this is the way to go.

Signed-off-by: Jens Axboe <axboe@suse.de>

diff --git a/fs/compat.c b/fs/compat.c
index 3f3e8f4..970888a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1323,7 +1323,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
 {
 	unsigned i;
 	struct iovec *iov;
-	if (nr_segs >= UIO_MAXIOV)
+	if (nr_segs > UIO_MAXIOV)
 		return -EINVAL;
 	iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec));
 	for (i = 0; i < nr_segs; i++) {
-- 
cgit v0.10.2


From fe10c6abea8bc83291a13e0580b3e4c355710b09 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Thu, 4 May 2006 13:51:45 +0100
Subject: [MMC] Correct mmc_request_done comments

mmc_request_done should be called at the end of handling a request, not
between the data and initial command parts of the request.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index 05aa4d6..91c6b10 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -59,13 +59,12 @@ static const unsigned int tacc_mant[] = {
 
 
 /**
- *	mmc_request_done - finish processing an MMC command
- *	@host: MMC host which completed command
- *	@mrq: MMC request which completed
+ *	mmc_request_done - finish processing an MMC request
+ *	@host: MMC host which completed request
+ *	@mrq: MMC request which request
  *
  *	MMC drivers should call this function when they have completed
- *	their processing of a command.  This should be called before the
- *	data part of the command has completed.
+ *	their processing of a request.
  */
 void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
 {
-- 
cgit v0.10.2


From 5b802344357338a4d645beac8ca97470bcbe3542 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <sascha@saschahauer.de>
Date: Thu, 4 May 2006 14:07:42 +0100
Subject: [ARM] 3490/1: i.MX: move uart resources to board files

Patch from Sascha Hauer

This patch moves the i.MX uart resources and the gpio pin setup to the
board files. This allows the boards to decide how many internal uarts
are connected to the outside world and whether they use rts/cts or
not.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-imx/generic.c b/arch/arm/mach-imx/generic.c
index 9d8331b..12ea58a 100644
--- a/arch/arm/mach-imx/generic.c
+++ b/arch/arm/mach-imx/generic.c
@@ -195,56 +195,6 @@ void __init imx_set_mmc_info(struct imxmmc_platform_data *info)
 }
 EXPORT_SYMBOL(imx_set_mmc_info);
 
-static struct resource imx_uart1_resources[] = {
-	[0] = {
-		.start	= 0x00206000,
-		.end	= 0x002060FF,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= (UART1_MINT_RX),
-		.end	= (UART1_MINT_RX),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= (UART1_MINT_TX),
-		.end	= (UART1_MINT_TX),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device imx_uart1_device = {
-	.name		= "imx-uart",
-	.id		= 0,
-	.num_resources	= ARRAY_SIZE(imx_uart1_resources),
-	.resource	= imx_uart1_resources,
-};
-
-static struct resource imx_uart2_resources[] = {
-	[0] = {
-		.start	= 0x00207000,
-		.end	= 0x002070FF,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= (UART2_MINT_RX),
-		.end	= (UART2_MINT_RX),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= (UART2_MINT_TX),
-		.end	= (UART2_MINT_TX),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device imx_uart2_device = {
-	.name		= "imx-uart",
-	.id		= 1,
-	.num_resources	= ARRAY_SIZE(imx_uart2_resources),
-	.resource	= imx_uart2_resources,
-};
-
 static struct imxfb_mach_info imx_fb_info;
 
 void __init set_imx_fb_info(struct imxfb_mach_info *hard_imx_fb_info)
@@ -283,8 +233,6 @@ static struct platform_device imxfb_device = {
 static struct platform_device *devices[] __initdata = {
 	&imx_mmc_device,
 	&imxfb_device,
-	&imx_uart1_device,
-	&imx_uart2_device,
 };
 
 static struct map_desc imx_io_desc[] __initdata = {
diff --git a/arch/arm/mach-imx/mx1ads.c b/arch/arm/mach-imx/mx1ads.c
index e34d0df..e1f6c0b 100644
--- a/arch/arm/mach-imx/mx1ads.c
+++ b/arch/arm/mach-imx/mx1ads.c
@@ -26,6 +26,7 @@
 
 #include <asm/mach/arch.h>
 #include <asm/arch/mmc.h>
+#include <asm/arch/imx-uart.h>
 #include <linux/interrupt.h>
 #include "generic.h"
 
@@ -48,8 +49,70 @@ static struct platform_device cs89x0_device = {
 	.resource	= cs89x0_resources,
 };
 
+static struct imxuart_platform_data uart_pdata = {
+	.flags = IMXUART_HAVE_RTSCTS,
+};
+
+static struct resource imx_uart1_resources[] = {
+	[0] = {
+		.start	= 0x00206000,
+		.end	= 0x002060FF,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= (UART1_MINT_RX),
+		.end	= (UART1_MINT_RX),
+		.flags	= IORESOURCE_IRQ,
+	},
+	[2] = {
+		.start	= (UART1_MINT_TX),
+		.end	= (UART1_MINT_TX),
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device imx_uart1_device = {
+	.name		= "imx-uart",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(imx_uart1_resources),
+	.resource	= imx_uart1_resources,
+	.dev = {
+		.platform_data = &uart_pdata,
+	}
+};
+
+static struct resource imx_uart2_resources[] = {
+	[0] = {
+		.start	= 0x00207000,
+		.end	= 0x002070FF,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= (UART2_MINT_RX),
+		.end	= (UART2_MINT_RX),
+		.flags	= IORESOURCE_IRQ,
+	},
+	[2] = {
+		.start	= (UART2_MINT_TX),
+		.end	= (UART2_MINT_TX),
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device imx_uart2_device = {
+	.name		= "imx-uart",
+	.id		= 1,
+	.num_resources	= ARRAY_SIZE(imx_uart2_resources),
+	.resource	= imx_uart2_resources,
+	.dev = {
+		.platform_data = &uart_pdata,
+	}
+};
+
 static struct platform_device *devices[] __initdata = {
 	&cs89x0_device,
+	&imx_uart1_device,
+	&imx_uart2_device,
 };
 
 #ifdef CONFIG_MMC_IMX
@@ -75,6 +138,17 @@ mx1ads_init(void)
 	imx_gpio_mode(GPIO_PORTB | GPIO_GIUS | GPIO_IN | 20);
 	imx_set_mmc_info(&mx1ads_mmc_info);
 #endif
+
+	imx_gpio_mode(PC9_PF_UART1_CTS);
+	imx_gpio_mode(PC10_PF_UART1_RTS);
+	imx_gpio_mode(PC11_PF_UART1_TXD);
+	imx_gpio_mode(PC12_PF_UART1_RXD);
+
+	imx_gpio_mode(PB28_PF_UART2_CTS);
+	imx_gpio_mode(PB29_PF_UART2_RTS);
+	imx_gpio_mode(PB30_PF_UART2_TXD);
+	imx_gpio_mode(PB31_PF_UART2_RXD);
+
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 }
 
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index c3b7a66..d202eb4 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -45,6 +45,7 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/hardware.h>
+#include <asm/arch/imx-uart.h>
 
 /* We've been assigned a range on the "Low-density serial ports" major */
 #define SERIAL_IMX_MAJOR	204
@@ -73,7 +74,8 @@ struct imx_port {
 	struct uart_port	port;
 	struct timer_list	timer;
 	unsigned int		old_status;
-	int txirq,rxirq,rtsirq;
+	int			txirq,rxirq,rtsirq;
+	int			have_rtscts:1;
 };
 
 /*
@@ -491,8 +493,12 @@ imx_set_termios(struct uart_port *port, struct termios *termios,
 		ucr2 = UCR2_SRST | UCR2_IRTS;
 
 	if (termios->c_cflag & CRTSCTS) {
-		ucr2 &= ~UCR2_IRTS;
-		ucr2 |= UCR2_CTSC;
+		if( sport->have_rtscts ) {
+			ucr2 &= ~UCR2_IRTS;
+			ucr2 |= UCR2_CTSC;
+		} else {
+			termios->c_cflag &= ~CRTSCTS;
+		}
 	}
 
 	if (termios->c_cflag & CSTOPB)
@@ -719,27 +725,6 @@ static void __init imx_init_ports(void)
 		imx_ports[i].timer.function = imx_timeout;
 		imx_ports[i].timer.data     = (unsigned long)&imx_ports[i];
 	}
-
-	imx_gpio_mode(PC9_PF_UART1_CTS);
-	imx_gpio_mode(PC10_PF_UART1_RTS);
-	imx_gpio_mode(PC11_PF_UART1_TXD);
-	imx_gpio_mode(PC12_PF_UART1_RXD);
-	imx_gpio_mode(PB28_PF_UART2_CTS);
-	imx_gpio_mode(PB29_PF_UART2_RTS);
-
-	imx_gpio_mode(PB30_PF_UART2_TXD);
-	imx_gpio_mode(PB31_PF_UART2_RXD);
-
-#if 0 /* We don't need these, on the mx1 the _modem_ side of the uart
-       * is implemented.
-       */
-	imx_gpio_mode(PD7_AF_UART2_DTR);
-	imx_gpio_mode(PD8_AF_UART2_DCD);
-	imx_gpio_mode(PD9_AF_UART2_RI);
-	imx_gpio_mode(PD10_AF_UART2_DSR);
-#endif
-
-
 }
 
 #ifdef CONFIG_SERIAL_IMX_CONSOLE
@@ -932,7 +917,14 @@ static int serial_imx_resume(struct platform_device *dev)
 
 static int serial_imx_probe(struct platform_device *dev)
 {
+	struct imxuart_platform_data *pdata;
+
 	imx_ports[dev->id].port.dev = &dev->dev;
+
+	pdata = (struct imxuart_platform_data *)dev->dev.platform_data;
+	if(pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
+		imx_ports[dev->id].have_rtscts = 1;
+
 	uart_add_one_port(&imx_reg, &imx_ports[dev->id].port);
 	platform_set_drvdata(dev, &imx_ports[dev->id]);
 	return 0;
diff --git a/include/asm-arm/arch-imx/imx-uart.h b/include/asm-arm/arch-imx/imx-uart.h
new file mode 100644
index 0000000..3a685e1
--- /dev/null
+++ b/include/asm-arm/arch-imx/imx-uart.h
@@ -0,0 +1,10 @@
+#ifndef ASMARM_ARCH_UART_H
+#define ASMARM_ARCH_UART_H
+
+#define IMXUART_HAVE_RTSCTS (1<<0)
+
+struct imxuart_platform_data {
+	unsigned int flags;
+};
+
+#endif
-- 
cgit v0.10.2


From 920e70c5c603ada05dd480ca0ccc0ae12a5fdc39 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Thu, 4 May 2006 18:22:51 +0100
Subject: [MMC] Move set_ios debugging into mmc.c

Rather than having every driver duplicate the set_ios debugging,
provide a single version in mmc.c which can be expanded as we
add additional functionality.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/mmc/at91_mci.c b/drivers/mmc/at91_mci.c
index 6061c2d..88f0eef 100644
--- a/drivers/mmc/at91_mci.c
+++ b/drivers/mmc/at91_mci.c
@@ -621,9 +621,6 @@ static void at91_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	struct at91mci_host *host = mmc_priv(mmc);
 	unsigned long at91_master_clock = clk_get_rate(mci_clk);
 
-	DBG("Clock %uHz, busmode %u, powermode %u, Vdd %u\n",
-		ios->clock, ios->bus_mode, ios->power_mode, ios->vdd);
-
 	if (host)
 		host->bus_mode = ios->bus_mode;
 	else
diff --git a/drivers/mmc/au1xmmc.c b/drivers/mmc/au1xmmc.c
index c0326bb..914d62b 100644
--- a/drivers/mmc/au1xmmc.c
+++ b/drivers/mmc/au1xmmc.c
@@ -720,10 +720,6 @@ static void au1xmmc_set_ios(struct mmc_host* mmc, struct mmc_ios* ios)
 {
 	struct au1xmmc_host *host = mmc_priv(mmc);
 
-	DBG("set_ios (power=%u, clock=%uHz, vdd=%u, mode=%u)\n",
-	      host->id, ios->power_mode, ios->clock, ios->vdd,
-	      ios->bus_mode);
-
 	if (ios->power_mode == MMC_POWER_OFF)
 		au1xmmc_set_power(host, 0);
 	else if (ios->power_mode == MMC_POWER_ON) {
diff --git a/drivers/mmc/imxmmc.c b/drivers/mmc/imxmmc.c
index bc27192..79358e22 100644
--- a/drivers/mmc/imxmmc.c
+++ b/drivers/mmc/imxmmc.c
@@ -807,10 +807,6 @@ static void imxmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	struct imxmci_host *host = mmc_priv(mmc);
 	int prescaler;
 
-	dev_dbg(mmc_dev(host->mmc), "clock %u power %u vdd %u width %u\n",
-		ios->clock, ios->power_mode, ios->vdd,
-		(ios->bus_width==MMC_BUS_WIDTH_4)?4:1);
-
 	if( ios->bus_width==MMC_BUS_WIDTH_4 ) {
 		host->actual_bus_width = MMC_BUS_WIDTH_4;
 		imx_gpio_mode(PB11_PF_SD_DAT3);
diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index 91c6b10..1ca2c8b 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -69,10 +69,13 @@ static const unsigned int tacc_mant[] = {
 void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq)
 {
 	struct mmc_command *cmd = mrq->cmd;
-	int err = mrq->cmd->error;
-	pr_debug("MMC: req done (%02x): %d: %08x %08x %08x %08x\n",
-		 cmd->opcode, err, cmd->resp[0], cmd->resp[1],
-		 cmd->resp[2], cmd->resp[3]);
+	int err = cmd->error;
+
+	pr_debug("%s: req done (CMD%u): %d/%d/%d: %08x %08x %08x %08x\n",
+		 mmc_hostname(host), cmd->opcode, err,
+		 mrq->data ? mrq->data->error : 0,
+		 mrq->stop ? mrq->stop->error : 0,
+		 cmd->resp[0], cmd->resp[1], cmd->resp[2], cmd->resp[3]);
 
 	if (err && cmd->retries) {
 		cmd->retries--;
@@ -96,8 +99,9 @@ EXPORT_SYMBOL(mmc_request_done);
 void
 mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 {
-	pr_debug("MMC: starting cmd %02x arg %08x flags %08x\n",
-		 mrq->cmd->opcode, mrq->cmd->arg, mrq->cmd->flags);
+	pr_debug("%s: starting CMD%u arg %08x flags %08x\n",
+		 mmc_hostname(host), mrq->cmd->opcode,
+		 mrq->cmd->arg, mrq->cmd->flags);
 
 	WARN_ON(host->card_busy == NULL);
 
@@ -311,6 +315,18 @@ void mmc_release_host(struct mmc_host *host)
 
 EXPORT_SYMBOL(mmc_release_host);
 
+static inline void mmc_set_ios(struct mmc_host *host)
+{
+	struct mmc_ios *ios = &host->ios;
+
+	pr_debug("%s: clock %uHz busmode %u powermode %u cs %u Vdd %u width %u\n",
+		 mmc_hostname(host), ios->clock, ios->bus_mode,
+		 ios->power_mode, ios->chip_select, ios->vdd,
+		 ios->bus_width);
+	
+	host->ops->set_ios(host, ios);
+}
+
 static int mmc_select_card(struct mmc_host *host, struct mmc_card *card)
 {
 	int err;
@@ -363,7 +379,7 @@ static int mmc_select_card(struct mmc_host *host, struct mmc_card *card)
 		}
 	}
 
-	host->ops->set_ios(host, &host->ios);
+	mmc_set_ios(host);
 
 	return MMC_ERR_NONE;
 }
@@ -414,7 +430,7 @@ static u32 mmc_select_voltage(struct mmc_host *host, u32 ocr)
 		ocr = 3 << bit;
 
 		host->ios.vdd = bit;
-		host->ops->set_ios(host, &host->ios);
+		mmc_set_ios(host);
 	} else {
 		ocr = 0;
 	}
@@ -667,7 +683,7 @@ static void mmc_idle_cards(struct mmc_host *host)
 	struct mmc_command cmd;
 
 	host->ios.chip_select = MMC_CS_HIGH;
-	host->ops->set_ios(host, &host->ios);
+	mmc_set_ios(host);
 
 	mmc_delay(1);
 
@@ -680,7 +696,7 @@ static void mmc_idle_cards(struct mmc_host *host)
 	mmc_delay(1);
 
 	host->ios.chip_select = MMC_CS_DONTCARE;
-	host->ops->set_ios(host, &host->ios);
+	mmc_set_ios(host);
 
 	mmc_delay(1);
 }
@@ -705,13 +721,13 @@ static void mmc_power_up(struct mmc_host *host)
 	host->ios.chip_select = MMC_CS_DONTCARE;
 	host->ios.power_mode = MMC_POWER_UP;
 	host->ios.bus_width = MMC_BUS_WIDTH_1;
-	host->ops->set_ios(host, &host->ios);
+	mmc_set_ios(host);
 
 	mmc_delay(1);
 
 	host->ios.clock = host->f_min;
 	host->ios.power_mode = MMC_POWER_ON;
-	host->ops->set_ios(host, &host->ios);
+	mmc_set_ios(host);
 
 	mmc_delay(2);
 }
@@ -724,7 +740,7 @@ static void mmc_power_off(struct mmc_host *host)
 	host->ios.chip_select = MMC_CS_DONTCARE;
 	host->ios.power_mode = MMC_POWER_OFF;
 	host->ios.bus_width = MMC_BUS_WIDTH_1;
-	host->ops->set_ios(host, &host->ios);
+	mmc_set_ios(host);
 }
 
 static int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr)
@@ -972,7 +988,8 @@ static unsigned int mmc_calculate_clock(struct mmc_host *host)
 		if (!mmc_card_dead(card) && max_dtr > card->csd.max_dtr)
 			max_dtr = card->csd.max_dtr;
 
-	pr_debug("MMC: selected %d.%03dMHz transfer rate\n",
+	pr_debug("%s: selected %d.%03dMHz transfer rate\n",
+		 mmc_hostname(host),
 		 max_dtr / 1000000, (max_dtr / 1000) % 1000);
 
 	return max_dtr;
@@ -1047,7 +1064,7 @@ static void mmc_setup(struct mmc_host *host)
 	} else {
 		host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN;
 		host->ios.clock = host->f_min;
-		host->ops->set_ios(host, &host->ios);
+		mmc_set_ios(host);
 
 		/*
 		 * We should remember the OCR mask from the existing
@@ -1083,7 +1100,7 @@ static void mmc_setup(struct mmc_host *host)
 	 * Ok, now switch to push-pull mode.
 	 */
 	host->ios.bus_mode = MMC_BUSMODE_PUSHPULL;
-	host->ops->set_ios(host, &host->ios);
+	mmc_set_ios(host);
 
 	mmc_read_csds(host);
 
@@ -1129,7 +1146,7 @@ static void mmc_rescan(void *data)
 		 * attached cards and the host support.
 		 */
 		host->ios.clock = mmc_calculate_clock(host);
-		host->ops->set_ios(host, &host->ios);
+		mmc_set_ios(host);
 	}
 
 	mmc_release_host(host);
diff --git a/drivers/mmc/mmci.c b/drivers/mmc/mmci.c
index df7e861e..da8e4d7 100644
--- a/drivers/mmc/mmci.c
+++ b/drivers/mmc/mmci.c
@@ -402,9 +402,6 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	struct mmci_host *host = mmc_priv(mmc);
 	u32 clk = 0, pwr = 0;
 
-	DBG(host, "clock %uHz busmode %u powermode %u Vdd %u\n",
-	    ios->clock, ios->bus_mode, ios->power_mode, ios->vdd);
-
 	if (ios->clock) {
 		if (ios->clock >= host->mclk) {
 			clk = MCI_CLK_BYPASS;
diff --git a/drivers/mmc/pxamci.c b/drivers/mmc/pxamci.c
index 40970c4..f97b472 100644
--- a/drivers/mmc/pxamci.c
+++ b/drivers/mmc/pxamci.c
@@ -365,10 +365,6 @@ static void pxamci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct pxamci_host *host = mmc_priv(mmc);
 
-	pr_debug("PXAMCI: clock %u power %u vdd %u.%02u\n",
-		 ios->clock, ios->power_mode, ios->vdd / 100,
-		 ios->vdd % 100);
-
 	if (ios->clock) {
 		unsigned int clk = CLOCKRATE / ios->clock;
 		if (CLOCKRATE / clk > ios->clock)
diff --git a/drivers/mmc/sdhci.c b/drivers/mmc/sdhci.c
index bdbfca0..b005328 100644
--- a/drivers/mmc/sdhci.c
+++ b/drivers/mmc/sdhci.c
@@ -570,10 +570,6 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	spin_lock_irqsave(&host->lock, flags);
 
-	DBG("clock %uHz busmode %u powermode %u cs %u Vdd %u width %u\n",
-	     ios->clock, ios->bus_mode, ios->power_mode, ios->chip_select,
-	     ios->vdd, ios->bus_width);
-
 	/*
 	 * Reset the chip on each power off.
 	 * Should clear out any weird states.
diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c
index 511f7b0..39b3d97 100644
--- a/drivers/mmc/wbsd.c
+++ b/drivers/mmc/wbsd.c
@@ -931,10 +931,6 @@ static void wbsd_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	struct wbsd_host *host = mmc_priv(mmc);
 	u8 clk, setup, pwr;
 
-	DBGF("clock %uHz busmode %u powermode %u cs %u Vdd %u width %u\n",
-		ios->clock, ios->bus_mode, ios->power_mode, ios->chip_select,
-		ios->vdd, ios->bus_width);
-
 	spin_lock_bh(&host->lock);
 
 	/*
-- 
cgit v0.10.2


From fed3be9bd56e67c9b9324277b7f95c32e73a75bb Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Thu, 4 May 2006 13:23:40 -0400
Subject: CREDITS file update (Tristan Greaves)

By request from Tristan.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/CREDITS b/CREDITS
index 787564b..6f50be3 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1194,15 +1194,9 @@ S: Brecksville, OH  44141-1334
 S: USA
 
 N: Tristan Greaves
-E: Tristan.Greaves@icl.com
-E: tmg296@ecs.soton.ac.uk
-W: http://www.ecs.soton.ac.uk/~tmg296
+E: tristan@extricate.org
+W: http://www.extricate.org/
 D: Miscellaneous ipv4 sysctl patches
-S: 15 Little Mead
-S: Denmead
-S: Hampshire
-S: PO7 6HS
-S: United Kingdom
 
 N: Michael A. Griffith
 E: grif@cs.ucr.edu
-- 
cgit v0.10.2


From ff10952a547dad934d9ed9afc5cf579ed1ccb53a Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 5 May 2006 15:11:14 +0100
Subject: [ARM] 3494/1: asm-arm/bug.h needs linux/stddef.h

Patch from Nicolas Pitre

... for the definition of NULL.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/include/asm-arm/bug.h b/include/asm-arm/bug.h
index 7fb0213..5ab8216 100644
--- a/include/asm-arm/bug.h
+++ b/include/asm-arm/bug.h
@@ -2,6 +2,7 @@
 #define _ASMARM_BUG_H
 
 #include <linux/config.h>
+#include <linux/stddef.h>
 
 #ifdef CONFIG_BUG
 #ifdef CONFIG_DEBUG_BUGVERBOSE
-- 
cgit v0.10.2


From 2eb9d3157107497fdccb51e1570fea677f6e3c82 Mon Sep 17 00:00:00 2001
From: Uwe Zeisberger <Uwe_Zeisberger@digi.com>
Date: Fri, 5 May 2006 15:11:14 +0100
Subject: [ARM] 3496/1: more constants for asm-offsets.h

Patch from Uwe Zeisberger

added the following constants:
- MACHINFO_TYPE
- MACHINFO_NAME
- MACHINFO_PHYSIO
- MACHINFO_PGOFFIO
- PROCINFO_INITFUNC
- PROCINFO_MMUFLAGS

and removed their definition from head.S and head-nommu.S

Signed-off-by: Uwe Zeisberger <Uwe_Zeisberger@digi.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index b324dca..45fdf4a 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -95,5 +95,11 @@ int main(void)
   DEFINE(SYS_ERROR0,		0x9f0000);
   BLANK();
   DEFINE(SIZEOF_MACHINE_DESC,	sizeof(struct machine_desc));
+  DEFINE(MACHINFO_TYPE,		offsetof(struct machine_desc, nr));
+  DEFINE(MACHINFO_NAME,		offsetof(struct machine_desc, name));
+  DEFINE(MACHINFO_PHYSIO,	offsetof(struct machine_desc, phys_io));
+  DEFINE(MACHINFO_PGOFFIO,	offsetof(struct machine_desc, io_pg_offst));
+  DEFINE(PROCINFO_INITFUNC,	offsetof(struct proc_info_list, __cpu_flush));
+  DEFINE(PROCINFO_MMUFLAGS,	offsetof(struct proc_info_list, __cpu_mmu_flags));
   return 0; 
 }
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 0bea658..adf62e5e 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -20,12 +20,10 @@
 #include <asm/mach-types.h>
 #include <asm/procinfo.h>
 #include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/system.h>
 
-#define PROCINFO_INITFUNC       12
-#define MACHINFO_TYPE		0
-
 /*
  * Kernel startup entry point.
  * ---------------------------
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 04b66a9..04f7344 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -24,14 +24,6 @@
 #include <asm/thread_info.h>
 #include <asm/system.h>
 
-#define PROCINFO_MMUFLAGS	8
-#define PROCINFO_INITFUNC	12
-
-#define MACHINFO_TYPE		0
-#define MACHINFO_PHYSIO		4
-#define MACHINFO_PGOFFIO	8
-#define MACHINFO_NAME		12
-
 #define KERNEL_RAM_ADDR	(PAGE_OFFSET + TEXT_OFFSET)
 
 /*
-- 
cgit v0.10.2


From 56cf6504fc1c0c221b82cebc16a444b684140fb7 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Fri, 5 May 2006 17:57:52 +0100
Subject: [BLOCK] Fix oops on removal of SD/MMC card

The block layer keeps a reference (driverfs_dev) to the struct
device associated with the block device, and uses it internally
for generating uevents in block_uevent.

Block device uevents include umounting the partition, which can
occur after the backing device has been removed.

Unfortunately, this reference is not counted.  This means that
if the struct device is removed from the device tree, the block
layers reference will become stale.

Guard against this by holding a reference to the struct device
in add_disk(), and only drop the reference when we're releasing
the gendisk kobject - in other words when we can be sure that no
further uevents will be generated for this block device.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Jens Axboe <axboe@suse.de>

diff --git a/block/genhd.c b/block/genhd.c
index 5a8d3bf..d965725 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -182,6 +182,7 @@ static int exact_lock(dev_t dev, void *data)
  */
 void add_disk(struct gendisk *disk)
 {
+	get_device(disk->driverfs_dev);
 	disk->flags |= GENHD_FL_UP;
 	blk_register_region(MKDEV(disk->major, disk->first_minor),
 			    disk->minors, NULL, exact_match, exact_lock, disk);
@@ -427,6 +428,7 @@ static struct attribute * default_attrs[] = {
 static void disk_release(struct kobject * kobj)
 {
 	struct gendisk *disk = to_disk(kobj);
+	put_device(disk->driverfs_dev);
 	kfree(disk->random);
 	kfree(disk->part);
 	free_disk_stats(disk);
-- 
cgit v0.10.2


From b7d7ef87e15dea105be59ec8f14e2f92182dd421 Mon Sep 17 00:00:00 2001
From: "George G. Davis" <davis_g@mvista.com>
Date: Fri, 5 May 2006 22:32:23 +0100
Subject: [ARM] 3499/1: Fix VFP FPSCR corruption for double exception case

Patch from George G. Davis

The ARM VFP FPSCR register is corrupted when a condition flags modifying
VFP instruction is followed by a non-condition flags modifying VFP
instruction and both instructions raise exceptions.  The fix is to
read the current FPSCR in between emulation of these two instructions
and use the current FPSCR value when handling the second exception.

Signed-off-by: George G. Davis <gdavis@mvista.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 37ff814..03486be 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -245,7 +245,7 @@ void VFP9_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
 	 */
 	barrier();
 	trigger = fmrx(FPINST2);
-	fpscr = fmrx(FPSCR);
+	orig_fpscr = fpscr = fmrx(FPSCR);
 
  emulate:
 	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
-- 
cgit v0.10.2


From 99532559dc7a8e686b2cef14c780a7ad5dbd4a31 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 5 May 2006 22:32:24 +0100
Subject: [ARM] 3500/1: fix PXA27x DMA allocation priority

Patch from Nicolas Pitre

Intel PXA27x developers manual section 5.4.1.1 lists a priority
distribution for the DMA channels differently than what the code
currently assumes.  This patch fixes that.

Noticed by Simon Vogl <vogl@soft.uni-linz.ac.at>

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-pxa/dma.c b/arch/arm/mach-pxa/dma.c
index 458112b..7d8c854 100644
--- a/arch/arm/mach-pxa/dma.c
+++ b/arch/arm/mach-pxa/dma.c
@@ -45,23 +45,16 @@ int pxa_request_dma (char *name, pxa_dma_prio prio,
 
 	local_irq_save(flags);
 
-	/* try grabbing a DMA channel with the requested priority */
-	for (i = prio; i < prio + PXA_DMA_NBCH(prio); i++) {
-		if (!dma_channels[i].name) {
-			found = 1;
-			break;
-		}
-	}
-
-	if (!found) {
-		/* requested prio group is full, try hier priorities */
-		for (i = prio-1; i >= 0; i--) {
+	do {
+		/* try grabbing a DMA channel with the requested priority */
+		pxa_for_each_dma_prio (i, prio) {
 			if (!dma_channels[i].name) {
 				found = 1;
 				break;
 			}
 		}
-	}
+		/* if requested prio group is full, try a hier priority */
+	} while (!found && prio--);
 
 	if (found) {
 		DCSR(i) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR;
diff --git a/include/asm-arm/arch-pxa/dma.h b/include/asm-arm/arch-pxa/dma.h
index 3e88a2a..a008150 100644
--- a/include/asm-arm/arch-pxa/dma.h
+++ b/include/asm-arm/arch-pxa/dma.h
@@ -24,27 +24,29 @@ typedef struct pxa_dma_desc {
 	volatile u32 dcmd;	/* DCMD value for the current transfer */
 } pxa_dma_desc;
 
+typedef enum {
+	DMA_PRIO_HIGH = 0,
+	DMA_PRIO_MEDIUM = 1,
+	DMA_PRIO_LOW = 2
+} pxa_dma_prio;
+
 #if defined(CONFIG_PXA27x)
 
 #define PXA_DMA_CHANNELS	32
-#define PXA_DMA_NBCH(prio)	((prio == DMA_PRIO_LOW) ? 16 : 8)
 
-typedef enum {
-	DMA_PRIO_HIGH = 0,
-	DMA_PRIO_MEDIUM = 8,
-	DMA_PRIO_LOW = 16
-} pxa_dma_prio;
+#define pxa_for_each_dma_prio(ch, prio)					\
+for (									\
+	ch = prio * 4;							\
+	ch != (4 << prio) + 16;						\
+	ch = (ch + 1 == (4 << prio)) ? (prio * 4 + 16) : (ch + 1)	\
+)
 
 #elif defined(CONFIG_PXA25x)
 
 #define PXA_DMA_CHANNELS	16
-#define PXA_DMA_NBCH(prio)	((prio == DMA_PRIO_LOW) ? 8 : 4)
 
-typedef enum {
-	DMA_PRIO_HIGH = 0,
-	DMA_PRIO_MEDIUM = 4,
-	DMA_PRIO_LOW = 8
-} pxa_dma_prio;
+#define pxa_for_each_dma_prio(ch, prio)					\
+	for (ch = prio * 4; ch != (4 << prio); ch++)
 
 #endif
 
-- 
cgit v0.10.2


From 568cb09b9d889b6f2852ede19772b8e9eed36c1e Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 5 May 2006 22:35:05 +0100
Subject: [ARM] 3495/1: EABI: undefine removed syscalls, but...

Patch from Nicolas Pitre

... but only for user space.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h
index 26f2f48..cbf39a5 100644
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -363,7 +363,7 @@
 /*
  * The following syscalls are obsolete and no longer available for EABI.
  */
-#if defined(__ARM_EABI__)
+#if defined(__ARM_EABI__) && !defined(__KERNEL__)
 #undef __NR_time
 #undef __NR_umount
 #undef __NR_stime
-- 
cgit v0.10.2


From 7c3ceb4fb9667f34f1599a062efecf4cdc4a4ce5 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Fri, 5 May 2006 17:02:09 -0700
Subject: [SCTP]: Allow spillover of receive buffer to avoid deadlock.

This patch fixes a deadlock situation in the receive path by allowing
temporary spillover of the receive buffer.

- If the chunk we receive has a tsn that immediately follows the ctsn,
  accept it even if we run out of receive buffer space and renege data with
  higher TSNs.
- Once we accept one chunk in a packet, accept all the remaining chunks
  even if we run out of receive buffer space.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Mark Butler <butlerm@middle.net>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index eba99f3..7f4fea1 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -712,6 +712,7 @@ struct sctp_chunk {
 	__u8 tsn_gap_acked;	/* Is this chunk acked by a GAP ACK? */
 	__s8 fast_retransmit;	 /* Is this chunk fast retransmitted? */
 	__u8 tsn_missing_report; /* Data chunk missing counter. */
+	__u8 data_accepted; 	/* At least 1 chunk in this packet accepted */
 };
 
 void sctp_chunk_hold(struct sctp_chunk *);
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 297b895..cf0c767 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -149,6 +149,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 		/* This is the first chunk in the packet.  */
 		chunk->singleton = 1;
 		ch = (sctp_chunkhdr_t *) chunk->skb->data;
+		chunk->data_accepted = 0;
 	}
 
         chunk->chunk_hdr = ch;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 2b9a832..f5d131f 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5151,7 +5151,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	int tmp;
 	__u32 tsn;
 	int account_value;
+	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
 	struct sock *sk = asoc->base.sk;
+	int rcvbuf_over = 0;
 
 	data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
 	skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
@@ -5162,10 +5164,16 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	/* ASSERT:  Now skb->data is really the user data.  */
 
 	/*
-	 * if we are established, and we have used up our receive
-	 * buffer memory, drop the frame
-	 */
-	if (asoc->state == SCTP_STATE_ESTABLISHED) {
+	 * If we are established, and we have used up our receive buffer
+	 * memory, think about droping the frame.
+	 * Note that we have an opportunity to improve performance here.
+	 * If we accept one chunk from an skbuff, we have to keep all the
+	 * memory of that skbuff around until the chunk is read into user
+	 * space. Therefore, once we accept 1 chunk we may as well accept all
+	 * remaining chunks in the skbuff. The data_accepted flag helps us do
+	 * that.
+	 */
+	if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) {
 		/*
 		 * If the receive buffer policy is 1, then each
 		 * association can allocate up to sk_rcvbuf bytes
@@ -5176,9 +5184,25 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 			account_value = atomic_read(&asoc->rmem_alloc);
 		else
 			account_value = atomic_read(&sk->sk_rmem_alloc);
-
-		if (account_value > sk->sk_rcvbuf)
-			return SCTP_IERROR_IGNORE_TSN;
+		if (account_value > sk->sk_rcvbuf) {
+			/*
+			 * We need to make forward progress, even when we are
+			 * under memory pressure, so we always allow the
+			 * next tsn after the ctsn ack point to be accepted.
+			 * This lets us avoid deadlocks in which we have to
+			 * drop frames that would otherwise let us drain the
+			 * receive queue.
+			 */
+			if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn)
+				return SCTP_IERROR_IGNORE_TSN;
+
+			/*
+			 * We're going to accept the frame but we should renege
+			 * to make space for it. This will send us down that
+			 * path later in this function.
+			 */
+			rcvbuf_over = 1;
+		}
 	}
 
 	/* Process ECN based congestion.
@@ -5226,6 +5250,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	datalen -= sizeof(sctp_data_chunk_t);
 
 	deliver = SCTP_CMD_CHUNK_ULP;
+	chunk->data_accepted = 1;
 
 	/* Think about partial delivery. */
 	if ((datalen >= asoc->rwnd) && (!asoc->ulpq.pd_mode)) {
@@ -5242,7 +5267,8 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * large spill over.
 	 */
 	if (!asoc->rwnd || asoc->rwnd_over ||
-	    (datalen > asoc->rwnd + asoc->frag_point)) {
+	    (datalen > asoc->rwnd + asoc->frag_point) ||
+	    rcvbuf_over) {
 
 		/* If this is the next TSN, consider reneging to make
 		 * room.   Note: Playing nice with a confused sender.  A
@@ -5250,8 +5276,8 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 		 * space and in the future we may want to detect and
 		 * do more drastic reneging.
 		 */
-		if (sctp_tsnmap_has_gap(&asoc->peer.tsn_map) &&
-		    (sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + 1) == tsn) {
+		if (sctp_tsnmap_has_gap(map) &&
+		    (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
 			SCTP_DEBUG_PRINTK("Reneging for tsn:%u\n", tsn);
 			deliver = SCTP_CMD_RENEGE;
 		} else {
-- 
cgit v0.10.2


From 672e7cca17ed6036a1756ed34cf20dbd72d5e5f6 Mon Sep 17 00:00:00 2001
From: Vladislav Yasevich <vladsilav.yasevich@hp.com>
Date: Fri, 5 May 2006 17:03:49 -0700
Subject: [SCTP]: Prevent possible infinite recursion with multiple bundled
 DATA.

There is a rare situation that causes lksctp to go into infinite recursion
and crash the system.  The trigger is a packet that contains at least the
first two DATA fragments of a message bundled together. The recursion is
triggered when the user data buffer is smaller that the full data message.
The problem is that we clone the skb for every fragment in the message.
When reassembling the full message, we try to link skbs from the "first
fragment" clone using the frag_list. However, since the frag_list is shared
between two clones in this rare situation, we end up setting the frag_list
pointer of the second fragment to point to itself.  This causes
sctp_skb_pull() to potentially recurse indefinitely.

Proposed solution is to make a copy of the skb when attempting to link
things using frag_list.

Signed-off-by: Vladislav Yasevich <vladsilav.yasevich@hp.com>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 2080b2d..575e556 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -279,6 +279,7 @@ static inline void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
 static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag)
 {
 	struct sk_buff *pos;
+	struct sk_buff *new = NULL;
 	struct sctp_ulpevent *event;
 	struct sk_buff *pnext, *last;
 	struct sk_buff *list = skb_shinfo(f_frag)->frag_list;
@@ -297,11 +298,33 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
 	 */
 	if (last)
 		last->next = pos;
-	else
-		skb_shinfo(f_frag)->frag_list = pos;
+ 	else {
+ 		if (skb_cloned(f_frag)) {
+ 			/* This is a cloned skb, we can't just modify
+ 			 * the frag_list.  We need a new skb to do that.
+ 			 * Instead of calling skb_unshare(), we'll do it
+ 			 * ourselves since we need to delay the free.
+ 			 */
+ 			new = skb_copy(f_frag, GFP_ATOMIC);
+ 			if (!new)
+ 				return NULL;	/* try again later */
+
+ 			new->sk = f_frag->sk;
+
+ 			skb_shinfo(new)->frag_list = pos;
+ 		} else
+ 			skb_shinfo(f_frag)->frag_list = pos;
+ 	}
 
 	/* Remove the first fragment from the reassembly queue.  */
 	__skb_unlink(f_frag, queue);
+
+ 	/* if we did unshare, then free the old skb and re-assign */
+ 	if (new) {
+ 		kfree_skb(f_frag);
+ 		f_frag = new;
+ 	}
+
 	while (pos) {
 
 		pnext = pos->next;
-- 
cgit v0.10.2


From 62b08083ec3dbfd7e533c8d230dd1d8191a6e813 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Fri, 5 May 2006 17:04:43 -0700
Subject: [SCTP]: Fix panic's when receiving fragmented SCTP control chunks.

Use pskb_pull() to handle incoming COOKIE_ECHO and HEARTBEAT chunks that
are received as skb's with fragment list.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f5d131f..8cdba51 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -636,8 +636,9 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	 */
         chunk->subh.cookie_hdr =
 		(struct sctp_signed_cookie *)chunk->skb->data;
-	skb_pull(chunk->skb,
-		 ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t));
+	if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
+					 sizeof(sctp_chunkhdr_t)))
+		goto nomem;
 
 	/* 5.1 D) Upon reception of the COOKIE ECHO chunk, Endpoint
 	 * "Z" will reply with a COOKIE ACK chunk after building a TCB
@@ -965,7 +966,8 @@ sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep,
 	 */
 	chunk->subh.hb_hdr = (sctp_heartbeathdr_t *) chunk->skb->data;
 	paylen = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
-	skb_pull(chunk->skb, paylen);
+	if (!pskb_pull(chunk->skb, paylen))
+		goto nomem;
 
 	reply = sctp_make_heartbeat_ack(asoc, chunk,
 					chunk->subh.hb_hdr, paylen);
@@ -1860,8 +1862,9 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
 	 * are in good shape.
 	 */
         chunk->subh.cookie_hdr = (struct sctp_signed_cookie *)chunk->skb->data;
-	skb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
-		 sizeof(sctp_chunkhdr_t));
+	if (!pskb_pull(chunk->skb, ntohs(chunk->chunk_hdr->length) -
+					sizeof(sctp_chunkhdr_t)))
+		goto nomem;
 
 	/* In RFC 2960 5.2.4 3, if both Verification Tags in the State Cookie
 	 * of a duplicate COOKIE ECHO match the Verification Tags of the
-- 
cgit v0.10.2


From 35d63edb1c807bc5317e49592260e84637bc432e Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Fri, 5 May 2006 17:05:23 -0700
Subject: [SCTP]: Fix state table entries for chunks received in CLOSED state.

Discard an unexpected chunk in CLOSED state rather can calling BUG().

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 75ef104..8bcca56 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -366,9 +366,9 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
 	/* SCTP_STATE_EMPTY */ \
 	{.fn = sctp_sf_ootb, .name = "sctp_sf_ootb"}, \
 	/* SCTP_STATE_CLOSED */ \
-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
 	/* SCTP_STATE_COOKIE_WAIT */ \
-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
 	/* SCTP_STATE_COOKIE_ECHOED */ \
 	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
 	/* SCTP_STATE_ESTABLISHED */ \
@@ -380,7 +380,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
 	{.fn = sctp_sf_do_ecne, .name = "sctp_sf_do_ecne"}, \
 	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
 } /* TYPE_SCTP_ECN_ECNE */
 
 #define TYPE_SCTP_ECN_CWR { \
@@ -401,7 +401,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
 	/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
 	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
 	/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	{.fn = sctp_sf_discard_chunk, .name = "sctp_sf_discard_chunk"}, \
 } /* TYPE_SCTP_ECN_CWR */
 
 #define TYPE_SCTP_SHUTDOWN_COMPLETE { \
@@ -647,7 +647,7 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
 	/* SCTP_STATE_EMPTY */ \
 	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
 	/* SCTP_STATE_CLOSED */ \
-	{.fn = sctp_sf_bug, .name = "sctp_sf_bug"}, \
+	{.fn = sctp_sf_error_closed, .name = "sctp_sf_error_closed"}, \
 	/* SCTP_STATE_COOKIE_WAIT */ \
 	{.fn = sctp_sf_do_prm_requestheartbeat,		      \
 	 .name = "sctp_sf_do_prm_requestheartbeat"},          \
-- 
cgit v0.10.2


From 1c29fc4989bc2a3838b2837adc12b8aeb0feeede Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Fri, 5 May 2006 17:07:13 -0700
Subject: [BRIDGE]: keep track of received multicast packets

It makes sense to add this simple statistic to keep track of received
multicast packets.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index b0b7f55..bfa4d8c 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -66,6 +66,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
 	}
 
 	if (is_multicast_ether_addr(dest)) {
+		br->statistics.multicast++;
 		br_flood_forward(br, skb, !passedup);
 		if (!passedup)
 			br_pass_frame_up(br, skb);
-- 
cgit v0.10.2


From 134af34632a7b3b0a98a79a2e56bf9cc927e0eac Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 5 May 2006 17:09:13 -0700
Subject: [DCCP]: Fix sock_orphan dead lock

Calling sock_orphan inside bh_lock_sock in dccp_close can lead to dead
locks.  For example, the inet_diag code holds sk_callback_lock without
disabling BH.  If an inbound packet arrives during that admittedly tiny
window, it will cause a dead lock on bh_lock_sock.  Another possible
path would be through sock_wfree if the network device driver frees the
tx skb in process context with BH enabled.

We can fix this by moving sock_orphan out of bh_lock_sock.

The tricky bit is to work out when we need to destroy the socket
ourselves and when it has already been destroyed by someone else.

By moving sock_orphan before the release_sock we can solve this
problem.  This is because as long as we own the socket lock its
state cannot change.

So we simply record the socket state before the release_sock
and then check the state again after we regain the socket lock.
If the socket state has transitioned to DCCP_CLOSED in the time being,
we know that the socket has been destroyed.  Otherwise the socket is
still ours to keep.

This problem was discoverd by Ingo Molnar using his lock validator.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 1ff7328..2e0ee83 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -848,6 +848,7 @@ static int dccp_close_state(struct sock *sk)
 void dccp_close(struct sock *sk, long timeout)
 {
 	struct sk_buff *skb;
+	int state;
 
 	lock_sock(sk);
 
@@ -882,6 +883,11 @@ void dccp_close(struct sock *sk, long timeout)
 	sk_stream_wait_close(sk, timeout);
 
 adjudge_to_death:
+	state = sk->sk_state;
+	sock_hold(sk);
+	sock_orphan(sk);
+	atomic_inc(sk->sk_prot->orphan_count);
+
 	/*
 	 * It is the last release_sock in its life. It will remove backlog.
 	 */
@@ -894,8 +900,9 @@ adjudge_to_death:
 	bh_lock_sock(sk);
 	BUG_TRAP(!sock_owned_by_user(sk));
 
-	sock_hold(sk);
-	sock_orphan(sk);
+	/* Have we already been destroyed by a softirq or backlog? */
+	if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
+		goto out;
 
 	/*
 	 * The last release_sock may have processed the CLOSE or RESET
@@ -915,12 +922,12 @@ adjudge_to_death:
 #endif
 	}
 
-	atomic_inc(sk->sk_prot->orphan_count);
 	if (sk->sk_state == DCCP_CLOSED)
 		inet_csk_destroy_sock(sk);
 
 	/* Otherwise, socket is reprieved until protocol close. */
 
+out:
 	bh_unlock_sock(sk);
 	local_bh_enable();
 	sock_put(sk);
-- 
cgit v0.10.2


From f530937b2cccdb131cb459977943c98421ab09b3 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 5 May 2006 17:19:26 -0700
Subject: [NETROM/ROSE]: Kill module init version kernel log messages.

There are out of date and don't tell the user anything useful.
The similar messages which IPV4 and the core networking used
to output were killed a long time ago.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ecd288b..3669cb9 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1370,8 +1370,6 @@ static struct notifier_block nr_dev_notifier = {
 
 static struct net_device **dev_nr;
 
-static char banner[] __initdata = KERN_INFO "G4KLX NET/ROM for Linux. Version 0.7 for AX25.037 Linux 2.4\n";
-
 static int __init nr_proto_init(void)
 {
 	int i;
@@ -1419,7 +1417,6 @@ static int __init nr_proto_init(void)
 	}
 		
 	register_netdevice_notifier(&nr_dev_notifier);
-	printk(banner);
 
 	ax25_protocol_register(AX25_P_NETROM, nr_route_frame);
 	ax25_linkfail_register(nr_link_failed);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ef4538a..55564ef 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1469,8 +1469,6 @@ static struct notifier_block rose_dev_notifier = {
 
 static struct net_device **dev_rose;
 
-static const char banner[] = KERN_INFO "F6FBB/G4KLX ROSE for Linux. Version 0.62 for AX25.037 Linux 2.4\n";
-
 static int __init rose_proto_init(void)
 {
 	int i;
@@ -1519,7 +1517,6 @@ static int __init rose_proto_init(void)
 
 	sock_register(&rose_family_ops);
 	register_netdevice_notifier(&rose_dev_notifier);
-	printk(banner);
 
 	ax25_protocol_register(AX25_P_ROSE, rose_route_frame);
 	ax25_linkfail_register(rose_link_failed);
-- 
cgit v0.10.2


From 5528e568a760442e0ec8fd2dea1f0791875a066b Mon Sep 17 00:00:00 2001
From: John Heffner <jheffner@psc.edu>
Date: Fri, 5 May 2006 17:41:44 -0700
Subject: [TCP]: Fix snd_cwnd adjustments in tcp_highspeed.c

Xiaoliang (David) Wei wrote:
> Hi gurus,
>
>    I am reading the code of tcp_highspeed.c in the kernel and have a
> question on the hstcp_cong_avoid function, specifically the following
> AI part (line 136~143 in net/ipv4/tcp_highspeed.c ):
>
>                /* Do additive increase */
>                if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
>                        tp->snd_cwnd_cnt += ca->ai;
>                        if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
>                                tp->snd_cwnd++;
>                                tp->snd_cwnd_cnt -= tp->snd_cwnd;
>                        }
>                }
>
>    In this part, when (tp->snd_cwnd_cnt == tp->snd_cwnd),
> snd_cwnd_cnt will be -1... snd_cwnd_cnt is defined as u16, will this
> small chance of getting -1 becomes a problem?
> Shall we change it by reversing the order of the cwnd++ and cwnd_cnt -=
> cwnd?

Absolutely correct.  Thanks.

Signed-off-by: John Heffner <jheffner@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index e0e9d13..b72fa55 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -137,8 +137,8 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
 		if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
 			tp->snd_cwnd_cnt += ca->ai;
 			if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
-				tp->snd_cwnd++;
 				tp->snd_cwnd_cnt -= tp->snd_cwnd;
+				tp->snd_cwnd++;
 			}
 		}
 	}
-- 
cgit v0.10.2