From 8d730cfb50cc77da6d00f941daef440918a1922f Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 11:58:39 +0100
Subject: Basic implementation of 'make headers_install'

This adds a make target which exports a subset of headers which contain
definitions which are useful for system libraries and tools. It uses the
BSD 'unifdef' tool to remove instances of #ifdef __KERNEL__, and uses
sed to remove markers like __user.

Based on an original implementation by Arnd Bergmann <arnd@arndb.de>
Hacked about by David Woodhouse <dwmw2@infradead.org>
Reviewed and cleaned up by Sam Ravnborg <sam@ravnborg.org>

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/Makefile b/Makefile
index 3494c17..98e5af7 100644
--- a/Makefile
+++ b/Makefile
@@ -856,6 +856,17 @@ depend dep:
 	@echo '*** Warning: make $@ is unnecessary now.'
 
 # ---------------------------------------------------------------------------
+# Kernel headers
+INSTALL_HDR_PATH=$(MODLIB)/abi
+export INSTALL_HDR_PATH
+
+PHONY += headers_install
+headers_install: include/linux/version.h
+	$(Q)unifdef -Ux /dev/null
+	$(Q)rm -rf $(INSTALL_HDR_PATH)/include
+	$(Q)$(MAKE) -rR -f $(srctree)/scripts/Makefile.headersinst obj=include
+
+# ---------------------------------------------------------------------------
 # Modules
 
 ifdef CONFIG_MODULES
@@ -1027,6 +1038,8 @@ help:
 	@echo  '  cscope	  - Generate cscope index'
 	@echo  '  kernelrelease	  - Output the release version string'
 	@echo  '  kernelversion	  - Output the version stored in Makefile'
+	@echo  '  headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'
+	@echo  '                    (default: /lib/modules/$$VERSION/abi)'
 	@echo  ''
 	@echo  'Static analysers'
 	@echo  '  checkstack      - Generate a list of stack hogs'
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
new file mode 100644
index 0000000..688f8cb
--- /dev/null
+++ b/scripts/Makefile.headersinst
@@ -0,0 +1,147 @@
+# ==========================================================================
+# Installing headers
+#
+# header-y files will be installed verbatim
+# unifdef-y are the files where unifdef will be run before installing files
+# objhdr-y are generated files that will be installed verbatim
+#
+# ==========================================================================
+
+UNIFDEF := unifdef -U__KERNEL__
+
+# Eliminate the contents of (and inclusions of) compiler.h
+HDRSED  := sed 	-e "s/ inline / __inline__ /g" \
+		-e "s/[[:space:]]__user[[:space:]]\+/ /g" \
+		-e "s/(__user[[:space:]]\+/ (/g" \
+		-e "s/[[:space:]]__force[[:space:]]\+/ /g" \
+		-e "s/(__force[[:space:]]\+/ (/g" \
+		-e "s/[[:space:]]__iomem[[:space:]]\+/ /g" \
+		-e "s/(__iomem[[:space:]]\+/ (/g" \
+		-e "s/[[:space:]]__attribute_const__[[:space:]]\+/\ /g" \
+		-e "s/[[:space:]]__attribute_const__$$//" \
+		-e "/^\#include <linux\/compiler.h>/d"
+
+_dst := $(if $(dst),$(dst),$(obj))
+
+.PHONY: __headersinst
+__headersinst:
+
+
+ifeq (,$(patsubst include/asm/%,,$(obj)/))
+# For producing the generated stuff in include/asm for biarch builds, include
+# both sets of Kbuild files; we'll generate anything which is mentioned in
+# _either_ arch, and recurse into subdirectories which are mentioned in either
+# arch. Since some directories may exist in one but not the other, we must
+# use '-include'.
+GENASM := 1
+archasm	   := $(subst include/asm,asm-$(ARCH),$(obj))
+altarchasm := $(subst include/asm,asm-$(ALTARCH),$(obj))
+-include $(srctree)/include/$(archasm)/Kbuild
+-include $(srctree)/include/$(altarchasm)/Kbuild
+else
+include $(srctree)/$(obj)/Kbuild
+endif
+
+include scripts/Kbuild.include
+
+# If this is include/asm-$(ARCH) and there's no $(ALTARCH), then
+# override $(_dst) so that we install to include/asm directly.
+ifeq ($(obj)$(ALTARCH),include/asm-$(ARCH))
+     _dst := include/asm
+endif
+
+header-y	:= $(sort $(header-y))
+unifdef-y	:= $(sort $(unifdef-y))
+subdir-y	:= $(patsubst %/,%,$(filter %/, $(header-y)))
+header-y	:= $(filter-out %/, $(header-y))
+header-y	:= $(filter-out $(unifdef-y),$(header-y))
+
+ifdef ALTARCH
+ifeq ($(obj),include/asm-$(ARCH))
+altarch-y	:= altarch-dir
+endif
+endif
+
+# Make the definitions visible for recursive make invocations
+export ALTARCH
+export ARCHDEF
+export ALTARCHDEF
+
+quiet_cmd_o_hdr_install   = INSTALL $(_dst)/$@
+      cmd_o_hdr_install   = cp $(objtree)/$(obj)/$@ $(INSTALL_HDR_PATH)/$(_dst)
+
+quiet_cmd_headers_install = INSTALL $(_dst)/$@
+      cmd_headers_install = $(HDRSED) $(srctree)/$(obj)/$@		\
+			    > $(INSTALL_HDR_PATH)/$(_dst)/$@
+
+quiet_cmd_unifdef	  = UNIFDEF $(_dst)/$@
+      cmd_unifdef	  = $(UNIFDEF) $(srctree)/$(obj)/$@ | $(HDRSED)	\
+                            > $(INSTALL_HDR_PATH)/$(_dst)/$@ || :
+
+quiet_cmd_mkdir		  = MKDIR   $@
+      cmd_mkdir		  = mkdir -p $(INSTALL_HDR_PATH)/$@
+
+quiet_cmd_gen		  = GEN     $(_dst)/$@
+      cmd_gen		  = \
+STUBDEF=__ASM_STUB_`echo $@ | tr a-z. A-Z_`;				\
+(echo "/* File autogenerated by 'make headers_install' */" ;		\
+echo "\#ifndef $$STUBDEF" ;						\
+echo "\#define $$STUBDEF" ;						\
+echo "\# if $(ARCHDEF)" ;						\
+if [ -r $(srctree)/include/$(archasm)/$@ ]; then			\
+	echo "\#  include <$(archasm)/$@>" ;				\
+else									\
+	echo "\#  error $(archasm)/$@ does not exist in"		\
+			"the $(ARCH) architecture" ;			\
+fi ;									\
+echo "\# elif $(ALTARCHDEF)" ;						\
+if [ -r $(srctree)/include/$(altarchasm)/$@ ]; then			\
+	echo "\#  include <$(altarchasm)/$@>" ;				\
+else									\
+	echo "\#  error $(altarchasm)/$@ does not exist in"		\
+			"the $(ALTARCH) architecture" ;			\
+fi ;									\
+echo "\# else" ;							\
+echo "\#  warning This machine appears to be"				\
+		 "neither $(ARCH) nor $(ALTARCH)." ;			\
+echo "\# endif" ;							\
+echo "\#endif /* $$STUBDEF */" ;					\
+) > $(INSTALL_HDR_PATH)/$(_dst)/$@
+
+__headersinst: $(subdir-y) $(header-y) $(unifdef-y) $(altarch-y) $(objhdr-y)
+
+.PHONY: $(header-y) $(unifdef-y) $(subdir-y)
+
+# Rules for installing headers
+
+$(objhdr-y) $(subdir-y) $(header-y) $(unifdef-y): $(_dst)
+
+.PHONY: $(_dst)
+$(_dst):
+	$(call cmd,mkdir)
+
+ifdef GENASM
+$(objhdr-y) $(header-y) $(unifdef-y):
+	$(call cmd,gen)
+
+else
+$(objhdr-y):
+	$(call cmd,o_hdr_install)
+
+$(header-y):
+	$(call cmd,headers_install)
+
+$(unifdef-y):
+	$(call cmd,unifdef)
+endif
+
+hdrinst := -rR -f $(srctree)/scripts/Makefile.headersinst obj
+
+.PHONY: altarch-dir
+altarch-dir:
+	$(Q)$(MAKE) $(hdrinst)=include/asm-$(ALTARCH) dst=include/asm-$(ALTARCH)
+	$(Q)$(MAKE) $(hdrinst)=include/asm dst=include/asm
+
+# Recursion
+$(subdir-y):
+	$(Q)$(MAKE) $(hdrinst)=$(obj)/$@ dst=$(_dst)/$@ rel=../$(rel)
-- 
cgit v0.10.2


From 684753599afc76aa8f66c731bafb7204b39265b8 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 12:02:10 +0100
Subject: Basic implementation of 'make headers_check'

Based on the 'headers_install' target, this performs a basic sanity check
on the exported headers -- so far only checking that they do not include
any other headers which aren't selected for import, but easily extendable.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/Makefile b/Makefile
index 98e5af7..dbab1a9 100644
--- a/Makefile
+++ b/Makefile
@@ -866,6 +866,10 @@ headers_install: include/linux/version.h
 	$(Q)rm -rf $(INSTALL_HDR_PATH)/include
 	$(Q)$(MAKE) -rR -f $(srctree)/scripts/Makefile.headersinst obj=include
 
+PHONY += headers_check
+headers_check: headers_install
+	$(Q)$(MAKE) -rR -f $(srctree)/scripts/Makefile.headersinst obj=include HDRCHECK=1
+
 # ---------------------------------------------------------------------------
 # Modules
 
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 688f8cb..aa9990a 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -78,6 +78,11 @@ quiet_cmd_unifdef	  = UNIFDEF $(_dst)/$@
       cmd_unifdef	  = $(UNIFDEF) $(srctree)/$(obj)/$@ | $(HDRSED)	\
                             > $(INSTALL_HDR_PATH)/$(_dst)/$@ || :
 
+quiet_cmd_check		  = CHECK   $(_dst)/$@
+      cmd_check		  = $(srctree)/scripts/hdrcheck.sh		\
+                              $(INSTALL_HDR_PATH)/include		\
+			      $(INSTALL_HDR_PATH)/$(_dst)/$@
+
 quiet_cmd_mkdir		  = MKDIR   $@
       cmd_mkdir		  = mkdir -p $(INSTALL_HDR_PATH)/$@
 
@@ -112,6 +117,11 @@ __headersinst: $(subdir-y) $(header-y) $(unifdef-y) $(altarch-y) $(objhdr-y)
 
 .PHONY: $(header-y) $(unifdef-y) $(subdir-y)
 
+ifdef HDRCHECK
+# Rules for checking headers
+$(objhdr-y) $(header-y) $(unifdef-y):
+	$(call cmd,check)
+else
 # Rules for installing headers
 
 $(objhdr-y) $(subdir-y) $(header-y) $(unifdef-y): $(_dst)
@@ -134,6 +144,7 @@ $(header-y):
 $(unifdef-y):
 	$(call cmd,unifdef)
 endif
+endif
 
 hdrinst := -rR -f $(srctree)/scripts/Makefile.headersinst obj
 
diff --git a/scripts/hdrcheck.sh b/scripts/hdrcheck.sh
new file mode 100755
index 0000000..b3bb683
--- /dev/null
+++ b/scripts/hdrcheck.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+for FILE in `grep '^#include <' $2 | cut -f2 -d\< | cut -f1 -d\> | egrep ^linux\|^asm` ; do
+    if [ ! -r $1/$FILE ]; then
+	echo $2 requires $FILE, which does not exist
+	exit 1
+    fi
+done
-- 
cgit v0.10.2


From 8555255f0b426858d8648c6206b70eb906cf4ec7 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 12:14:01 +0100
Subject: Add generic Kbuild files for 'make headers_install'

This adds the Kbuild files listing the files which are to be installed by
the 'headers_install' make target, in generic directories.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/Kbuild b/include/Kbuild
new file mode 100644
index 0000000..cb25348
--- /dev/null
+++ b/include/Kbuild
@@ -0,0 +1,2 @@
+header-y += asm-generic/ linux/ scsi/ sound/ mtd/ rdma/ video/
+header-y += asm-$(ARCH)/ 
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
new file mode 100644
index 0000000..70594b2
--- /dev/null
+++ b/include/asm-generic/Kbuild
@@ -0,0 +1,3 @@
+header-y += atomic.h errno-base.h errno.h fcntl.h ioctl.h ipc.h mman.h \
+	signal.h statfs.h
+unifdef-y := resource.h siginfo.h
diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
new file mode 100644
index 0000000..d8d0bce
--- /dev/null
+++ b/include/asm-generic/Kbuild.asm
@@ -0,0 +1,11 @@
+unifdef-y += a.out.h auxvec.h byteorder.h errno.h fcntl.h ioctl.h	\
+	ioctls.h ipcbuf.h irq.h mman.h msgbuf.h param.h poll.h		\
+	posix_types.h ptrace.h resource.h sembuf.h shmbuf.h shmparam.h	\
+	sigcontext.h siginfo.h signal.h socket.h sockios.h stat.h	\
+	statfs.h termbits.h termios.h timex.h types.h unistd.h user.h
+
+# These really shouldn't be exported
+unifdef-y += atomic.h io.h
+
+# These probably shouldn't be exported
+unifdef-y += elf.h page.h
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
new file mode 100644
index 0000000..5c95aa4
--- /dev/null
+++ b/include/linux/Kbuild
@@ -0,0 +1,63 @@
+header-y := byteorder/ dvb/ hdlc/ isdn/ nfsd/ raid/ sunrpc/ tc_act/	\
+	netfilter/ netfilter_arp/ netfilter_bridge/ netfilter_ipv4/	\
+	netfilter_ipv6/
+
+header-y += affs_fs.h affs_hardblocks.h aio_abi.h a.out.h arcfb.h	\
+	atmapi.h atmbr2684.h atmclip.h atm_eni.h atm_he.h		\
+	atm_idt77105.h atmioc.h atmlec.h atmmpc.h atm_nicstar.h		\
+	atmppp.h atmsap.h atmsvc.h atm_zatm.h auto_fs4.h auxvec.h	\
+	awe_voice.h ax25.h b1lli.h baycom.h bfs_fs.h blkpg.h		\
+	bpqether.h cdk.h chio.h coda_psdev.h coff.h comstats.h		\
+	consolemap.h cycx_cfm.h devfs_fs.h dm-ioctl.h dn.h dqblk_v1.h	\
+	dqblk_v2.h dqblk_xfs.h efs_fs_sb.h elf-fdpic.h elf.h elf-em.h	\
+	fadvise.h fd.h fdreg.h ftape-header-segment.h ftape-vendors.h	\
+	fuse.h futex.h genetlink.h gen_stats.h gigaset_dev.h hdsmart.h	\
+	hpfs_fs.h hysdn_if.h i2c-dev.h i2c-id.h i8k.h icmp.h		\
+	if_arcnet.h if_arp.h if_bonding.h if_cablemodem.h if_fc.h	\
+	if_fddi.h if.h if_hippi.h if_infiniband.h if_packet.h		\
+	if_plip.h if_ppp.h if_slip.h if_strip.h if_tunnel.h in6.h	\
+	in_route.h ioctl.h ip.h ipmi_msgdefs.h ip_mp_alg.h ipsec.h	\
+	ipx.h irda.h isdn_divertif.h iso_fs.h ite_gpio.h ixjuser.h	\
+	jffs2.h keyctl.h limits.h major.h matroxfb.h meye.h minix_fs.h	\
+	mmtimer.h mqueue.h mtio.h ncp_no.h netfilter_arp.h netrom.h	\
+	nfs2.h nfs4_mount.h nfs_mount.h openprom_fs.h param.h		\
+	pci_ids.h pci_regs.h personality.h pfkeyv2.h pg.h pkt_cls.h	\
+	pkt_sched.h posix_types.h ppdev.h prctl.h ps2esdi.h qic117.h	\
+	qnxtypes.h quotaio_v1.h quotaio_v2.h radeonfb.h raw.h		\
+	resource.h rose.h sctp.h smbno.h snmp.h sockios.h som.h		\
+	sound.h stddef.h synclink.h telephony.h termios.h ticable.h	\
+	times.h tiocl.h tipc.h toshiba.h ultrasound.h un.h utime.h	\
+	utsname.h video_decoder.h video_encoder.h videotext.h vt.h	\
+	wavefront.h wireless.h xattr.h x25.h zorro_ids.h
+
+unifdef-y += acct.h adb.h adfs_fs.h agpgart.h apm_bios.h atalk.h	\
+	atmarp.h atmdev.h atm.h atm_tcp.h audit.h auto_fs.h binfmts.h	\
+	capability.h capi.h cciss_ioctl.h cdrom.h cm4000_cs.h		\
+	cn_proc.h coda.h connector.h cramfs_fs.h cuda.h cyclades.h	\
+	dccp.h dirent.h divert.h elfcore.h errno.h errqueue.h		\
+	ethtool.h eventpoll.h ext2_fs.h ext3_fs.h fb.h fcntl.h		\
+	filter.h flat.h fs.h ftape.h gameport.h generic_serial.h	\
+	genhd.h hayesesp.h hdlcdrv.h hdlc.h hdreg.h hiddev.h hpet.h	\
+	i2c-algo-ite.h i2c.h i2o-dev.h icmpv6.h if_bridge.h if_ec.h	\
+	if_eql.h if_ether.h if_frad.h if_ltalk.h if_pppox.h		\
+	if_shaper.h if_tr.h if_tun.h if_vlan.h if_wanpipe.h igmp.h	\
+	inet_diag.h in.h inotify.h input.h ipc.h ipmi.h ipv6.h		\
+	ipv6_route.h isdn.h isdnif.h isdn_ppp.h isicom.h jbd.h		\
+	joystick.h kdev_t.h kd.h kernelcapi.h kernel.h keyboard.h	\
+	llc.h loop.h lp.h mempolicy.h mii.h mman.h mroute.h msdos_fs.h	\
+	msg.h nbd.h ncp_fs.h ncp.h ncp_mount.h netdevice.h		\
+	netfilter_bridge.h netfilter_decnet.h netfilter.h		\
+	netfilter_ipv4.h netfilter_ipv6.h netfilter_logging.h net.h	\
+	netlink.h nfs3.h nfs4.h nfsacl.h nfs_fs.h nfs.h nfs_idmap.h	\
+	n_r3964.h nubus.h nvram.h parport.h patchkey.h pci.h pktcdvd.h	\
+	pmu.h poll.h ppp_defs.h ppp-comp.h ptrace.h qnx4_fs.h quota.h	\
+	random.h reboot.h reiserfs_fs.h reiserfs_xattr.h romfs_fs.h	\
+	route.h rtc.h rtnetlink.h scc.h sched.h sdla.h			\
+	selinux_netlink.h sem.h serial_core.h serial.h serio.h shm.h	\
+	signal.h smb_fs.h smb.h smb_mount.h socket.h sonet.h sonypi.h	\
+	soundcard.h stat.h sysctl.h tcp.h time.h timex.h tty.h types.h	\
+	udf_fs_i.h udp.h uinput.h uio.h unistd.h usb_ch9.h		\
+	usbdevice_fs.h user.h videodev2.h videodev.h wait.h		\
+	wanrouter.h watchdog.h xfrm.h zftape.h
+
+objhdr-y := version.h
diff --git a/include/linux/byteorder/Kbuild b/include/linux/byteorder/Kbuild
new file mode 100644
index 0000000..84a57d4
--- /dev/null
+++ b/include/linux/byteorder/Kbuild
@@ -0,0 +1,2 @@
+unifdef-y += generic.h swabb.h swab.h
+header-y += big_endian.h little_endian.h pdp_endian.h
diff --git a/include/linux/dvb/Kbuild b/include/linux/dvb/Kbuild
new file mode 100644
index 0000000..63973af
--- /dev/null
+++ b/include/linux/dvb/Kbuild
@@ -0,0 +1,2 @@
+header-y += ca.h frontend.h net.h osd.h version.h
+unifdef-y := audio.h dmx.h video.h
diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild
new file mode 100644
index 0000000..1fb2644
--- /dev/null
+++ b/include/linux/hdlc/Kbuild
@@ -0,0 +1 @@
+header-y += ioctl.h
diff --git a/include/linux/isdn/Kbuild b/include/linux/isdn/Kbuild
new file mode 100644
index 0000000..c1727c8
--- /dev/null
+++ b/include/linux/isdn/Kbuild
@@ -0,0 +1 @@
+header-y += capicmd.h tpam.h
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
new file mode 100644
index 0000000..d06311a
--- /dev/null
+++ b/include/linux/netfilter/Kbuild
@@ -0,0 +1,11 @@
+header-y := nf_conntrack_sctp.h nf_conntrack_tuple_common.h		\
+	    nfnetlink_conntrack.h nfnetlink_log.h nfnetlink_queue.h	\
+	    xt_CLASSIFY.h xt_comment.h xt_connbytes.h xt_connmark.h	\
+	    xt_CONNMARK.h xt_conntrack.h xt_dccp.h xt_esp.h		\
+	    xt_helper.h xt_length.h xt_limit.h xt_mac.h xt_mark.h	\
+	    xt_MARK.h xt_multiport.h xt_NFQUEUE.h xt_pkttype.h		\
+	    xt_policy.h xt_realm.h xt_sctp.h xt_state.h xt_string.h	\
+	    xt_tcpmss.h xt_tcpudp.h
+
+unifdef-y := nf_conntrack_common.h nf_conntrack_ftp.h		\
+	nf_conntrack_tcp.h nfnetlink.h x_tables.h xt_physdev.h
diff --git a/include/linux/netfilter_arp/Kbuild b/include/linux/netfilter_arp/Kbuild
new file mode 100644
index 0000000..198ec5e
--- /dev/null
+++ b/include/linux/netfilter_arp/Kbuild
@@ -0,0 +1,2 @@
+header-y := arpt_mangle.h
+unifdef-y := arp_tables.h
diff --git a/include/linux/netfilter_bridge/Kbuild b/include/linux/netfilter_bridge/Kbuild
new file mode 100644
index 0000000..5b1aba6
--- /dev/null
+++ b/include/linux/netfilter_bridge/Kbuild
@@ -0,0 +1,4 @@
+header-y += ebt_among.h ebt_arp.h ebt_arpreply.h ebt_ip.h ebt_limit.h	\
+	ebt_log.h ebt_mark_m.h ebt_mark_t.h ebt_nat.h ebt_pkttype.h	\
+	ebt_redirect.h ebt_stp.h ebt_ulog.h ebt_vlan.h
+unifdef-y := ebtables.h ebt_802_3.h
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
new file mode 100644
index 0000000..04e4d27
--- /dev/null
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -0,0 +1,21 @@
+
+header-y := ip_conntrack_helper.h ip_conntrack_helper_h323_asn1.h	\
+	    ip_conntrack_helper_h323_types.h ip_conntrack_protocol.h	\
+	    ip_conntrack_sctp.h ip_conntrack_tcp.h ip_conntrack_tftp.h	\
+	    ip_nat_pptp.h ipt_addrtype.h ipt_ah.h	\
+	    ipt_CLASSIFY.h ipt_CLUSTERIP.h ipt_comment.h		\
+	    ipt_connbytes.h ipt_connmark.h ipt_CONNMARK.h		\
+	    ipt_conntrack.h ipt_dccp.h ipt_dscp.h ipt_DSCP.h ipt_ecn.h	\
+	    ipt_ECN.h ipt_esp.h ipt_hashlimit.h ipt_helper.h		\
+	    ipt_iprange.h ipt_length.h ipt_limit.h ipt_LOG.h ipt_mac.h	\
+	    ipt_mark.h ipt_MARK.h ipt_multiport.h ipt_NFQUEUE.h		\
+	    ipt_owner.h ipt_physdev.h ipt_pkttype.h ipt_policy.h	\
+	    ipt_realm.h ipt_recent.h ipt_REJECT.h ipt_SAME.h		\
+	    ipt_sctp.h ipt_state.h ipt_string.h ipt_tcpmss.h		\
+	    ipt_TCPMSS.h ipt_tos.h ipt_TOS.h ipt_ttl.h ipt_TTL.h	\
+	    ipt_ULOG.h
+
+unifdef-y := ip_conntrack.h ip_conntrack_h323.h ip_conntrack_irc.h	\
+	ip_conntrack_pptp.h ip_conntrack_proto_gre.h			\
+	ip_conntrack_tuple.h ip_nat.h ip_nat_rule.h ip_queue.h		\
+	ip_tables.h
diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild
new file mode 100644
index 0000000..913ddbf
--- /dev/null
+++ b/include/linux/netfilter_ipv6/Kbuild
@@ -0,0 +1,6 @@
+header-y += ip6t_HL.h ip6t_LOG.h ip6t_MARK.h ip6t_REJECT.h ip6t_ah.h	\
+	ip6t_esp.h ip6t_frag.h ip6t_hl.h ip6t_ipv6header.h		\
+	ip6t_length.h ip6t_limit.h ip6t_mac.h ip6t_mark.h		\
+	ip6t_multiport.h ip6t_opts.h ip6t_owner.h ip6t_policy.h		\
+	ip6t_physdev.h ip6t_rt.h
+unifdef-y := ip6_tables.h
diff --git a/include/linux/nfsd/Kbuild b/include/linux/nfsd/Kbuild
new file mode 100644
index 0000000..c8c5456
--- /dev/null
+++ b/include/linux/nfsd/Kbuild
@@ -0,0 +1,2 @@
+unifdef-y := const.h export.h stats.h syscall.h nfsfh.h debug.h auth.h
+
diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild
new file mode 100644
index 0000000..73fa27a
--- /dev/null
+++ b/include/linux/raid/Kbuild
@@ -0,0 +1 @@
+header-y += md_p.h md_u.h
diff --git a/include/linux/sunrpc/Kbuild b/include/linux/sunrpc/Kbuild
new file mode 100644
index 0000000..0d1d768
--- /dev/null
+++ b/include/linux/sunrpc/Kbuild
@@ -0,0 +1 @@
+unifdef-y := debug.h
diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild
new file mode 100644
index 0000000..5251a50
--- /dev/null
+++ b/include/linux/tc_act/Kbuild
@@ -0,0 +1 @@
+header-y += tc_gact.h tc_ipt.h tc_mirred.h tc_pedit.h
diff --git a/include/linux/tc_ematch/Kbuild b/include/linux/tc_ematch/Kbuild
new file mode 100644
index 0000000..381e930
--- /dev/null
+++ b/include/linux/tc_ematch/Kbuild
@@ -0,0 +1 @@
+headers-y := tc_em_cmp.h tc_em_meta.h tc_em_nbyte.h tc_em_text.h
diff --git a/include/mtd/Kbuild b/include/mtd/Kbuild
new file mode 100644
index 0000000..e1da2a5
--- /dev/null
+++ b/include/mtd/Kbuild
@@ -0,0 +1,2 @@
+unifdef-y := mtd-abi.h
+header-y := inftl-user.h jffs2-user.h mtd-user.h nftl-user.h
diff --git a/include/rdma/Kbuild b/include/rdma/Kbuild
new file mode 100644
index 0000000..eb710ba
--- /dev/null
+++ b/include/rdma/Kbuild
@@ -0,0 +1 @@
+header-y := ib_user_mad.h
diff --git a/include/scsi/Kbuild b/include/scsi/Kbuild
new file mode 100644
index 0000000..14a033d
--- /dev/null
+++ b/include/scsi/Kbuild
@@ -0,0 +1,2 @@
+header-y += scsi.h
+unifdef-y := scsi_ioctl.h sg.h
diff --git a/include/sound/Kbuild b/include/sound/Kbuild
new file mode 100644
index 0000000..3a5a3df
--- /dev/null
+++ b/include/sound/Kbuild
@@ -0,0 +1,2 @@
+header-y := asound_fm.h hdsp.h hdspm.h sfnt_info.h sscape_ioctl.h
+unifdef-y := asequencer.h asound.h emu10k1.h sb16_csp.h 
diff --git a/include/video/Kbuild b/include/video/Kbuild
new file mode 100644
index 0000000..76a6073
--- /dev/null
+++ b/include/video/Kbuild
@@ -0,0 +1 @@
+unifdef-y := sisfb.h
-- 
cgit v0.10.2


From 6a87a86d064cee49b3c298c558b990b65d736cf4 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 12:17:47 +0100
Subject: Add Kbuild file for PowerPC 'make headers_install'

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/asm-powerpc/Kbuild b/include/asm-powerpc/Kbuild
new file mode 100644
index 0000000..ac61d7e
--- /dev/null
+++ b/include/asm-powerpc/Kbuild
@@ -0,0 +1,10 @@
+include include/asm-generic/Kbuild.asm
+
+unifdef-y += a.out.h asm-compat.h bootx.h byteorder.h cputable.h elf.h	\
+	nvram.h param.h posix_types.h ptrace.h seccomp.h signal.h	\
+	termios.h types.h unistd.h
+
+header-y += auxvec.h ioctls.h mman.h sembuf.h siginfo.h stat.h errno.h	\
+	ipcbuf.h msgbuf.h shmbuf.h socket.h termbits.h fcntl.h ipc.h	\
+	poll.h shmparam.h sockios.h ucontext.h ioctl.h linkage.h	\
+	resource.h sigcontext.h statfs.h
-- 
cgit v0.10.2


From 54863bb0d8782bbc81fe0275f1d20042fa14e32f Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 12:19:11 +0100
Subject: Add Kbuild file for x86_64 'make headers_install'

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild
new file mode 100644
index 0000000..dc4d101
--- /dev/null
+++ b/include/asm-x86_64/Kbuild
@@ -0,0 +1,11 @@
+include include/asm-generic/Kbuild.asm
+
+ALTARCH := i386
+ARCHDEF := defined __x86_64__
+ALTARCHDEF := defined __i386__
+
+header-y += boot.h bootsetup.h cpufeature.h debugreg.h ldt.h \
+	 msr.h prctl.h setup.h sigcontext32.h ucontext.h \
+	 vsyscall32.h
+
+unifdef-y += mce.h mtrr.h vsyscall.h
-- 
cgit v0.10.2


From c509075e711305d591760e4f0e8f57ff178cd501 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 12:19:46 +0100
Subject: Add Kbuild file for i386 'make headers_install'

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild
new file mode 100644
index 0000000..c064a8e
--- /dev/null
+++ b/include/asm-i386/Kbuild
@@ -0,0 +1,5 @@
+include include/asm-generic/Kbuild.asm
+
+header-y += boot.h cpufeature.h debugreg.h ldt.h setup.h ucontext.h
+
+unifdef-y += mtrr.h vm86.h
-- 
cgit v0.10.2


From 1e7c34994d809820dbe31220397b62660d8d1514 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 12:22:02 +0100
Subject: Add Kbuild file for S390 'make headers_install'

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/asm-s390/Kbuild b/include/asm-s390/Kbuild
new file mode 100644
index 0000000..ed8955f
--- /dev/null
+++ b/include/asm-s390/Kbuild
@@ -0,0 +1,4 @@
+include include/asm-generic/Kbuild.asm
+
+unifdef-y += cmb.h debug.h
+header-y += dasd.h qeth.h tape390.h ucontext.h vtoc.h z90crypt.h
-- 
cgit v0.10.2


From dc901d6d2596d45f2c398e735125e5123b4e2774 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 12:22:20 +0100
Subject: Add Kbuild file for IA64 'make headers_install'

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/asm-ia64/Kbuild b/include/asm-ia64/Kbuild
new file mode 100644
index 0000000..85d6f80
--- /dev/null
+++ b/include/asm-ia64/Kbuild
@@ -0,0 +1,7 @@
+include include/asm-generic/Kbuild.asm
+
+header-y += break.h fpu.h fpswa.h gcc_intrin.h ia64regs.h		\
+	 intel_intrin.h intrinsics.h perfmon_default_smpl.h	\
+	 ptrace_offsets.h rse.h setup.h ucontext.h
+
+unifdef-y += perfmon.h
-- 
cgit v0.10.2


From a29ee9f3bf49d9a19def177145dcb85fc021c096 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 12:22:49 +0100
Subject: Add Kbuild file for SPARC 'make headers_install'

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/asm-sparc/Kbuild b/include/asm-sparc/Kbuild
new file mode 100644
index 0000000..cecdc61
--- /dev/null
+++ b/include/asm-sparc/Kbuild
@@ -0,0 +1,13 @@
+include include/asm-generic/Kbuild.asm
+
+header-y += apc.h asi.h asmmacro.h auxio.h bitext.h bpp.h \
+	bsderrno.h btfixup.h clock.h contregs.h cpudata.h cypress.h \
+	ebus.h ecc.h eeprom.h fbio.h floppy.h head.h hw_irq.h \
+	idprom.h io-unit.h iommu.h ipc.h jsflash.h \
+	kdebug.h machines.h mbus.h memreg.h mostek.h mpmbox.h msi.h \
+	mxcc.h obio.h openprom.h openpromio.h oplib.h pbm.h pcic.h \
+	pconf.h perfctr.h pgtsrmmu.h pgtsun4.h pgtsun4c.h psr.h reg.h \
+	ross.h sbi.h sbus.h sfp-machine.h smpprim.h \
+	solerrno.h spinlock.h sun4paddr.h sun4prom.h sunbpp.h svr4.h \
+	swift.h sysen.h timer.h traps.h tsunami.h turbosparc.h \
+	vac-ops.h vaddrs.h vfc_ioctls.h viking.h winmacro.h
diff --git a/include/asm-sparc64/Kbuild b/include/asm-sparc64/Kbuild
new file mode 100644
index 0000000..5edf764
--- /dev/null
+++ b/include/asm-sparc64/Kbuild
@@ -0,0 +1,16 @@
+include include/asm-generic/Kbuild.asm
+
+ALTARCH := sparc
+ARCHDEF := defined __sparc__ && defined __arch64__ 
+ALTARCHDEF := defined __sparc__ && !defined __arch64__
+
+header-y += agp.h apb.h asi.h auxio.h bbc.h bpp.h bsderrno.h \
+	chafsr.h chmctrl.h compat.h const.h cpudata.h dcr.h dcu.h \
+	display7seg.h ebus.h envctrl.h estate.h fbio.h fhc.h floppy.h \
+	fpumacro.h head.h hw_irq.h idprom.h iommu.h \
+	ipc.h isa.h kdebug.h lsu.h mostek.h ns87303.h \
+	openprom.h openpromio.h oplib.h parport.h pbm.h pconf.h \
+	perfctr.h pil.h psrcompat.h pstate.h reg.h sbus.h \
+	sfp-machine.h solerrno.h spinlock.h spitfire.h starfire.h \
+	sunbpp.h svr4.h timer.h ttable.h uctx.h upa.h utrap.h vga.h \
+	visasm.h watchdog.h
-- 
cgit v0.10.2


From 57e580f885a62c21fe75e35e9e445fb6fb82509c Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 12:45:15 +0100
Subject: Add Kbuild file for Alpha 'make headers_install'

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/asm-alpha/Kbuild b/include/asm-alpha/Kbuild
new file mode 100644
index 0000000..e57fd57
--- /dev/null
+++ b/include/asm-alpha/Kbuild
@@ -0,0 +1,5 @@
+include include/asm-generic/Kbuild.asm
+
+unifdef-y += console.h fpu.h sysinfo.h
+
+header-y += gentrap.h regdef.h pal.h reg.h
-- 
cgit v0.10.2


From ef4d04b87d16839500a77aa1279f80be7ec4ef2e Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sun, 18 Jun 2006 12:58:53 +0100
Subject: Add empty Kbuild files for 'make headers_install' in remaining
 arches.

These include nothing more than the basic set of files listed in
asm-generic/Kbuild.asm. Any extra arch-specific files will need to be
added.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/asm-arm/Kbuild b/include/asm-arm/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-arm/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-arm26/Kbuild b/include/asm-arm26/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-arm26/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-cris/Kbuild b/include/asm-cris/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-cris/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-frv/Kbuild b/include/asm-frv/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-frv/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-h8300/Kbuild b/include/asm-h8300/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-h8300/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-m32r/Kbuild b/include/asm-m32r/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-m32r/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-m68k/Kbuild b/include/asm-m68k/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-m68k/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-m68knommu/Kbuild b/include/asm-m68knommu/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-m68knommu/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-mips/Kbuild b/include/asm-mips/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-mips/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-parisc/Kbuild b/include/asm-parisc/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-parisc/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-sh/Kbuild b/include/asm-sh/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-sh/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-sh64/Kbuild b/include/asm-sh64/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-sh64/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-um/Kbuild b/include/asm-um/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-um/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-v850/Kbuild b/include/asm-v850/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-v850/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
diff --git a/include/asm-xtensa/Kbuild b/include/asm-xtensa/Kbuild
new file mode 100644
index 0000000..c68e168
--- /dev/null
+++ b/include/asm-xtensa/Kbuild
@@ -0,0 +1 @@
+include include/asm-generic/Kbuild.asm
-- 
cgit v0.10.2


From 4d1a099828fb3ed52ff666fceb6a7d562d9048b8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 20 Jun 2006 08:34:40 +0100
Subject: Restrict headers exported to userspace for SPARC and SPARC64

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/asm-sparc/Kbuild b/include/asm-sparc/Kbuild
index cecdc61..e2a57fd 100644
--- a/include/asm-sparc/Kbuild
+++ b/include/asm-sparc/Kbuild
@@ -1,13 +1,6 @@
 include include/asm-generic/Kbuild.asm
 
-header-y += apc.h asi.h asmmacro.h auxio.h bitext.h bpp.h \
-	bsderrno.h btfixup.h clock.h contregs.h cpudata.h cypress.h \
-	ebus.h ecc.h eeprom.h fbio.h floppy.h head.h hw_irq.h \
-	idprom.h io-unit.h iommu.h ipc.h jsflash.h \
-	kdebug.h machines.h mbus.h memreg.h mostek.h mpmbox.h msi.h \
-	mxcc.h obio.h openprom.h openpromio.h oplib.h pbm.h pcic.h \
-	pconf.h perfctr.h pgtsrmmu.h pgtsun4.h pgtsun4c.h psr.h reg.h \
-	ross.h sbi.h sbus.h sfp-machine.h smpprim.h \
-	solerrno.h spinlock.h sun4paddr.h sun4prom.h sunbpp.h svr4.h \
-	swift.h sysen.h timer.h traps.h tsunami.h turbosparc.h \
-	vac-ops.h vaddrs.h vfc_ioctls.h viking.h winmacro.h
+unifdef-y += fbio.h perfctr.h psr.h
+header-y += apc.h asi.h auxio.h bpp.h head.h ipc.h jsflash.h	\
+	openpromio.h pbm.h pconf.h pgtsun4.h reg.h traps.h	\
+	turbosparc.h vfc_ioctls.h winmacro.h
diff --git a/include/asm-sparc64/Kbuild b/include/asm-sparc64/Kbuild
index 5edf764..c78d44b 100644
--- a/include/asm-sparc64/Kbuild
+++ b/include/asm-sparc64/Kbuild
@@ -1,16 +1,10 @@
 include include/asm-generic/Kbuild.asm
 
 ALTARCH := sparc
-ARCHDEF := defined __sparc__ && defined __arch64__ 
+ARCHDEF := defined __sparc__ && defined __arch64__
 ALTARCHDEF := defined __sparc__ && !defined __arch64__
 
-header-y += agp.h apb.h asi.h auxio.h bbc.h bpp.h bsderrno.h \
-	chafsr.h chmctrl.h compat.h const.h cpudata.h dcr.h dcu.h \
-	display7seg.h ebus.h envctrl.h estate.h fbio.h fhc.h floppy.h \
-	fpumacro.h head.h hw_irq.h idprom.h iommu.h \
-	ipc.h isa.h kdebug.h lsu.h mostek.h ns87303.h \
-	openprom.h openpromio.h oplib.h parport.h pbm.h pconf.h \
-	perfctr.h pil.h psrcompat.h pstate.h reg.h sbus.h \
-	sfp-machine.h solerrno.h spinlock.h spitfire.h starfire.h \
-	sunbpp.h svr4.h timer.h ttable.h uctx.h upa.h utrap.h vga.h \
-	visasm.h watchdog.h
+unifdef-y := fbio.h perfctr.h
+header-y += apb.h asi.h bbc.h bpp.h display7seg.h envctrl.h floppy.h	\
+	ipc.h kdebug.h mostek.h openprom.h openpromio.h parport.h	\
+	pconf.h psrcompat.h pstate.h reg.h uctx.h utrap.h watchdog.h
-- 
cgit v0.10.2


From 1bca9f2e5bd7f92fe4b34753e57bf4f47d3a2f01 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Thu, 22 Jun 2006 14:24:31 +0100
Subject: Remove <linux/i2c-id.h> and <linux/i2c-algo-ite.h> from userspace
 export

Do not export the following i2c header files to user space:

* i2c-id.h: These IDs are internal to the kernel and user space
  does not need them.
* i2c-algo-ite.h: The driver is broken and planned for removal, so
  let's not export it for now. The extra ioctl defined here can
  be standardized and moved to i2c-core/i2c-dev if really needed.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 5c95aa4..f7252be 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -12,7 +12,7 @@ header-y += affs_fs.h affs_hardblocks.h aio_abi.h a.out.h arcfb.h	\
 	dqblk_v2.h dqblk_xfs.h efs_fs_sb.h elf-fdpic.h elf.h elf-em.h	\
 	fadvise.h fd.h fdreg.h ftape-header-segment.h ftape-vendors.h	\
 	fuse.h futex.h genetlink.h gen_stats.h gigaset_dev.h hdsmart.h	\
-	hpfs_fs.h hysdn_if.h i2c-dev.h i2c-id.h i8k.h icmp.h		\
+	hpfs_fs.h hysdn_if.h i2c-dev.h i8k.h icmp.h			\
 	if_arcnet.h if_arp.h if_bonding.h if_cablemodem.h if_fc.h	\
 	if_fddi.h if.h if_hippi.h if_infiniband.h if_packet.h		\
 	if_plip.h if_ppp.h if_slip.h if_strip.h if_tunnel.h in6.h	\
@@ -38,7 +38,7 @@ unifdef-y += acct.h adb.h adfs_fs.h agpgart.h apm_bios.h atalk.h	\
 	ethtool.h eventpoll.h ext2_fs.h ext3_fs.h fb.h fcntl.h		\
 	filter.h flat.h fs.h ftape.h gameport.h generic_serial.h	\
 	genhd.h hayesesp.h hdlcdrv.h hdlc.h hdreg.h hiddev.h hpet.h	\
-	i2c-algo-ite.h i2c.h i2o-dev.h icmpv6.h if_bridge.h if_ec.h	\
+	i2c.h i2o-dev.h icmpv6.h if_bridge.h if_ec.h			\
 	if_eql.h if_ether.h if_frad.h if_ltalk.h if_pppox.h		\
 	if_shaper.h if_tr.h if_tun.h if_vlan.h if_wanpipe.h igmp.h	\
 	inet_diag.h in.h inotify.h input.h ipc.h ipmi.h ipv6.h		\
-- 
cgit v0.10.2


From 4311fa60b0d1cb5a2f62f646978294bca7b46cbb Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 22 Jun 2006 15:46:15 -0400
Subject: [SCSI] kmalloc argument switcheroo in recent 53c700 change.

On Wed, Jun 21, 2006 at 07:00:34PM +0000, Linux Kernel wrote:
 > commit 67d59dfdeb21df2c16dcd478b66177e91178ecd0
 > tree ae85703651d81740f4a6cd398f9dd4d6aabe6a2f
 > parent 6db874fbdbedba5e15e76cc03b42f52ea70338c0
 > author James Bottomley <James.Bottomley@steeleye.com> Wed, 14 Jun 2006 07:31:19 -0500
 > committer James Bottomley <jejb@mulgrave.il.steeleye.com> Tue, 20 Jun 2006 05:34:01 -0500
 >
 > [SCSI] 53c700: remove reliance on deprecated cmnd fields
 >  ...
 >
 > +	SDp->hostdata = kmalloc(GFP_KERNEL, sizeof(struct NCR_700_sense));
 > +
 > +	if (!SDp->hostdata)
 > +		return -ENOMEM;

"I'll take reversed arguments for $100 please Alex".

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 3c683dc..bff0479 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -2044,7 +2044,7 @@ NCR_700_slave_configure(struct scsi_device *SDp)
 	struct NCR_700_Host_Parameters *hostdata = 
 		(struct NCR_700_Host_Parameters *)SDp->host->hostdata[0];
 
-	SDp->hostdata = kmalloc(GFP_KERNEL, sizeof(struct NCR_700_sense));
+	SDp->hostdata = kmalloc(sizeof(struct NCR_700_sense), GFP_KERNEL);
 
 	if (!SDp->hostdata)
 		return -ENOMEM;
-- 
cgit v0.10.2


From c65b1445d153a66ca91b00c1f10187e495c17918 Mon Sep 17 00:00:00 2001
From: Douglas Gilbert <dougg@torque.net>
Date: Tue, 6 Jun 2006 00:11:24 -0400
Subject: [SCSI] scsi_debug version 1.79

  - add 'virtual_gb' parameter to simulate large storage
    (by wrapping in dev_size_mb megabytes of actual ram)
  - add 'no_lun_0' parameter to skip lun 0 on each target
    (but still respond as required to INQUIRY + REPORT LUNS)
  - add well know lu support
  - add MODE SELECT commands support [pages: 0xa and 0x1c]
  - add LOG SENSE command support [pages: 0xd and 0x2f]
  - add READ CAPACITY (16) support
  - increase number of mode pages supported (to read),
    mainly transport specific (SAS) mode (sub)pages
  - add more VPD pages and extend others, including
    ATA information VPD page
  - START STOP UNIT now maintains a state machine
  - READ (16) and WRITE (16) cope with lbas larger
    than 32 bits (needed for the 'virtual_gb' parameter)
  - allow single command transfers up to 32 MB
  - more precise error (sense data) messages

Signed-off-by: Douglas Gilbert <dougg@torque.net>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 5a5d2af..1da8ccd 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -51,18 +51,22 @@
 #include "scsi_logging.h"
 #include "scsi_debug.h"
 
-#define SCSI_DEBUG_VERSION "1.75"
-static const char * scsi_debug_version_date = "20050113";
+#define SCSI_DEBUG_VERSION "1.79"
+static const char * scsi_debug_version_date = "20060604";
 
 /* Additional Sense Code (ASC) used */
-#define NO_ADDED_SENSE 0x0
+#define NO_ADDITIONAL_SENSE 0x0
+#define LOGICAL_UNIT_NOT_READY 0x4
 #define UNRECOVERED_READ_ERR 0x11
+#define PARAMETER_LIST_LENGTH_ERR 0x1a
 #define INVALID_OPCODE 0x20
 #define ADDR_OUT_OF_RANGE 0x21
 #define INVALID_FIELD_IN_CDB 0x24
+#define INVALID_FIELD_IN_PARAM_LIST 0x26
 #define POWERON_RESET 0x29
 #define SAVING_PARAMS_UNSUP 0x39
-#define THRESHHOLD_EXCEEDED 0x5d
+#define THRESHOLD_EXCEEDED 0x5d
+#define LOW_POWER_COND_ON 0x5e
 
 #define SDEBUG_TAGGED_QUEUING 0 /* 0 | MSG_SIMPLE_TAG | MSG_ORDERED_TAG */
 
@@ -81,6 +85,8 @@ static const char * scsi_debug_version_date = "20050113";
 #define DEF_SCSI_LEVEL   5    /* INQUIRY, byte2 [5->SPC-3] */
 #define DEF_PTYPE   0
 #define DEF_D_SENSE   0
+#define DEF_NO_LUN_0   0
+#define DEF_VIRTUAL_GB   0
 
 /* bit mask values for scsi_debug_opts */
 #define SCSI_DEBUG_OPT_NOISE   1
@@ -107,6 +113,7 @@ static const char * scsi_debug_version_date = "20050113";
 /* If REPORT LUNS has luns >= 256 it can choose "flat space" (value 1)
  * or "peripheral device" addressing (value 0) */
 #define SAM2_LUN_ADDRESS_METHOD 0
+#define SAM2_WLUN_REPORT_LUNS 0xc101
 
 static int scsi_debug_add_host = DEF_NUM_HOST;
 static int scsi_debug_delay = DEF_DELAY;
@@ -119,13 +126,16 @@ static int scsi_debug_opts = DEF_OPTS;
 static int scsi_debug_scsi_level = DEF_SCSI_LEVEL;
 static int scsi_debug_ptype = DEF_PTYPE; /* SCSI peripheral type (0==disk) */
 static int scsi_debug_dsense = DEF_D_SENSE;
+static int scsi_debug_no_lun_0 = DEF_NO_LUN_0;
+static int scsi_debug_virtual_gb = DEF_VIRTUAL_GB;
 
 static int scsi_debug_cmnd_count = 0;
 
 #define DEV_READONLY(TGT)      (0)
 #define DEV_REMOVEABLE(TGT)    (0)
 
-static unsigned long sdebug_store_size;	/* in bytes */
+static unsigned int sdebug_store_size;	/* in bytes */
+static unsigned int sdebug_store_sectors;
 static sector_t sdebug_capacity;	/* in sectors */
 
 /* old BIOS stuff, kernel may get rid of them but some mode sense pages
@@ -150,7 +160,9 @@ struct sdebug_dev_info {
 	unsigned int target;
 	unsigned int lun;
 	struct sdebug_host_info *sdbg_host;
+	unsigned int wlun;
 	char reset;
+	char stopped;
 	char used;
 };
 
@@ -194,11 +206,11 @@ static struct scsi_host_template sdebug_driver_template = {
 	.bios_param =		scsi_debug_biosparam,
 	.can_queue =		SCSI_DEBUG_CANQUEUE,
 	.this_id =		7,
-	.sg_tablesize =		64,
-	.cmd_per_lun =		3,
-	.max_sectors =		4096,
+	.sg_tablesize =		256,
+	.cmd_per_lun =		16,
+	.max_sectors =		0xffff,
 	.unchecked_isa_dma = 	0,
-	.use_clustering = 	DISABLE_CLUSTERING,
+	.use_clustering = 	ENABLE_CLUSTERING,
 	.module =		THIS_MODULE,
 };
 
@@ -226,19 +238,32 @@ static struct device_driver sdebug_driverfs_driver = {
 static const int check_condition_result =
 		(DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
 
+static unsigned char ctrl_m_pg[] = {0xa, 10, 2, 0, 0, 0, 0, 0,
+				    0, 0, 0x2, 0x4b};
+static unsigned char iec_m_pg[] = {0x1c, 0xa, 0x08, 0, 0, 0, 0, 0,
+			           0, 0, 0x0, 0x0};
+
 /* function declarations */
 static int resp_inquiry(struct scsi_cmnd * SCpnt, int target,
 			struct sdebug_dev_info * devip);
 static int resp_requests(struct scsi_cmnd * SCpnt,
 			 struct sdebug_dev_info * devip);
+static int resp_start_stop(struct scsi_cmnd * scp,
+			   struct sdebug_dev_info * devip);
 static int resp_readcap(struct scsi_cmnd * SCpnt,
 			struct sdebug_dev_info * devip);
-static int resp_mode_sense(struct scsi_cmnd * SCpnt, int target,
+static int resp_readcap16(struct scsi_cmnd * SCpnt,
+			  struct sdebug_dev_info * devip);
+static int resp_mode_sense(struct scsi_cmnd * scp, int target,
 			   struct sdebug_dev_info * devip);
-static int resp_read(struct scsi_cmnd * SCpnt, int upper_blk, int block,
-		     int num, struct sdebug_dev_info * devip);
-static int resp_write(struct scsi_cmnd * SCpnt, int upper_blk, int block,
-		      int num, struct sdebug_dev_info * devip);
+static int resp_mode_select(struct scsi_cmnd * scp, int mselect6,
+			    struct sdebug_dev_info * devip);
+static int resp_log_sense(struct scsi_cmnd * scp,
+			  struct sdebug_dev_info * devip);
+static int resp_read(struct scsi_cmnd * SCpnt, unsigned long long lba,
+		     unsigned int num, struct sdebug_dev_info * devip);
+static int resp_write(struct scsi_cmnd * SCpnt, unsigned long long lba,
+		      unsigned int num, struct sdebug_dev_info * devip);
 static int resp_report_luns(struct scsi_cmnd * SCpnt,
 			    struct sdebug_dev_info * devip);
 static int fill_from_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
@@ -249,8 +274,8 @@ static void timer_intr_handler(unsigned long);
 static struct sdebug_dev_info * devInfoReg(struct scsi_device * sdev);
 static void mk_sense_buffer(struct sdebug_dev_info * devip, int key,
 			    int asc, int asq);
-static int check_reset(struct scsi_cmnd * SCpnt,
-		       struct sdebug_dev_info * devip);
+static int check_readiness(struct scsi_cmnd * SCpnt, int reset_only,
+			   struct sdebug_dev_info * devip);
 static int schedule_resp(struct scsi_cmnd * cmnd,
 			 struct sdebug_dev_info * devip,
 			 done_funct_t done, int scsi_result, int delta_jiff);
@@ -258,8 +283,10 @@ static void __init sdebug_build_parts(unsigned char * ramp);
 static void __init init_all_queued(void);
 static void stop_all_queued(void);
 static int stop_queued_cmnd(struct scsi_cmnd * cmnd);
-static int inquiry_evpd_83(unsigned char * arr, int dev_id_num,
-                           const char * dev_id_str, int dev_id_str_len);
+static int inquiry_evpd_83(unsigned char * arr, int target_dev_id,
+			   int dev_id_num, const char * dev_id_str,
+			   int dev_id_str_len);
+static int inquiry_evpd_88(unsigned char * arr, int target_dev_id);
 static void do_create_driverfs_files(void);
 static void do_remove_driverfs_files(void);
 
@@ -275,18 +302,22 @@ static
 int scsi_debug_queuecommand(struct scsi_cmnd * SCpnt, done_funct_t done)
 {
 	unsigned char *cmd = (unsigned char *) SCpnt->cmnd;
-	int block, upper_blk, num, k;
+	int len, k, j;
+	unsigned int num;
+	unsigned long long lba;
 	int errsts = 0;
-	int target = scmd_id(SCpnt);
+	int target = SCpnt->device->id;
 	struct sdebug_dev_info * devip = NULL;
 	int inj_recovered = 0;
+	int delay_override = 0;
 
 	if (done == NULL)
 		return 0;	/* assume mid level reprocessing command */
 
+	SCpnt->resid = 0;
 	if ((SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) && cmd) {
 		printk(KERN_INFO "scsi_debug: cmd ");
-		for (k = 0, num = SCpnt->cmd_len; k < num; ++k)
+		for (k = 0, len = SCpnt->cmd_len; k < len; ++k)
 			printk("%02x ", (int)cmd[k]);
 		printk("\n");
 	}
@@ -297,7 +328,8 @@ int scsi_debug_queuecommand(struct scsi_cmnd * SCpnt, done_funct_t done)
 				     DID_NO_CONNECT << 16, 0);
         }
 
-	if (SCpnt->device->lun >= scsi_debug_max_luns)
+	if ((SCpnt->device->lun >= scsi_debug_max_luns) &&
+	    (SCpnt->device->lun != SAM2_WLUN_REPORT_LUNS))
 		return schedule_resp(SCpnt, NULL, done,
 				     DID_NO_CONNECT << 16, 0);
 	devip = devInfoReg(SCpnt->device);
@@ -316,118 +348,150 @@ int scsi_debug_queuecommand(struct scsi_cmnd * SCpnt, done_funct_t done)
 			inj_recovered = 1; /* to reads and writes below */
         }
 
+	if (devip->wlun) {
+		switch (*cmd) {
+		case INQUIRY:
+		case REQUEST_SENSE:
+		case TEST_UNIT_READY:
+		case REPORT_LUNS:
+			break;  /* only allowable wlun commands */
+		default:
+			if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
+				printk(KERN_INFO "scsi_debug: Opcode: 0x%x "
+				       "not supported for wlun\n", *cmd);
+			mk_sense_buffer(devip, ILLEGAL_REQUEST,
+					INVALID_OPCODE, 0);
+			errsts = check_condition_result;
+			return schedule_resp(SCpnt, devip, done, errsts,
+					     0);
+		}
+	}
+
 	switch (*cmd) {
 	case INQUIRY:     /* mandatory, ignore unit attention */
+		delay_override = 1;
 		errsts = resp_inquiry(SCpnt, target, devip);
 		break;
 	case REQUEST_SENSE:	/* mandatory, ignore unit attention */
+		delay_override = 1;
 		errsts = resp_requests(SCpnt, devip);
 		break;
 	case REZERO_UNIT:	/* actually this is REWIND for SSC */
 	case START_STOP:
-		errsts = check_reset(SCpnt, devip);
+		errsts = resp_start_stop(SCpnt, devip);
 		break;
 	case ALLOW_MEDIUM_REMOVAL:
-		if ((errsts = check_reset(SCpnt, devip)))
+		if ((errsts = check_readiness(SCpnt, 1, devip)))
 			break;
 		if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
 			printk(KERN_INFO "scsi_debug: Medium removal %s\n",
 			        cmd[4] ? "inhibited" : "enabled");
 		break;
 	case SEND_DIAGNOSTIC:     /* mandatory */
-		errsts = check_reset(SCpnt, devip);
+		errsts = check_readiness(SCpnt, 1, devip);
 		break;
 	case TEST_UNIT_READY:     /* mandatory */
-		errsts = check_reset(SCpnt, devip);
+		delay_override = 1;
+		errsts = check_readiness(SCpnt, 0, devip);
 		break;
         case RESERVE:
-		errsts = check_reset(SCpnt, devip);
+		errsts = check_readiness(SCpnt, 1, devip);
                 break;
         case RESERVE_10:
-		errsts = check_reset(SCpnt, devip);
+		errsts = check_readiness(SCpnt, 1, devip);
                 break;
         case RELEASE:
-		errsts = check_reset(SCpnt, devip);
+		errsts = check_readiness(SCpnt, 1, devip);
                 break;
         case RELEASE_10:
-		errsts = check_reset(SCpnt, devip);
+		errsts = check_readiness(SCpnt, 1, devip);
                 break;
 	case READ_CAPACITY:
 		errsts = resp_readcap(SCpnt, devip);
 		break;
+	case SERVICE_ACTION_IN:
+		if (SAI_READ_CAPACITY_16 != cmd[1]) {
+			mk_sense_buffer(devip, ILLEGAL_REQUEST,
+					INVALID_OPCODE, 0);
+			errsts = check_condition_result;
+			break;
+		}
+		errsts = resp_readcap16(SCpnt, devip);
+		break;
 	case READ_16:
 	case READ_12:
 	case READ_10:
 	case READ_6:
-		if ((errsts = check_reset(SCpnt, devip)))
+		if ((errsts = check_readiness(SCpnt, 0, devip)))
 			break;
-		upper_blk = 0;
 		if ((*cmd) == READ_16) {
-			upper_blk = cmd[5] + (cmd[4] << 8) +
-				    (cmd[3] << 16) + (cmd[2] << 24);
-			block = cmd[9] + (cmd[8] << 8) +
-				(cmd[7] << 16) + (cmd[6] << 24);
+			for (lba = 0, j = 0; j < 8; ++j) {
+				if (j > 0)
+					lba <<= 8;
+				lba += cmd[2 + j];
+			}
 			num = cmd[13] + (cmd[12] << 8) +
 				(cmd[11] << 16) + (cmd[10] << 24);
 		} else if ((*cmd) == READ_12) {
-			block = cmd[5] + (cmd[4] << 8) +
+			lba = cmd[5] + (cmd[4] << 8) +
 				(cmd[3] << 16) + (cmd[2] << 24);
 			num = cmd[9] + (cmd[8] << 8) +
 				(cmd[7] << 16) + (cmd[6] << 24);
 		} else if ((*cmd) == READ_10) {
-			block = cmd[5] + (cmd[4] << 8) +
+			lba = cmd[5] + (cmd[4] << 8) +
 				(cmd[3] << 16) + (cmd[2] << 24);
 			num = cmd[8] + (cmd[7] << 8);
-		} else {
-			block = cmd[3] + (cmd[2] << 8) +
+		} else {	/* READ (6) */
+			lba = cmd[3] + (cmd[2] << 8) +
 				((cmd[1] & 0x1f) << 16);
-			num = cmd[4];
+			num = (0 == cmd[4]) ? 256 : cmd[4];
 		}
-		errsts = resp_read(SCpnt, upper_blk, block, num, devip);
+		errsts = resp_read(SCpnt, lba, num, devip);
 		if (inj_recovered && (0 == errsts)) {
 			mk_sense_buffer(devip, RECOVERED_ERROR,
-					THRESHHOLD_EXCEEDED, 0);
+					THRESHOLD_EXCEEDED, 0);
 			errsts = check_condition_result;
 		}
 		break;
 	case REPORT_LUNS:	/* mandatory, ignore unit attention */
+		delay_override = 1;
 		errsts = resp_report_luns(SCpnt, devip);
 		break;
 	case VERIFY:		/* 10 byte SBC-2 command */
-		errsts = check_reset(SCpnt, devip);
+		errsts = check_readiness(SCpnt, 0, devip);
 		break;
 	case WRITE_16:
 	case WRITE_12:
 	case WRITE_10:
 	case WRITE_6:
-		if ((errsts = check_reset(SCpnt, devip)))
+		if ((errsts = check_readiness(SCpnt, 0, devip)))
 			break;
-		upper_blk = 0;
 		if ((*cmd) == WRITE_16) {
-			upper_blk = cmd[5] + (cmd[4] << 8) +
-				    (cmd[3] << 16) + (cmd[2] << 24);
-			block = cmd[9] + (cmd[8] << 8) +
-				(cmd[7] << 16) + (cmd[6] << 24);
+			for (lba = 0, j = 0; j < 8; ++j) {
+				if (j > 0)
+					lba <<= 8;
+				lba += cmd[2 + j];
+			}
 			num = cmd[13] + (cmd[12] << 8) +
 				(cmd[11] << 16) + (cmd[10] << 24);
 		} else if ((*cmd) == WRITE_12) {
-			block = cmd[5] + (cmd[4] << 8) +
+			lba = cmd[5] + (cmd[4] << 8) +
 				(cmd[3] << 16) + (cmd[2] << 24);
 			num = cmd[9] + (cmd[8] << 8) +
 				(cmd[7] << 16) + (cmd[6] << 24);
 		} else if ((*cmd) == WRITE_10) {
-			block = cmd[5] + (cmd[4] << 8) +
+			lba = cmd[5] + (cmd[4] << 8) +
 				(cmd[3] << 16) + (cmd[2] << 24);
 			num = cmd[8] + (cmd[7] << 8);
-		} else {
-			block = cmd[3] + (cmd[2] << 8) +
+		} else {	/* WRITE (6) */
+			lba = cmd[3] + (cmd[2] << 8) +
 				((cmd[1] & 0x1f) << 16);
-			num = cmd[4];
+			num = (0 == cmd[4]) ? 256 : cmd[4];
 		}
-		errsts = resp_write(SCpnt, upper_blk, block, num, devip);
+		errsts = resp_write(SCpnt, lba, num, devip);
 		if (inj_recovered && (0 == errsts)) {
 			mk_sense_buffer(devip, RECOVERED_ERROR,
-					THRESHHOLD_EXCEEDED, 0);
+					THRESHOLD_EXCEEDED, 0);
 			errsts = check_condition_result;
 		}
 		break;
@@ -435,20 +499,31 @@ int scsi_debug_queuecommand(struct scsi_cmnd * SCpnt, done_funct_t done)
 	case MODE_SENSE_10:
 		errsts = resp_mode_sense(SCpnt, target, devip);
 		break;
+	case MODE_SELECT:
+		errsts = resp_mode_select(SCpnt, 1, devip);
+		break;
+	case MODE_SELECT_10:
+		errsts = resp_mode_select(SCpnt, 0, devip);
+		break;
+	case LOG_SENSE:
+		errsts = resp_log_sense(SCpnt, devip);
+		break;
 	case SYNCHRONIZE_CACHE:
-		errsts = check_reset(SCpnt, devip);
+		delay_override = 1;
+		errsts = check_readiness(SCpnt, 0, devip);
 		break;
 	default:
 		if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
 			printk(KERN_INFO "scsi_debug: Opcode: 0x%x not "
 			       "supported\n", *cmd);
-		if ((errsts = check_reset(SCpnt, devip)))
+		if ((errsts = check_readiness(SCpnt, 1, devip)))
 			break;	/* Unit attention takes precedence */
 		mk_sense_buffer(devip, ILLEGAL_REQUEST, INVALID_OPCODE, 0);
 		errsts = check_condition_result;
 		break;
 	}
-	return schedule_resp(SCpnt, devip, done, errsts, scsi_debug_delay);
+	return schedule_resp(SCpnt, devip, done, errsts,
+			     (delay_override ? 0 : scsi_debug_delay));
 }
 
 static int scsi_debug_ioctl(struct scsi_device *dev, int cmd, void __user *arg)
@@ -460,7 +535,8 @@ static int scsi_debug_ioctl(struct scsi_device *dev, int cmd, void __user *arg)
 	/* return -ENOTTY; // correct return but upsets fdisk */
 }
 
-static int check_reset(struct scsi_cmnd * SCpnt, struct sdebug_dev_info * devip)
+static int check_readiness(struct scsi_cmnd * SCpnt, int reset_only,
+			   struct sdebug_dev_info * devip)
 {
 	if (devip->reset) {
 		if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
@@ -470,6 +546,14 @@ static int check_reset(struct scsi_cmnd * SCpnt, struct sdebug_dev_info * devip)
 		mk_sense_buffer(devip, UNIT_ATTENTION, POWERON_RESET, 0);
 		return check_condition_result;
 	}
+	if ((0 == reset_only) && devip->stopped) {
+		if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
+			printk(KERN_INFO "scsi_debug: Reporting Not "
+			       "ready: initializing command required\n");
+		mk_sense_buffer(devip, NOT_READY, LOGICAL_UNIT_NOT_READY,
+				0x2);
+		return check_condition_result;
+	}
 	return 0;
 }
 
@@ -493,7 +577,10 @@ static int fill_from_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
 		req_len = scp->request_bufflen;
 		act_len = (req_len < arr_len) ? req_len : arr_len;
 		memcpy(scp->request_buffer, arr, act_len);
-		scp->resid = req_len - act_len;
+		if (scp->resid)
+			scp->resid -= act_len;
+		else
+			scp->resid = req_len - act_len;
 		return 0;
 	}
 	sgpnt = (struct scatterlist *)scp->request_buffer;
@@ -516,7 +603,10 @@ static int fill_from_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
 		}
 		req_len += sgpnt->length;
 	}
-	scp->resid = req_len - act_len;
+	if (scp->resid)
+		scp->resid -= act_len;
+	else
+		scp->resid = req_len - act_len;
 	return 0;
 }
 
@@ -567,12 +657,14 @@ static const char * inq_vendor_id = "Linux   ";
 static const char * inq_product_id = "scsi_debug      ";
 static const char * inq_product_rev = "0004";
 
-static int inquiry_evpd_83(unsigned char * arr, int dev_id_num,
-			   const char * dev_id_str, int dev_id_str_len)
+static int inquiry_evpd_83(unsigned char * arr, int target_dev_id,
+			   int dev_id_num, const char * dev_id_str,
+			   int dev_id_str_len)
 {
-	int num;
+	int num, port_a;
+	char b[32];
 
-	/* Two identification descriptors: */
+	port_a = target_dev_id + 1;
 	/* T10 vendor identifier field format (faked) */
 	arr[0] = 0x2;	/* ASCII */
 	arr[1] = 0x1;
@@ -583,25 +675,246 @@ static int inquiry_evpd_83(unsigned char * arr, int dev_id_num,
 	num = 8 + 16 + dev_id_str_len;
 	arr[3] = num;
 	num += 4;
-	/* NAA IEEE registered identifier (faked) */
-	arr[num] = 0x1;	/* binary */
-	arr[num + 1] = 0x3;
-	arr[num + 2] = 0x0;
-	arr[num + 3] = 0x8;
-	arr[num + 4] = 0x51;	/* ieee company id=0x123456 (faked) */
-	arr[num + 5] = 0x23;
-	arr[num + 6] = 0x45;
-	arr[num + 7] = 0x60;
-	arr[num + 8] = (dev_id_num >> 24);
-	arr[num + 9] = (dev_id_num >> 16) & 0xff;
-	arr[num + 10] = (dev_id_num >> 8) & 0xff;
-	arr[num + 11] = dev_id_num & 0xff;
-	return num + 12;
+	if (dev_id_num >= 0) {
+		/* NAA-5, Logical unit identifier (binary) */
+		arr[num++] = 0x1;	/* binary (not necessarily sas) */
+		arr[num++] = 0x3;	/* PIV=0, lu, naa */
+		arr[num++] = 0x0;
+		arr[num++] = 0x8;
+		arr[num++] = 0x53;  /* naa-5 ieee company id=0x333333 (fake) */
+		arr[num++] = 0x33;
+		arr[num++] = 0x33;
+		arr[num++] = 0x30;
+		arr[num++] = (dev_id_num >> 24);
+		arr[num++] = (dev_id_num >> 16) & 0xff;
+		arr[num++] = (dev_id_num >> 8) & 0xff;
+		arr[num++] = dev_id_num & 0xff;
+		/* Target relative port number */
+		arr[num++] = 0x61;	/* proto=sas, binary */
+		arr[num++] = 0x94;	/* PIV=1, target port, rel port */
+		arr[num++] = 0x0;	/* reserved */
+		arr[num++] = 0x4;	/* length */
+		arr[num++] = 0x0;	/* reserved */
+		arr[num++] = 0x0;	/* reserved */
+		arr[num++] = 0x0;
+		arr[num++] = 0x1;	/* relative port A */
+	}
+	/* NAA-5, Target port identifier */
+	arr[num++] = 0x61;	/* proto=sas, binary */
+	arr[num++] = 0x93;	/* piv=1, target port, naa */
+	arr[num++] = 0x0;
+	arr[num++] = 0x8;
+	arr[num++] = 0x52;	/* naa-5, company id=0x222222 (fake) */
+	arr[num++] = 0x22;
+	arr[num++] = 0x22;
+	arr[num++] = 0x20;
+	arr[num++] = (port_a >> 24);
+	arr[num++] = (port_a >> 16) & 0xff;
+	arr[num++] = (port_a >> 8) & 0xff;
+	arr[num++] = port_a & 0xff;
+	/* NAA-5, Target device identifier */
+	arr[num++] = 0x61;	/* proto=sas, binary */
+	arr[num++] = 0xa3;	/* piv=1, target device, naa */
+	arr[num++] = 0x0;
+	arr[num++] = 0x8;
+	arr[num++] = 0x52;	/* naa-5, company id=0x222222 (fake) */
+	arr[num++] = 0x22;
+	arr[num++] = 0x22;
+	arr[num++] = 0x20;
+	arr[num++] = (target_dev_id >> 24);
+	arr[num++] = (target_dev_id >> 16) & 0xff;
+	arr[num++] = (target_dev_id >> 8) & 0xff;
+	arr[num++] = target_dev_id & 0xff;
+	/* SCSI name string: Target device identifier */
+	arr[num++] = 0x63;	/* proto=sas, UTF-8 */
+	arr[num++] = 0xa8;	/* piv=1, target device, SCSI name string */
+	arr[num++] = 0x0;
+	arr[num++] = 24;
+	memcpy(arr + num, "naa.52222220", 12);
+	num += 12;
+	snprintf(b, sizeof(b), "%08X", target_dev_id);
+	memcpy(arr + num, b, 8);
+	num += 8;
+	memset(arr + num, 0, 4);
+	num += 4;
+	return num;
+}
+
+
+static unsigned char vpd84_data[] = {
+/* from 4th byte */ 0x22,0x22,0x22,0x0,0xbb,0x0,
+    0x22,0x22,0x22,0x0,0xbb,0x1,
+    0x22,0x22,0x22,0x0,0xbb,0x2,
+};
+
+static int inquiry_evpd_84(unsigned char * arr)
+{
+	memcpy(arr, vpd84_data, sizeof(vpd84_data));
+	return sizeof(vpd84_data);
+}
+
+static int inquiry_evpd_85(unsigned char * arr)
+{
+	int num = 0;
+	const char * na1 = "https://www.kernel.org/config";
+	const char * na2 = "http://www.kernel.org/log";
+	int plen, olen;
+
+	arr[num++] = 0x1;	/* lu, storage config */
+	arr[num++] = 0x0;	/* reserved */
+	arr[num++] = 0x0;
+	olen = strlen(na1);
+	plen = olen + 1;
+	if (plen % 4)
+		plen = ((plen / 4) + 1) * 4;
+	arr[num++] = plen;	/* length, null termianted, padded */
+	memcpy(arr + num, na1, olen);
+	memset(arr + num + olen, 0, plen - olen);
+	num += plen;
+
+	arr[num++] = 0x4;	/* lu, logging */
+	arr[num++] = 0x0;	/* reserved */
+	arr[num++] = 0x0;
+	olen = strlen(na2);
+	plen = olen + 1;
+	if (plen % 4)
+		plen = ((plen / 4) + 1) * 4;
+	arr[num++] = plen;	/* length, null terminated, padded */
+	memcpy(arr + num, na2, olen);
+	memset(arr + num + olen, 0, plen - olen);
+	num += plen;
+
+	return num;
+}
+
+/* SCSI ports VPD page */
+static int inquiry_evpd_88(unsigned char * arr, int target_dev_id)
+{
+	int num = 0;
+	int port_a, port_b;
+
+	port_a = target_dev_id + 1;
+	port_b = port_a + 1;
+	arr[num++] = 0x0;	/* reserved */
+	arr[num++] = 0x0;	/* reserved */
+	arr[num++] = 0x0;
+	arr[num++] = 0x1;	/* relative port 1 (primary) */
+	memset(arr + num, 0, 6);
+	num += 6;
+	arr[num++] = 0x0;
+	arr[num++] = 12;	/* length tp descriptor */
+	/* naa-5 target port identifier (A) */
+	arr[num++] = 0x61;	/* proto=sas, binary */
+	arr[num++] = 0x93;	/* PIV=1, target port, NAA */
+	arr[num++] = 0x0;	/* reserved */
+	arr[num++] = 0x8;	/* length */
+	arr[num++] = 0x52;	/* NAA-5, company_id=0x222222 (fake) */
+	arr[num++] = 0x22;
+	arr[num++] = 0x22;
+	arr[num++] = 0x20;
+	arr[num++] = (port_a >> 24);
+	arr[num++] = (port_a >> 16) & 0xff;
+	arr[num++] = (port_a >> 8) & 0xff;
+	arr[num++] = port_a & 0xff;
+
+	arr[num++] = 0x0;	/* reserved */
+	arr[num++] = 0x0;	/* reserved */
+	arr[num++] = 0x0;
+	arr[num++] = 0x2;	/* relative port 2 (secondary) */
+	memset(arr + num, 0, 6);
+	num += 6;
+	arr[num++] = 0x0;
+	arr[num++] = 12;	/* length tp descriptor */
+	/* naa-5 target port identifier (B) */
+	arr[num++] = 0x61;	/* proto=sas, binary */
+	arr[num++] = 0x93;	/* PIV=1, target port, NAA */
+	arr[num++] = 0x0;	/* reserved */
+	arr[num++] = 0x8;	/* length */
+	arr[num++] = 0x52;	/* NAA-5, company_id=0x222222 (fake) */
+	arr[num++] = 0x22;
+	arr[num++] = 0x22;
+	arr[num++] = 0x20;
+	arr[num++] = (port_b >> 24);
+	arr[num++] = (port_b >> 16) & 0xff;
+	arr[num++] = (port_b >> 8) & 0xff;
+	arr[num++] = port_b & 0xff;
+
+	return num;
+}
+
+
+static unsigned char vpd89_data[] = {
+/* from 4th byte */ 0,0,0,0,
+'l','i','n','u','x',' ',' ',' ',
+'S','A','T',' ','s','c','s','i','_','d','e','b','u','g',' ',' ',
+'1','2','3','4',
+0x34,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
+0xec,0,0,0,
+0x5a,0xc,0xff,0x3f,0x37,0xc8,0x10,0,0,0,0,0,0x3f,0,0,0,
+0,0,0,0,0x58,0x58,0x58,0x58,0x58,0x58,0x58,0x58,0x20,0x20,0x20,0x20,
+0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0,0,0,0x40,0x4,0,0x2e,0x33,
+0x38,0x31,0x20,0x20,0x20,0x20,0x54,0x53,0x38,0x33,0x30,0x30,0x33,0x31,
+0x53,0x41,
+0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
+0x20,0x20,
+0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
+0x10,0x80,
+0,0,0,0x2f,0,0,0,0x2,0,0x2,0x7,0,0xff,0xff,0x1,0,
+0x3f,0,0xc1,0xff,0x3e,0,0x10,0x1,0xb0,0xf8,0x50,0x9,0,0,0x7,0,
+0x3,0,0x78,0,0x78,0,0xf0,0,0x78,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x2,0,0,0,0,0,0,0,
+0x7e,0,0x1b,0,0x6b,0x34,0x1,0x7d,0x3,0x40,0x69,0x34,0x1,0x3c,0x3,0x40,
+0x7f,0x40,0,0,0,0,0xfe,0xfe,0,0,0,0,0,0xfe,0,0,
+0,0,0,0,0,0,0,0,0xb0,0xf8,0x50,0x9,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x1,0,0xb0,0xf8,0x50,0x9,0xb0,0xf8,0x50,0x9,0x20,0x20,0x2,0,0xb6,0x42,
+0,0x80,0x8a,0,0x6,0x3c,0xa,0x3c,0xff,0xff,0xc6,0x7,0,0x1,0,0x8,
+0xf0,0xf,0,0x10,0x2,0,0x30,0,0,0,0,0,0,0,0x6,0xfe,
+0,0,0x2,0,0x50,0,0x8a,0,0x4f,0x95,0,0,0x21,0,0xb,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa5,0x51,
+};
+
+static int inquiry_evpd_89(unsigned char * arr)
+{
+	memcpy(arr, vpd89_data, sizeof(vpd89_data));
+	return sizeof(vpd89_data);
+}
+
+
+static unsigned char vpdb0_data[] = {
+	/* from 4th byte */ 0,0,0,4,
+	0,0,0x4,0,
+	0,0,0,64,
+};
+
+static int inquiry_evpd_b0(unsigned char * arr)
+{
+	memcpy(arr, vpdb0_data, sizeof(vpdb0_data));
+	if (sdebug_store_sectors > 0x400) {
+		arr[4] = (sdebug_store_sectors >> 24) & 0xff;
+		arr[5] = (sdebug_store_sectors >> 16) & 0xff;
+		arr[6] = (sdebug_store_sectors >> 8) & 0xff;
+		arr[7] = sdebug_store_sectors & 0xff;
+	}
+	return sizeof(vpdb0_data);
 }
 
 
 #define SDEBUG_LONG_INQ_SZ 96
-#define SDEBUG_MAX_INQ_ARR_SZ 128
+#define SDEBUG_MAX_INQ_ARR_SZ 584
 
 static int resp_inquiry(struct scsi_cmnd * scp, int target,
 			struct sdebug_dev_info * devip)
@@ -609,64 +922,113 @@ static int resp_inquiry(struct scsi_cmnd * scp, int target,
 	unsigned char pq_pdt;
 	unsigned char arr[SDEBUG_MAX_INQ_ARR_SZ];
 	unsigned char *cmd = (unsigned char *)scp->cmnd;
-	int alloc_len;
+	int alloc_len, n;
 
 	alloc_len = (cmd[3] << 8) + cmd[4];
 	memset(arr, 0, SDEBUG_MAX_INQ_ARR_SZ);
-	pq_pdt = (scsi_debug_ptype & 0x1f);
+	if (devip->wlun)
+		pq_pdt = 0x1e;	/* present, wlun */
+	else if (scsi_debug_no_lun_0 && (0 == devip->lun))
+		pq_pdt = 0x7f;	/* not present, no device type */
+	else
+		pq_pdt = (scsi_debug_ptype & 0x1f);
 	arr[0] = pq_pdt;
 	if (0x2 & cmd[1]) {  /* CMDDT bit set */
 		mk_sense_buffer(devip, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB,
 			       	0);
 		return check_condition_result;
 	} else if (0x1 & cmd[1]) {  /* EVPD bit set */
-		int dev_id_num, len;
-		char dev_id_str[6];
+		int lu_id_num, target_dev_id, len;
+		char lu_id_str[6];
+		int host_no = devip->sdbg_host->shost->host_no;
 		
-		dev_id_num = ((devip->sdbg_host->shost->host_no + 1) * 2000) +
-			     (devip->target * 1000) + devip->lun;
-		len = scnprintf(dev_id_str, 6, "%d", dev_id_num);
+		lu_id_num = devip->wlun ? -1 : (((host_no + 1) * 2000) +
+			    (devip->target * 1000) + devip->lun);
+		target_dev_id = ((host_no + 1) * 2000) +
+				 (devip->target * 1000) - 3;
+		len = scnprintf(lu_id_str, 6, "%d", lu_id_num);
 		if (0 == cmd[2]) { /* supported vital product data pages */
-			arr[3] = 3;
-			arr[4] = 0x0; /* this page */
-			arr[5] = 0x80; /* unit serial number */
-			arr[6] = 0x83; /* device identification */
+			arr[1] = cmd[2];	/*sanity */
+			n = 4;
+			arr[n++] = 0x0;   /* this page */
+			arr[n++] = 0x80;  /* unit serial number */
+			arr[n++] = 0x83;  /* device identification */
+			arr[n++] = 0x84;  /* software interface ident. */
+			arr[n++] = 0x85;  /* management network addresses */
+			arr[n++] = 0x86;  /* extended inquiry */
+			arr[n++] = 0x87;  /* mode page policy */
+			arr[n++] = 0x88;  /* SCSI ports */
+			arr[n++] = 0x89;  /* ATA information */
+			arr[n++] = 0xb0;  /* Block limits (SBC) */
+			arr[3] = n - 4;	  /* number of supported VPD pages */
 		} else if (0x80 == cmd[2]) { /* unit serial number */
-			arr[1] = 0x80;
+			arr[1] = cmd[2];	/*sanity */
 			arr[3] = len;
-			memcpy(&arr[4], dev_id_str, len);
+			memcpy(&arr[4], lu_id_str, len);
 		} else if (0x83 == cmd[2]) { /* device identification */
-			arr[1] = 0x83;
-			arr[3] = inquiry_evpd_83(&arr[4], dev_id_num,
-						 dev_id_str, len);
+			arr[1] = cmd[2];	/*sanity */
+			arr[3] = inquiry_evpd_83(&arr[4], target_dev_id,
+						 lu_id_num, lu_id_str, len);
+		} else if (0x84 == cmd[2]) { /* Software interface ident. */
+			arr[1] = cmd[2];	/*sanity */
+			arr[3] = inquiry_evpd_84(&arr[4]);
+		} else if (0x85 == cmd[2]) { /* Management network addresses */
+			arr[1] = cmd[2];	/*sanity */
+			arr[3] = inquiry_evpd_85(&arr[4]);
+		} else if (0x86 == cmd[2]) { /* extended inquiry */
+			arr[1] = cmd[2];	/*sanity */
+			arr[3] = 0x3c;	/* number of following entries */
+			arr[4] = 0x0;   /* no protection stuff */
+			arr[5] = 0x7;   /* head of q, ordered + simple q's */
+		} else if (0x87 == cmd[2]) { /* mode page policy */
+			arr[1] = cmd[2];	/*sanity */
+			arr[3] = 0x8;	/* number of following entries */
+			arr[4] = 0x2;	/* disconnect-reconnect mp */
+			arr[6] = 0x80;	/* mlus, shared */
+			arr[8] = 0x18;	 /* protocol specific lu */
+			arr[10] = 0x82;	 /* mlus, per initiator port */
+		} else if (0x88 == cmd[2]) { /* SCSI Ports */
+			arr[1] = cmd[2];	/*sanity */
+			arr[3] = inquiry_evpd_88(&arr[4], target_dev_id);
+		} else if (0x89 == cmd[2]) { /* ATA information */
+			arr[1] = cmd[2];        /*sanity */
+			n = inquiry_evpd_89(&arr[4]);
+			arr[2] = (n >> 8);
+			arr[3] = (n & 0xff);
+		} else if (0xb0 == cmd[2]) { /* Block limits (SBC) */
+			arr[1] = cmd[2];        /*sanity */
+			arr[3] = inquiry_evpd_b0(&arr[4]);
 		} else {
 			/* Illegal request, invalid field in cdb */
 			mk_sense_buffer(devip, ILLEGAL_REQUEST,
 					INVALID_FIELD_IN_CDB, 0);
 			return check_condition_result;
 		}
+		len = min(((arr[2] << 8) + arr[3]) + 4, alloc_len);
 		return fill_from_dev_buffer(scp, arr,
-			    min(alloc_len, SDEBUG_MAX_INQ_ARR_SZ));
+			    min(len, SDEBUG_MAX_INQ_ARR_SZ));
 	}
 	/* drops through here for a standard inquiry */
 	arr[1] = DEV_REMOVEABLE(target) ? 0x80 : 0;	/* Removable disk */
 	arr[2] = scsi_debug_scsi_level;
 	arr[3] = 2;    /* response_data_format==2 */
 	arr[4] = SDEBUG_LONG_INQ_SZ - 5;
-	arr[6] = 0x1; /* claim: ADDR16 */
+	arr[6] = 0x10; /* claim: MultiP */
 	/* arr[6] |= 0x40; ... claim: EncServ (enclosure services) */
-	arr[7] = 0x3a; /* claim: WBUS16, SYNC, LINKED + CMDQUE */
+	arr[7] = 0xa; /* claim: LINKED + CMDQUE */
 	memcpy(&arr[8], inq_vendor_id, 8);
 	memcpy(&arr[16], inq_product_id, 16);
 	memcpy(&arr[32], inq_product_rev, 4);
 	/* version descriptors (2 bytes each) follow */
-	arr[58] = 0x0; arr[59] = 0x40; /* SAM-2 */
-	arr[60] = 0x3; arr[61] = 0x0;  /* SPC-3 */
+	arr[58] = 0x0; arr[59] = 0x77; /* SAM-3 ANSI */
+	arr[60] = 0x3; arr[61] = 0x14;  /* SPC-3 ANSI */
+	n = 62;
 	if (scsi_debug_ptype == 0) {
-		arr[62] = 0x1; arr[63] = 0x80; /* SBC */
+		arr[n++] = 0x3; arr[n++] = 0x3d; /* SBC-2 ANSI */
 	} else if (scsi_debug_ptype == 1) {
-		arr[62] = 0x2; arr[63] = 0x00; /* SSC */
+		arr[n++] = 0x3; arr[n++] = 0x60; /* SSC-2 no version */
 	}
+	arr[n++] = 0xc; arr[n++] = 0xf;  /* SAS-1.1 rev 10 */
 	return fill_from_dev_buffer(scp, arr,
 			    min(alloc_len, SDEBUG_LONG_INQ_SZ));
 }
@@ -677,46 +1039,141 @@ static int resp_requests(struct scsi_cmnd * scp,
 	unsigned char * sbuff;
 	unsigned char *cmd = (unsigned char *)scp->cmnd;
 	unsigned char arr[SDEBUG_SENSE_LEN];
+	int want_dsense;
 	int len = 18;
 
-	memset(arr, 0, SDEBUG_SENSE_LEN);
+	memset(arr, 0, sizeof(arr));
 	if (devip->reset == 1)
-		mk_sense_buffer(devip, 0, NO_ADDED_SENSE, 0);
+		mk_sense_buffer(devip, 0, NO_ADDITIONAL_SENSE, 0);
+	want_dsense = !!(cmd[1] & 1) || scsi_debug_dsense;
 	sbuff = devip->sense_buff;
-	if ((cmd[1] & 1) && (! scsi_debug_dsense)) {
-		/* DESC bit set and sense_buff in fixed format */
-		arr[0] = 0x72;
-		arr[1] = sbuff[2];     /* sense key */
-		arr[2] = sbuff[12];    /* asc */
-		arr[3] = sbuff[13];    /* ascq */
-		len = 8;
-	} else
+	if ((iec_m_pg[2] & 0x4) && (6 == (iec_m_pg[3] & 0xf))) {
+		if (want_dsense) {
+			arr[0] = 0x72;
+			arr[1] = 0x0;		/* NO_SENSE in sense_key */
+			arr[2] = THRESHOLD_EXCEEDED;
+			arr[3] = 0xff;		/* TEST set and MRIE==6 */
+		} else {
+			arr[0] = 0x70;
+			arr[2] = 0x0;		/* NO_SENSE in sense_key */
+			arr[7] = 0xa;   	/* 18 byte sense buffer */
+			arr[12] = THRESHOLD_EXCEEDED;
+			arr[13] = 0xff;		/* TEST set and MRIE==6 */
+		}
+	} else if (devip->stopped) {
+		if (want_dsense) {
+			arr[0] = 0x72;
+			arr[1] = 0x0;		/* NO_SENSE in sense_key */
+			arr[2] = LOW_POWER_COND_ON;
+			arr[3] = 0x0;		/* TEST set and MRIE==6 */
+		} else {
+			arr[0] = 0x70;
+			arr[2] = 0x0;		/* NO_SENSE in sense_key */
+			arr[7] = 0xa;   	/* 18 byte sense buffer */
+			arr[12] = LOW_POWER_COND_ON;
+			arr[13] = 0x0;		/* TEST set and MRIE==6 */
+		}
+	} else {
 		memcpy(arr, sbuff, SDEBUG_SENSE_LEN);
-	mk_sense_buffer(devip, 0, NO_ADDED_SENSE, 0);
+		if ((cmd[1] & 1) && (! scsi_debug_dsense)) {
+			/* DESC bit set and sense_buff in fixed format */
+			memset(arr, 0, sizeof(arr));
+			arr[0] = 0x72;
+			arr[1] = sbuff[2];     /* sense key */
+			arr[2] = sbuff[12];    /* asc */
+			arr[3] = sbuff[13];    /* ascq */
+			len = 8;
+		}
+	}
+	mk_sense_buffer(devip, 0, NO_ADDITIONAL_SENSE, 0);
 	return fill_from_dev_buffer(scp, arr, len);
 }
 
+static int resp_start_stop(struct scsi_cmnd * scp,
+			   struct sdebug_dev_info * devip)
+{
+	unsigned char *cmd = (unsigned char *)scp->cmnd;
+	int power_cond, errsts, start;
+
+	if ((errsts = check_readiness(scp, 1, devip)))
+		return errsts;
+	power_cond = (cmd[4] & 0xf0) >> 4;
+	if (power_cond) {
+		mk_sense_buffer(devip, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB,
+			       	0);
+		return check_condition_result;
+	}
+	start = cmd[4] & 1;
+	if (start == devip->stopped)
+		devip->stopped = !start;
+	return 0;
+}
+
 #define SDEBUG_READCAP_ARR_SZ 8
 static int resp_readcap(struct scsi_cmnd * scp,
 			struct sdebug_dev_info * devip)
 {
 	unsigned char arr[SDEBUG_READCAP_ARR_SZ];
-	unsigned long capac;
+	unsigned int capac;
 	int errsts;
 
-	if ((errsts = check_reset(scp, devip)))
+	if ((errsts = check_readiness(scp, 1, devip)))
 		return errsts;
+	/* following just in case virtual_gb changed */
+	if (scsi_debug_virtual_gb > 0) {
+		sdebug_capacity = 2048 * 1024;
+		sdebug_capacity *= scsi_debug_virtual_gb;
+	} else
+		sdebug_capacity = sdebug_store_sectors;
 	memset(arr, 0, SDEBUG_READCAP_ARR_SZ);
-	capac = (unsigned long)sdebug_capacity - 1;
-	arr[0] = (capac >> 24);
-	arr[1] = (capac >> 16) & 0xff;
-	arr[2] = (capac >> 8) & 0xff;
-	arr[3] = capac & 0xff;
+	if (sdebug_capacity < 0xffffffff) {
+		capac = (unsigned int)sdebug_capacity - 1;
+		arr[0] = (capac >> 24);
+		arr[1] = (capac >> 16) & 0xff;
+		arr[2] = (capac >> 8) & 0xff;
+		arr[3] = capac & 0xff;
+	} else {
+		arr[0] = 0xff;
+		arr[1] = 0xff;
+		arr[2] = 0xff;
+		arr[3] = 0xff;
+	}
 	arr[6] = (SECT_SIZE_PER(target) >> 8) & 0xff;
 	arr[7] = SECT_SIZE_PER(target) & 0xff;
 	return fill_from_dev_buffer(scp, arr, SDEBUG_READCAP_ARR_SZ);
 }
 
+#define SDEBUG_READCAP16_ARR_SZ 32
+static int resp_readcap16(struct scsi_cmnd * scp,
+			  struct sdebug_dev_info * devip)
+{
+	unsigned char *cmd = (unsigned char *)scp->cmnd;
+	unsigned char arr[SDEBUG_READCAP16_ARR_SZ];
+	unsigned long long capac;
+	int errsts, k, alloc_len;
+
+	if ((errsts = check_readiness(scp, 1, devip)))
+		return errsts;
+	alloc_len = ((cmd[10] << 24) + (cmd[11] << 16) + (cmd[12] << 8)
+		     + cmd[13]);
+	/* following just in case virtual_gb changed */
+	if (scsi_debug_virtual_gb > 0) {
+		sdebug_capacity = 2048 * 1024;
+		sdebug_capacity *= scsi_debug_virtual_gb;
+	} else
+		sdebug_capacity = sdebug_store_sectors;
+	memset(arr, 0, SDEBUG_READCAP16_ARR_SZ);
+	capac = sdebug_capacity - 1;
+	for (k = 0; k < 8; ++k, capac >>= 8)
+		arr[7 - k] = capac & 0xff;
+	arr[8] = (SECT_SIZE_PER(target) >> 24) & 0xff;
+	arr[9] = (SECT_SIZE_PER(target) >> 16) & 0xff;
+	arr[10] = (SECT_SIZE_PER(target) >> 8) & 0xff;
+	arr[11] = SECT_SIZE_PER(target) & 0xff;
+	return fill_from_dev_buffer(scp, arr,
+				    min(alloc_len, SDEBUG_READCAP16_ARR_SZ));
+}
+
 /* <<Following mode page info copied from ST318451LW>> */
 
 static int resp_err_recov_pg(unsigned char * p, int pcontrol, int target)
@@ -772,27 +1229,98 @@ static int resp_caching_pg(unsigned char * p, int pcontrol, int target)
 
 static int resp_ctrl_m_pg(unsigned char * p, int pcontrol, int target)
 { 	/* Control mode page for mode_sense */
-	unsigned char ctrl_m_pg[] = {0xa, 10, 2, 0, 0, 0, 0, 0,
+	unsigned char ch_ctrl_m_pg[] = {/* 0xa, 10, */ 0x6, 0, 0, 0, 0, 0,
+				        0, 0, 0, 0};
+	unsigned char d_ctrl_m_pg[] = {0xa, 10, 2, 0, 0, 0, 0, 0,
 				     0, 0, 0x2, 0x4b};
 
 	if (scsi_debug_dsense)
 		ctrl_m_pg[2] |= 0x4;
+	else
+		ctrl_m_pg[2] &= ~0x4;
 	memcpy(p, ctrl_m_pg, sizeof(ctrl_m_pg));
 	if (1 == pcontrol)
-		memset(p + 2, 0, sizeof(ctrl_m_pg) - 2);
+		memcpy(p + 2, ch_ctrl_m_pg, sizeof(ch_ctrl_m_pg));
+	else if (2 == pcontrol)
+		memcpy(p, d_ctrl_m_pg, sizeof(d_ctrl_m_pg));
 	return sizeof(ctrl_m_pg);
 }
 
+
 static int resp_iec_m_pg(unsigned char * p, int pcontrol, int target)
 {	/* Informational Exceptions control mode page for mode_sense */
-	unsigned char iec_m_pg[] = {0x1c, 0xa, 0x08, 0, 0, 0, 0, 0,
-				    0, 0, 0x0, 0x0};
+	unsigned char ch_iec_m_pg[] = {/* 0x1c, 0xa, */ 0x4, 0xf, 0, 0, 0, 0,
+				       0, 0, 0x0, 0x0};
+	unsigned char d_iec_m_pg[] = {0x1c, 0xa, 0x08, 0, 0, 0, 0, 0,
+				      0, 0, 0x0, 0x0};
+
 	memcpy(p, iec_m_pg, sizeof(iec_m_pg));
 	if (1 == pcontrol)
-		memset(p + 2, 0, sizeof(iec_m_pg) - 2);
+		memcpy(p + 2, ch_iec_m_pg, sizeof(ch_iec_m_pg));
+	else if (2 == pcontrol)
+		memcpy(p, d_iec_m_pg, sizeof(d_iec_m_pg));
 	return sizeof(iec_m_pg);
 }
 
+static int resp_sas_sf_m_pg(unsigned char * p, int pcontrol, int target)
+{	/* SAS SSP mode page - short format for mode_sense */
+	unsigned char sas_sf_m_pg[] = {0x19, 0x6,
+		0x6, 0x0, 0x7, 0xd0, 0x0, 0x0};
+
+	memcpy(p, sas_sf_m_pg, sizeof(sas_sf_m_pg));
+	if (1 == pcontrol)
+		memset(p + 2, 0, sizeof(sas_sf_m_pg) - 2);
+	return sizeof(sas_sf_m_pg);
+}
+
+
+static int resp_sas_pcd_m_spg(unsigned char * p, int pcontrol, int target,
+			      int target_dev_id)
+{	/* SAS phy control and discover mode page for mode_sense */
+	unsigned char sas_pcd_m_pg[] = {0x59, 0x1, 0, 0x64, 0, 0x6, 0, 2,
+		    0, 0, 0, 0, 0x10, 0x9, 0x8, 0x0,
+		    0x52, 0x22, 0x22, 0x20, 0x0, 0x0, 0x0, 0x0,
+		    0x51, 0x11, 0x11, 0x10, 0x0, 0x0, 0x0, 0x1,
+		    0x2, 0, 0, 0, 0, 0, 0, 0,
+		    0x88, 0x99, 0, 0, 0, 0, 0, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0,
+		    0, 1, 0, 0, 0x10, 0x9, 0x8, 0x0,
+		    0x52, 0x22, 0x22, 0x20, 0x0, 0x0, 0x0, 0x0,
+		    0x51, 0x11, 0x11, 0x10, 0x0, 0x0, 0x0, 0x1,
+		    0x3, 0, 0, 0, 0, 0, 0, 0,
+		    0x88, 0x99, 0, 0, 0, 0, 0, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0,
+		};
+	int port_a, port_b;
+
+	port_a = target_dev_id + 1;
+	port_b = port_a + 1;
+	memcpy(p, sas_pcd_m_pg, sizeof(sas_pcd_m_pg));
+	p[20] = (port_a >> 24);
+	p[21] = (port_a >> 16) & 0xff;
+	p[22] = (port_a >> 8) & 0xff;
+	p[23] = port_a & 0xff;
+	p[48 + 20] = (port_b >> 24);
+	p[48 + 21] = (port_b >> 16) & 0xff;
+	p[48 + 22] = (port_b >> 8) & 0xff;
+	p[48 + 23] = port_b & 0xff;
+	if (1 == pcontrol)
+		memset(p + 4, 0, sizeof(sas_pcd_m_pg) - 4);
+	return sizeof(sas_pcd_m_pg);
+}
+
+static int resp_sas_sha_m_spg(unsigned char * p, int pcontrol)
+{	/* SAS SSP shared protocol specific port mode subpage */
+	unsigned char sas_sha_m_pg[] = {0x59, 0x2, 0, 0xc, 0, 0x6, 0x10, 0,
+		    0, 0, 0, 0, 0, 0, 0, 0,
+		};
+
+	memcpy(p, sas_sha_m_pg, sizeof(sas_sha_m_pg));
+	if (1 == pcontrol)
+		memset(p + 4, 0, sizeof(sas_sha_m_pg) - 4);
+	return sizeof(sas_sha_m_pg);
+}
+
 #define SDEBUG_MAX_MSENSE_SZ 256
 
 static int resp_mode_sense(struct scsi_cmnd * scp, int target,
@@ -801,12 +1329,12 @@ static int resp_mode_sense(struct scsi_cmnd * scp, int target,
 	unsigned char dbd;
 	int pcontrol, pcode, subpcode;
 	unsigned char dev_spec;
-	int alloc_len, msense_6, offset, len, errsts;
+	int alloc_len, msense_6, offset, len, errsts, target_dev_id;
 	unsigned char * ap;
 	unsigned char arr[SDEBUG_MAX_MSENSE_SZ];
 	unsigned char *cmd = (unsigned char *)scp->cmnd;
 
-	if ((errsts = check_reset(scp, devip)))
+	if ((errsts = check_readiness(scp, 1, devip)))
 		return errsts;
 	dbd = cmd[1] & 0x8;
 	pcontrol = (cmd[2] & 0xc0) >> 6;
@@ -820,6 +1348,8 @@ static int resp_mode_sense(struct scsi_cmnd * scp, int target,
 			       	0);
 		return check_condition_result;
 	}
+	target_dev_id = ((devip->sdbg_host->shost->host_no + 1) * 2000) +
+			(devip->target * 1000) - 3;
 	dev_spec = DEV_READONLY(target) ? 0x80 : 0x0;
 	if (msense_6) {
 		arr[2] = dev_spec;
@@ -830,7 +1360,8 @@ static int resp_mode_sense(struct scsi_cmnd * scp, int target,
 	}
 	ap = arr + offset;
 
-	if (0 != subpcode) { /* TODO: Control Extension page */
+	if ((subpcode > 0x0) && (subpcode < 0xff) && (0x19 != pcode)) {
+		/* TODO: Control Extension page */
 		mk_sense_buffer(devip, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB,
 			       	0);
 		return check_condition_result;
@@ -856,17 +1387,45 @@ static int resp_mode_sense(struct scsi_cmnd * scp, int target,
 		len = resp_ctrl_m_pg(ap, pcontrol, target);
 		offset += len;
 		break;
+	case 0x19:	/* if spc==1 then sas phy, control+discover */
+		if ((subpcode > 0x2) && (subpcode < 0xff)) {
+		        mk_sense_buffer(devip, ILLEGAL_REQUEST,
+					INVALID_FIELD_IN_CDB, 0);
+			return check_condition_result;
+	        }
+		len = 0;
+		if ((0x0 == subpcode) || (0xff == subpcode))
+			len += resp_sas_sf_m_pg(ap + len, pcontrol, target);
+		if ((0x1 == subpcode) || (0xff == subpcode))
+			len += resp_sas_pcd_m_spg(ap + len, pcontrol, target,
+						  target_dev_id);
+		if ((0x2 == subpcode) || (0xff == subpcode))
+			len += resp_sas_sha_m_spg(ap + len, pcontrol);
+		offset += len;
+		break;
 	case 0x1c:	/* Informational Exceptions Mode page, all devices */
 		len = resp_iec_m_pg(ap, pcontrol, target);
 		offset += len;
 		break;
 	case 0x3f:	/* Read all Mode pages */
-		len = resp_err_recov_pg(ap, pcontrol, target);
-		len += resp_disconnect_pg(ap + len, pcontrol, target);
-		len += resp_format_pg(ap + len, pcontrol, target);
-		len += resp_caching_pg(ap + len, pcontrol, target);
-		len += resp_ctrl_m_pg(ap + len, pcontrol, target);
-		len += resp_iec_m_pg(ap + len, pcontrol, target);
+		if ((0 == subpcode) || (0xff == subpcode)) {
+			len = resp_err_recov_pg(ap, pcontrol, target);
+			len += resp_disconnect_pg(ap + len, pcontrol, target);
+			len += resp_format_pg(ap + len, pcontrol, target);
+			len += resp_caching_pg(ap + len, pcontrol, target);
+			len += resp_ctrl_m_pg(ap + len, pcontrol, target);
+			len += resp_sas_sf_m_pg(ap + len, pcontrol, target);
+			if (0xff == subpcode) {
+				len += resp_sas_pcd_m_spg(ap + len, pcontrol,
+						  target, target_dev_id);
+				len += resp_sas_sha_m_spg(ap + len, pcontrol);
+			}
+			len += resp_iec_m_pg(ap + len, pcontrol, target);
+		} else {
+			mk_sense_buffer(devip, ILLEGAL_REQUEST,
+					INVALID_FIELD_IN_CDB, 0);
+			return check_condition_result;
+                }
 		offset += len;
 		break;
 	default:
@@ -883,71 +1442,274 @@ static int resp_mode_sense(struct scsi_cmnd * scp, int target,
 	return fill_from_dev_buffer(scp, arr, min(alloc_len, offset));
 }
 
-static int resp_read(struct scsi_cmnd * SCpnt, int upper_blk, int block,
-		     int num, struct sdebug_dev_info * devip)
+#define SDEBUG_MAX_MSELECT_SZ 512
+
+static int resp_mode_select(struct scsi_cmnd * scp, int mselect6,
+			    struct sdebug_dev_info * devip)
+{
+	int pf, sp, ps, md_len, bd_len, off, spf, pg_len;
+	int param_len, res, errsts, mpage;
+	unsigned char arr[SDEBUG_MAX_MSELECT_SZ];
+	unsigned char *cmd = (unsigned char *)scp->cmnd;
+
+	if ((errsts = check_readiness(scp, 1, devip)))
+		return errsts;
+	memset(arr, 0, sizeof(arr));
+	pf = cmd[1] & 0x10;
+	sp = cmd[1] & 0x1;
+	param_len = mselect6 ? cmd[4] : ((cmd[7] << 8) + cmd[8]);
+	if ((0 == pf) || sp || (param_len > SDEBUG_MAX_MSELECT_SZ)) {
+		mk_sense_buffer(devip, ILLEGAL_REQUEST,
+				INVALID_FIELD_IN_CDB, 0);
+		return check_condition_result;
+	}
+        res = fetch_to_dev_buffer(scp, arr, param_len);
+        if (-1 == res)
+                return (DID_ERROR << 16);
+        else if ((res < param_len) &&
+                 (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts))
+                printk(KERN_INFO "scsi_debug: mode_select: cdb indicated=%d, "
+                       " IO sent=%d bytes\n", param_len, res);
+	md_len = mselect6 ? (arr[0] + 1) : ((arr[0] << 8) + arr[1] + 2);
+	bd_len = mselect6 ? arr[3] : ((arr[6] << 8) + arr[7]);
+	if ((md_len > 2) || (0 != bd_len)) {
+		mk_sense_buffer(devip, ILLEGAL_REQUEST,
+				INVALID_FIELD_IN_PARAM_LIST, 0);
+		return check_condition_result;
+	}
+	off = bd_len + (mselect6 ? 4 : 8);
+	mpage = arr[off] & 0x3f;
+	ps = !!(arr[off] & 0x80);
+	if (ps) {
+		mk_sense_buffer(devip, ILLEGAL_REQUEST,
+				INVALID_FIELD_IN_PARAM_LIST, 0);
+		return check_condition_result;
+	}
+	spf = !!(arr[off] & 0x40);
+	pg_len = spf ? ((arr[off + 2] << 8) + arr[off + 3] + 4) :
+		       (arr[off + 1] + 2);
+	if ((pg_len + off) > param_len) {
+		mk_sense_buffer(devip, ILLEGAL_REQUEST,
+				PARAMETER_LIST_LENGTH_ERR, 0);
+		return check_condition_result;
+	}
+	switch (mpage) {
+	case 0xa:      /* Control Mode page */
+		if (ctrl_m_pg[1] == arr[off + 1]) {
+			memcpy(ctrl_m_pg + 2, arr + off + 2,
+			       sizeof(ctrl_m_pg) - 2);
+			scsi_debug_dsense = !!(ctrl_m_pg[2] & 0x4);
+			return 0;
+		}
+		break;
+	case 0x1c:      /* Informational Exceptions Mode page */
+		if (iec_m_pg[1] == arr[off + 1]) {
+			memcpy(iec_m_pg + 2, arr + off + 2,
+			       sizeof(iec_m_pg) - 2);
+			return 0;
+		}
+		break;
+	default:
+		break;
+	}
+	mk_sense_buffer(devip, ILLEGAL_REQUEST,
+			INVALID_FIELD_IN_PARAM_LIST, 0);
+	return check_condition_result;
+}
+
+static int resp_temp_l_pg(unsigned char * arr)
+{
+	unsigned char temp_l_pg[] = {0x0, 0x0, 0x3, 0x2, 0x0, 38,
+				     0x0, 0x1, 0x3, 0x2, 0x0, 65,
+		};
+
+        memcpy(arr, temp_l_pg, sizeof(temp_l_pg));
+        return sizeof(temp_l_pg);
+}
+
+static int resp_ie_l_pg(unsigned char * arr)
+{
+	unsigned char ie_l_pg[] = {0x0, 0x0, 0x3, 0x3, 0x0, 0x0, 38,
+		};
+
+        memcpy(arr, ie_l_pg, sizeof(ie_l_pg));
+	if (iec_m_pg[2] & 0x4) {	/* TEST bit set */
+		arr[4] = THRESHOLD_EXCEEDED;
+		arr[5] = 0xff;
+	}
+        return sizeof(ie_l_pg);
+}
+
+#define SDEBUG_MAX_LSENSE_SZ 512
+
+static int resp_log_sense(struct scsi_cmnd * scp,
+                          struct sdebug_dev_info * devip)
+{
+	int ppc, sp, pcontrol, pcode, alloc_len, errsts, len, n;
+	unsigned char arr[SDEBUG_MAX_LSENSE_SZ];
+	unsigned char *cmd = (unsigned char *)scp->cmnd;
+
+	if ((errsts = check_readiness(scp, 1, devip)))
+		return errsts;
+	memset(arr, 0, sizeof(arr));
+	ppc = cmd[1] & 0x2;
+	sp = cmd[1] & 0x1;
+	if (ppc || sp) {
+		mk_sense_buffer(devip, ILLEGAL_REQUEST,
+				INVALID_FIELD_IN_CDB, 0);
+		return check_condition_result;
+	}
+	pcontrol = (cmd[2] & 0xc0) >> 6;
+	pcode = cmd[2] & 0x3f;
+	alloc_len = (cmd[7] << 8) + cmd[8];
+	arr[0] = pcode;
+	switch (pcode) {
+	case 0x0:	/* Supported log pages log page */
+		n = 4;
+		arr[n++] = 0x0;		/* this page */
+		arr[n++] = 0xd;		/* Temperature */
+		arr[n++] = 0x2f;	/* Informational exceptions */
+		arr[3] = n - 4;
+		break;
+	case 0xd:	/* Temperature log page */
+		arr[3] = resp_temp_l_pg(arr + 4);
+		break;
+	case 0x2f:	/* Informational exceptions log page */
+		arr[3] = resp_ie_l_pg(arr + 4);
+		break;
+	default:
+		mk_sense_buffer(devip, ILLEGAL_REQUEST,
+				INVALID_FIELD_IN_CDB, 0);
+		return check_condition_result;
+	}
+	len = min(((arr[2] << 8) + arr[3]) + 4, alloc_len);
+	return fill_from_dev_buffer(scp, arr,
+		    min(len, SDEBUG_MAX_INQ_ARR_SZ));
+}
+
+static int resp_read(struct scsi_cmnd * SCpnt, unsigned long long lba,
+		     unsigned int num, struct sdebug_dev_info * devip)
 {
 	unsigned long iflags;
+	unsigned int block, from_bottom;
+	unsigned long long u;
 	int ret;
 
-	if (upper_blk || (block + num > sdebug_capacity)) {
+	if (lba + num > sdebug_capacity) {
 		mk_sense_buffer(devip, ILLEGAL_REQUEST, ADDR_OUT_OF_RANGE,
 				0);
 		return check_condition_result;
 	}
+	/* transfer length excessive (tie in to block limits VPD page) */
+	if (num > sdebug_store_sectors) {
+		mk_sense_buffer(devip, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB,
+				0);
+		return check_condition_result;
+	}
 	if ((SCSI_DEBUG_OPT_MEDIUM_ERR & scsi_debug_opts) &&
-	    (block <= OPT_MEDIUM_ERR_ADDR) &&
-	    ((block + num) > OPT_MEDIUM_ERR_ADDR)) {
+	    (lba <= OPT_MEDIUM_ERR_ADDR) &&
+	    ((lba + num) > OPT_MEDIUM_ERR_ADDR)) {
+		/* claim unrecoverable read error */
 		mk_sense_buffer(devip, MEDIUM_ERROR, UNRECOVERED_READ_ERR,
 				0);
-		/* claim unrecoverable read error */
+		/* set info field and valid bit for fixed descriptor */
+		if (0x70 == (devip->sense_buff[0] & 0x7f)) {
+			devip->sense_buff[0] |= 0x80;	/* Valid bit */
+			ret = OPT_MEDIUM_ERR_ADDR;
+			devip->sense_buff[3] = (ret >> 24) & 0xff;
+			devip->sense_buff[4] = (ret >> 16) & 0xff;
+			devip->sense_buff[5] = (ret >> 8) & 0xff;
+			devip->sense_buff[6] = ret & 0xff;
+		}
 		return check_condition_result;
 	}
 	read_lock_irqsave(&atomic_rw, iflags);
-	ret = fill_from_dev_buffer(SCpnt, fake_storep + (block * SECT_SIZE),
-			   	   num * SECT_SIZE);
+	if ((lba + num) <= sdebug_store_sectors)
+		ret = fill_from_dev_buffer(SCpnt,
+					   fake_storep + (lba * SECT_SIZE),
+			   		   num * SECT_SIZE);
+	else {
+		/* modulo when one arg is 64 bits needs do_div() */
+		u = lba;
+		block = do_div(u, sdebug_store_sectors);
+		from_bottom = 0;
+		if ((block + num) > sdebug_store_sectors)
+			from_bottom = (block + num) - sdebug_store_sectors;
+		ret = fill_from_dev_buffer(SCpnt,
+					   fake_storep + (block * SECT_SIZE),
+			   		   (num - from_bottom) * SECT_SIZE);
+		if ((0 == ret) && (from_bottom > 0))
+			ret = fill_from_dev_buffer(SCpnt, fake_storep,
+						   from_bottom * SECT_SIZE);
+	}
 	read_unlock_irqrestore(&atomic_rw, iflags);
 	return ret;
 }
 
-static int resp_write(struct scsi_cmnd * SCpnt, int upper_blk, int block,
-		      int num, struct sdebug_dev_info * devip)
+static int resp_write(struct scsi_cmnd * SCpnt, unsigned long long lba,
+		      unsigned int num, struct sdebug_dev_info * devip)
 {
 	unsigned long iflags;
+	unsigned int block, to_bottom;
+	unsigned long long u;
 	int res;
 
-	if (upper_blk || (block + num > sdebug_capacity)) {
+	if (lba + num > sdebug_capacity) {
 		mk_sense_buffer(devip, ILLEGAL_REQUEST, ADDR_OUT_OF_RANGE,
 			       	0);
 		return check_condition_result;
 	}
+	/* transfer length excessive (tie in to block limits VPD page) */
+	if (num > sdebug_store_sectors) {
+		mk_sense_buffer(devip, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB,
+				0);
+		return check_condition_result;
+	}
 
 	write_lock_irqsave(&atomic_rw, iflags);
-	res = fetch_to_dev_buffer(SCpnt, fake_storep + (block * SECT_SIZE),
-			   	  num * SECT_SIZE);
+	if ((lba + num) <= sdebug_store_sectors)
+		res = fetch_to_dev_buffer(SCpnt,
+					  fake_storep + (lba * SECT_SIZE),
+			   		  num * SECT_SIZE);
+	else {
+		/* modulo when one arg is 64 bits needs do_div() */
+		u = lba;
+		block = do_div(u, sdebug_store_sectors);
+		to_bottom = 0;
+		if ((block + num) > sdebug_store_sectors)
+			to_bottom = (block + num) - sdebug_store_sectors;
+		res = fetch_to_dev_buffer(SCpnt,
+					  fake_storep + (block * SECT_SIZE),
+			   		  (num - to_bottom) * SECT_SIZE);
+		if ((0 == res) && (to_bottom > 0))
+			res = fetch_to_dev_buffer(SCpnt, fake_storep,
+						  to_bottom * SECT_SIZE);
+	}
 	write_unlock_irqrestore(&atomic_rw, iflags);
 	if (-1 == res)
 		return (DID_ERROR << 16);
 	else if ((res < (num * SECT_SIZE)) &&
 		 (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts))
-		printk(KERN_INFO "scsi_debug: write: cdb indicated=%d, "
+		printk(KERN_INFO "scsi_debug: write: cdb indicated=%u, "
 		       " IO sent=%d bytes\n", num * SECT_SIZE, res);
 	return 0;
 }
 
-#define SDEBUG_RLUN_ARR_SZ 128
+#define SDEBUG_RLUN_ARR_SZ 256
 
 static int resp_report_luns(struct scsi_cmnd * scp,
 			    struct sdebug_dev_info * devip)
 {
 	unsigned int alloc_len;
-	int lun_cnt, i, upper;
+	int lun_cnt, i, upper, num, n, wlun, lun;
 	unsigned char *cmd = (unsigned char *)scp->cmnd;
 	int select_report = (int)cmd[2];
 	struct scsi_lun *one_lun;
 	unsigned char arr[SDEBUG_RLUN_ARR_SZ];
+	unsigned char * max_addr;
 
 	alloc_len = cmd[9] + (cmd[8] << 8) + (cmd[7] << 16) + (cmd[6] << 24);
-	if ((alloc_len < 16) || (select_report > 2)) {
+	if ((alloc_len < 4) || (select_report > 2)) {
 		mk_sense_buffer(devip, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB,
 			       	0);
 		return check_condition_result;
@@ -955,18 +1717,37 @@ static int resp_report_luns(struct scsi_cmnd * scp,
 	/* can produce response with up to 16k luns (lun 0 to lun 16383) */
 	memset(arr, 0, SDEBUG_RLUN_ARR_SZ);
 	lun_cnt = scsi_debug_max_luns;
-	arr[2] = ((sizeof(struct scsi_lun) * lun_cnt) >> 8) & 0xff;
-	arr[3] = (sizeof(struct scsi_lun) * lun_cnt) & 0xff;
-	lun_cnt = min((int)((SDEBUG_RLUN_ARR_SZ - 8) /
-			    sizeof(struct scsi_lun)), lun_cnt);
+	if (1 == select_report)
+		lun_cnt = 0;
+	else if (scsi_debug_no_lun_0 && (lun_cnt > 0))
+		--lun_cnt;
+	wlun = (select_report > 0) ? 1 : 0;
+	num = lun_cnt + wlun;
+	arr[2] = ((sizeof(struct scsi_lun) * num) >> 8) & 0xff;
+	arr[3] = (sizeof(struct scsi_lun) * num) & 0xff;
+	n = min((int)((SDEBUG_RLUN_ARR_SZ - 8) /
+			    sizeof(struct scsi_lun)), num);
+	if (n < num) {
+		wlun = 0;
+		lun_cnt = n;
+	}
 	one_lun = (struct scsi_lun *) &arr[8];
-	for (i = 0; i < lun_cnt; i++) {
-		upper = (i >> 8) & 0x3f;
+	max_addr = arr + SDEBUG_RLUN_ARR_SZ;
+	for (i = 0, lun = (scsi_debug_no_lun_0 ? 1 : 0);
+             ((i < lun_cnt) && ((unsigned char *)(one_lun + i) < max_addr));
+	     i++, lun++) {
+		upper = (lun >> 8) & 0x3f;
 		if (upper)
 			one_lun[i].scsi_lun[0] =
 			    (upper | (SAM2_LUN_ADDRESS_METHOD << 6));
-		one_lun[i].scsi_lun[1] = i & 0xff;
+		one_lun[i].scsi_lun[1] = lun & 0xff;
+	}
+	if (wlun) {
+		one_lun[i].scsi_lun[0] = (SAM2_WLUN_REPORT_LUNS >> 8) & 0xff;
+		one_lun[i].scsi_lun[1] = SAM2_WLUN_REPORT_LUNS & 0xff;
+		i++;
 	}
+	alloc_len = (unsigned char *)(one_lun + i) - arr;
 	return fill_from_dev_buffer(scp, arr,
 				    min((int)alloc_len, SDEBUG_RLUN_ARR_SZ));
 }
@@ -1002,7 +1783,8 @@ static void timer_intr_handler(unsigned long indx)
 static int scsi_debug_slave_alloc(struct scsi_device * sdp)
 {
 	if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
-		sdev_printk(KERN_INFO, sdp, "scsi_debug: slave_alloc\n");
+		printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %u>\n",
+		       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
 	return 0;
 }
 
@@ -1011,7 +1793,8 @@ static int scsi_debug_slave_configure(struct scsi_device * sdp)
 	struct sdebug_dev_info * devip;
 
 	if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
-		sdev_printk(KERN_INFO, sdp, "scsi_debug: slave_configure\n");
+		printk(KERN_INFO "scsi_debug: slave_configure <%u %u %u %u>\n",
+		       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
 	if (sdp->host->max_cmd_len != SCSI_DEBUG_MAX_CMD_LEN)
 		sdp->host->max_cmd_len = SCSI_DEBUG_MAX_CMD_LEN;
 	devip = devInfoReg(sdp);
@@ -1019,6 +1802,7 @@ static int scsi_debug_slave_configure(struct scsi_device * sdp)
 	if (sdp->host->cmd_per_lun)
 		scsi_adjust_queue_depth(sdp, SDEBUG_TAGGED_QUEUING,
 					sdp->host->cmd_per_lun);
+	blk_queue_max_segment_size(sdp->request_queue, 256 * 1024);
 	return 0;
 }
 
@@ -1028,7 +1812,8 @@ static void scsi_debug_slave_destroy(struct scsi_device * sdp)
 				(struct sdebug_dev_info *)sdp->hostdata;
 
 	if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
-		sdev_printk(KERN_INFO, sdp, "scsi_debug: slave_destroy\n");
+		printk(KERN_INFO "scsi_debug: slave_destroy <%u %u %u %u>\n",
+		       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
 	if (devip) {
 		/* make this slot avaliable for re-use */
 		devip->used = 0;
@@ -1085,6 +1870,8 @@ static struct sdebug_dev_info * devInfoReg(struct scsi_device * sdev)
 			open_devip->sense_buff[0] = 0x70;
 			open_devip->sense_buff[7] = 0xa;
 		}
+		if (sdev->lun == SAM2_WLUN_REPORT_LUNS)
+			open_devip->wlun = SAM2_WLUN_REPORT_LUNS & 0xff;
 		return open_devip;
         }
         return NULL;
@@ -1273,7 +2060,7 @@ static void __init sdebug_build_parts(unsigned char * ramp)
 		printk(KERN_WARNING "scsi_debug:build_parts: reducing "
 				    "partitions to %d\n", SDEBUG_MAX_PARTS);
 	}
-	num_sectors = (int)(sdebug_store_size / SECT_SIZE);
+	num_sectors = (int)sdebug_store_sectors;
 	sectors_per_part = (num_sectors - sdebug_sectors_per)
 			   / scsi_debug_num_parts;
 	heads_by_sects = sdebug_heads * sdebug_sectors_per;
@@ -1316,9 +2103,9 @@ static int schedule_resp(struct scsi_cmnd * cmnd,
 		if (scsi_result) {
 			struct scsi_device * sdp = cmnd->device;
 
-			sdev_printk(KERN_INFO, sdp,
-				"non-zero result=0x%x\n",
-			       	scsi_result);
+			printk(KERN_INFO "scsi_debug:    <%u %u %u %u> "
+			       "non-zero result=0x%x\n", sdp->host->host_no,
+			       sdp->channel, sdp->id, sdp->lun, scsi_result);
 		}
 	}
 	if (cmnd && devip) {
@@ -1365,21 +2152,19 @@ static int schedule_resp(struct scsi_cmnd * cmnd,
 	}
 }
 
-/* Set 'perm' (4th argument) to 0 to disable module_param's definition
- * of sysfs parameters (which module_param doesn't yet support).
- * Sysfs parameters defined explicitly below.
- */
-module_param_named(add_host, scsi_debug_add_host, int, 0); /* perm=0644 */
-module_param_named(delay, scsi_debug_delay, int, 0); /* perm=0644 */
-module_param_named(dev_size_mb, scsi_debug_dev_size_mb, int, 0);
-module_param_named(dsense, scsi_debug_dsense, int, 0);
-module_param_named(every_nth, scsi_debug_every_nth, int, 0);
-module_param_named(max_luns, scsi_debug_max_luns, int, 0);
-module_param_named(num_parts, scsi_debug_num_parts, int, 0);
-module_param_named(num_tgts, scsi_debug_num_tgts, int, 0);
-module_param_named(opts, scsi_debug_opts, int, 0); /* perm=0644 */
-module_param_named(ptype, scsi_debug_ptype, int, 0);
-module_param_named(scsi_level, scsi_debug_scsi_level, int, 0);
+module_param_named(add_host, scsi_debug_add_host, int, S_IRUGO | S_IWUSR);
+module_param_named(delay, scsi_debug_delay, int, S_IRUGO | S_IWUSR);
+module_param_named(dev_size_mb, scsi_debug_dev_size_mb, int, S_IRUGO);
+module_param_named(dsense, scsi_debug_dsense, int, S_IRUGO | S_IWUSR);
+module_param_named(every_nth, scsi_debug_every_nth, int, S_IRUGO | S_IWUSR);
+module_param_named(max_luns, scsi_debug_max_luns, int, S_IRUGO | S_IWUSR);
+module_param_named(no_lun_0, scsi_debug_no_lun_0, int, S_IRUGO | S_IWUSR);
+module_param_named(num_parts, scsi_debug_num_parts, int, S_IRUGO);
+module_param_named(num_tgts, scsi_debug_num_tgts, int, S_IRUGO | S_IWUSR);
+module_param_named(opts, scsi_debug_opts, int, S_IRUGO | S_IWUSR);
+module_param_named(ptype, scsi_debug_ptype, int, S_IRUGO | S_IWUSR);
+module_param_named(scsi_level, scsi_debug_scsi_level, int, S_IRUGO);
+module_param_named(virtual_gb, scsi_debug_virtual_gb, int, S_IRUGO | S_IWUSR);
 
 MODULE_AUTHOR("Eric Youngdale + Douglas Gilbert");
 MODULE_DESCRIPTION("SCSI debug adapter driver");
@@ -1388,15 +2173,17 @@ MODULE_VERSION(SCSI_DEBUG_VERSION);
 
 MODULE_PARM_DESC(add_host, "0..127 hosts allowed(def=1)");
 MODULE_PARM_DESC(delay, "# of jiffies to delay response(def=1)");
-MODULE_PARM_DESC(dev_size_mb, "size in MB of ram shared by devs");
-MODULE_PARM_DESC(dsense, "use descriptor sense format(def: fixed)");
+MODULE_PARM_DESC(dev_size_mb, "size in MB of ram shared by devs(def=8)");
+MODULE_PARM_DESC(dsense, "use descriptor sense format(def=0 -> fixed)");
 MODULE_PARM_DESC(every_nth, "timeout every nth command(def=100)");
-MODULE_PARM_DESC(max_luns, "number of SCSI LUNs per target to simulate");
+MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
+MODULE_PARM_DESC(no_lun_0, "no LU number 0 (def=0 -> have lun 0)");
 MODULE_PARM_DESC(num_parts, "number of partitions(def=0)");
-MODULE_PARM_DESC(num_tgts, "number of SCSI targets per host to simulate");
-MODULE_PARM_DESC(opts, "1->noise, 2->medium_error, 4->...");
+MODULE_PARM_DESC(num_tgts, "number of targets per host to simulate(def=1)");
+MODULE_PARM_DESC(opts, "1->noise, 2->medium_error, 4->... (def=0)");
 MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])");
 MODULE_PARM_DESC(scsi_level, "SCSI level to simulate(def=5[SPC-3])");
+MODULE_PARM_DESC(virtual_gb, "virtual gigabyte size (def=0 -> use dev_size_mb)");
 
 
 static char sdebug_info[256];
@@ -1548,6 +2335,24 @@ static ssize_t sdebug_dsense_store(struct device_driver * ddp,
 DRIVER_ATTR(dsense, S_IRUGO | S_IWUSR, sdebug_dsense_show,
 	    sdebug_dsense_store);
 
+static ssize_t sdebug_no_lun_0_show(struct device_driver * ddp, char * buf)
+{
+        return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_no_lun_0);
+}
+static ssize_t sdebug_no_lun_0_store(struct device_driver * ddp,
+				     const char * buf, size_t count)
+{
+        int n;
+
+	if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) {
+		scsi_debug_no_lun_0 = n;
+		return count;
+	}
+	return -EINVAL;
+}
+DRIVER_ATTR(no_lun_0, S_IRUGO | S_IWUSR, sdebug_no_lun_0_show,
+	    sdebug_no_lun_0_store);
+
 static ssize_t sdebug_num_tgts_show(struct device_driver * ddp, char * buf)
 {
         return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_num_tgts);
@@ -1623,6 +2428,29 @@ static ssize_t sdebug_scsi_level_show(struct device_driver * ddp, char * buf)
 }
 DRIVER_ATTR(scsi_level, S_IRUGO, sdebug_scsi_level_show, NULL);
 
+static ssize_t sdebug_virtual_gb_show(struct device_driver * ddp, char * buf)
+{
+        return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_virtual_gb);
+}
+static ssize_t sdebug_virtual_gb_store(struct device_driver * ddp,
+				       const char * buf, size_t count)
+{
+        int n;
+
+	if ((count > 0) && (1 == sscanf(buf, "%d", &n)) && (n >= 0)) {
+		scsi_debug_virtual_gb = n;
+		if (scsi_debug_virtual_gb > 0) {
+			sdebug_capacity = 2048 * 1024;
+			sdebug_capacity *= scsi_debug_virtual_gb;
+		} else
+			sdebug_capacity = sdebug_store_sectors;
+		return count;
+	}
+	return -EINVAL;
+}
+DRIVER_ATTR(virtual_gb, S_IRUGO | S_IWUSR, sdebug_virtual_gb_show,
+	    sdebug_virtual_gb_store);
+
 static ssize_t sdebug_add_host_show(struct device_driver * ddp, char * buf)
 {
         return scnprintf(buf, PAGE_SIZE, "%d\n", scsi_debug_add_host);
@@ -1692,14 +2520,19 @@ static void do_remove_driverfs_files(void)
 
 static int __init scsi_debug_init(void)
 {
-	unsigned long sz;
+	unsigned int sz;
 	int host_to_add;
 	int k;
 
 	if (scsi_debug_dev_size_mb < 1)
 		scsi_debug_dev_size_mb = 1;  /* force minimum 1 MB ramdisk */
-	sdebug_store_size = (unsigned long)scsi_debug_dev_size_mb * 1048576;
-	sdebug_capacity = sdebug_store_size / SECT_SIZE;
+	sdebug_store_size = (unsigned int)scsi_debug_dev_size_mb * 1048576;
+	sdebug_store_sectors = sdebug_store_size / SECT_SIZE;
+	if (scsi_debug_virtual_gb > 0) {
+		sdebug_capacity = 2048 * 1024;
+		sdebug_capacity *= scsi_debug_virtual_gb;
+	} else
+		sdebug_capacity = sdebug_store_sectors;
 
 	/* play around with geometry, don't waste too much on track 0 */
 	sdebug_heads = 8;
@@ -1813,7 +2646,7 @@ static int sdebug_add_adapter(void)
         struct sdebug_dev_info *sdbg_devinfo;
         struct list_head *lh, *lh_sf;
 
-        sdbg_host = kzalloc(sizeof(*sdbg_host), GFP_KERNEL);
+        sdbg_host = kzalloc(sizeof(*sdbg_host),GFP_KERNEL);
 
         if (NULL == sdbg_host) {
                 printk(KERN_ERR "%s: out of memory at line %d\n",
@@ -1825,7 +2658,7 @@ static int sdebug_add_adapter(void)
 
 	devs_per_host = scsi_debug_num_tgts * scsi_debug_max_luns;
         for (k = 0; k < devs_per_host; k++) {
-                sdbg_devinfo = kzalloc(sizeof(*sdbg_devinfo), GFP_KERNEL);
+                sdbg_devinfo = kzalloc(sizeof(*sdbg_devinfo),GFP_KERNEL);
                 if (NULL == sdbg_devinfo) {
                         printk(KERN_ERR "%s: out of memory at line %d\n",
                                __FUNCTION__, __LINE__);
@@ -1906,7 +2739,7 @@ static int sdebug_driver_probe(struct device * dev)
 		hpnt->max_id = scsi_debug_num_tgts + 1;
 	else
 		hpnt->max_id = scsi_debug_num_tgts;
-	hpnt->max_lun = scsi_debug_max_luns;
+	hpnt->max_lun = SAM2_WLUN_REPORT_LUNS;	/* = scsi_debug_max_luns; */
 
         error = scsi_add_host(hpnt, &sdbg_host->dev);
         if (error) {
@@ -1960,7 +2793,7 @@ static void sdebug_max_tgts_luns(void)
 			hpnt->max_id = scsi_debug_num_tgts + 1;
 		else
 			hpnt->max_id = scsi_debug_num_tgts;
-		hpnt->max_lun = scsi_debug_max_luns;
+		hpnt->max_lun = SAM2_WLUN_REPORT_LUNS; /* scsi_debug_max_luns; */
 	}
 	spin_unlock(&sdebug_host_list_lock);
 }
-- 
cgit v0.10.2


From cefbda2d6cd9bf78a93768130729a6d142588d67 Mon Sep 17 00:00:00 2001
From: Dave C Boutcher <boutcher@cs.umn.edu>
Date: Mon, 12 Jun 2006 21:22:51 -0500
Subject: [SCSI] ibmvscsi: treat busy and error conditions separately

This patch fixes a condition where ibmvscsi treats a transport error as a
"busy" condition, so no errors were returned to the scsi mid-layer.
In a RAID environment this means that I/O hung rather than failing
over.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 944fc12..669ea4f 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -535,6 +535,7 @@ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct,
 				   struct ibmvscsi_host_data *hostdata)
 {
 	u64 *crq_as_u64 = (u64 *) &evt_struct->crq;
+	int request_status;
 	int rc;
 
 	/* If we have exhausted our request limit, just fail this request.
@@ -542,9 +543,18 @@ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct,
 	 * (such as task management requests) that the mid layer may think we
 	 * can handle more requests (can_queue) when we actually can't
 	 */
-	if ((evt_struct->crq.format == VIOSRP_SRP_FORMAT) &&
-	    (atomic_dec_if_positive(&hostdata->request_limit) < 0))
-		goto send_error;
+	if (evt_struct->crq.format == VIOSRP_SRP_FORMAT) {
+		request_status =
+			atomic_dec_if_positive(&hostdata->request_limit);
+		/* If request limit was -1 when we started, it is now even
+		 * less than that
+		 */
+		if (request_status < -1)
+			goto send_error;
+		/* Otherwise, if we have run out of requests */
+		else if (request_status < 0)
+			goto send_busy;
+	}
 
 	/* Copy the IU into the transfer area */
 	*evt_struct->xfer_iu = evt_struct->iu;
@@ -567,11 +577,23 @@ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct,
 
 	return 0;
 
- send_error:
+ send_busy:
 	unmap_cmd_data(&evt_struct->iu.srp.cmd, evt_struct, hostdata->dev);
 
 	free_event_struct(&hostdata->pool, evt_struct);
  	return SCSI_MLQUEUE_HOST_BUSY;
+
+ send_error:
+	unmap_cmd_data(&evt_struct->iu.srp.cmd, evt_struct, hostdata->dev);
+
+	if (evt_struct->cmnd != NULL) {
+		evt_struct->cmnd->result = DID_ERROR << 16;
+		evt_struct->cmnd_done(evt_struct->cmnd);
+	} else if (evt_struct->done)
+		evt_struct->done(evt_struct);
+
+	free_event_struct(&hostdata->pool, evt_struct);
+	return 0;
 }
 
 /**
@@ -1184,27 +1206,37 @@ void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 		return;
 	case 0xFF:	/* Hypervisor telling us the connection is closed */
 		scsi_block_requests(hostdata->host);
+		atomic_set(&hostdata->request_limit, 0);
 		if (crq->format == 0x06) {
 			/* We need to re-setup the interpartition connection */
 			printk(KERN_INFO
 			       "ibmvscsi: Re-enabling adapter!\n");
-			atomic_set(&hostdata->request_limit, -1);
 			purge_requests(hostdata, DID_REQUEUE);
-			if (ibmvscsi_reenable_crq_queue(&hostdata->queue,
-							hostdata) == 0)
-				if (ibmvscsi_send_crq(hostdata,
-						      0xC001000000000000LL, 0))
+			if ((ibmvscsi_reenable_crq_queue(&hostdata->queue,
+							hostdata) == 0) ||
+			    (ibmvscsi_send_crq(hostdata,
+					       0xC001000000000000LL, 0))) {
+					atomic_set(&hostdata->request_limit,
+						   -1);
 					printk(KERN_ERR
-					       "ibmvscsi: transmit error after"
+					       "ibmvscsi: error after"
 					       " enable\n");
+			}
 		} else {
 			printk(KERN_INFO
 			       "ibmvscsi: Virtual adapter failed rc %d!\n",
 			       crq->format);
 
-			atomic_set(&hostdata->request_limit, -1);
 			purge_requests(hostdata, DID_ERROR);
-			ibmvscsi_reset_crq_queue(&hostdata->queue, hostdata);
+			if ((ibmvscsi_reset_crq_queue(&hostdata->queue,
+							hostdata)) ||
+			    (ibmvscsi_send_crq(hostdata,
+					       0xC001000000000000LL, 0))) {
+					atomic_set(&hostdata->request_limit,
+						   -1);
+					printk(KERN_ERR
+					       "ibmvscsi: error after reset\n");
+			}
 		}
 		scsi_unblock_requests(hostdata->host);
 		return;
@@ -1467,6 +1499,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	struct Scsi_Host *host;
 	struct device *dev = &vdev->dev;
 	unsigned long wait_switch = 0;
+	int rc;
 
 	vdev->dev.driver_data = NULL;
 
@@ -1484,8 +1517,8 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	atomic_set(&hostdata->request_limit, -1);
 	hostdata->host->max_sectors = 32 * 8; /* default max I/O 32 pages */
 
-	if (ibmvscsi_init_crq_queue(&hostdata->queue, hostdata,
-				    max_requests) != 0) {
+	rc = ibmvscsi_init_crq_queue(&hostdata->queue, hostdata, max_requests);
+	if (rc != 0 && rc != H_RESOURCE) {
 		printk(KERN_ERR "ibmvscsi: couldn't initialize crq\n");
 		goto init_crq_failed;
 	}
@@ -1505,7 +1538,8 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	 * to fail if the other end is not acive.  In that case we don't
 	 * want to scan
 	 */
-	if (ibmvscsi_send_crq(hostdata, 0xC001000000000000LL, 0) == 0) {
+	if (ibmvscsi_send_crq(hostdata, 0xC001000000000000LL, 0) == 0
+	    || rc == H_RESOURCE) {
 		/*
 		 * Wait around max init_timeout secs for the adapter to finish
 		 * initializing. When we are done initializing, we will have a
diff --git a/drivers/scsi/ibmvscsi/rpa_vscsi.c b/drivers/scsi/ibmvscsi/rpa_vscsi.c
index 1a9992b..242b887 100644
--- a/drivers/scsi/ibmvscsi/rpa_vscsi.c
+++ b/drivers/scsi/ibmvscsi/rpa_vscsi.c
@@ -208,6 +208,7 @@ int ibmvscsi_init_crq_queue(struct crq_queue *queue,
 			    int max_requests)
 {
 	int rc;
+	int retrc;
 	struct vio_dev *vdev = to_vio_dev(hostdata->dev);
 
 	queue->msgs = (struct viosrp_crq *)get_zeroed_page(GFP_KERNEL);
@@ -226,7 +227,7 @@ int ibmvscsi_init_crq_queue(struct crq_queue *queue,
 	gather_partition_info();
 	set_adapter_info(hostdata);
 
-	rc = plpar_hcall_norets(H_REG_CRQ,
+	retrc = rc = plpar_hcall_norets(H_REG_CRQ,
 				vdev->unit_address,
 				queue->msg_token, PAGE_SIZE);
 	if (rc == H_RESOURCE)
@@ -263,7 +264,7 @@ int ibmvscsi_init_crq_queue(struct crq_queue *queue,
 	tasklet_init(&hostdata->srp_task, (void *)ibmvscsi_task,
 		     (unsigned long)hostdata);
 
-	return 0;
+	return retrc;
 
       req_irq_failed:
 	do {
-- 
cgit v0.10.2


From 0e98936c924e2329f876b0b7791b45249c2e2129 Mon Sep 17 00:00:00 2001
From: Sumant Patro <sumantp@lsil.com>
Date: Tue, 20 Jun 2006 15:32:37 -0700
Subject: [SCSI] megaraid_sas: zcr with fix

The patch adds support for a ZCR controller (Device ID : 0x413).

It also has a critical bug fix :

Disable controller interrupt before firing INIT cmd to FW.  Interrupt
is enabled after required initialization is over. This is done to
ensure that driver is ready to handle interrupts when it is generated
by the controller.

Signed-off-by: Sumant Patro <Sumant.Patro@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas
index 0a85a7e..d9e5960 100644
--- a/Documentation/scsi/ChangeLog.megaraid_sas
+++ b/Documentation/scsi/ChangeLog.megaraid_sas
@@ -1,4 +1,20 @@
 
+1 Release Date    : Sun May 14 22:49:52 PDT 2006 - Sumant Patro <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.03.01
+3 Older Version   : 00.00.02.04
+
+i.	Added support for ZCR controller.
+
+		New device id 0x413 added.
+
+ii.	Bug fix : Disable controller interrupt before firing INIT cmd to FW.
+
+		Interrupt is enabled after required initialization is over.
+		This is done to ensure that driver is ready to handle interrupts when
+		it is generated by the controller.
+
+		-Sumant Patro <Sumant.Patro@lsil.com>
+
 1 Release Date    : Wed Feb 03 14:31:44 PST 2006 - Sumant Patro <Sumant.Patro@lsil.com>
 2 Current Version : 00.00.02.04
 3 Older Version   : 00.00.02.04 
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 0c9516f..3e6f9fe 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -10,7 +10,7 @@
  *	   2 of the License, or (at your option) any later version.
  *
  * FILE		: megaraid_sas.c
- * Version	: v00.00.02.04
+ * Version	: v00.00.03.01
  *
  * Authors:
  * 	Sreenivas Bagalkote	<Sreenivas.Bagalkote@lsil.com>
@@ -55,19 +55,25 @@ static struct pci_device_id megasas_pci_table[] = {
 
 	{
 	 PCI_VENDOR_ID_LSI_LOGIC,
-	 PCI_DEVICE_ID_LSI_SAS1064R, // xscale IOP
+	 PCI_DEVICE_ID_LSI_SAS1064R, /* xscale IOP */
 	 PCI_ANY_ID,
 	 PCI_ANY_ID,
 	 },
 	{
 	 PCI_VENDOR_ID_LSI_LOGIC,
-	 PCI_DEVICE_ID_LSI_SAS1078R, // ppc IOP
+	 PCI_DEVICE_ID_LSI_SAS1078R, /* ppc IOP */
 	 PCI_ANY_ID,
 	 PCI_ANY_ID,
 	},
 	{
+	 PCI_VENDOR_ID_LSI_LOGIC,
+	 PCI_DEVICE_ID_LSI_VERDE_ZCR,	/* xscale IOP, vega */
+	 PCI_ANY_ID,
+	 PCI_ANY_ID,
+	 },
+	{
 	 PCI_VENDOR_ID_DELL,
-	 PCI_DEVICE_ID_DELL_PERC5, // xscale IOP
+	 PCI_DEVICE_ID_DELL_PERC5, /* xscale IOP */
 	 PCI_ANY_ID,
 	 PCI_ANY_ID,
 	 },
@@ -289,9 +295,14 @@ static struct megasas_instance_template megasas_instance_template_ppc = {
  * @regs:			MFI register set
  */
 static inline void
-megasas_disable_intr(struct megasas_register_set __iomem * regs)
+megasas_disable_intr(struct megasas_instance *instance)
 {
 	u32 mask = 0x1f; 
+	struct megasas_register_set __iomem *regs = instance->reg_set;
+
+	if(instance->pdev->device == PCI_DEVICE_ID_LSI_SAS1078R)
+		mask = 0xffffffff;
+
 	writel(mask, &regs->outbound_intr_mask);
 
 	/* Dummy readl to force pci flush */
@@ -1260,7 +1271,7 @@ megasas_transition_to_ready(struct megasas_instance* instance)
 			/*
 			 * Bring it to READY state; assuming max wait 2 secs
 			 */
-			megasas_disable_intr(instance->reg_set);
+			megasas_disable_intr(instance);
 			writel(MFI_INIT_READY, &instance->reg_set->inbound_doorbell);
 
 			max_wait = 10;
@@ -1757,6 +1768,11 @@ static int megasas_init_mfi(struct megasas_instance *instance)
 	init_frame->data_xfer_len = sizeof(struct megasas_init_queue_info);
 
 	/*
+	 * disable the intr before firing the init frame to FW
+	 */
+	megasas_disable_intr(instance);
+
+	/*
 	 * Issue the init frame in polled mode
 	 */
 	if (megasas_issue_polled(instance, cmd)) {
@@ -2234,7 +2250,7 @@ megasas_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	megasas_mgmt_info.max_index--;
 
 	pci_set_drvdata(pdev, NULL);
-	megasas_disable_intr(instance->reg_set);
+	megasas_disable_intr(instance);
 	free_irq(instance->pdev->irq, instance);
 
 	megasas_release_mfi(instance);
@@ -2364,7 +2380,7 @@ static void megasas_detach_one(struct pci_dev *pdev)
 
 	pci_set_drvdata(instance->pdev, NULL);
 
-	megasas_disable_intr(instance->reg_set);
+	megasas_disable_intr(instance);
 
 	free_irq(instance->pdev->irq, instance);
 
diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
index 927d6ff..3531a14 100644
--- a/drivers/scsi/megaraid/megaraid_sas.h
+++ b/drivers/scsi/megaraid/megaraid_sas.h
@@ -18,9 +18,16 @@
 /**
  * MegaRAID SAS Driver meta data
  */
-#define MEGASAS_VERSION				"00.00.02.04"
-#define MEGASAS_RELDATE				"Feb 03, 2006"
-#define MEGASAS_EXT_VERSION			"Fri Feb 03 14:31:44 PST 2006"
+#define MEGASAS_VERSION				"00.00.03.01"
+#define MEGASAS_RELDATE				"May 14, 2006"
+#define MEGASAS_EXT_VERSION			"Sun May 14 22:49:52 PDT 2006"
+
+/*
+ * Device IDs
+ */
+#define	PCI_DEVICE_ID_LSI_SAS1078R		0x0060
+#define	PCI_DEVICE_ID_LSI_VERDE_ZCR		0x0413
+
 /*
  * =====================================
  * MegaRAID SAS MFI firmware definitions
@@ -554,7 +561,11 @@ struct megasas_ctrl_info {
 #define MFI_POLL_TIMEOUT_SECS			10
 
 #define MFI_REPLY_1078_MESSAGE_INTERRUPT	0x80000000
-#define PCI_DEVICE_ID_LSI_SAS1078R		0x00000060
+
+/*
+* register set for both 1068 and 1078 controllers
+* structure extended for 1078 registers
+*/
  
 struct megasas_register_set {
 	u32 	reserved_0[4];			/*0000h*/
@@ -1150,10 +1161,10 @@ struct compat_megasas_iocpacket {
 	struct compat_iovec sgl[MAX_IOCTL_SGE];
 } __attribute__ ((packed));
 
+#define MEGASAS_IOC_FIRMWARE32	_IOWR('M', 1, struct compat_megasas_iocpacket)
 #endif
 
 #define MEGASAS_IOC_FIRMWARE	_IOWR('M', 1, struct megasas_iocpacket)
-#define MEGASAS_IOC_FIRMWARE32	_IOWR('M', 1, struct compat_megasas_iocpacket)
 #define MEGASAS_IOC_GET_AEN	_IOW('M', 3, struct megasas_aen)
 
 struct megasas_mgmt_info {
-- 
cgit v0.10.2


From 9ba0883cfc5ab69820c05f1bf2b7711bb0a0103c Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Thu, 22 Jun 2006 09:19:51 +0200
Subject: [SCSI] HP XP devinfo update

According to Anthony Cheung all HP XP arrays with "OPEN-"
types support REPORT_LUN. So there is no reason why we
shouldn't use it.

Signed-off-by: Anthony Cheung <anthony.cheung@hp.com>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index fb5cb4c..3d0429b 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -162,7 +162,7 @@ static struct {
 	{"HITACHI", "DISK-SUBSYSTEM", "*", BLIST_ATTACH_PQ3 | BLIST_SPARSELUN | BLIST_LARGELUN},
 	{"HITACHI", "OPEN-E", "*", BLIST_ATTACH_PQ3 | BLIST_SPARSELUN | BLIST_LARGELUN},
 	{"HP", "A6189A", NULL, BLIST_SPARSELUN | BLIST_LARGELUN},	/* HP VA7400 */
-	{"HP", "OPEN-", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, /* HP XP Arrays */
+	{"HP", "OPEN-", "*", BLIST_REPORTLUN2}, /* HP XP Arrays */
 	{"HP", "NetRAID-4M", NULL, BLIST_FORCELUN},
 	{"HP", "HSV100", NULL, BLIST_REPORTLUN2 | BLIST_NOSTARTONADD},
 	{"HP", "C1557A", NULL, BLIST_FORCELUN},
-- 
cgit v0.10.2


From f89d0a4e1d01168f20f9e8273de7dfc094b2a430 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Thu, 22 Jun 2006 11:45:00 +0200
Subject: [SCSI] aic79xx: remove slave_destroy

Even with the latest fixes aic79xx still occasionally triggers the
BUG_ON in slave_destroy. Rather than trying to figure out the various
levels of interaction here I've decided to remove the callback altogether.

The primary reason for the slave_alloc / slave_destroy is to keep an
index of pointers to the sdevs associated with a given target.
However, by changing the arguments to the affected functions slightly
it's possible to avoid the use of that index entirely.
The only performance penalty we'll incur is in writing the
information for /proc/scsi/XXX, as we'll have to recurse over all
available sdevs to find the correct ones. But I doubt that reading
from /proc is in any way time-critical.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/aic7xxx/aic79xx.h b/drivers/scsi/aic7xxx/aic79xx.h
index eb77456..df3346b 100644
--- a/drivers/scsi/aic7xxx/aic79xx.h
+++ b/drivers/scsi/aic7xxx/aic79xx.h
@@ -1487,6 +1487,7 @@ typedef enum {
 } ahd_queue_alg;
 
 void			ahd_set_tags(struct ahd_softc *ahd,
+				     struct scsi_cmnd *cmd,
 				     struct ahd_devinfo *devinfo,
 				     ahd_queue_alg alg);
 
diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c
index 801fc81..a1e8ca7 100644
--- a/drivers/scsi/aic7xxx/aic79xx_core.c
+++ b/drivers/scsi/aic7xxx/aic79xx_core.c
@@ -1090,7 +1090,7 @@ ahd_handle_seqint(struct ahd_softc *ahd, u_int intstat)
 
 			/* Notify XPT */
 			ahd_send_async(ahd, devinfo.channel, devinfo.target,
-				       CAM_LUN_WILDCARD, AC_SENT_BDR, NULL);
+				       CAM_LUN_WILDCARD, AC_SENT_BDR);
 
 			/*
 			 * Allow the sequencer to continue with
@@ -3062,7 +3062,7 @@ ahd_set_syncrate(struct ahd_softc *ahd, struct ahd_devinfo *devinfo,
 		tinfo->curr.ppr_options = ppr_options;
 
 		ahd_send_async(ahd, devinfo->channel, devinfo->target,
-			       CAM_LUN_WILDCARD, AC_TRANSFER_NEG, NULL);
+			       CAM_LUN_WILDCARD, AC_TRANSFER_NEG);
 		if (bootverbose) {
 			if (offset != 0) {
 				int options;
@@ -3184,7 +3184,7 @@ ahd_set_width(struct ahd_softc *ahd, struct ahd_devinfo *devinfo,
 
 		tinfo->curr.width = width;
 		ahd_send_async(ahd, devinfo->channel, devinfo->target,
-			       CAM_LUN_WILDCARD, AC_TRANSFER_NEG, NULL);
+			       CAM_LUN_WILDCARD, AC_TRANSFER_NEG);
 		if (bootverbose) {
 			printf("%s: target %d using %dbit transfers\n",
 			       ahd_name(ahd), devinfo->target,
@@ -3211,12 +3211,14 @@ ahd_set_width(struct ahd_softc *ahd, struct ahd_devinfo *devinfo,
  * Update the current state of tagged queuing for a given target.
  */
 void
-ahd_set_tags(struct ahd_softc *ahd, struct ahd_devinfo *devinfo,
-	     ahd_queue_alg alg)
+ahd_set_tags(struct ahd_softc *ahd, struct scsi_cmnd *cmd,
+	     struct ahd_devinfo *devinfo, ahd_queue_alg alg)
 {
-	ahd_platform_set_tags(ahd, devinfo, alg);
+	struct scsi_device *sdev = cmd->device;
+
+	ahd_platform_set_tags(ahd, sdev, devinfo, alg);
 	ahd_send_async(ahd, devinfo->channel, devinfo->target,
-		       devinfo->lun, AC_TRANSFER_NEG, &alg);
+		       devinfo->lun, AC_TRANSFER_NEG);
 }
 
 static void
@@ -4746,7 +4748,7 @@ ahd_handle_msg_reject(struct ahd_softc *ahd, struct ahd_devinfo *devinfo)
 			printf("(%s:%c:%d:%d): refuses tagged commands.  "
 			       "Performing non-tagged I/O\n", ahd_name(ahd),
 			       devinfo->channel, devinfo->target, devinfo->lun);
-			ahd_set_tags(ahd, devinfo, AHD_QUEUE_NONE);
+			ahd_set_tags(ahd, scb->io_ctx, devinfo, AHD_QUEUE_NONE);
 			mask = ~0x23;
 		} else {
 			printf("(%s:%c:%d:%d): refuses %s tagged commands.  "
@@ -4754,7 +4756,7 @@ ahd_handle_msg_reject(struct ahd_softc *ahd, struct ahd_devinfo *devinfo)
 			       ahd_name(ahd), devinfo->channel, devinfo->target,
 			       devinfo->lun, tag_type == MSG_ORDERED_TASK
 			       ? "ordered" : "head of queue");
-			ahd_set_tags(ahd, devinfo, AHD_QUEUE_BASIC);
+			ahd_set_tags(ahd, scb->io_ctx, devinfo, AHD_QUEUE_BASIC);
 			mask = ~0x03;
 		}
 
@@ -5098,7 +5100,7 @@ ahd_handle_devreset(struct ahd_softc *ahd, struct ahd_devinfo *devinfo,
 	
 	if (status != CAM_SEL_TIMEOUT)
 		ahd_send_async(ahd, devinfo->channel, devinfo->target,
-			       CAM_LUN_WILDCARD, AC_SENT_BDR, NULL);
+			       CAM_LUN_WILDCARD, AC_SENT_BDR);
 
 	if (message != NULL && bootverbose)
 		printf("%s: %s on %c:%d. %d SCBs aborted\n", ahd_name(ahd),
@@ -7952,7 +7954,7 @@ ahd_reset_channel(struct ahd_softc *ahd, char channel, int initiate_reset)
 #endif
 	/* Notify the XPT that a bus reset occurred */
 	ahd_send_async(ahd, devinfo.channel, CAM_TARGET_WILDCARD,
-		       CAM_LUN_WILDCARD, AC_BUS_RESET, NULL);
+		       CAM_LUN_WILDCARD, AC_BUS_RESET);
 
 	/*
 	 * Revert to async/narrow transfers until we renegotiate.
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index e0ccdf3..b244c71 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -484,7 +484,6 @@ ahd_linux_target_alloc(struct scsi_target *starget)
 	struct seeprom_config *sc = ahd->seep_config;
 	unsigned long flags;
 	struct scsi_target **ahd_targp = ahd_linux_target_in_softc(starget);
-	struct ahd_linux_target *targ = scsi_transport_target_data(starget);
 	struct ahd_devinfo devinfo;
 	struct ahd_initiator_tinfo *tinfo;
 	struct ahd_tmode_tstate *tstate;
@@ -495,7 +494,6 @@ ahd_linux_target_alloc(struct scsi_target *starget)
 	BUG_ON(*ahd_targp != NULL);
 
 	*ahd_targp = starget;
-	memset(targ, 0, sizeof(*targ));
 
 	if (sc) {
 		int flags = sc->device_flags[starget->id];
@@ -551,15 +549,11 @@ ahd_linux_slave_alloc(struct scsi_device *sdev)
 {
 	struct	ahd_softc *ahd =
 		*((struct ahd_softc **)sdev->host->hostdata);
-	struct scsi_target *starget = sdev->sdev_target;
-	struct ahd_linux_target *targ = scsi_transport_target_data(starget);
 	struct ahd_linux_device *dev;
 
 	if (bootverbose)
 		printf("%s: Slave Alloc %d\n", ahd_name(ahd), sdev->id);
 
-	BUG_ON(targ->sdev[sdev->lun] != NULL);
-
 	dev = scsi_transport_device_data(sdev);
 	memset(dev, 0, sizeof(*dev));
 
@@ -576,8 +570,6 @@ ahd_linux_slave_alloc(struct scsi_device *sdev)
 	 */
 	dev->maxtags = 0;
 	
-	targ->sdev[sdev->lun] = sdev;
-
 	return (0);
 }
 
@@ -599,23 +591,6 @@ ahd_linux_slave_configure(struct scsi_device *sdev)
 	return 0;
 }
 
-static void
-ahd_linux_slave_destroy(struct scsi_device *sdev)
-{
-	struct	ahd_softc *ahd;
-	struct	ahd_linux_device *dev = scsi_transport_device_data(sdev);
-	struct  ahd_linux_target *targ = scsi_transport_target_data(sdev->sdev_target);
-
-	ahd = *((struct ahd_softc **)sdev->host->hostdata);
-	if (bootverbose)
-		printf("%s: Slave Destroy %d\n", ahd_name(ahd), sdev->id);
-
-	BUG_ON(dev->active);
-
-	targ->sdev[sdev->lun] = NULL;
-
-}
-
 #if defined(__i386__)
 /*
  * Return the disk geometry for the given SCSI device.
@@ -822,7 +797,6 @@ struct scsi_host_template aic79xx_driver_template = {
 	.use_clustering		= ENABLE_CLUSTERING,
 	.slave_alloc		= ahd_linux_slave_alloc,
 	.slave_configure	= ahd_linux_slave_configure,
-	.slave_destroy		= ahd_linux_slave_destroy,
 	.target_alloc		= ahd_linux_target_alloc,
 	.target_destroy		= ahd_linux_target_destroy,
 };
@@ -1249,20 +1223,13 @@ void
 ahd_platform_free(struct ahd_softc *ahd)
 {
 	struct scsi_target *starget;
-	int i, j;
+	int i;
 
 	if (ahd->platform_data != NULL) {
 		/* destroy all of the device and target objects */
 		for (i = 0; i < AHD_NUM_TARGETS; i++) {
 			starget = ahd->platform_data->starget[i];
 			if (starget != NULL) {
-				for (j = 0; j < AHD_NUM_LUNS; j++) {
-					struct ahd_linux_target *targ =
-						scsi_transport_target_data(starget);
-					if (targ->sdev[j] == NULL)
-						continue;
-					targ->sdev[j] = NULL;
-				}
 				ahd->platform_data->starget[i] = NULL;
 			}
 		}
@@ -1318,20 +1285,13 @@ ahd_platform_freeze_devq(struct ahd_softc *ahd, struct scb *scb)
 }
 
 void
-ahd_platform_set_tags(struct ahd_softc *ahd, struct ahd_devinfo *devinfo,
-		      ahd_queue_alg alg)
+ahd_platform_set_tags(struct ahd_softc *ahd, struct scsi_device *sdev,
+		      struct ahd_devinfo *devinfo, ahd_queue_alg alg)
 {
-	struct scsi_target *starget;
-	struct ahd_linux_target *targ;
 	struct ahd_linux_device *dev;
-	struct scsi_device *sdev;
 	int was_queuing;
 	int now_queuing;
 
-	starget = ahd->platform_data->starget[devinfo->target];
-	targ = scsi_transport_target_data(starget);
-	BUG_ON(targ == NULL);
-	sdev = targ->sdev[devinfo->lun];
 	if (sdev == NULL)
 		return;
 
@@ -1467,11 +1427,15 @@ ahd_linux_device_queue_depth(struct scsi_device *sdev)
 	tags = ahd_linux_user_tagdepth(ahd, &devinfo);
 	if (tags != 0 && sdev->tagged_supported != 0) {
 
-		ahd_set_tags(ahd, &devinfo, AHD_QUEUE_TAGGED);
+		ahd_platform_set_tags(ahd, sdev, &devinfo, AHD_QUEUE_TAGGED);
+		ahd_send_async(ahd, devinfo.channel, devinfo.target,
+			       devinfo.lun, AC_TRANSFER_NEG);
 		ahd_print_devinfo(ahd, &devinfo);
 		printf("Tagged Queuing enabled.  Depth %d\n", tags);
 	} else {
-		ahd_set_tags(ahd, &devinfo, AHD_QUEUE_NONE);
+		ahd_platform_set_tags(ahd, sdev, &devinfo, AHD_QUEUE_NONE);
+		ahd_send_async(ahd, devinfo.channel, devinfo.target,
+			       devinfo.lun, AC_TRANSFER_NEG);
 	}
 }
 
@@ -1629,7 +1593,7 @@ ahd_linux_isr(int irq, void *dev_id, struct pt_regs * regs)
 
 void
 ahd_send_async(struct ahd_softc *ahd, char channel,
-	       u_int target, u_int lun, ac_code code, void *arg)
+	       u_int target, u_int lun, ac_code code)
 {
 	switch (code) {
 	case AC_TRANSFER_NEG:
@@ -1956,7 +1920,7 @@ ahd_linux_handle_scsi_status(struct ahd_softc *ahd,
 			}
 			ahd_set_transaction_status(scb, CAM_REQUEUE_REQ);
 			ahd_set_scsi_status(scb, SCSI_STATUS_OK);
-			ahd_platform_set_tags(ahd, &devinfo,
+			ahd_platform_set_tags(ahd, sdev, &devinfo,
 				     (dev->flags & AHD_DEV_Q_BASIC)
 				   ? AHD_QUEUE_BASIC : AHD_QUEUE_TAGGED);
 			break;
@@ -1966,7 +1930,7 @@ ahd_linux_handle_scsi_status(struct ahd_softc *ahd,
 		 * as if the target returned BUSY SCSI status.
 		 */
 		dev->openings = 1;
-		ahd_platform_set_tags(ahd, &devinfo,
+		ahd_platform_set_tags(ahd, sdev, &devinfo,
 			     (dev->flags & AHD_DEV_Q_BASIC)
 			   ? AHD_QUEUE_BASIC : AHD_QUEUE_TAGGED);
 		ahd_set_scsi_status(scb, SCSI_STATUS_BUSY);
@@ -2778,8 +2742,6 @@ ahd_linux_init(void)
 	if (!ahd_linux_transport_template)
 		return -ENODEV;
 
-	scsi_transport_reserve_target(ahd_linux_transport_template,
-				      sizeof(struct ahd_linux_target));
 	scsi_transport_reserve_device(ahd_linux_transport_template,
 				      sizeof(struct ahd_linux_device));
 
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.h b/drivers/scsi/aic7xxx/aic79xx_osm.h
index 2b83316..b2e65e1 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.h
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.h
@@ -263,7 +263,6 @@ typedef enum {
 	AHD_DEV_PERIODIC_OTAG	 = 0x40, /* Send OTAG to prevent starvation */
 } ahd_linux_dev_flags;
 
-struct ahd_linux_target;
 struct ahd_linux_device {
 	TAILQ_ENTRY(ahd_linux_device) links;
 
@@ -343,12 +342,6 @@ struct ahd_linux_device {
 #define AHD_OTAG_THRESH	500
 };
 
-struct ahd_linux_target {
-	struct scsi_device	 *sdev[AHD_NUM_LUNS];
-	struct ahd_transinfo	  last_tinfo;
-	struct ahd_softc	 *ahd;
-};
-
 /********************* Definitions Required by the Core ***********************/
 /*
  * Number of SG segments we require.  So long as the S/G segments for
@@ -865,7 +858,7 @@ ahd_freeze_scb(struct scb *scb)
         }
 }
 
-void	ahd_platform_set_tags(struct ahd_softc *ahd,
+void	ahd_platform_set_tags(struct ahd_softc *ahd, struct scsi_device *sdev,
 			      struct ahd_devinfo *devinfo, ahd_queue_alg);
 int	ahd_platform_abort_scbs(struct ahd_softc *ahd, int target,
 				char channel, int lun, u_int tag,
@@ -874,7 +867,7 @@ irqreturn_t
 	ahd_linux_isr(int irq, void *dev_id, struct pt_regs * regs);
 void	ahd_done(struct ahd_softc*, struct scb*);
 void	ahd_send_async(struct ahd_softc *, char channel,
-		       u_int target, u_int lun, ac_code, void *);
+		       u_int target, u_int lun, ac_code);
 void	ahd_print_path(struct ahd_softc *, struct scb *);
 
 #ifdef CONFIG_PCI
diff --git a/drivers/scsi/aic7xxx/aic79xx_proc.c b/drivers/scsi/aic7xxx/aic79xx_proc.c
index 24fd59a..c5f0ee5 100644
--- a/drivers/scsi/aic7xxx/aic79xx_proc.c
+++ b/drivers/scsi/aic7xxx/aic79xx_proc.c
@@ -47,7 +47,7 @@ static int	copy_info(struct info_str *info, char *fmt, ...);
 static void	ahd_dump_target_state(struct ahd_softc *ahd,
 				      struct info_str *info,
 				      u_int our_id, char channel,
-				      u_int target_id, u_int target_offset);
+				      u_int target_id);
 static void	ahd_dump_device_state(struct info_str *info,
 				      struct scsi_device *sdev);
 static int	ahd_proc_write_seeprom(struct ahd_softc *ahd,
@@ -204,10 +204,8 @@ ahd_format_transinfo(struct info_str *info, struct ahd_transinfo *tinfo)
 
 static void
 ahd_dump_target_state(struct ahd_softc *ahd, struct info_str *info,
-		      u_int our_id, char channel, u_int target_id,
-		      u_int target_offset)
+		      u_int our_id, char channel, u_int target_id)
 {
-	struct	ahd_linux_target *targ;
 	struct  scsi_target *starget;
 	struct	ahd_initiator_tinfo *tinfo;
 	struct	ahd_tmode_tstate *tstate;
@@ -218,10 +216,9 @@ ahd_dump_target_state(struct ahd_softc *ahd, struct info_str *info,
 	copy_info(info, "Target %d Negotiation Settings\n", target_id);
 	copy_info(info, "\tUser: ");
 	ahd_format_transinfo(info, &tinfo->user);
-	starget = ahd->platform_data->starget[target_offset];
+	starget = ahd->platform_data->starget[target_id];
 	if (starget == NULL)
 		return;
-	targ = scsi_transport_target_data(starget);
 
 	copy_info(info, "\tGoal: ");
 	ahd_format_transinfo(info, &tinfo->goal);
@@ -231,7 +228,7 @@ ahd_dump_target_state(struct ahd_softc *ahd, struct info_str *info,
 	for (lun = 0; lun < AHD_NUM_LUNS; lun++) {
 		struct scsi_device *dev;
 
-		dev = targ->sdev[lun];
+		dev = scsi_device_lookup_by_target(starget, lun);
 
 		if (dev == NULL)
 			continue;
@@ -355,7 +352,7 @@ ahd_linux_proc_info(struct Scsi_Host *shost, char *buffer, char **start,
 	copy_info(&info, "Allocated SCBs: %d, SG List Length: %d\n\n",
 		  ahd->scb_data.numscbs, AHD_NSEG);
 
-	max_targ = 15;
+	max_targ = 16;
 
 	if (ahd->seep_config == NULL)
 		copy_info(&info, "No Serial EEPROM\n");
@@ -373,12 +370,12 @@ ahd_linux_proc_info(struct Scsi_Host *shost, char *buffer, char **start,
 	copy_info(&info, "\n");
 
 	if ((ahd->features & AHD_WIDE) == 0)
-		max_targ = 7;
+		max_targ = 8;
 
-	for (i = 0; i <= max_targ; i++) {
+	for (i = 0; i < max_targ; i++) {
 
 		ahd_dump_target_state(ahd, &info, ahd->our_id, 'A',
-				      /*target_id*/i, /*target_offset*/i);
+				      /*target_id*/i);
 	}
 	retval = info.pos > info.offset ? info.pos - info.offset : 0;
 done:
-- 
cgit v0.10.2


From 03aba2f79594ca94d159c8bab454de9bcc385b76 Mon Sep 17 00:00:00 2001
From: Luben Tuikov <ltuikov@yahoo.com>
Date: Fri, 23 Jun 2006 09:39:09 -0700
Subject: [SCSI] sd/scsi_lib simplify sd_rw_intr and scsi_io_completion

This patch simplifies "good_bytes" computation in sd_rw_intr().
sd: "good_bytes" computation is always done in terms of the resolution
of the device's medium, since after that it is the number of good bytes
we pass around and other layers/contexts (as opposed ot sd) can translate
that to their own resolution (block layer:512).  It also makes
scsi_io_completion() processing more straightforward, eliminating the
3rd argument to the function.

It also fixes a couple of bugs like not checking return value,
using "break" instead of "return;", etc.

I've been running with this patch for some time now on a
test (do-it-all) system.

Signed-off-by: Luben Tuikov <ltuikov@yahoo.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 3d04a9f..4c4add5 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -855,8 +855,7 @@ static void scsi_release_buffers(struct scsi_cmnd *cmd)
  *		b) We can just use scsi_requeue_command() here.  This would
  *		   be used if we just wanted to retry, for example.
  */
-void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
-			unsigned int block_bytes)
+void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 {
 	int result = cmd->result;
 	int this_count = cmd->bufflen;
@@ -921,87 +920,72 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
 	 * Next deal with any sectors which we were able to correctly
 	 * handle.
 	 */
-	if (good_bytes >= 0) {
-		SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d bytes done.\n",
+	if (good_bytes > 0) {
+		SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
+					      "%d bytes done.\n",
 					      req->nr_sectors, good_bytes));
 		SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));
 
 		if (clear_errors)
 			req->errors = 0;
-		/*
-		 * If multiple sectors are requested in one buffer, then
-		 * they will have been finished off by the first command.
-		 * If not, then we have a multi-buffer command.
-		 *
-		 * If block_bytes != 0, it means we had a medium error
-		 * of some sort, and that we want to mark some number of
-		 * sectors as not uptodate.  Thus we want to inhibit
-		 * requeueing right here - we will requeue down below
-		 * when we handle the bad sectors.
-		 */
 
-		/*
-		 * If the command completed without error, then either
-		 * finish off the rest of the command, or start a new one.
+		/* A number of bytes were successfully read.  If there
+		 * is leftovers and there is some kind of error
+		 * (result != 0), retry the rest.
 		 */
-		if (scsi_end_request(cmd, 1, good_bytes, result == 0) == NULL)
+		if (scsi_end_request(cmd, 1, good_bytes, !!result) == NULL)
 			return;
 	}
-	/*
-	 * Now, if we were good little boys and girls, Santa left us a request
-	 * sense buffer.  We can extract information from this, so we
-	 * can choose a block to remap, etc.
+
+	/* good_bytes = 0, or (inclusive) there were leftovers and
+	 * result = 0, so scsi_end_request couldn't retry.
 	 */
 	if (sense_valid && !sense_deferred) {
 		switch (sshdr.sense_key) {
 		case UNIT_ATTENTION:
 			if (cmd->device->removable) {
-				/* detected disc change.  set a bit 
+				/* Detected disc change.  Set a bit
 				 * and quietly refuse further access.
 				 */
 				cmd->device->changed = 1;
-				scsi_end_request(cmd, 0,
-						this_count, 1);
+				scsi_end_request(cmd, 0, this_count, 1);
 				return;
 			} else {
-				/*
-				* Must have been a power glitch, or a
-				* bus reset.  Could not have been a
-				* media change, so we just retry the
-				* request and see what happens.  
-				*/
+				/* Must have been a power glitch, or a
+				 * bus reset.  Could not have been a
+				 * media change, so we just retry the
+				 * request and see what happens.
+				 */
 				scsi_requeue_command(q, cmd);
 				return;
 			}
 			break;
 		case ILLEGAL_REQUEST:
-			/*
-		 	* If we had an ILLEGAL REQUEST returned, then we may
-		 	* have performed an unsupported command.  The only
-		 	* thing this should be would be a ten byte read where
-			* only a six byte read was supported.  Also, on a
-			* system where READ CAPACITY failed, we may have read
-			* past the end of the disk.
-		 	*/
+			/* If we had an ILLEGAL REQUEST returned, then
+			 * we may have performed an unsupported
+			 * command.  The only thing this should be
+			 * would be a ten byte read where only a six
+			 * byte read was supported.  Also, on a system
+			 * where READ CAPACITY failed, we may have
+			 * read past the end of the disk.
+			 */
 			if ((cmd->device->use_10_for_rw &&
 			    sshdr.asc == 0x20 && sshdr.ascq == 0x00) &&
 			    (cmd->cmnd[0] == READ_10 ||
 			     cmd->cmnd[0] == WRITE_10)) {
 				cmd->device->use_10_for_rw = 0;
-				/*
-				 * This will cause a retry with a 6-byte
-				 * command.
+				/* This will cause a retry with a
+				 * 6-byte command.
 				 */
 				scsi_requeue_command(q, cmd);
-				result = 0;
+				return;
 			} else {
 				scsi_end_request(cmd, 0, this_count, 1);
 				return;
 			}
 			break;
 		case NOT_READY:
-			/*
-			 * If the device is in the process of becoming
+			/* If the device is in the process of becoming
 			 * ready, or has a temporary blockage, retry.
 			 */
 			if (sshdr.asc == 0x04) {
@@ -1021,7 +1005,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
 			}
 			if (!(req->flags & REQ_QUIET)) {
 				scmd_printk(KERN_INFO, cmd,
-					   "Device not ready: ");
+					    "Device not ready: ");
 				scsi_print_sense_hdr("", &sshdr);
 			}
 			scsi_end_request(cmd, 0, this_count, 1);
@@ -1029,21 +1013,21 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
 		case VOLUME_OVERFLOW:
 			if (!(req->flags & REQ_QUIET)) {
 				scmd_printk(KERN_INFO, cmd,
-					   "Volume overflow, CDB: ");
+					    "Volume overflow, CDB: ");
 				__scsi_print_command(cmd->data_cmnd);
 				scsi_print_sense("", cmd);
 			}
-			scsi_end_request(cmd, 0, block_bytes, 1);
+			/* See SSC3rXX or current. */
+			scsi_end_request(cmd, 0, this_count, 1);
 			return;
 		default:
 			break;
 		}
-	}			/* driver byte != 0 */
+	}
 	if (host_byte(result) == DID_RESET) {
-		/*
-		 * Third party bus reset or reset for error
-		 * recovery reasons.  Just retry the request
-		 * and see what happens.  
+		/* Third party bus reset or reset for error recovery
+		 * reasons.  Just retry the request and see what
+		 * happens.
 		 */
 		scsi_requeue_command(q, cmd);
 		return;
@@ -1051,21 +1035,13 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
 	if (result) {
 		if (!(req->flags & REQ_QUIET)) {
 			scmd_printk(KERN_INFO, cmd,
-				   "SCSI error: return code = 0x%x\n", result);
-
+				    "SCSI error: return code = 0x%08x\n",
+				    result);
 			if (driver_byte(result) & DRIVER_SENSE)
 				scsi_print_sense("", cmd);
 		}
-		/*
-		 * Mark a single buffer as not uptodate.  Queue the remainder.
-		 * We sometimes get this cruft in the event that a medium error
-		 * isn't properly reported.
-		 */
-		block_bytes = req->hard_cur_sectors << 9;
-		if (!block_bytes)
-			block_bytes = req->data_len;
-		scsi_end_request(cmd, 0, block_bytes, 1);
 	}
+	scsi_end_request(cmd, 0, this_count, !result);
 }
 EXPORT_SYMBOL(scsi_io_completion);
 
@@ -1169,7 +1145,7 @@ static void scsi_blk_pc_done(struct scsi_cmnd *cmd)
 	 * successfully. Since this is a REQ_BLOCK_PC command the
 	 * caller should check the request's errors value
 	 */
-	scsi_io_completion(cmd, cmd->bufflen, 0);
+	scsi_io_completion(cmd, cmd->bufflen);
 }
 
 static void scsi_setup_blk_pc_cmnd(struct scsi_cmnd *cmd)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3541990..f899ff0 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -891,11 +891,10 @@ static struct block_device_operations sd_fops = {
 static void sd_rw_intr(struct scsi_cmnd * SCpnt)
 {
 	int result = SCpnt->result;
-	int this_count = SCpnt->request_bufflen;
-	int good_bytes = (result == 0 ? this_count : 0);
-	sector_t block_sectors = 1;
-	u64 first_err_block;
-	sector_t error_sector;
+ 	unsigned int xfer_size = SCpnt->request_bufflen;
+ 	unsigned int good_bytes = result ? 0 : xfer_size;
+ 	u64 start_lba = SCpnt->request->sector;
+ 	u64 bad_lba;
 	struct scsi_sense_hdr sshdr;
 	int sense_valid = 0;
 	int sense_deferred = 0;
@@ -906,7 +905,6 @@ static void sd_rw_intr(struct scsi_cmnd * SCpnt)
 		if (sense_valid)
 			sense_deferred = scsi_sense_is_deferred(&sshdr);
 	}
-
 #ifdef CONFIG_SCSI_LOGGING
 	SCSI_LOG_HLCOMPLETE(1, printk("sd_rw_intr: %s: res=0x%x\n", 
 				SCpnt->request->rq_disk->disk_name, result));
@@ -916,89 +914,72 @@ static void sd_rw_intr(struct scsi_cmnd * SCpnt)
 				sshdr.sense_key, sshdr.asc, sshdr.ascq));
 	}
 #endif
-	/*
-	   Handle MEDIUM ERRORs that indicate partial success.  Since this is a
-	   relatively rare error condition, no care is taken to avoid
-	   unnecessary additional work such as memcpy's that could be avoided.
-	 */
-	if (driver_byte(result) != 0 &&
-		 sense_valid && !sense_deferred) {
-		switch (sshdr.sense_key) {
-		case MEDIUM_ERROR:
-			if (!blk_fs_request(SCpnt->request))
-				break;
-			info_valid = scsi_get_sense_info_fld(
-				SCpnt->sense_buffer, SCSI_SENSE_BUFFERSIZE,
-				&first_err_block);
-			/*
-			 * May want to warn and skip if following cast results
-			 * in actual truncation (if sector_t < 64 bits)
-			 */
-			error_sector = (sector_t)first_err_block;
-			if (SCpnt->request->bio != NULL)
-				block_sectors = bio_sectors(SCpnt->request->bio);
-			switch (SCpnt->device->sector_size) {
-			case 1024:
-				error_sector <<= 1;
-				if (block_sectors < 2)
-					block_sectors = 2;
-				break;
-			case 2048:
-				error_sector <<= 2;
-				if (block_sectors < 4)
-					block_sectors = 4;
-				break;
-			case 4096:
-				error_sector <<=3;
-				if (block_sectors < 8)
-					block_sectors = 8;
-				break;
-			case 256:
-				error_sector >>= 1;
-				break;
-			default:
-				break;
-			}
+	if (driver_byte(result) != DRIVER_SENSE &&
+	    (!sense_valid || sense_deferred))
+		goto out;
 
-			error_sector &= ~(block_sectors - 1);
-			good_bytes = (error_sector - SCpnt->request->sector) << 9;
-			if (good_bytes < 0 || good_bytes >= this_count)
-				good_bytes = 0;
+	switch (sshdr.sense_key) {
+	case HARDWARE_ERROR:
+	case MEDIUM_ERROR:
+		if (!blk_fs_request(SCpnt->request))
+			goto out;
+		info_valid = scsi_get_sense_info_fld(SCpnt->sense_buffer,
+						     SCSI_SENSE_BUFFERSIZE,
+						     &bad_lba);
+		if (!info_valid)
+			goto out;
+		if (xfer_size <= SCpnt->device->sector_size)
+			goto out;
+		switch (SCpnt->device->sector_size) {
+		case 256:
+			start_lba <<= 1;
 			break;
-
-		case RECOVERED_ERROR: /* an error occurred, but it recovered */
-		case NO_SENSE: /* LLDD got sense data */
-			/*
-			 * Inform the user, but make sure that it's not treated
-			 * as a hard error.
-			 */
-			scsi_print_sense("sd", SCpnt);
-			SCpnt->result = 0;
-			memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
-			good_bytes = this_count;
+		case 512:
 			break;
-
-		case ILLEGAL_REQUEST:
-			if (SCpnt->device->use_10_for_rw &&
-			    (SCpnt->cmnd[0] == READ_10 ||
-			     SCpnt->cmnd[0] == WRITE_10))
-				SCpnt->device->use_10_for_rw = 0;
-			if (SCpnt->device->use_10_for_ms &&
-			    (SCpnt->cmnd[0] == MODE_SENSE_10 ||
-			     SCpnt->cmnd[0] == MODE_SELECT_10))
-				SCpnt->device->use_10_for_ms = 0;
+		case 1024:
+			start_lba >>= 1;
+			break;
+		case 2048:
+			start_lba >>= 2;
+			break;
+		case 4096:
+			start_lba >>= 3;
 			break;
-
 		default:
+			/* Print something here with limiting frequency. */
+			goto out;
 			break;
 		}
+		/* This computation should always be done in terms of
+		 * the resolution of the device's medium.
+		 */
+		good_bytes = (bad_lba - start_lba)*SCpnt->device->sector_size;
+		break;
+	case RECOVERED_ERROR:
+	case NO_SENSE:
+		/* Inform the user, but make sure that it's not treated
+		 * as a hard error.
+		 */
+		scsi_print_sense("sd", SCpnt);
+		SCpnt->result = 0;
+		memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
+		good_bytes = xfer_size;
+		break;
+	case ILLEGAL_REQUEST:
+		if (SCpnt->device->use_10_for_rw &&
+		    (SCpnt->cmnd[0] == READ_10 ||
+		     SCpnt->cmnd[0] == WRITE_10))
+			SCpnt->device->use_10_for_rw = 0;
+		if (SCpnt->device->use_10_for_ms &&
+		    (SCpnt->cmnd[0] == MODE_SENSE_10 ||
+		     SCpnt->cmnd[0] == MODE_SELECT_10))
+			SCpnt->device->use_10_for_ms = 0;
+		break;
+	default:
+		break;
 	}
-	/*
-	 * This calls the generic completion function, now that we know
-	 * how many actual sectors finished, and how many sectors we need
-	 * to say have failed.
-	 */
-	scsi_io_completion(SCpnt, good_bytes, block_sectors << 9);
+ out:
+	scsi_io_completion(SCpnt, good_bytes);
 }
 
 static int media_not_present(struct scsi_disk *sdkp,
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index ebf6579..fd94408 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -292,7 +292,7 @@ static void rw_intr(struct scsi_cmnd * SCpnt)
 	 * how many actual sectors finished, and how many sectors we need
 	 * to say have failed.
 	 */
-	scsi_io_completion(SCpnt, good_bytes, block_sectors << 9);
+	scsi_io_completion(SCpnt, good_bytes);
 }
 
 static int sr_init_command(struct scsi_cmnd * SCpnt)
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index e46cd40..371f70d 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -143,7 +143,7 @@ struct scsi_cmnd {
 
 extern struct scsi_cmnd *scsi_get_command(struct scsi_device *, gfp_t);
 extern void scsi_put_command(struct scsi_cmnd *);
-extern void scsi_io_completion(struct scsi_cmnd *, unsigned int, unsigned int);
+extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
 extern void scsi_finish_command(struct scsi_cmnd *cmd);
 extern void scsi_req_abort_cmd(struct scsi_cmnd *cmd);
 
-- 
cgit v0.10.2


From 9ea7290902abcf22f796e9aeae4dc2e71d3f7e67 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 23 Jun 2006 14:25:34 -0400
Subject: [SCSI] SCSI core: Allow QUIESCE -> CANCEL sdev transition

We have to be able to remove SCSI devices even when they are suspended, so
QUIESCE -> CANCEL must be a legal state transition.  This patch (as727)
adds the transition to the state machine.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 4c4add5..68e0d7d 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2026,6 +2026,7 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
 		switch (oldstate) {
 		case SDEV_CREATED:
 		case SDEV_RUNNING:
+		case SDEV_QUIESCE:
 		case SDEV_OFFLINE:
 		case SDEV_BLOCK:
 			break;
-- 
cgit v0.10.2


From a7a167bf7e2d196fc33f292e7b02e90fee03bc9a Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:10:29 -0700
Subject: [SCSI] qla2xxx: Rework firmware-trace facilities.

- Defer firmware dump-data raw-to-textual conversion to
  user-space.
- Add module parameter (ql2xallocfwdump) to allow for per-HBA
  allocations of firmware dump memory.
- Dump request and response queue data as per firmware group
  request.
- Add extended firmware trace support for ISP24XX/ISP54XX chips.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index e96d58d..78eec67 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -16,15 +16,16 @@ qla2x00_sysfs_read_fw_dump(struct kobject *kobj, char *buf, loff_t off,
 {
 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
 	    struct device, kobj)));
+	char *rbuf = (char *)ha->fw_dump;
 
 	if (ha->fw_dump_reading == 0)
 		return 0;
-	if (off > ha->fw_dump_buffer_len)
-		return 0;
-	if (off + count > ha->fw_dump_buffer_len)
-		count = ha->fw_dump_buffer_len - off;
+	if (off > ha->fw_dump_len)
+                return 0;
+	if (off + count > ha->fw_dump_len)
+		count = ha->fw_dump_len - off;
 
-	memcpy(buf, &ha->fw_dump_buffer[off], count);
+	memcpy(buf, &rbuf[off], count);
 
 	return (count);
 }
@@ -36,7 +37,6 @@ qla2x00_sysfs_write_fw_dump(struct kobject *kobj, char *buf, loff_t off,
 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
 	    struct device, kobj)));
 	int reading;
-	uint32_t dump_size;
 
 	if (off != 0)
 		return (0);
@@ -44,46 +44,27 @@ qla2x00_sysfs_write_fw_dump(struct kobject *kobj, char *buf, loff_t off,
 	reading = simple_strtol(buf, NULL, 10);
 	switch (reading) {
 	case 0:
-		if (ha->fw_dump_reading == 1) {
-			qla_printk(KERN_INFO, ha,
-			    "Firmware dump cleared on (%ld).\n", ha->host_no);
+		if (!ha->fw_dump_reading)
+			break;
 
-			vfree(ha->fw_dump_buffer);
-			ha->fw_dump_buffer = NULL;
-			ha->fw_dump_reading = 0;
-			ha->fw_dumped = 0;
-		}
+		qla_printk(KERN_INFO, ha,
+		    "Firmware dump cleared on (%ld).\n", ha->host_no);
+
+		ha->fw_dump_reading = 0;
+		ha->fw_dumped = 0;
 		break;
 	case 1:
 		if (ha->fw_dumped && !ha->fw_dump_reading) {
 			ha->fw_dump_reading = 1;
 
-			if (IS_QLA24XX(ha) || IS_QLA54XX(ha))
-				dump_size = FW_DUMP_SIZE_24XX;
-			else {
-				dump_size = FW_DUMP_SIZE_1M;
-				if (ha->fw_memory_size < 0x20000)
-					dump_size = FW_DUMP_SIZE_128K;
-				else if (ha->fw_memory_size < 0x80000)
-					dump_size = FW_DUMP_SIZE_512K;
-			}
-			ha->fw_dump_buffer = (char *)vmalloc(dump_size);
-			if (ha->fw_dump_buffer == NULL) {
-				qla_printk(KERN_WARNING, ha,
-				    "Unable to allocate memory for firmware "
-				    "dump buffer (%d).\n", dump_size);
-
-				ha->fw_dump_reading = 0;
-				return (count);
-			}
 			qla_printk(KERN_INFO, ha,
-			    "Firmware dump ready for read on (%ld).\n",
+			    "Raw firmware dump ready for read on (%ld).\n",
 			    ha->host_no);
-			memset(ha->fw_dump_buffer, 0, dump_size);
-			ha->isp_ops.ascii_fw_dump(ha);
-			ha->fw_dump_buffer_len = strlen(ha->fw_dump_buffer);
 		}
 		break;
+	case 2:
+		qla2x00_alloc_fw_dump(ha);
+		break;
 	}
 	return (count);
 }
diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c
index 74e5471..f6ed696 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.c
+++ b/drivers/scsi/qla2xxx/qla_dbg.c
@@ -8,7 +8,34 @@
 
 #include <linux/delay.h>
 
-static int qla_uprintf(char **, char *, ...);
+static inline void
+qla2xxx_prep_dump(scsi_qla_host_t *ha, struct qla2xxx_fw_dump *fw_dump)
+{
+	fw_dump->fw_major_version = htonl(ha->fw_major_version);
+	fw_dump->fw_minor_version = htonl(ha->fw_minor_version);
+	fw_dump->fw_subminor_version = htonl(ha->fw_subminor_version);
+	fw_dump->fw_attributes = htonl(ha->fw_attributes);
+
+	fw_dump->vendor = htonl(ha->pdev->vendor);
+	fw_dump->device = htonl(ha->pdev->device);
+	fw_dump->subsystem_vendor = htonl(ha->pdev->subsystem_vendor);
+	fw_dump->subsystem_device = htonl(ha->pdev->subsystem_device);
+}
+
+static inline void *
+qla2xxx_copy_queues(scsi_qla_host_t *ha, void *ptr)
+{
+	/* Request queue. */
+	memcpy(ptr, ha->request_ring, ha->request_q_length *
+	    sizeof(request_t));
+
+	/* Response queue. */
+	ptr += ha->request_q_length * sizeof(request_t);
+	memcpy(ptr, ha->response_ring, ha->response_q_length  *
+	    sizeof(response_t));
+
+	return ptr + (ha->response_q_length * sizeof(response_t));
+}
 
 /**
  * qla2300_fw_dump() - Dumps binary data from the 2300 firmware.
@@ -49,10 +76,11 @@ qla2300_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 		    "request...\n", ha->fw_dump);
 		goto qla2300_fw_dump_failed;
 	}
-	fw = ha->fw_dump;
+	fw = &ha->fw_dump->isp.isp23;
+	qla2xxx_prep_dump(ha, ha->fw_dump);
 
 	rval = QLA_SUCCESS;
-	fw->hccr = RD_REG_WORD(&reg->hccr);
+	fw->hccr = htons(RD_REG_WORD(&reg->hccr));
 
 	/* Pause RISC. */
 	WRT_REG_WORD(&reg->hccr, HCCR_PAUSE_RISC);
@@ -73,85 +101,86 @@ qla2300_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 	if (rval == QLA_SUCCESS) {
 		dmp_reg = (uint16_t __iomem *)(reg + 0);
 		for (cnt = 0; cnt < sizeof(fw->pbiu_reg) / 2; cnt++)
-			fw->pbiu_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->pbiu_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x10);
 		for (cnt = 0; cnt < sizeof(fw->risc_host_reg) / 2; cnt++)
-			fw->risc_host_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_host_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x40);
 		for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++)
-			fw->mailbox_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->mailbox_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->ctrl_status, 0x40);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->resp_dma_reg) / 2; cnt++)
-			fw->resp_dma_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->resp_dma_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->ctrl_status, 0x50);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->dma_reg) / 2; cnt++)
-			fw->dma_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->dma_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->ctrl_status, 0x00);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0xA0);
 		for (cnt = 0; cnt < sizeof(fw->risc_hdw_reg) / 2; cnt++)
-			fw->risc_hdw_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_hdw_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2000);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp0_reg) / 2; cnt++)
-			fw->risc_gp0_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp0_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2200);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp1_reg) / 2; cnt++)
-			fw->risc_gp1_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp1_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2400);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp2_reg) / 2; cnt++)
-			fw->risc_gp2_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp2_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2600);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp3_reg) / 2; cnt++)
-			fw->risc_gp3_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp3_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2800);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp4_reg) / 2; cnt++)
-			fw->risc_gp4_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp4_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2A00);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp5_reg) / 2; cnt++)
-			fw->risc_gp5_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp5_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2C00);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp6_reg) / 2; cnt++)
-			fw->risc_gp6_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp6_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2E00);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp7_reg) / 2; cnt++)
-			fw->risc_gp7_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp7_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->ctrl_status, 0x10);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->frame_buf_hdw_reg) / 2; cnt++)
-			fw->frame_buf_hdw_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->frame_buf_hdw_reg[cnt] =
+			    htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->ctrl_status, 0x20);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->fpm_b0_reg) / 2; cnt++)
-			fw->fpm_b0_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->fpm_b0_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->ctrl_status, 0x30);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->fpm_b1_reg) / 2; cnt++)
-			fw->fpm_b1_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->fpm_b1_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		/* Reset RISC. */
 		WRT_REG_WORD(&reg->ctrl_status, CSR_ISP_SOFT_RESET);
@@ -226,7 +255,7 @@ qla2300_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 
 		if (test_and_clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags)) {
 			rval = mb0 & MBS_MASK;
-			fw->risc_ram[cnt] = mb2;
+			fw->risc_ram[cnt] = htons(mb2);
 		} else {
 			rval = QLA_FUNCTION_FAILED;
 		}
@@ -285,7 +314,7 @@ qla2300_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 
 		if (test_and_clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags)) {
 			rval = mb0 & MBS_MASK;
-			fw->stack_ram[cnt] = mb2;
+			fw->stack_ram[cnt] = htons(mb2);
 		} else {
 			rval = QLA_FUNCTION_FAILED;
 		}
@@ -345,12 +374,15 @@ qla2300_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 
 		if (test_and_clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags)) {
 			rval = mb0 & MBS_MASK;
-			fw->data_ram[cnt] = mb2;
+			fw->data_ram[cnt] = htons(mb2);
 		} else {
 			rval = QLA_FUNCTION_FAILED;
 		}
 	}
 
+	if (rval == QLA_SUCCESS)
+		qla2xxx_copy_queues(ha, &fw->data_ram[cnt]);
+
 	if (rval != QLA_SUCCESS) {
 		qla_printk(KERN_WARNING, ha,
 		    "Failed to dump firmware (%x)!!!\n", rval);
@@ -369,193 +401,6 @@ qla2300_fw_dump_failed:
 }
 
 /**
- * qla2300_ascii_fw_dump() - Converts a binary firmware dump to ASCII.
- * @ha: HA context
- */
-void
-qla2300_ascii_fw_dump(scsi_qla_host_t *ha)
-{
-	uint32_t cnt;
-	char *uiter;
-	char fw_info[30];
-	struct qla2300_fw_dump *fw;
-	uint32_t data_ram_cnt;
-
-	uiter = ha->fw_dump_buffer;
-	fw = ha->fw_dump;
-
-	qla_uprintf(&uiter, "%s Firmware Version %s\n", ha->model_number,
-	    ha->isp_ops.fw_version_str(ha, fw_info));
-
-	qla_uprintf(&uiter, "\n[==>BEG]\n");
-
-	qla_uprintf(&uiter, "HCCR Register:\n%04x\n\n", fw->hccr);
-
-	qla_uprintf(&uiter, "PBIU Registers:");
-	for (cnt = 0; cnt < sizeof (fw->pbiu_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->pbiu_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nReqQ-RspQ-Risc2Host Status registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_host_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_host_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nMailbox Registers:");
-	for (cnt = 0; cnt < sizeof (fw->mailbox_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->mailbox_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nAuto Request Response DMA Registers:");
-	for (cnt = 0; cnt < sizeof (fw->resp_dma_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->resp_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nDMA Registers:");
-	for (cnt = 0; cnt < sizeof (fw->dma_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC Hardware Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_hdw_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_hdw_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP0 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp0_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp0_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP1 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp1_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp1_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP2 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp2_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp2_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP3 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp3_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp3_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP4 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp4_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp4_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP5 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp5_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp5_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP6 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp6_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp6_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP7 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp7_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp7_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nFrame Buffer Hardware Registers:");
-	for (cnt = 0; cnt < sizeof (fw->frame_buf_hdw_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->frame_buf_hdw_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nFPM B0 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->fpm_b0_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->fpm_b0_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nFPM B1 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->fpm_b1_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->fpm_b1_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nCode RAM Dump:");
-	for (cnt = 0; cnt < sizeof (fw->risc_ram) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n%04x: ", cnt + 0x0800);
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_ram[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nStack RAM Dump:");
-	for (cnt = 0; cnt < sizeof (fw->stack_ram) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n%05x: ", cnt + 0x10000);
-		}
-		qla_uprintf(&uiter, "%04x ", fw->stack_ram[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nData RAM Dump:");
-	data_ram_cnt = ha->fw_memory_size - 0x11000 + 1;
-	for (cnt = 0; cnt < data_ram_cnt; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n%05x: ", cnt + 0x11000);
-		}
-		qla_uprintf(&uiter, "%04x ", fw->data_ram[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\n[<==END] ISP Debug Dump.");
-}
-
-/**
  * qla2100_fw_dump() - Dumps binary data from the 2100/2200 firmware.
  * @ha: HA context
  * @hardware_locked: Called with the hardware_lock
@@ -591,10 +436,11 @@ qla2100_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 		    "request...\n", ha->fw_dump);
 		goto qla2100_fw_dump_failed;
 	}
-	fw = ha->fw_dump;
+	fw = &ha->fw_dump->isp.isp21;
+	qla2xxx_prep_dump(ha, ha->fw_dump);
 
 	rval = QLA_SUCCESS;
-	fw->hccr = RD_REG_WORD(&reg->hccr);
+	fw->hccr = htons(RD_REG_WORD(&reg->hccr));
 
 	/* Pause RISC. */
 	WRT_REG_WORD(&reg->hccr, HCCR_PAUSE_RISC);
@@ -608,79 +454,81 @@ qla2100_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 	if (rval == QLA_SUCCESS) {
 		dmp_reg = (uint16_t __iomem *)(reg + 0);
 		for (cnt = 0; cnt < sizeof(fw->pbiu_reg) / 2; cnt++)
-			fw->pbiu_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->pbiu_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x10);
 		for (cnt = 0; cnt < ha->mbx_count; cnt++) {
 			if (cnt == 8) {
-				dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0xe0);
+				dmp_reg = (uint16_t __iomem *)
+					((uint8_t __iomem *)reg + 0xe0);
 			}
-			fw->mailbox_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->mailbox_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 		}
 
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x20);
 		for (cnt = 0; cnt < sizeof(fw->dma_reg) / 2; cnt++)
-			fw->dma_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->dma_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->ctrl_status, 0x00);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0xA0);
 		for (cnt = 0; cnt < sizeof(fw->risc_hdw_reg) / 2; cnt++)
-			fw->risc_hdw_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_hdw_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2000);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp0_reg) / 2; cnt++)
-			fw->risc_gp0_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp0_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2100);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp1_reg) / 2; cnt++)
-			fw->risc_gp1_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp1_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2200);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp2_reg) / 2; cnt++)
-			fw->risc_gp2_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp2_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2300);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp3_reg) / 2; cnt++)
-			fw->risc_gp3_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp3_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2400);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp4_reg) / 2; cnt++)
-			fw->risc_gp4_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp4_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2500);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp5_reg) / 2; cnt++)
-			fw->risc_gp5_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp5_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2600);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp6_reg) / 2; cnt++)
-			fw->risc_gp6_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp6_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->pcr, 0x2700);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->risc_gp7_reg) / 2; cnt++)
-			fw->risc_gp7_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->risc_gp7_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->ctrl_status, 0x10);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->frame_buf_hdw_reg) / 2; cnt++)
-			fw->frame_buf_hdw_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->frame_buf_hdw_reg[cnt] =
+			    htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->ctrl_status, 0x20);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->fpm_b0_reg) / 2; cnt++)
-			fw->fpm_b0_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->fpm_b0_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		WRT_REG_WORD(&reg->ctrl_status, 0x30);
 		dmp_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->fpm_b1_reg) / 2; cnt++)
-			fw->fpm_b1_reg[cnt] = RD_REG_WORD(dmp_reg++);
+			fw->fpm_b1_reg[cnt] = htons(RD_REG_WORD(dmp_reg++));
 
 		/* Reset the ISP. */
 		WRT_REG_WORD(&reg->ctrl_status, CSR_ISP_SOFT_RESET);
@@ -755,12 +603,15 @@ qla2100_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 
 		if (test_and_clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags)) {
 			rval = mb0 & MBS_MASK;
-			fw->risc_ram[cnt] = mb2;
+			fw->risc_ram[cnt] = htons(mb2);
 		} else {
 			rval = QLA_FUNCTION_FAILED;
 		}
 	}
 
+	if (rval == QLA_SUCCESS)
+		qla2xxx_copy_queues(ha, &fw->risc_ram[cnt]);
+
 	if (rval != QLA_SUCCESS) {
 		qla_printk(KERN_WARNING, ha,
 		    "Failed to dump firmware (%x)!!!\n", rval);
@@ -778,179 +629,6 @@ qla2100_fw_dump_failed:
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
 
-/**
- * qla2100_ascii_fw_dump() - Converts a binary firmware dump to ASCII.
- * @ha: HA context
- */
-void
-qla2100_ascii_fw_dump(scsi_qla_host_t *ha)
-{
-	uint32_t cnt;
-	char *uiter;
-	char fw_info[30];
-	struct qla2100_fw_dump *fw;
-
-	uiter = ha->fw_dump_buffer;
-	fw = ha->fw_dump;
-
-	qla_uprintf(&uiter, "%s Firmware Version %s\n", ha->model_number,
-	    ha->isp_ops.fw_version_str(ha, fw_info));
-
-	qla_uprintf(&uiter, "\n[==>BEG]\n");
-
-	qla_uprintf(&uiter, "HCCR Register:\n%04x\n\n", fw->hccr);
-
-	qla_uprintf(&uiter, "PBIU Registers:");
-	for (cnt = 0; cnt < sizeof (fw->pbiu_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->pbiu_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nMailbox Registers:");
-	for (cnt = 0; cnt < sizeof (fw->mailbox_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->mailbox_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nDMA Registers:");
-	for (cnt = 0; cnt < sizeof (fw->dma_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC Hardware Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_hdw_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_hdw_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP0 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp0_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp0_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP1 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp1_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp1_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP2 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp2_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp2_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP3 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp3_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp3_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP4 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp4_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp4_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP5 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp5_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp5_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP6 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp6_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp6_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP7 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->risc_gp7_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_gp7_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nFrame Buffer Hardware Registers:");
-	for (cnt = 0; cnt < sizeof (fw->frame_buf_hdw_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->frame_buf_hdw_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nFPM B0 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->fpm_b0_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->fpm_b0_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nFPM B1 Registers:");
-	for (cnt = 0; cnt < sizeof (fw->fpm_b1_reg) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n");
-		}
-		qla_uprintf(&uiter, "%04x ", fw->fpm_b1_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC SRAM:");
-	for (cnt = 0; cnt < sizeof (fw->risc_ram) / 2; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n%04x: ", cnt + 0x1000);
-		}
-		qla_uprintf(&uiter, "%04x ", fw->risc_ram[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\n[<==END] ISP Debug Dump.");
-
-	return;
-}
-
-static int
-qla_uprintf(char **uiter, char *fmt, ...)
-{
-	int	iter, len;
-	char	buf[128];
-	va_list	args;
-
-	va_start(args, fmt);
-	len = vsprintf(buf, fmt, args);
-	va_end(args);
-
-	for (iter = 0; iter < len; iter++, *uiter += 1)
-		*uiter[0] = buf[iter];
-
-	return (len);
-}
-
-
 void
 qla24xx_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 {
@@ -967,6 +645,7 @@ qla24xx_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 	unsigned long	flags;
 	struct qla24xx_fw_dump *fw;
 	uint32_t	ext_mem_cnt;
+	void		*eft;
 
 	risc_address = ext_mem_cnt = 0;
 	memset(mb, 0, sizeof(mb));
@@ -987,10 +666,11 @@ qla24xx_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 		    "request...\n", ha->fw_dump);
 		goto qla24xx_fw_dump_failed;
 	}
-	fw = ha->fw_dump;
+	fw = &ha->fw_dump->isp.isp24;
+	qla2xxx_prep_dump(ha, ha->fw_dump);
 
 	rval = QLA_SUCCESS;
-	fw->host_status = RD_REG_DWORD(&reg->host_status);
+	fw->host_status = htonl(RD_REG_DWORD(&reg->host_status));
 
 	/* Pause RISC. */
 	if ((RD_REG_DWORD(&reg->hccr) & HCCRX_RISC_PAUSE) == 0) {
@@ -1012,7 +692,7 @@ qla24xx_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 		/* Host interface registers. */
 		dmp_reg = (uint32_t __iomem *)(reg + 0);
 		for (cnt = 0; cnt < sizeof(fw->host_reg) / 4; cnt++)
-			fw->host_reg[cnt] = RD_REG_DWORD(dmp_reg++);
+			fw->host_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* Disable interrupts. */
 		WRT_REG_DWORD(&reg->ictrl, 0);
@@ -1024,470 +704,471 @@ qla24xx_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xF0);
 		WRT_REG_DWORD(dmp_reg, 0xB0000000);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xFC);
-		fw->shadow_reg[0] = RD_REG_DWORD(dmp_reg);
+		fw->shadow_reg[0] = htonl(RD_REG_DWORD(dmp_reg));
 
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xF0);
 		WRT_REG_DWORD(dmp_reg, 0xB0100000);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xFC);
-		fw->shadow_reg[1] = RD_REG_DWORD(dmp_reg);
+		fw->shadow_reg[1] = htonl(RD_REG_DWORD(dmp_reg));
 
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xF0);
 		WRT_REG_DWORD(dmp_reg, 0xB0200000);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xFC);
-		fw->shadow_reg[2] = RD_REG_DWORD(dmp_reg);
+		fw->shadow_reg[2] = htonl(RD_REG_DWORD(dmp_reg));
 
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xF0);
 		WRT_REG_DWORD(dmp_reg, 0xB0300000);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xFC);
-		fw->shadow_reg[3] = RD_REG_DWORD(dmp_reg);
+		fw->shadow_reg[3] = htonl(RD_REG_DWORD(dmp_reg));
 
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xF0);
 		WRT_REG_DWORD(dmp_reg, 0xB0400000);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xFC);
-		fw->shadow_reg[4] = RD_REG_DWORD(dmp_reg);
+		fw->shadow_reg[4] = htonl(RD_REG_DWORD(dmp_reg));
 
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xF0);
 		WRT_REG_DWORD(dmp_reg, 0xB0500000);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xFC);
-		fw->shadow_reg[5] = RD_REG_DWORD(dmp_reg);
+		fw->shadow_reg[5] = htonl(RD_REG_DWORD(dmp_reg));
 
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xF0);
 		WRT_REG_DWORD(dmp_reg, 0xB0600000);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xFC);
-		fw->shadow_reg[6] = RD_REG_DWORD(dmp_reg);
+		fw->shadow_reg[6] = htonl(RD_REG_DWORD(dmp_reg));
 
 		/* Mailbox registers. */
 		mbx_reg = (uint16_t __iomem *)((uint8_t __iomem *)reg + 0x80);
 		for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++)
-			fw->mailbox_reg[cnt] = RD_REG_WORD(mbx_reg++);
+			fw->mailbox_reg[cnt] = htons(RD_REG_WORD(mbx_reg++));
 
 		/* Transfer sequence registers. */
 		iter_reg = fw->xseq_gp_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0xBF00);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xBF10);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xBF20);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xBF30);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xBF40);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xBF50);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xBF60);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xBF70);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xBFE0);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < sizeof(fw->xseq_0_reg) / 4; cnt++)
-			fw->xseq_0_reg[cnt] = RD_REG_DWORD(dmp_reg++);
+			fw->xseq_0_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xBFF0);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < sizeof(fw->xseq_1_reg) / 4; cnt++)
-			fw->xseq_1_reg[cnt] = RD_REG_DWORD(dmp_reg++);
+			fw->xseq_1_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* Receive sequence registers. */
 		iter_reg = fw->rseq_gp_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFF00);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFF10);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFF20);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFF30);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFF40);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFF50);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFF60);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFF70);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFFD0);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < sizeof(fw->rseq_0_reg) / 4; cnt++)
-			fw->rseq_0_reg[cnt] = RD_REG_DWORD(dmp_reg++);
+			fw->rseq_0_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFFE0);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < sizeof(fw->rseq_1_reg) / 4; cnt++)
-			fw->rseq_1_reg[cnt] = RD_REG_DWORD(dmp_reg++);
+			fw->rseq_1_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0xFFF0);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < sizeof(fw->rseq_2_reg) / 4; cnt++)
-			fw->rseq_2_reg[cnt] = RD_REG_DWORD(dmp_reg++);
+			fw->rseq_2_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* Command DMA registers. */
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7100);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < sizeof(fw->cmd_dma_reg) / 4; cnt++)
-			fw->cmd_dma_reg[cnt] = RD_REG_DWORD(dmp_reg++);
+			fw->cmd_dma_reg[cnt] = htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* Queues. */
 		iter_reg = fw->req0_dma_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7200);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 8; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xE4);
 		for (cnt = 0; cnt < 7; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		iter_reg = fw->resp0_dma_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7300);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 8; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xE4);
 		for (cnt = 0; cnt < 7; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		iter_reg = fw->req1_dma_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7400);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 8; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xE4);
 		for (cnt = 0; cnt < 7; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* Transmit DMA registers. */
 		iter_reg = fw->xmt0_dma_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7600);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7610);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		iter_reg = fw->xmt1_dma_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7620);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7630);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		iter_reg = fw->xmt2_dma_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7640);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7650);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		iter_reg = fw->xmt3_dma_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7660);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7670);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		iter_reg = fw->xmt4_dma_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7680);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7690);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x76A0);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < sizeof(fw->xmt_data_dma_reg) / 4; cnt++)
-			fw->xmt_data_dma_reg[cnt] = RD_REG_DWORD(dmp_reg++);
+			fw->xmt_data_dma_reg[cnt] =
+			    htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* Receive DMA registers. */
 		iter_reg = fw->rcvt0_data_dma_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7700);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7710);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		iter_reg = fw->rcvt1_data_dma_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7720);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x7730);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* RISC registers. */
 		iter_reg = fw->risc_gp_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x0F00);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x0F10);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x0F20);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x0F30);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x0F40);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x0F50);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x0F60);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x0F70);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* Local memory controller registers. */
 		iter_reg = fw->lmc_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x3000);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x3010);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x3020);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x3030);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x3040);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x3050);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x3060);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* Fibre Protocol Module registers. */
 		iter_reg = fw->fpm_hdw_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x4000);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x4010);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x4020);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x4030);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x4040);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x4050);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x4060);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x4070);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x4080);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x4090);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x40A0);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x40B0);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* Frame Buffer registers. */
 		iter_reg = fw->fb_hdw_reg;
 		WRT_REG_DWORD(&reg->iobase_addr, 0x6000);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x6010);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x6020);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x6030);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x6040);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x6100);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x6130);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x6150);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x6170);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x6190);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		WRT_REG_DWORD(&reg->iobase_addr, 0x61B0);
 		dmp_reg = (uint32_t __iomem *)((uint8_t __iomem *)reg + 0xC0);
 		for (cnt = 0; cnt < 16; cnt++)
-			*iter_reg++ = RD_REG_DWORD(dmp_reg++);
+			*iter_reg++ = htonl(RD_REG_DWORD(dmp_reg++));
 
 		/* Reset RISC. */
 		WRT_REG_DWORD(&reg->ctrl_status,
@@ -1577,7 +1258,7 @@ qla24xx_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 
 		if (test_and_clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags)) {
 			rval = mb[0] & MBS_MASK;
-			fw->code_ram[cnt] = (mb[3] << 16) | mb[2];
+			fw->code_ram[cnt] = htonl((mb[3] << 16) | mb[2]);
 		} else {
 			rval = QLA_FUNCTION_FAILED;
 		}
@@ -1627,12 +1308,18 @@ qla24xx_fw_dump(scsi_qla_host_t *ha, int hardware_locked)
 
 		if (test_and_clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags)) {
 			rval = mb[0] & MBS_MASK;
-			fw->ext_mem[cnt] = (mb[3] << 16) | mb[2];
+			fw->ext_mem[cnt] = htonl((mb[3] << 16) | mb[2]);
 		} else {
 			rval = QLA_FUNCTION_FAILED;
 		}
 	}
 
+	if (rval == QLA_SUCCESS) {
+		eft = qla2xxx_copy_queues(ha, &fw->ext_mem[cnt]);
+		if (ha->eft)
+			memcpy(eft, ha->eft, ntohl(ha->fw_dump->eft_size));
+	}
+
 	if (rval != QLA_SUCCESS) {
 		qla_printk(KERN_WARNING, ha,
 		    "Failed to dump firmware (%x)!!!\n", rval);
@@ -1650,252 +1337,6 @@ qla24xx_fw_dump_failed:
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 }
 
-void
-qla24xx_ascii_fw_dump(scsi_qla_host_t *ha)
-{
-	uint32_t cnt;
-	char *uiter;
-	struct qla24xx_fw_dump *fw;
-	uint32_t ext_mem_cnt;
-
-	uiter = ha->fw_dump_buffer;
-	fw = ha->fw_dump;
-
-	qla_uprintf(&uiter, "ISP FW Version %d.%02d.%02d Attributes %04x\n",
-	    ha->fw_major_version, ha->fw_minor_version,
-	    ha->fw_subminor_version, ha->fw_attributes);
-
-	qla_uprintf(&uiter, "\nR2H Status Register\n%04x\n", fw->host_status);
-
-	qla_uprintf(&uiter, "\nHost Interface Registers");
-	for (cnt = 0; cnt < sizeof(fw->host_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->host_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nShadow Registers");
-	for (cnt = 0; cnt < sizeof(fw->shadow_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->shadow_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nMailbox Registers");
-	for (cnt = 0; cnt < sizeof(fw->mailbox_reg) / 2; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->mailbox_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nXSEQ GP Registers");
-	for (cnt = 0; cnt < sizeof(fw->xseq_gp_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->xseq_gp_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nXSEQ-0 Registers");
-	for (cnt = 0; cnt < sizeof(fw->xseq_0_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->xseq_0_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nXSEQ-1 Registers");
-	for (cnt = 0; cnt < sizeof(fw->xseq_1_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->xseq_1_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRSEQ GP Registers");
-	for (cnt = 0; cnt < sizeof(fw->rseq_gp_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->rseq_gp_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRSEQ-0 Registers");
-	for (cnt = 0; cnt < sizeof(fw->rseq_0_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->rseq_0_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRSEQ-1 Registers");
-	for (cnt = 0; cnt < sizeof(fw->rseq_1_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->rseq_1_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRSEQ-2 Registers");
-	for (cnt = 0; cnt < sizeof(fw->rseq_2_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->rseq_2_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nCommand DMA Registers");
-	for (cnt = 0; cnt < sizeof(fw->cmd_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->cmd_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRequest0 Queue DMA Channel Registers");
-	for (cnt = 0; cnt < sizeof(fw->req0_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->req0_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nResponse0 Queue DMA Channel Registers");
-	for (cnt = 0; cnt < sizeof(fw->resp0_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->resp0_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRequest1 Queue DMA Channel Registers");
-	for (cnt = 0; cnt < sizeof(fw->req1_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->req1_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nXMT0 Data DMA Registers");
-	for (cnt = 0; cnt < sizeof(fw->xmt0_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->xmt0_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nXMT1 Data DMA Registers");
-	for (cnt = 0; cnt < sizeof(fw->xmt1_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->xmt1_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nXMT2 Data DMA Registers");
-	for (cnt = 0; cnt < sizeof(fw->xmt2_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->xmt2_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nXMT3 Data DMA Registers");
-	for (cnt = 0; cnt < sizeof(fw->xmt3_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->xmt3_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nXMT4 Data DMA Registers");
-	for (cnt = 0; cnt < sizeof(fw->xmt4_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->xmt4_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nXMT Data DMA Common Registers");
-	for (cnt = 0; cnt < sizeof(fw->xmt_data_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->xmt_data_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRCV Thread 0 Data DMA Registers");
-	for (cnt = 0; cnt < sizeof(fw->rcvt0_data_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->rcvt0_data_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRCV Thread 1 Data DMA Registers");
-	for (cnt = 0; cnt < sizeof(fw->rcvt1_data_dma_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->rcvt1_data_dma_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nRISC GP Registers");
-	for (cnt = 0; cnt < sizeof(fw->risc_gp_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->risc_gp_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nLMC Registers");
-	for (cnt = 0; cnt < sizeof(fw->lmc_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->lmc_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nFPM Hardware Registers");
-	for (cnt = 0; cnt < sizeof(fw->fpm_hdw_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->fpm_hdw_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nFB Hardware Registers");
-	for (cnt = 0; cnt < sizeof(fw->fb_hdw_reg) / 4; cnt++) {
-		if (cnt % 8 == 0)
-			qla_uprintf(&uiter, "\n");
-
-		qla_uprintf(&uiter, "%08x ", fw->fb_hdw_reg[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nCode RAM");
-	for (cnt = 0; cnt < sizeof (fw->code_ram) / 4; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n%08x: ", cnt + 0x20000);
-		}
-		qla_uprintf(&uiter, "%08x ", fw->code_ram[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n\nExternal Memory");
-	ext_mem_cnt = ha->fw_memory_size - 0x100000 + 1;
-	for (cnt = 0; cnt < ext_mem_cnt; cnt++) {
-		if (cnt % 8 == 0) {
-			qla_uprintf(&uiter, "\n%08x: ", cnt + 0x100000);
-		}
-		qla_uprintf(&uiter, "%08x ", fw->ext_mem[cnt]);
-	}
-
-	qla_uprintf(&uiter, "\n[<==END] ISP Debug Dump");
-}
-
-
 /****************************************************************************/
 /*                         Driver Debug Functions.                          */
 /****************************************************************************/
diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index ab6afea..ace6afd 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -176,9 +176,6 @@
 /*
  * Firmware Dump structure definition
  */
-#define FW_DUMP_SIZE_128K	0xBC000
-#define FW_DUMP_SIZE_512K	0x2FC000
-#define FW_DUMP_SIZE_1M		0x5FC000
 
 struct qla2300_fw_dump {
 	uint16_t hccr;
@@ -224,8 +221,6 @@ struct qla2100_fw_dump {
 	uint16_t risc_ram[0xf000];
 };
 
-#define FW_DUMP_SIZE_24XX	0x2B0000
-
 struct qla24xx_fw_dump {
 	uint32_t host_status;
 	uint32_t host_reg[32];
@@ -257,3 +252,39 @@ struct qla24xx_fw_dump {
 	uint32_t code_ram[0x2000];
 	uint32_t ext_mem[1];
 };
+
+#define EFT_NUM_BUFFERS		4
+#define EFT_BYTES_PER_BUFFER	0x4000
+#define EFT_SIZE		((EFT_BYTES_PER_BUFFER) * (EFT_NUM_BUFFERS))
+
+struct qla2xxx_fw_dump {
+	uint8_t signature[4];
+	uint32_t version;
+
+	uint32_t fw_major_version;
+	uint32_t fw_minor_version;
+	uint32_t fw_subminor_version;
+	uint32_t fw_attributes;
+
+	uint32_t vendor;
+	uint32_t device;
+	uint32_t subsystem_vendor;
+	uint32_t subsystem_device;
+
+	uint32_t fixed_size;
+	uint32_t mem_size;
+	uint32_t req_q_size;
+	uint32_t rsp_q_size;
+
+	uint32_t eft_size;
+	uint32_t eft_addr_l;
+	uint32_t eft_addr_h;
+
+	uint32_t header_size;
+
+	union {
+		struct qla2100_fw_dump isp21;
+		struct qla2300_fw_dump isp23;
+		struct qla24xx_fw_dump isp24;
+	} isp;
+};
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index d6f6579..653e22e 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -608,6 +608,7 @@ typedef struct {
 #define MBC_SERDES_PARAMS		0x10	/* Serdes Tx Parameters. */
 #define MBC_GET_IOCB_STATUS		0x12	/* Get IOCB status command. */
 #define MBC_GET_TIMEOUT_PARAMS		0x22	/* Get FW timeouts. */
+#define MBC_TRACE_CONTROL		0x27	/* Trace control command. */
 #define MBC_GEN_SYSTEM_ERROR		0x2a	/* Generate System Error. */
 #define MBC_SET_TIMEOUT_PARAMS		0x32	/* Set FW timeouts. */
 #define MBC_MID_INITIALIZE_FIRMWARE	0x48	/* MID Initialize firmware. */
@@ -618,6 +619,9 @@ typedef struct {
 #define MBC_GET_LINK_PRIV_STATS		0x6d	/* Get link & private data. */
 #define MBC_SET_VENDOR_ID		0x76	/* Set Vendor ID. */
 
+#define TC_ENABLE			4
+#define TC_DISABLE			5
+
 /* Firmware return data sizes */
 #define FCAL_MAP_SIZE	128
 
@@ -1997,7 +2001,6 @@ struct isp_operations {
 		uint32_t);
 
 	void (*fw_dump) (struct scsi_qla_host *, int);
-	void (*ascii_fw_dump) (struct scsi_qla_host *);
 
 	int (*beacon_on) (struct scsi_qla_host *);
 	int (*beacon_off) (struct scsi_qla_host *);
@@ -2303,11 +2306,12 @@ typedef struct scsi_qla_host {
 	uint16_t	fw_seriallink_options24[4];
 
 	/* Firmware dump information. */
-	void		*fw_dump;
+	struct qla2xxx_fw_dump *fw_dump;
+	uint32_t	fw_dump_len;
 	int		fw_dumped;
 	int		fw_dump_reading;
-	char		*fw_dump_buffer;
-	int		fw_dump_buffer_len;
+	dma_addr_t	eft_dma;
+	void		*eft;
 
 	uint8_t		host_str[16];
 	uint32_t	pci_attr;
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 164d53c..fec1e1c 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -51,6 +51,8 @@ extern int qla2x00_abort_isp(scsi_qla_host_t *);
 extern void qla2x00_update_fcport(scsi_qla_host_t *, fc_port_t *);
 extern void qla2x00_reg_remote_port(scsi_qla_host_t *, fc_port_t *);
 
+extern void qla2x00_alloc_fw_dump(scsi_qla_host_t *);
+
 /*
  * Global Data in qla_os.c source file.
  */
@@ -61,6 +63,7 @@ extern int qlport_down_retry;
 extern int ql2xplogiabsentdevice;
 extern int ql2xloginretrycount;
 extern int ql2xfdmienable;
+extern int ql2xallocfwdump;
 
 extern void qla2x00_sp_compl(scsi_qla_host_t *, srb_t *);
 
@@ -204,6 +207,9 @@ qla2x00_set_serdes_params(scsi_qla_host_t *, uint16_t, uint16_t, uint16_t);
 extern int
 qla2x00_stop_firmware(scsi_qla_host_t *);
 
+extern int
+qla2x00_trace_control(scsi_qla_host_t *, uint16_t, dma_addr_t, uint16_t);
+
 /*
  * Global Function Prototypes in qla_isr.c source file.
  */
@@ -254,9 +260,6 @@ extern int qla24xx_write_optrom_data(struct scsi_qla_host *, uint8_t *,
 extern void qla2100_fw_dump(scsi_qla_host_t *, int);
 extern void qla2300_fw_dump(scsi_qla_host_t *, int);
 extern void qla24xx_fw_dump(scsi_qla_host_t *, int);
-extern void qla2100_ascii_fw_dump(scsi_qla_host_t *);
-extern void qla2300_ascii_fw_dump(scsi_qla_host_t *);
-extern void qla24xx_ascii_fw_dump(scsi_qla_host_t *);
 extern void qla2x00_dump_regs(scsi_qla_host_t *);
 extern void qla2x00_dump_buffer(uint8_t *, uint32_t);
 extern void qla2x00_print_scsi_cmd(struct scsi_cmnd *);
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index aef093d..880de6f 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -770,29 +770,104 @@ qla24xx_chip_diag(scsi_qla_host_t *ha)
 	return rval;
 }
 
-static void
+void
 qla2x00_alloc_fw_dump(scsi_qla_host_t *ha)
 {
-	uint32_t dump_size = 0;
+	int rval;
+	uint32_t dump_size, fixed_size, mem_size, req_q_size, rsp_q_size,
+	    eft_size;
+	dma_addr_t eft_dma;
+	void *eft;
+
+	if (ha->fw_dump) {
+		qla_printk(KERN_WARNING, ha,
+		    "Firmware dump previously allocated.\n");
+		return;
+	}
 
 	ha->fw_dumped = 0;
+	fixed_size = mem_size = eft_size = 0;
 	if (IS_QLA2100(ha) || IS_QLA2200(ha)) {
-		dump_size = sizeof(struct qla2100_fw_dump);
+		fixed_size = sizeof(struct qla2100_fw_dump);
 	} else if (IS_QLA23XX(ha)) {
-		dump_size = sizeof(struct qla2300_fw_dump);
-		dump_size += (ha->fw_memory_size - 0x11000) * sizeof(uint16_t);
-        } else if (IS_QLA24XX(ha) || IS_QLA54XX(ha)) {
-		dump_size = sizeof(struct qla24xx_fw_dump);
-		dump_size += (ha->fw_memory_size - 0x100000) * sizeof(uint32_t);
+		fixed_size = offsetof(struct qla2300_fw_dump, data_ram);
+		mem_size = (ha->fw_memory_size - 0x11000 + 1) *
+		    sizeof(uint16_t);
+	} else if (IS_QLA24XX(ha) || IS_QLA54XX(ha)) {
+		fixed_size = offsetof(struct qla24xx_fw_dump, ext_mem);
+		mem_size = (ha->fw_memory_size - 0x100000 + 1) *
+		    sizeof(uint32_t);
+
+		/* Allocate memory for Extended Trace Buffer. */
+		eft = dma_alloc_coherent(&ha->pdev->dev, EFT_SIZE, &eft_dma,
+		    GFP_KERNEL);
+		if (!eft) {
+			qla_printk(KERN_WARNING, ha, "Unable to allocate "
+			    "(%d KB) for EFT.\n", EFT_SIZE / 1024);
+			goto cont_alloc;
+		}
+
+		rval = qla2x00_trace_control(ha, TC_ENABLE, eft_dma,
+		    EFT_NUM_BUFFERS);
+		if (rval) {
+			qla_printk(KERN_WARNING, ha, "Unable to initialize "
+			    "EFT (%d).\n", rval);
+			dma_free_coherent(&ha->pdev->dev, EFT_SIZE, eft,
+			    eft_dma);
+			goto cont_alloc;
+		}
+
+		qla_printk(KERN_INFO, ha, "Allocated (%d KB) for EFT...\n",
+		    EFT_SIZE / 1024);
+
+		eft_size = EFT_SIZE;
+		memset(eft, 0, eft_size);
+		ha->eft_dma = eft_dma;
+		ha->eft = eft;
 	}
+cont_alloc:
+	req_q_size = ha->request_q_length * sizeof(request_t);
+	rsp_q_size = ha->response_q_length * sizeof(response_t);
+
+	dump_size = offsetof(struct qla2xxx_fw_dump, isp);
+	dump_size += fixed_size + mem_size + req_q_size + rsp_q_size +
+	    eft_size;
 
 	ha->fw_dump = vmalloc(dump_size);
-	if (ha->fw_dump)
-		qla_printk(KERN_INFO, ha, "Allocated (%d KB) for firmware "
-		    "dump...\n", dump_size / 1024);
-	else
+	if (!ha->fw_dump) {
 		qla_printk(KERN_WARNING, ha, "Unable to allocate (%d KB) for "
 		    "firmware dump!!!\n", dump_size / 1024);
+
+		if (ha->eft) {
+			dma_free_coherent(&ha->pdev->dev, eft_size, ha->eft,
+			    ha->eft_dma);
+			ha->eft = NULL;
+			ha->eft_dma = 0;
+		}
+		return;
+	}
+
+	qla_printk(KERN_INFO, ha, "Allocated (%d KB) for firmware dump...\n",
+	    dump_size / 1024);
+
+	ha->fw_dump_len = dump_size;
+	ha->fw_dump->signature[0] = 'Q';
+	ha->fw_dump->signature[1] = 'L';
+	ha->fw_dump->signature[2] = 'G';
+	ha->fw_dump->signature[3] = 'C';
+	ha->fw_dump->version = __constant_htonl(1);
+
+	ha->fw_dump->fixed_size = htonl(fixed_size);
+	ha->fw_dump->mem_size = htonl(mem_size);
+	ha->fw_dump->req_q_size = htonl(req_q_size);
+	ha->fw_dump->rsp_q_size = htonl(rsp_q_size);
+
+	ha->fw_dump->eft_size = htonl(eft_size);
+	ha->fw_dump->eft_addr_l = htonl(LSD(ha->eft_dma));
+	ha->fw_dump->eft_addr_h = htonl(MSD(ha->eft_dma));
+
+	ha->fw_dump->header_size =
+	    htonl(offsetof(struct qla2xxx_fw_dump, isp));
 }
 
 /**
@@ -810,8 +885,6 @@ qla2x00_resize_request_q(scsi_qla_host_t *ha)
 	dma_addr_t request_dma;
 	request_t *request_ring;
 
-	qla2x00_alloc_fw_dump(ha);
-
 	/* Valid only on recent ISPs. */
 	if (IS_QLA2100(ha) || IS_QLA2200(ha))
 		return;
@@ -883,6 +956,9 @@ qla2x00_setup_chip(scsi_qla_host_t *ha)
 				    &ha->fw_subminor_version,
 				    &ha->fw_attributes, &ha->fw_memory_size);
 				qla2x00_resize_request_q(ha);
+
+				if (ql2xallocfwdump)
+					qla2x00_alloc_fw_dump(ha);
 			}
 		} else {
 			DEBUG2(printk(KERN_INFO
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index d6cb3bd..39ddd38 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -2460,3 +2460,45 @@ qla2x00_stop_firmware(scsi_qla_host_t *ha)
 
 	return rval;
 }
+
+int
+qla2x00_trace_control(scsi_qla_host_t *ha, uint16_t ctrl, dma_addr_t eft_dma,
+    uint16_t buffers)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
+
+	if (!IS_QLA24XX(ha) && !IS_QLA54XX(ha))
+		return QLA_FUNCTION_FAILED;
+
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
+
+	mcp->mb[0] = MBC_TRACE_CONTROL;
+	mcp->mb[1] = ctrl;
+	mcp->out_mb = MBX_1|MBX_0;
+	mcp->in_mb = MBX_1|MBX_0;
+	if (ctrl == TC_ENABLE) {
+		mcp->mb[2] = LSW(eft_dma);
+		mcp->mb[3] = MSW(eft_dma);
+		mcp->mb[4] = LSW(MSD(eft_dma));
+		mcp->mb[5] = MSW(MSD(eft_dma));
+		mcp->mb[6] = buffers;
+		mcp->mb[7] = buffers;
+		mcp->out_mb |= MBX_7|MBX_6|MBX_5|MBX_4|MBX_3|MBX_2;
+	}
+	mcp->tov = 30;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(ha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		DEBUG2_3_11(printk("%s(%ld): failed=%x mb[0]=%x mb[1]=%x.\n",
+		    __func__, ha->host_no, rval, mcp->mb[0], mcp->mb[1]));
+	} else {
+		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
+	}
+
+	return rval;
+}
+
+
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index ccaad0b..c16154c 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -54,6 +54,13 @@ module_param(ql2xloginretrycount, int, S_IRUGO|S_IRUSR);
 MODULE_PARM_DESC(ql2xloginretrycount,
 		"Specify an alternate value for the NVRAM login retry count.");
 
+int ql2xallocfwdump = 1;
+module_param(ql2xallocfwdump, int, S_IRUGO|S_IRUSR);
+MODULE_PARM_DESC(ql2xallocfwdump,
+		"Option to enable allocation of memory for a firmware dump "
+		"during HBA initialization.  Memory allocation requirements "
+		"vary by ISP type.  Default is 1 - allocate memory.");
+
 static void qla2x00_free_device(scsi_qla_host_t *);
 
 static void qla2x00_config_dma_addressing(scsi_qla_host_t *ha);
@@ -1405,7 +1412,6 @@ static int qla2x00_probe_one(struct pci_dev *pdev)
 	ha->isp_ops.read_nvram		= qla2x00_read_nvram_data;
 	ha->isp_ops.write_nvram		= qla2x00_write_nvram_data;
 	ha->isp_ops.fw_dump		= qla2100_fw_dump;
-	ha->isp_ops.ascii_fw_dump	= qla2100_ascii_fw_dump;
 	ha->isp_ops.read_optrom		= qla2x00_read_optrom_data;
 	ha->isp_ops.write_optrom	= qla2x00_write_optrom_data;
 	if (IS_QLA2100(ha)) {
@@ -1432,7 +1438,6 @@ static int qla2x00_probe_one(struct pci_dev *pdev)
 		ha->isp_ops.pci_config = qla2300_pci_config;
 		ha->isp_ops.intr_handler = qla2300_intr_handler;
 		ha->isp_ops.fw_dump = qla2300_fw_dump;
-		ha->isp_ops.ascii_fw_dump = qla2300_ascii_fw_dump;
 		ha->isp_ops.beacon_on = qla2x00_beacon_on;
 		ha->isp_ops.beacon_off = qla2x00_beacon_off;
 		ha->isp_ops.beacon_blink = qla2x00_beacon_blink;
@@ -1469,7 +1474,6 @@ static int qla2x00_probe_one(struct pci_dev *pdev)
 		ha->isp_ops.read_nvram = qla24xx_read_nvram_data;
 		ha->isp_ops.write_nvram = qla24xx_write_nvram_data;
 		ha->isp_ops.fw_dump = qla24xx_fw_dump;
-		ha->isp_ops.ascii_fw_dump = qla24xx_ascii_fw_dump;
 		ha->isp_ops.read_optrom	= qla24xx_read_optrom_data;
 		ha->isp_ops.write_optrom = qla24xx_write_optrom_data;
 		ha->isp_ops.beacon_on = qla24xx_beacon_on;
@@ -1678,6 +1682,9 @@ qla2x00_free_device(scsi_qla_host_t *ha)
 		kthread_stop(t);
 	}
 
+	if (ha->eft)
+		qla2x00_trace_control(ha, TC_DISABLE, 0, 0);
+
 	/* Stop currently executing firmware. */
 	qla2x00_stop_firmware(ha);
 
@@ -2012,6 +2019,13 @@ qla2x00_mem_free(scsi_qla_host_t *ha)
 	/* free sp pool */
 	qla2x00_free_sp_pool(ha);
 
+	if (ha->fw_dump) {
+		if (ha->eft)
+			dma_free_coherent(&ha->pdev->dev,
+			    ntohl(ha->fw_dump->eft_size), ha->eft, ha->eft_dma);
+		vfree(ha->fw_dump);
+	}
+
 	if (ha->sns_cmd)
 		dma_free_coherent(&ha->pdev->dev, sizeof(struct sns_cmd_pkt),
 		    ha->sns_cmd, ha->sns_cmd_dma);
@@ -2043,6 +2057,8 @@ qla2x00_mem_free(scsi_qla_host_t *ha)
 		    (ha->request_q_length + 1) * sizeof(request_t),
 		    ha->request_ring, ha->request_dma);
 
+	ha->eft = NULL;
+	ha->eft_dma = 0;
 	ha->sns_cmd = NULL;
 	ha->sns_cmd_dma = 0;
 	ha->ct_sns = NULL;
@@ -2071,13 +2087,9 @@ qla2x00_mem_free(scsi_qla_host_t *ha)
 	}
 	INIT_LIST_HEAD(&ha->fcports);
 
-	vfree(ha->fw_dump);
-	vfree(ha->fw_dump_buffer);
-
 	ha->fw_dump = NULL;
 	ha->fw_dumped = 0;
 	ha->fw_dump_reading = 0;
-	ha->fw_dump_buffer = NULL;
 
 	vfree(ha->optrom_buffer);
 }
-- 
cgit v0.10.2


From 395e0808fad28b08b982dd9f299fe4723e7c579b Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:10:34 -0700
Subject: [SCSI] qla2xxx: Resync with latest HBA SSID specification -- 2.2j.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_devtbl.h b/drivers/scsi/qla2xxx/qla_devtbl.h
index a8fc0ff..dd43541 100644
--- a/drivers/scsi/qla2xxx/qla_devtbl.h
+++ b/drivers/scsi/qla2xxx/qla_devtbl.h
@@ -1,4 +1,4 @@
-#define QLA_MODEL_NAMES         0x4A
+#define QLA_MODEL_NAMES		0x57
 
 /*
  * Adapter model names and descriptions.
@@ -76,6 +76,19 @@ static char *qla2x00_model_name[QLA_MODEL_NAMES*2] = {
 	"QLE2440",	"PCI-Express to 4Gb FC, Single Channel",	/* 0x145 */
 	"QLE2464",	"PCI-Express to 4Gb FC, Quad Channel",		/* 0x146 */
 	"QLA2440",	"PCI-X 2.0 to 4Gb FC, Single Channel",		/* 0x147 */
-	" ",		" ",						/* 0x148 */
+	"HP AE369A",	"PCI-X 2.0 to 4Gb FC, Dual Channel",		/* 0x148 */
 	"QLA2340",	"Sun 133MHz PCI-X to 2Gb FC, Single Channel",	/* 0x149 */
+	" ",		" ",						/* 0x14a */
+	" ",		" ",						/* 0x14b */
+	"QMC2432M",	"IBM eServer BC 4Gb FC Expansion Card CFFE",	/* 0x14c */
+	"QMC2422M",	"IBM eServer BC 4Gb FC Expansion Card CFFX",	/* 0x14d */
+	"QLE220",	"Sun PCI-Express to 4Gb FC, Single Channel",	/* 0x14e */
+	" ",		" ",						/* 0x14f */
+	" ",		" ",						/* 0x150 */
+	" ",		" ",						/* 0x151 */
+	"QME2462",	"PCI-Express to 4Gb FC, Dual Channel Mezz HBA",	/* 0x152 */
+	"QMH2462",	"PCI-Express to 4Gb FC, Dual Channel Mezz HBA",	/* 0x153 */
+	" ",		" ",						/* 0x154 */
+	"QLE220",	"PCI-Express to 4Gb FC, Single Channel",	/* 0x155 */
+	"QLE220",	"PCI-Express to 4Gb FC, Single Channel",	/* 0x156 */
 };
-- 
cgit v0.10.2


From d4c760c2119fca982f335d83ff9095479c5d6737 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:10:39 -0700
Subject: [SCSI] qla2xxx: Add NVRAM 'Disable Serdes' bit support.

The host section of ISP24xx NVRAMs contain a new bit which
allows a user to selectively disable ports of an HBA.  These
ports (hosts) will not be presented to the midlayer.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 653e22e..dfcd3a2 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2044,6 +2044,7 @@ typedef struct scsi_qla_host {
 		uint32_t	enable_led_scheme	:1;
 		uint32_t	msi_enabled		:1;
 		uint32_t	msix_enabled		:1;
+		uint32_t	disable_serdes		:1;
 	} flags;
 
 	atomic_t	loop_state;
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 3af4786..af023f5 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -141,7 +141,7 @@ struct nvram_24xx {
 	 * BIT 2  = Enable Memory Map BIOS
 	 * BIT 3  = Enable Selectable Boot
 	 * BIT 4  = Disable RISC code load
-	 * BIT 5  =
+	 * BIT 5  = Disable Serdes
 	 * BIT 6  =
 	 * BIT 7  =
 	 *
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 880de6f..2ccdd84 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -89,6 +89,17 @@ qla2x00_initialize_adapter(scsi_qla_host_t *ha)
 
 	ha->isp_ops.nvram_config(ha);
 
+	if (ha->flags.disable_serdes) {
+		/* Mask HBA via NVRAM settings? */
+		qla_printk(KERN_INFO, ha, "Masking HBA WWPN "
+		    "%02x%02x%02x%02x%02x%02x%02x%02x (via NVRAM).\n",
+		    ha->port_name[0], ha->port_name[1],
+		    ha->port_name[2], ha->port_name[3],
+		    ha->port_name[4], ha->port_name[5],
+		    ha->port_name[6], ha->port_name[7]);
+		return QLA_FUNCTION_FAILED;
+	}
+
 	qla_printk(KERN_INFO, ha, "Verifying loaded RISC code...\n");
 
 	retry = 10;
@@ -1639,6 +1650,7 @@ qla2x00_nvram_config(scsi_qla_host_t *ha)
 	ha->flags.enable_lip_full_login = ((nv->host_p[1] & BIT_2) ? 1 : 0);
 	ha->flags.enable_target_reset = ((nv->host_p[1] & BIT_3) ? 1 : 0);
 	ha->flags.enable_led_scheme = (nv->special_options[1] & BIT_4) ? 1 : 0;
+	ha->flags.disable_serdes = 0;
 
 	ha->operating_mode =
 	    (icb->add_firmware_options[0] & (BIT_6 | BIT_5 | BIT_4)) >> 4;
@@ -3450,6 +3462,7 @@ qla24xx_nvram_config(scsi_qla_host_t *ha)
 	ha->flags.enable_lip_full_login = 1;
 	ha->flags.enable_target_reset = 1;
 	ha->flags.enable_led_scheme = 0;
+	ha->flags.disable_serdes = le32_to_cpu(nv->host_p) & BIT_5 ? 1: 0;
 
 	ha->operating_mode = (le32_to_cpu(icb->firmware_options_2) &
 	    (BIT_6 | BIT_5 | BIT_4)) >> 4;
-- 
cgit v0.10.2


From 8baa51a6f027a2f581930da12f867d8054907656 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:10:44 -0700
Subject: [SCSI] qla2xxx: Honour 'skip process-login' option during
 fabric-login IOCB.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 39ddd38..3b16d6f 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -1458,6 +1458,8 @@ qla24xx_login_fabric(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 	lg->control_flags = __constant_cpu_to_le16(LCF_COMMAND_PLOGI);
 	if (opt & BIT_0)
 		lg->control_flags |= __constant_cpu_to_le16(LCF_COND_PLOGI);
+	if (opt & BIT_1)
+		lg->control_flags |= __constant_cpu_to_le16(LCF_SKIP_PRLI);
 	lg->port_id[0] = al_pa;
 	lg->port_id[1] = area;
 	lg->port_id[2] = domain;
-- 
cgit v0.10.2


From 88729e53a4798df20e7a7ef68e0a816f4a268da4 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:10:50 -0700
Subject: [SCSI] qla2xxx: Add DMI (Diagnostics Monitoring Interface) support.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 78eec67..1a766b2 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -338,6 +338,53 @@ static struct bin_attribute sysfs_vpd_attr = {
 	.write = qla2x00_sysfs_write_vpd,
 };
 
+static ssize_t
+qla2x00_sysfs_read_sfp(struct kobject *kobj, char *buf, loff_t off,
+    size_t count)
+{
+	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+	    struct device, kobj)));
+	uint16_t iter, addr, offset;
+	int rval;
+
+	if (!capable(CAP_SYS_ADMIN) || off != 0 || count != SFP_DEV_SIZE * 2)
+		return 0;
+
+	addr = 0xa0;
+	for (iter = 0, offset = 0; iter < (SFP_DEV_SIZE * 2) / SFP_BLOCK_SIZE;
+	    iter++, offset += SFP_BLOCK_SIZE) {
+		if (iter == 4) {
+			/* Skip to next device address. */
+			addr = 0xa2;
+			offset = 0;
+		}
+
+		rval = qla2x00_read_sfp(ha, ha->sfp_data_dma, addr, offset,
+		    SFP_BLOCK_SIZE);
+		if (rval != QLA_SUCCESS) {
+			qla_printk(KERN_WARNING, ha,
+			    "Unable to read SFP data (%x/%x/%x).\n", rval,
+			    addr, offset);
+			count = 0;
+			break;
+		}
+		memcpy(buf, ha->sfp_data, SFP_BLOCK_SIZE);
+		buf += SFP_BLOCK_SIZE;
+	}
+
+	return count;
+}
+
+static struct bin_attribute sysfs_sfp_attr = {
+	.attr = {
+		.name = "sfp",
+		.mode = S_IRUSR | S_IWUSR,
+		.owner = THIS_MODULE,
+	},
+	.size = SFP_DEV_SIZE * 2,
+	.read = qla2x00_sysfs_read_sfp,
+};
+
 void
 qla2x00_alloc_sysfs_attr(scsi_qla_host_t *ha)
 {
@@ -349,6 +396,9 @@ qla2x00_alloc_sysfs_attr(scsi_qla_host_t *ha)
 	sysfs_create_bin_file(&host->shost_gendev.kobj,
 	    &sysfs_optrom_ctl_attr);
 	sysfs_create_bin_file(&host->shost_gendev.kobj, &sysfs_vpd_attr);
+	if (IS_QLA24XX(ha) || IS_QLA54XX(ha))
+		sysfs_create_bin_file(&host->shost_gendev.kobj,
+		    &sysfs_sfp_attr);
 }
 
 void
@@ -362,6 +412,9 @@ qla2x00_free_sysfs_attr(scsi_qla_host_t *ha)
 	sysfs_remove_bin_file(&host->shost_gendev.kobj,
 	    &sysfs_optrom_ctl_attr);
 	sysfs_remove_bin_file(&host->shost_gendev.kobj, &sysfs_vpd_attr);
+	if (IS_QLA24XX(ha) || IS_QLA54XX(ha))
+		sysfs_remove_bin_file(&host->shost_gendev.kobj,
+		    &sysfs_sfp_attr);
 
 	if (ha->beacon_blink_led == 1)
 		ha->isp_ops.beacon_off(ha);
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index dfcd3a2..bb0b5f5 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -610,6 +610,7 @@ typedef struct {
 #define MBC_GET_TIMEOUT_PARAMS		0x22	/* Get FW timeouts. */
 #define MBC_TRACE_CONTROL		0x27	/* Trace control command. */
 #define MBC_GEN_SYSTEM_ERROR		0x2a	/* Generate System Error. */
+#define MBC_READ_SFP			0x31	/* Read SFP Data. */
 #define MBC_SET_TIMEOUT_PARAMS		0x32	/* Set FW timeouts. */
 #define MBC_MID_INITIALIZE_FIRMWARE	0x48	/* MID Initialize firmware. */
 #define MBC_MID_GET_VP_DATABASE		0x49	/* MID Get VP Database. */
@@ -2242,6 +2243,11 @@ typedef struct scsi_qla_host {
 	struct sns_cmd_pkt	*sns_cmd;
 	dma_addr_t		sns_cmd_dma;
 
+#define SFP_DEV_SIZE	256
+#define SFP_BLOCK_SIZE	64
+	void			*sfp_data;
+	dma_addr_t		sfp_data_dma;
+
 	struct task_struct	*dpc_thread;
 	uint8_t dpc_active;                  /* DPC routine is active */
 
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index fec1e1c..4edb5d1 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -210,6 +210,9 @@ qla2x00_stop_firmware(scsi_qla_host_t *);
 extern int
 qla2x00_trace_control(scsi_qla_host_t *, uint16_t, dma_addr_t, uint16_t);
 
+extern int
+qla2x00_read_sfp(scsi_qla_host_t *, dma_addr_t, uint16_t, uint16_t, uint16_t);
+
 /*
  * Global Function Prototypes in qla_isr.c source file.
  */
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 3b16d6f..f5e226f 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -2503,4 +2503,40 @@ qla2x00_trace_control(scsi_qla_host_t *ha, uint16_t ctrl, dma_addr_t eft_dma,
 	return rval;
 }
 
+int
+qla2x00_read_sfp(scsi_qla_host_t *ha, dma_addr_t sfp_dma, uint16_t addr,
+    uint16_t off, uint16_t count)
+{
+	int rval;
+	mbx_cmd_t mc;
+	mbx_cmd_t *mcp = &mc;
 
+	if (!IS_QLA24XX(ha) && !IS_QLA54XX(ha))
+		return QLA_FUNCTION_FAILED;
+
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
+
+	mcp->mb[0] = MBC_READ_SFP;
+	mcp->mb[1] = addr;
+	mcp->mb[2] = MSW(sfp_dma);
+	mcp->mb[3] = LSW(sfp_dma);
+	mcp->mb[6] = MSW(MSD(sfp_dma));
+	mcp->mb[7] = LSW(MSD(sfp_dma));
+	mcp->mb[8] = count;
+	mcp->mb[9] = off;
+	mcp->mb[10] = 0;
+	mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0;
+	mcp->in_mb = MBX_0;
+	mcp->tov = 30;
+	mcp->flags = 0;
+	rval = qla2x00_mailbox_command(ha, mcp);
+
+	if (rval != QLA_SUCCESS) {
+		DEBUG2_3_11(printk("%s(%ld): failed=%x (%x).\n", __func__,
+		    ha->host_no, rval, mcp->mb[0]));
+	} else {
+		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
+	}
+
+	return rval;
+}
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index c16154c..da84ee2 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1979,6 +1979,26 @@ qla2x00_mem_alloc(scsi_qla_host_t *ha)
 				continue;
 			}
 			memset(ha->ct_sns, 0, sizeof(struct ct_sns_pkt));
+
+			if (IS_QLA24XX(ha) || IS_QLA54XX(ha)) {
+				/*
+				 * Get consistent memory allocated for SFP
+				 * block.
+				 */
+				ha->sfp_data = dma_pool_alloc(ha->s_dma_pool,
+				    GFP_KERNEL, &ha->sfp_data_dma);
+				if (ha->sfp_data == NULL) {
+					qla_printk(KERN_WARNING, ha,
+					    "Memory Allocation failed - "
+					    "sfp_data\n");
+
+					qla2x00_mem_free(ha);
+					msleep(100);
+
+					continue;
+				}
+				memset(ha->sfp_data, 0, SFP_BLOCK_SIZE);
+			}
 		}
 
 		/* Done all allocations without any error. */
@@ -2034,6 +2054,9 @@ qla2x00_mem_free(scsi_qla_host_t *ha)
 		dma_free_coherent(&ha->pdev->dev, sizeof(struct ct_sns_pkt),
 		    ha->ct_sns, ha->ct_sns_dma);
 
+	if (ha->sfp_data)
+		dma_pool_free(ha->s_dma_pool, ha->sfp_data, ha->sfp_data_dma);
+
 	if (ha->ms_iocb)
 		dma_pool_free(ha->s_dma_pool, ha->ms_iocb, ha->ms_iocb_dma);
 
-- 
cgit v0.10.2


From 7914d004bcff3e59ca7c0d628f9862fe6655429c Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:10:55 -0700
Subject: [SCSI] qla2xxx: Create an VPD sysfs entry for supported ISPs only.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 1a766b2..87f90c4 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -294,9 +294,6 @@ qla2x00_sysfs_read_vpd(struct kobject *kobj, char *buf, loff_t off,
 	if (!capable(CAP_SYS_ADMIN) || off != 0)
 		return 0;
 
-	if (!IS_QLA24XX(ha) && !IS_QLA54XX(ha))
-		return -ENOTSUPP;
-
 	/* Read NVRAM. */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	ha->isp_ops.read_nvram(ha, (uint8_t *)buf, ha->vpd_base, ha->vpd_size);
@@ -316,9 +313,6 @@ qla2x00_sysfs_write_vpd(struct kobject *kobj, char *buf, loff_t off,
 	if (!capable(CAP_SYS_ADMIN) || off != 0 || count != ha->vpd_size)
 		return 0;
 
-	if (!IS_QLA24XX(ha) && !IS_QLA54XX(ha))
-		return -ENOTSUPP;
-
 	/* Write NVRAM. */
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 	ha->isp_ops.write_nvram(ha, (uint8_t *)buf, ha->vpd_base, count);
@@ -395,10 +389,12 @@ qla2x00_alloc_sysfs_attr(scsi_qla_host_t *ha)
 	sysfs_create_bin_file(&host->shost_gendev.kobj, &sysfs_optrom_attr);
 	sysfs_create_bin_file(&host->shost_gendev.kobj,
 	    &sysfs_optrom_ctl_attr);
-	sysfs_create_bin_file(&host->shost_gendev.kobj, &sysfs_vpd_attr);
-	if (IS_QLA24XX(ha) || IS_QLA54XX(ha))
+	if (IS_QLA24XX(ha) || IS_QLA54XX(ha)) {
+		sysfs_create_bin_file(&host->shost_gendev.kobj,
+		    &sysfs_vpd_attr);
 		sysfs_create_bin_file(&host->shost_gendev.kobj,
 		    &sysfs_sfp_attr);
+	}
 }
 
 void
@@ -411,10 +407,12 @@ qla2x00_free_sysfs_attr(scsi_qla_host_t *ha)
 	sysfs_remove_bin_file(&host->shost_gendev.kobj, &sysfs_optrom_attr);
 	sysfs_remove_bin_file(&host->shost_gendev.kobj,
 	    &sysfs_optrom_ctl_attr);
-	sysfs_remove_bin_file(&host->shost_gendev.kobj, &sysfs_vpd_attr);
-	if (IS_QLA24XX(ha) || IS_QLA54XX(ha))
+	if (IS_QLA24XX(ha) || IS_QLA54XX(ha)) {
+		sysfs_remove_bin_file(&host->shost_gendev.kobj,
+		    &sysfs_vpd_attr);
 		sysfs_remove_bin_file(&host->shost_gendev.kobj,
 		    &sysfs_sfp_attr);
+	}
 
 	if (ha->beacon_blink_led == 1)
 		ha->isp_ops.beacon_off(ha);
-- 
cgit v0.10.2


From 7469059d52c5f762890cc060ef3f0452c097b91e Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:11:00 -0700
Subject: [SCSI] qla2xxx: Remove no-op IOCTL codes and macros.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 4edb5d1..9ff3298 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -286,13 +286,6 @@ extern void *qla24xx_prep_ms_fdmi_iocb(scsi_qla_host_t *, uint32_t, uint32_t);
 extern int qla2x00_fdmi_register(scsi_qla_host_t *);
 
 /*
- * Global Function Prototypes in qla_xioctl.c source file.
- */
-#define qla2x00_enqueue_aen(ha, cmd, mode)	do { } while (0)
-#define qla2x00_alloc_ioctl_mem(ha)		(0)
-#define qla2x00_free_ioctl_mem(ha)		do { } while (0)
-
-/*
  * Global Function Prototypes in qla_attr.c source file.
  */
 struct class_device_attribute;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index b28ac0a..b9f01f7 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -395,10 +395,6 @@ qla2x00_async_event(scsi_qla_host_t *ha, uint16_t *mb)
 		set_bit(REGISTER_FC4_NEEDED, &ha->dpc_flags);
 
 		ha->flags.management_server_logged_in = 0;
-
-		/* Update AEN queue. */
-		qla2x00_enqueue_aen(ha, MBA_LIP_OCCURRED, NULL);
-
 		break;
 
 	case MBA_LOOP_UP:		/* Loop Up Event */
@@ -418,9 +414,6 @@ qla2x00_async_event(scsi_qla_host_t *ha, uint16_t *mb)
 		    link_speed);
 
 		ha->flags.management_server_logged_in = 0;
-
-		/* Update AEN queue. */
-		qla2x00_enqueue_aen(ha, MBA_LOOP_UP, NULL);
 		break;
 
 	case MBA_LOOP_DOWN:		/* Loop Down Event */
@@ -439,9 +432,6 @@ qla2x00_async_event(scsi_qla_host_t *ha, uint16_t *mb)
 		ha->link_data_rate = LDR_UNKNOWN;
 		if (ql2xfdmienable)
 			set_bit(REGISTER_FDMI_NEEDED, &ha->dpc_flags);
-
-		/* Update AEN queue. */
-		qla2x00_enqueue_aen(ha, MBA_LOOP_DOWN, NULL);
 		break;
 
 	case MBA_LIP_RESET:		/* LIP reset occurred */
@@ -460,10 +450,6 @@ qla2x00_async_event(scsi_qla_host_t *ha, uint16_t *mb)
 
 		ha->operating_mode = LOOP;
 		ha->flags.management_server_logged_in = 0;
-
-		/* Update AEN queue. */
-		qla2x00_enqueue_aen(ha, MBA_LIP_RESET, NULL);
-
 		break;
 
 	case MBA_POINT_TO_POINT:	/* Point-to-Point */
@@ -545,9 +531,6 @@ qla2x00_async_event(scsi_qla_host_t *ha, uint16_t *mb)
 
 		set_bit(LOOP_RESYNC_NEEDED, &ha->dpc_flags);
 		set_bit(LOCAL_LOOP_UPDATE, &ha->dpc_flags);
-
-		/* Update AEN queue. */
-		qla2x00_enqueue_aen(ha, MBA_PORT_UPDATE, NULL);
 		break;
 
 	case MBA_RSCN_UPDATE:		/* State Change Registration */
@@ -584,9 +567,6 @@ qla2x00_async_event(scsi_qla_host_t *ha, uint16_t *mb)
 
 		set_bit(LOOP_RESYNC_NEEDED, &ha->dpc_flags);
 		set_bit(RSCN_UPDATE, &ha->dpc_flags);
-
-		/* Update AEN queue. */
-		qla2x00_enqueue_aen(ha, MBA_RSCN_UPDATE, &mb[0]);
 		break;
 
 	/* case MBA_RIO_RESPONSE: */
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index da84ee2..0c6e334 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1906,17 +1906,6 @@ qla2x00_mem_alloc(scsi_qla_host_t *ha)
 		}
 		memset(ha->init_cb, 0, ha->init_cb_size);
 
-		/* Allocate ioctl related memory. */
-		if (qla2x00_alloc_ioctl_mem(ha)) {
-			qla_printk(KERN_WARNING, ha,
-			    "Memory Allocation failed - ioctl_mem\n");
-
-			qla2x00_mem_free(ha);
-			msleep(100);
-
-			continue;
-		}
-
 		if (qla2x00_allocate_sp_pool(ha)) {
 			qla_printk(KERN_WARNING, ha,
 			    "Memory Allocation failed - "
@@ -2033,9 +2022,6 @@ qla2x00_mem_free(scsi_qla_host_t *ha)
 		return;
 	}
 
-	/* free ioctl memory */
-	qla2x00_free_ioctl_mem(ha);
-
 	/* free sp pool */
 	qla2x00_free_sp_pool(ha);
 
-- 
cgit v0.10.2


From 744f11fdb1118c9306303529263e5ed09b463a0f Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:11:05 -0700
Subject: [SCSI] qla2xxx: Cleanup DEBUG macro usage.

- macro usage statements should terminate with a ';'
- remove unused macros.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index ace6afd..2cea009 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -37,122 +37,110 @@
 /*
 * Macros use for debugging the driver.
 */
-#undef ENTER_TRACE
-#if defined(ENTER_TRACE)
-#define ENTER(x)	do { printk("qla2100 : Entering %s()\n", x); } while (0)
-#define LEAVE(x)	do { printk("qla2100 : Leaving %s()\n", x);  } while (0)
-#define ENTER_INTR(x)	do { printk("qla2100 : Entering %s()\n", x); } while (0)
-#define LEAVE_INTR(x)	do { printk("qla2100 : Leaving %s()\n", x);  } while (0)
-#else
-#define ENTER(x)	do {} while (0)
-#define LEAVE(x)	do {} while (0)
-#define ENTER_INTR(x) 	do {} while (0)
-#define LEAVE_INTR(x)   do {} while (0)
-#endif
 
 #if  DEBUG_QLA2100
-#define DEBUG(x)	do {x;} while (0);
+#define DEBUG(x)	do {x;} while (0)
 #else
-#define DEBUG(x)	do {} while (0);
+#define DEBUG(x)	do {} while (0)
 #endif
 
 #if defined(QL_DEBUG_LEVEL_1)
-#define DEBUG1(x)	do {x;} while (0);
+#define DEBUG1(x)	do {x;} while (0)
 #else
-#define DEBUG1(x)	do {} while (0);
+#define DEBUG1(x)	do {} while (0)
 #endif
 
 #if defined(QL_DEBUG_LEVEL_2)
-#define DEBUG2(x)       do {x;} while (0);
-#define DEBUG2_3(x)     do {x;} while (0);
-#define DEBUG2_3_11(x)  do {x;} while (0);
-#define DEBUG2_9_10(x)    do {x;} while (0);
-#define DEBUG2_11(x)    do {x;} while (0);
-#define DEBUG2_13(x)    do {x;} while (0);
+#define DEBUG2(x)       do {x;} while (0)
+#define DEBUG2_3(x)     do {x;} while (0)
+#define DEBUG2_3_11(x)  do {x;} while (0)
+#define DEBUG2_9_10(x)    do {x;} while (0)
+#define DEBUG2_11(x)    do {x;} while (0)
+#define DEBUG2_13(x)    do {x;} while (0)
 #else
-#define DEBUG2(x)	do {} while (0);
+#define DEBUG2(x)	do {} while (0)
 #endif
 
 #if defined(QL_DEBUG_LEVEL_3)
-#define DEBUG3(x)	do {x;} while (0);
-#define DEBUG2_3(x)	do {x;} while (0);
-#define DEBUG2_3_11(x)	do {x;} while (0);
-#define DEBUG3_11(x)	do {x;} while (0);
+#define DEBUG3(x)	do {x;} while (0)
+#define DEBUG2_3(x)	do {x;} while (0)
+#define DEBUG2_3_11(x)	do {x;} while (0)
+#define DEBUG3_11(x)	do {x;} while (0)
 #else
-#define DEBUG3(x)	do {} while (0);
+#define DEBUG3(x)	do {} while (0)
   #if !defined(QL_DEBUG_LEVEL_2)
-  #define DEBUG2_3(x)	do {} while (0);
+  #define DEBUG2_3(x)	do {} while (0)
   #endif
 #endif
 
 #if defined(QL_DEBUG_LEVEL_4)
-#define DEBUG4(x)	do {x;} while (0);
+#define DEBUG4(x)	do {x;} while (0)
 #else
-#define DEBUG4(x)	do {} while (0);
+#define DEBUG4(x)	do {} while (0)
 #endif
 
 #if defined(QL_DEBUG_LEVEL_5)
-#define DEBUG5(x)          do {x;} while (0);
+#define DEBUG5(x)          do {x;} while (0)
 #else
-#define DEBUG5(x)	do {} while (0);
+#define DEBUG5(x)	do {} while (0)
 #endif
 
 #if defined(QL_DEBUG_LEVEL_7)
-#define DEBUG7(x)          do {x;} while (0);
+#define DEBUG7(x)          do {x;} while (0)
 #else
-#define DEBUG7(x)	   do {} while (0);
+#define DEBUG7(x)	   do {} while (0)
 #endif
 
 #if defined(QL_DEBUG_LEVEL_9)
-#define DEBUG9(x)       do {x;} while (0);
-#define DEBUG9_10(x)    do {x;} while (0);
-#define DEBUG2_9_10(x)	do {x;} while (0);
+#define DEBUG9(x)       do {x;} while (0)
+#define DEBUG9_10(x)    do {x;} while (0)
+#define DEBUG2_9_10(x)	do {x;} while (0)
 #else
-#define DEBUG9(x)	do {} while (0);
+#define DEBUG9(x)	do {} while (0)
 #endif
 
 #if defined(QL_DEBUG_LEVEL_10)
-#define DEBUG10(x)      do {x;} while (0);
-#define DEBUG2_9_10(x)	do {x;} while (0);
-#define DEBUG9_10(x)	do {x;} while (0);
+#define DEBUG10(x)      do {x;} while (0)
+#define DEBUG2_9_10(x)	do {x;} while (0)
+#define DEBUG9_10(x)	do {x;} while (0)
 #else
-#define DEBUG10(x)	do {} while (0);
+#define DEBUG10(x)	do {} while (0)
   #if !defined(DEBUG2_9_10)
-  #define DEBUG2_9_10(x)	do {} while (0);
+  #define DEBUG2_9_10(x)	do {} while (0)
   #endif
   #if !defined(DEBUG9_10)
-  #define DEBUG9_10(x)	do {} while (0);
+  #define DEBUG9_10(x)	do {} while (0)
   #endif
 #endif
 
 #if defined(QL_DEBUG_LEVEL_11)
-#define DEBUG11(x)      do{x;} while(0);
+#define DEBUG11(x)      do{x;} while(0)
 #if !defined(DEBUG2_11)
-#define DEBUG2_11(x)    do{x;} while(0);
+#define DEBUG2_11(x)    do{x;} while(0)
 #endif
 #if !defined(DEBUG2_3_11)
-#define DEBUG2_3_11(x)  do{x;} while(0);
+#define DEBUG2_3_11(x)  do{x;} while(0)
 #endif
 #if !defined(DEBUG3_11)
-#define DEBUG3_11(x)    do{x;} while(0);
+#define DEBUG3_11(x)    do{x;} while(0)
 #endif
 #else
-#define DEBUG11(x)	do{} while(0);
+#define DEBUG11(x)	do{} while(0)
   #if !defined(QL_DEBUG_LEVEL_2)
-  #define DEBUG2_11(x)	do{} while(0);
+  #define DEBUG2_11(x)	do{} while(0)
     #if !defined(QL_DEBUG_LEVEL_3)
-    #define DEBUG2_3_11(x) do{} while(0);
+    #define DEBUG2_3_11(x) do{} while(0)
     #endif
   #endif
   #if !defined(QL_DEBUG_LEVEL_3)
-  #define DEBUG3_11(x)	do{} while(0);
+  #define DEBUG3_11(x)	do{} while(0)
   #endif
 #endif
 
 #if defined(QL_DEBUG_LEVEL_12)
-#define DEBUG12(x)      do {x;} while (0);
+#define DEBUG12(x)      do {x;} while (0)
 #else
-#define DEBUG12(x)	do {} while (0);
+#define DEBUG12(x)	do {} while (0)
 #endif
 
 #if defined(QL_DEBUG_LEVEL_13)
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 2ccdd84..101cf12 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3137,14 +3137,14 @@ qla2x00_abort_isp(scsi_qla_host_t *ha)
 					ha->isp_abort_cnt--;
 					DEBUG(printk("qla%ld: ISP abort - "
 					    "retry remaining %d\n",
-					    ha->host_no, ha->isp_abort_cnt);)
+					    ha->host_no, ha->isp_abort_cnt));
 					status = 1;
 				}
 			} else {
 				ha->isp_abort_cnt = MAX_RETRIES_OF_ISP_ABORT;
 				DEBUG(printk("qla2x00(%ld): ISP error recovery "
 				    "- retrying (%d) more times\n",
-				    ha->host_no, ha->isp_abort_cnt);)
+				    ha->host_no, ha->isp_abort_cnt));
 				set_bit(ISP_ABORT_RETRY, &ha->dpc_flags);
 				status = 1;
 			}
@@ -3158,7 +3158,7 @@ qla2x00_abort_isp(scsi_qla_host_t *ha)
 	} else {
 		DEBUG(printk(KERN_INFO
 				"qla2x00_abort_isp(%ld): exiting.\n",
-				ha->host_no);)
+				ha->host_no));
 	}
 
 	return(status);
@@ -3234,7 +3234,7 @@ qla2x00_restart_isp(scsi_qla_host_t *ha)
 		clear_bit(RESET_MARKER_NEEDED, &ha->dpc_flags);
 		if (!(status = qla2x00_fw_ready(ha))) {
 			DEBUG(printk("%s(): Start configure loop, "
-			    "status = %d\n", __func__, status);)
+			    "status = %d\n", __func__, status));
 
 			/* Issue a marker after FW becomes ready. */
 			qla2x00_marker(ha, 0, 0, MK_SYNC_ALL);
@@ -3258,7 +3258,7 @@ qla2x00_restart_isp(scsi_qla_host_t *ha)
 
 		DEBUG(printk("%s(): Configure loop done, status = 0x%x\n",
 				__func__,
-				status);)
+				status));
 	}
 	return (status);
 }
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index b9f01f7..795bf15 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1432,8 +1432,8 @@ qla24xx_ms_entry(scsi_qla_host_t *ha, struct ct_entry_24xx *pkt)
 	DEBUG3(printk("%s(%ld): pkt=%p pkthandle=%d.\n",
 	    __func__, ha->host_no, pkt, pkt->handle));
 
-	DEBUG9(printk("%s: ct pkt dump:\n", __func__);)
-	DEBUG9(qla2x00_dump_buffer((void *)pkt, sizeof(struct ct_entry_24xx));)
+	DEBUG9(printk("%s: ct pkt dump:\n", __func__));
+	DEBUG9(qla2x00_dump_buffer((void *)pkt, sizeof(struct ct_entry_24xx)));
 
 	/* Validate handle. */
  	if (pkt->handle < MAX_OUTSTANDING_COMMANDS)
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index f5e226f..879f281 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -13,13 +13,13 @@ qla2x00_mbx_sem_timeout(unsigned long data)
 {
 	struct semaphore	*sem_ptr = (struct semaphore *)data;
 
-	DEBUG11(printk("qla2x00_sem_timeout: entered.\n");)
+	DEBUG11(printk("qla2x00_sem_timeout: entered.\n"));
 
 	if (sem_ptr != NULL) {
 		up(sem_ptr);
 	}
 
-	DEBUG11(printk("qla2x00_mbx_sem_timeout: exiting.\n");)
+	DEBUG11(printk("qla2x00_mbx_sem_timeout: exiting.\n"));
 }
 
 /*
@@ -61,7 +61,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 	rval = QLA_SUCCESS;
 	abort_active = test_bit(ABORT_ISP_ACTIVE, &ha->dpc_flags);
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	/*
 	 * Wait for active mailbox commands to finish by waiting at most tov
@@ -72,7 +72,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 		if (qla2x00_down_timeout(&ha->mbx_cmd_sem, mcp->tov * HZ)) {
 			/* Timeout occurred. Return error. */
 			DEBUG2_3_11(printk("%s(%ld): cmd access timeout. "
-			    "Exiting.\n", __func__, ha->host_no);)
+			    "Exiting.\n", __func__, ha->host_no));
 			return QLA_FUNCTION_TIMEOUT;
 		}
 	}
@@ -86,7 +86,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 		spin_lock_irqsave(&ha->mbx_reg_lock, mbx_flags);
 
 	DEBUG11(printk("scsi(%ld): prepare to issue mbox cmd=0x%x.\n",
-	    ha->host_no, mcp->mb[0]);)
+	    ha->host_no, mcp->mb[0]));
 
 	spin_lock_irqsave(&ha->hardware_lock, flags);
 
@@ -131,14 +131,14 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 
 	/* Unlock mbx registers and wait for interrupt */
 	DEBUG11(printk("%s(%ld): going to unlock irq & waiting for interrupt. "
-	    "jiffies=%lx.\n", __func__, ha->host_no, jiffies);)
+	    "jiffies=%lx.\n", __func__, ha->host_no, jiffies));
 
 	/* Wait for mbx cmd completion until timeout */
 
 	if (!abort_active && io_lock_on) {
 		/* sleep on completion semaphore */
 		DEBUG11(printk("%s(%ld): INTERRUPT MODE. Initializing timer.\n",
-		    __func__, ha->host_no);)
+		    __func__, ha->host_no));
 
 		init_timer(&tmp_intr_timer);
 		tmp_intr_timer.data = (unsigned long)&ha->mbx_intr_sem;
@@ -147,11 +147,11 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 		    (void (*)(unsigned long))qla2x00_mbx_sem_timeout;
 
 		DEBUG11(printk("%s(%ld): Adding timer.\n", __func__,
-		    ha->host_no);)
+		    ha->host_no));
 		add_timer(&tmp_intr_timer);
 
 		DEBUG11(printk("%s(%ld): going to unlock & sleep. "
-		    "time=0x%lx.\n", __func__, ha->host_no, jiffies);)
+		    "time=0x%lx.\n", __func__, ha->host_no, jiffies));
 
 		set_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
 
@@ -170,14 +170,14 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 		down(&ha->mbx_intr_sem);
 
 		DEBUG11(printk("%s(%ld): waking up. time=0x%lx\n", __func__,
-		    ha->host_no, jiffies);)
+		    ha->host_no, jiffies));
 		clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
 
 		/* delete the timer */
 		del_timer(&tmp_intr_timer);
 	} else {
 		DEBUG3_11(printk("%s(%ld): cmd=%x POLLING MODE.\n", __func__,
-		    ha->host_no, command);)
+		    ha->host_no, command));
 
 		if (IS_QLA24XX(ha) || IS_QLA54XX(ha))
 			WRT_REG_DWORD(&reg->isp24.hccr, HCCRX_SET_HOST_INT);
@@ -209,7 +209,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 		uint16_t *iptr2;
 
 		DEBUG3_11(printk("%s(%ld): cmd %x completed.\n", __func__,
-		    ha->host_no, command);)
+		    ha->host_no, command));
 
 		/* Got interrupt. Clear the flag. */
 		ha->flags.mbox_int = 0;
@@ -266,7 +266,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 
 	if (!abort_active) {
 		DEBUG11(printk("%s(%ld): checking for additional resp "
-		    "interrupt.\n", __func__, ha->host_no);)
+		    "interrupt.\n", __func__, ha->host_no));
 
 		/* polling mode for non isp_abort commands. */
 		qla2x00_poll(ha);
@@ -277,9 +277,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 		if (!io_lock_on || (mcp->flags & IOCTL_CMD)) {
 			/* not in dpc. schedule it for dpc to take over. */
 			DEBUG(printk("%s(%ld): timeout schedule "
-			    "isp_abort_needed.\n", __func__, ha->host_no);)
+			    "isp_abort_needed.\n", __func__, ha->host_no));
 			DEBUG2_3_11(printk("%s(%ld): timeout schedule "
-			    "isp_abort_needed.\n", __func__, ha->host_no);)
+			    "isp_abort_needed.\n", __func__, ha->host_no));
 			qla_printk(KERN_WARNING, ha,
 			    "Mailbox command timeout occured. Scheduling ISP "
 			    "abort.\n");
@@ -288,9 +288,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 		} else if (!abort_active) {
 			/* call abort directly since we are in the DPC thread */
 			DEBUG(printk("%s(%ld): timeout calling abort_isp\n",
-			    __func__, ha->host_no);)
+			    __func__, ha->host_no));
 			DEBUG2_3_11(printk("%s(%ld): timeout calling "
-			    "abort_isp\n", __func__, ha->host_no);)
+			    "abort_isp\n", __func__, ha->host_no));
 			qla_printk(KERN_WARNING, ha,
 			    "Mailbox command timeout occured. Issuing ISP "
 			    "abort.\n");
@@ -303,9 +303,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 			}
 			clear_bit(ABORT_ISP_ACTIVE, &ha->dpc_flags);
 			DEBUG(printk("%s(%ld): finished abort_isp\n", __func__,
-			    ha->host_no);)
+			    ha->host_no));
 			DEBUG2_3_11(printk("%s(%ld): finished abort_isp\n",
-			    __func__, ha->host_no);)
+			    __func__, ha->host_no));
 		}
 	}
 
@@ -316,9 +316,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *ha, mbx_cmd_t *mcp)
 	if (rval) {
 		DEBUG2_3_11(printk("%s(%ld): **** FAILED. mbx0=%x, mbx1=%x, "
 		    "mbx2=%x, cmd=%x ****\n", __func__, ha->host_no,
-		    mcp->mb[0], mcp->mb[1], mcp->mb[2], command);)
+		    mcp->mb[0], mcp->mb[1], mcp->mb[2], command));
 	} else {
-		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no);)
+		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
 	}
 
 	return rval;
@@ -394,7 +394,7 @@ qla2x00_execute_fw(scsi_qla_host_t *ha, uint32_t risc_addr)
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	mcp->mb[0] = MBC_EXECUTE_FIRMWARE;
 	mcp->out_mb = MBX_0;
@@ -424,10 +424,10 @@ qla2x00_execute_fw(scsi_qla_host_t *ha, uint32_t risc_addr)
 	} else {
 		if (IS_QLA24XX(ha) || IS_QLA54XX(ha)) {
 			DEBUG11(printk("%s(%ld): done exchanges=%x.\n",
-			    __func__, ha->host_no, mcp->mb[1]);)
+			    __func__, ha->host_no, mcp->mb[1]));
 		} else {
 			DEBUG11(printk("%s(%ld): done.\n", __func__,
-			    ha->host_no);)
+			    ha->host_no));
 		}
 	}
 
@@ -611,7 +611,7 @@ qla2x00_mbx_reg_test(scsi_qla_host_t *ha)
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 
-	DEBUG11(printk("qla2x00_mbx_reg_test(%ld): entered.\n", ha->host_no);)
+	DEBUG11(printk("qla2x00_mbx_reg_test(%ld): entered.\n", ha->host_no));
 
 	mcp->mb[0] = MBC_MAILBOX_REGISTER_TEST;
 	mcp->mb[1] = 0xAAAA;
@@ -639,11 +639,11 @@ qla2x00_mbx_reg_test(scsi_qla_host_t *ha)
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("qla2x00_mbx_reg_test(%ld): failed=%x.\n",
-		    ha->host_no, rval);)
+		    ha->host_no, rval));
 	} else {
 		/*EMPTY*/
 		DEBUG11(printk("qla2x00_mbx_reg_test(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -671,7 +671,7 @@ qla2x00_verify_checksum(scsi_qla_host_t *ha, uint32_t risc_addr)
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	mcp->mb[0] = MBC_VERIFY_CHECKSUM;
 	mcp->out_mb = MBX_0;
@@ -694,9 +694,9 @@ qla2x00_verify_checksum(scsi_qla_host_t *ha, uint32_t risc_addr)
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("%s(%ld): failed=%x chk sum=%x.\n", __func__,
 		    ha->host_no, rval, (IS_QLA24XX(ha) || IS_QLA54XX(ha) ?
-		    (mcp->mb[2] << 16) | mcp->mb[1]: mcp->mb[1]));)
+		    (mcp->mb[2] << 16) | mcp->mb[1]: mcp->mb[1])));
 	} else {
-		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no);)
+		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
 	}
 
 	return rval;
@@ -743,9 +743,9 @@ qla2x00_issue_iocb(scsi_qla_host_t *ha, void*  buffer, dma_addr_t phys_addr,
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG(printk("qla2x00_issue_iocb(%ld): failed rval 0x%x\n",
-		    ha->host_no, rval);)
+		    ha->host_no, rval));
 		DEBUG2(printk("qla2x00_issue_iocb(%ld): failed rval 0x%x\n",
-		    ha->host_no, rval);)
+		    ha->host_no, rval));
 	} else {
 		sts_entry_t *sts_entry = (sts_entry_t *) buffer;
 
@@ -781,7 +781,7 @@ qla2x00_abort_command(scsi_qla_host_t *ha, srb_t *sp)
 	mbx_cmd_t	mc;
 	mbx_cmd_t	*mcp = &mc;
 
-	DEBUG11(printk("qla2x00_abort_command(%ld): entered.\n", ha->host_no);)
+	DEBUG11(printk("qla2x00_abort_command(%ld): entered.\n", ha->host_no));
 
 	fcport = sp->fcport;
 
@@ -813,11 +813,11 @@ qla2x00_abort_command(scsi_qla_host_t *ha, srb_t *sp)
 
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("qla2x00_abort_command(%ld): failed=%x.\n",
-		    ha->host_no, rval);)
+		    ha->host_no, rval));
 	} else {
 		sp->flags |= SRB_ABORT_PENDING;
 		DEBUG11(printk("qla2x00_abort_command(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -848,7 +848,7 @@ qla2x00_abort_target(fc_port_t *fcport)
 	if (fcport == NULL)
 		return 0;
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, fcport->ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, fcport->ha->host_no));
 
 	ha = fcport->ha;
 	mcp->mb[0] = MBC_ABORT_TARGET;
@@ -872,11 +872,11 @@ qla2x00_abort_target(fc_port_t *fcport)
 
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("qla2x00_abort_target(%ld): failed=%x.\n",
-		    ha->host_no, rval);)
+		    ha->host_no, rval));
 	} else {
 		/*EMPTY*/
 		DEBUG11(printk("qla2x00_abort_target(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -912,7 +912,7 @@ qla2x00_get_adapter_id(scsi_qla_host_t *ha, uint16_t *id, uint8_t *al_pa,
 	mbx_cmd_t *mcp = &mc;
 
 	DEBUG11(printk("qla2x00_get_adapter_id(%ld): entered.\n",
-	    ha->host_no);)
+	    ha->host_no));
 
 	mcp->mb[0] = MBC_GET_ADAPTER_LOOP_ID;
 	mcp->out_mb = MBX_0;
@@ -933,11 +933,11 @@ qla2x00_get_adapter_id(scsi_qla_host_t *ha, uint16_t *id, uint8_t *al_pa,
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("qla2x00_get_adapter_id(%ld): failed=%x.\n",
-		    ha->host_no, rval);)
+		    ha->host_no, rval));
 	} else {
 		/*EMPTY*/
 		DEBUG11(printk("qla2x00_get_adapter_id(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -968,7 +968,7 @@ qla2x00_get_retry_cnt(scsi_qla_host_t *ha, uint8_t *retry_cnt, uint8_t *tov,
 	mbx_cmd_t *mcp = &mc;
 
 	DEBUG11(printk("qla2x00_get_retry_cnt(%ld): entered.\n",
-			ha->host_no);)
+			ha->host_no));
 
 	mcp->mb[0] = MBC_GET_RETRY_COUNT;
 	mcp->out_mb = MBX_0;
@@ -980,7 +980,7 @@ qla2x00_get_retry_cnt(scsi_qla_host_t *ha, uint8_t *retry_cnt, uint8_t *tov,
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("qla2x00_get_retry_cnt(%ld): failed = %x.\n",
-		    ha->host_no, mcp->mb[0]);)
+		    ha->host_no, mcp->mb[0]));
 	} else {
 		/* Convert returned data and check our values. */
 		*r_a_tov = mcp->mb[3] / 2;
@@ -992,7 +992,7 @@ qla2x00_get_retry_cnt(scsi_qla_host_t *ha, uint8_t *retry_cnt, uint8_t *tov,
 		}
 
 		DEBUG11(printk("qla2x00_get_retry_cnt(%ld): done. mb3=%d "
-		    "ratov=%d.\n", ha->host_no, mcp->mb[3], ratov);)
+		    "ratov=%d.\n", ha->host_no, mcp->mb[3], ratov));
 	}
 
 	return rval;
@@ -1023,7 +1023,7 @@ qla2x00_init_firmware(scsi_qla_host_t *ha, uint16_t size)
 	mbx_cmd_t *mcp = &mc;
 
 	DEBUG11(printk("qla2x00_init_firmware(%ld): entered.\n",
-	    ha->host_no);)
+	    ha->host_no));
 
 	mcp->mb[0] = MBC_INITIALIZE_FIRMWARE;
 	mcp->mb[2] = MSW(ha->init_cb_dma);
@@ -1043,11 +1043,11 @@ qla2x00_init_firmware(scsi_qla_host_t *ha, uint16_t size)
 		/*EMPTY*/
 		DEBUG2_3_11(printk("qla2x00_init_firmware(%ld): failed=%x "
 		    "mb0=%x.\n",
-		    ha->host_no, rval, mcp->mb[0]);)
+		    ha->host_no, rval, mcp->mb[0]));
 	} else {
 		/*EMPTY*/
 		DEBUG11(printk("qla2x00_init_firmware(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -1079,7 +1079,7 @@ qla2x00_get_port_database(scsi_qla_host_t *ha, fc_port_t *fcport, uint8_t opt)
 	struct port_database_24xx *pd24;
 	dma_addr_t pd_dma;
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	pd24 = NULL;
 	pd = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma);
@@ -1220,7 +1220,7 @@ qla2x00_get_firmware_state(scsi_qla_host_t *ha, uint16_t *dptr)
 	mbx_cmd_t *mcp = &mc;
 
 	DEBUG11(printk("qla2x00_get_firmware_state(%ld): entered.\n",
-	    ha->host_no);)
+	    ha->host_no));
 
 	mcp->mb[0] = MBC_GET_FIRMWARE_STATE;
 	mcp->out_mb = MBX_0;
@@ -1235,11 +1235,11 @@ qla2x00_get_firmware_state(scsi_qla_host_t *ha, uint16_t *dptr)
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("qla2x00_get_firmware_state(%ld): "
-		    "failed=%x.\n", ha->host_no, rval);)
+		    "failed=%x.\n", ha->host_no, rval));
 	} else {
 		/*EMPTY*/
 		DEBUG11(printk("qla2x00_get_firmware_state(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -1272,7 +1272,7 @@ qla2x00_get_port_name(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t *name,
 	mbx_cmd_t *mcp = &mc;
 
 	DEBUG11(printk("qla2x00_get_port_name(%ld): entered.\n",
-	    ha->host_no);)
+	    ha->host_no));
 
 	mcp->mb[0] = MBC_GET_PORT_NAME;
 	mcp->out_mb = MBX_1|MBX_0;
@@ -1292,7 +1292,7 @@ qla2x00_get_port_name(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t *name,
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("qla2x00_get_port_name(%ld): failed=%x.\n",
-		    ha->host_no, rval);)
+		    ha->host_no, rval));
 	} else {
 		if (name != NULL) {
 			/* This function returns name in big endian. */
@@ -1307,7 +1307,7 @@ qla2x00_get_port_name(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t *name,
 		}
 
 		DEBUG11(printk("qla2x00_get_port_name(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -1335,7 +1335,7 @@ qla2x00_lip_reset(scsi_qla_host_t *ha)
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	if (IS_QLA24XX(ha) || IS_QLA54XX(ha)) {
 		mcp->mb[0] = MBC_LIP_FULL_LOGIN;
@@ -1364,10 +1364,10 @@ qla2x00_lip_reset(scsi_qla_host_t *ha)
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("%s(%ld): failed=%x.\n",
-		    __func__, ha->host_no, rval);)
+		    __func__, ha->host_no, rval));
 	} else {
 		/*EMPTY*/
-		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no);)
+		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
 	}
 
 	return rval;
@@ -1400,10 +1400,10 @@ qla2x00_send_sns(scsi_qla_host_t *ha, dma_addr_t sns_phys_address,
 	mbx_cmd_t *mcp = &mc;
 
 	DEBUG11(printk("qla2x00_send_sns(%ld): entered.\n",
-	    ha->host_no);)
+	    ha->host_no));
 
 	DEBUG11(printk("qla2x00_send_sns: retry cnt=%d ratov=%d total "
-	    "tov=%d.\n", ha->retry_count, ha->login_timeout, mcp->tov);)
+	    "tov=%d.\n", ha->retry_count, ha->login_timeout, mcp->tov));
 
 	mcp->mb[0] = MBC_SEND_SNS_COMMAND;
 	mcp->mb[1] = cmd_size;
@@ -1421,12 +1421,12 @@ qla2x00_send_sns(scsi_qla_host_t *ha, dma_addr_t sns_phys_address,
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG(printk("qla2x00_send_sns(%ld): failed=%x mb[0]=%x "
-		    "mb[1]=%x.\n", ha->host_no, rval, mcp->mb[0], mcp->mb[1]);)
+		    "mb[1]=%x.\n", ha->host_no, rval, mcp->mb[0], mcp->mb[1]));
 		DEBUG2_3_11(printk("qla2x00_send_sns(%ld): failed=%x mb[0]=%x "
-		    "mb[1]=%x.\n", ha->host_no, rval, mcp->mb[0], mcp->mb[1]);)
+		    "mb[1]=%x.\n", ha->host_no, rval, mcp->mb[0], mcp->mb[1]));
 	} else {
 		/*EMPTY*/
-		DEBUG11(printk("qla2x00_send_sns(%ld): done.\n", ha->host_no);)
+		DEBUG11(printk("qla2x00_send_sns(%ld): done.\n", ha->host_no));
 	}
 
 	return rval;
@@ -1442,7 +1442,7 @@ qla24xx_login_fabric(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 	dma_addr_t	lg_dma;
 	uint32_t	iop[2];
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	lg = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &lg_dma);
 	if (lg == NULL) {
@@ -1466,7 +1466,7 @@ qla24xx_login_fabric(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 	rval = qla2x00_issue_iocb(ha, lg, lg_dma, 0);
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("%s(%ld): failed to issue Login IOCB "
-		    "(%x).\n", __func__, ha->host_no, rval);)
+		    "(%x).\n", __func__, ha->host_no, rval));
 	} else if (lg->entry_status != 0) {
 		DEBUG2_3_11(printk("%s(%ld): failed to complete IOCB "
 		    "-- error status (%x).\n", __func__, ha->host_no,
@@ -1507,7 +1507,7 @@ qla24xx_login_fabric(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 			break;
 		}
 	} else {
-		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no);)
+		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
 
 		iop[0] = le32_to_cpu(lg->io_parameter[0]);
 
@@ -1561,7 +1561,7 @@ qla2x00_login_fabric(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 	mbx_cmd_t mc;
 	mbx_cmd_t *mcp = &mc;
 
-	DEBUG11(printk("qla2x00_login_fabric(%ld): entered.\n", ha->host_no);)
+	DEBUG11(printk("qla2x00_login_fabric(%ld): entered.\n", ha->host_no));
 
 	mcp->mb[0] = MBC_LOGIN_FABRIC_PORT;
 	mcp->out_mb = MBX_3|MBX_2|MBX_1|MBX_0;
@@ -1606,11 +1606,11 @@ qla2x00_login_fabric(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 		/*EMPTY*/
 		DEBUG2_3_11(printk("qla2x00_login_fabric(%ld): failed=%x "
 		    "mb[0]=%x mb[1]=%x mb[2]=%x.\n", ha->host_no, rval,
-		    mcp->mb[0], mcp->mb[1], mcp->mb[2]);)
+		    mcp->mb[0], mcp->mb[1], mcp->mb[2]));
 	} else {
 		/*EMPTY*/
 		DEBUG11(printk("qla2x00_login_fabric(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -1645,7 +1645,7 @@ qla2x00_login_local_device(scsi_qla_host_t *ha, fc_port_t *fcport,
 		    fcport->d_id.b.domain, fcport->d_id.b.area,
 		    fcport->d_id.b.al_pa, mb_ret, opt);
 
-	DEBUG3(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG3(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	mcp->mb[0] = MBC_LOGIN_LOOP_PORT;
 	if (HAS_EXTENDED_IDS(ha))
@@ -1679,13 +1679,13 @@ qla2x00_login_local_device(scsi_qla_host_t *ha, fc_port_t *fcport,
 
 		DEBUG(printk("%s(%ld): failed=%x mb[0]=%x mb[1]=%x "
 		    "mb[6]=%x mb[7]=%x.\n", __func__, ha->host_no, rval,
-		    mcp->mb[0], mcp->mb[1], mcp->mb[6], mcp->mb[7]);)
+		    mcp->mb[0], mcp->mb[1], mcp->mb[6], mcp->mb[7]));
 		DEBUG2_3(printk("%s(%ld): failed=%x mb[0]=%x mb[1]=%x "
 		    "mb[6]=%x mb[7]=%x.\n", __func__, ha->host_no, rval,
-		    mcp->mb[0], mcp->mb[1], mcp->mb[6], mcp->mb[7]);)
+		    mcp->mb[0], mcp->mb[1], mcp->mb[6], mcp->mb[7]));
 	} else {
 		/*EMPTY*/
-		DEBUG3(printk("%s(%ld): done.\n", __func__, ha->host_no);)
+		DEBUG3(printk("%s(%ld): done.\n", __func__, ha->host_no));
 	}
 
 	return (rval);
@@ -1699,7 +1699,7 @@ qla24xx_fabric_logout(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 	struct logio_entry_24xx *lg;
 	dma_addr_t	lg_dma;
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	lg = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &lg_dma);
 	if (lg == NULL) {
@@ -1720,7 +1720,7 @@ qla24xx_fabric_logout(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 	rval = qla2x00_issue_iocb(ha, lg, lg_dma, 0);
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("%s(%ld): failed to issue Logout IOCB "
-		    "(%x).\n", __func__, ha->host_no, rval);)
+		    "(%x).\n", __func__, ha->host_no, rval));
 	} else if (lg->entry_status != 0) {
 		DEBUG2_3_11(printk("%s(%ld): failed to complete IOCB "
 		    "-- error status (%x).\n", __func__, ha->host_no,
@@ -1731,10 +1731,10 @@ qla24xx_fabric_logout(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 		    "-- completion status (%x)  ioparam=%x/%x.\n", __func__,
 		    ha->host_no, le16_to_cpu(lg->comp_status),
 		    le32_to_cpu(lg->io_parameter[0]),
-		    le32_to_cpu(lg->io_parameter[1]));)
+		    le32_to_cpu(lg->io_parameter[1])));
 	} else {
 		/*EMPTY*/
-		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no);)
+		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
 	}
 
 	dma_pool_free(ha->s_dma_pool, lg, lg_dma);
@@ -1767,7 +1767,7 @@ qla2x00_fabric_logout(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 	mbx_cmd_t *mcp = &mc;
 
 	DEBUG11(printk("qla2x00_fabric_logout(%ld): entered.\n",
-	    ha->host_no);)
+	    ha->host_no));
 
 	mcp->mb[0] = MBC_LOGOUT_FABRIC_PORT;
 	mcp->out_mb = MBX_1|MBX_0;
@@ -1787,11 +1787,11 @@ qla2x00_fabric_logout(scsi_qla_host_t *ha, uint16_t loop_id, uint8_t domain,
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("qla2x00_fabric_logout(%ld): failed=%x "
-		    "mbx1=%x.\n", ha->host_no, rval, mcp->mb[1]);)
+		    "mbx1=%x.\n", ha->host_no, rval, mcp->mb[1]));
 	} else {
 		/*EMPTY*/
 		DEBUG11(printk("qla2x00_fabric_logout(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -1820,7 +1820,7 @@ qla2x00_full_login_lip(scsi_qla_host_t *ha)
 	mbx_cmd_t *mcp = &mc;
 
 	DEBUG11(printk("qla2x00_full_login_lip(%ld): entered.\n",
-	    ha->host_no);)
+	    ha->host_no));
 
 	mcp->mb[0] = MBC_LIP_FULL_LOGIN;
 	mcp->mb[1] = 0;
@@ -1835,11 +1835,11 @@ qla2x00_full_login_lip(scsi_qla_host_t *ha)
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("qla2x00_full_login_lip(%ld): failed=%x.\n",
-		    ha->host_no, rval);)
+		    ha->host_no, rval));
 	} else {
 		/*EMPTY*/
 		DEBUG11(printk("qla2x00_full_login_lip(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -1866,7 +1866,7 @@ qla2x00_get_id_list(scsi_qla_host_t *ha, void *id_list, dma_addr_t id_list_dma,
 	mbx_cmd_t *mcp = &mc;
 
 	DEBUG11(printk("qla2x00_get_id_list(%ld): entered.\n",
-	    ha->host_no);)
+	    ha->host_no));
 
 	if (id_list == NULL)
 		return QLA_FUNCTION_FAILED;
@@ -1895,11 +1895,11 @@ qla2x00_get_id_list(scsi_qla_host_t *ha, void *id_list, dma_addr_t id_list_dma,
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("qla2x00_get_id_list(%ld): failed=%x.\n",
-		    ha->host_no, rval);)
+		    ha->host_no, rval));
 	} else {
 		*entries = mcp->mb[1];
 		DEBUG11(printk("qla2x00_get_id_list(%ld): done.\n",
-		    ha->host_no);)
+		    ha->host_no));
 	}
 
 	return rval;
@@ -1938,7 +1938,7 @@ qla2x00_get_resource_cnts(scsi_qla_host_t *ha, uint16_t *cur_xchg_cnt,
 	if (rval != QLA_SUCCESS) {
 		/*EMPTY*/
 		DEBUG2_3_11(printk("%s(%ld): failed = %x.\n", __func__,
-		    ha->host_no, mcp->mb[0]);)
+		    ha->host_no, mcp->mb[0]));
 	} else {
 		DEBUG11(printk("%s(%ld): done. mb1=%x mb2=%x mb3=%x mb6=%x "
 		    "mb7=%x mb10=%x.\n", __func__, ha->host_no,
@@ -2047,7 +2047,7 @@ qla2x00_get_link_status(scsi_qla_host_t *ha, uint16_t loop_id,
 	link_stat_t *stat_buf;
 	dma_addr_t stat_buf_dma;
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	stat_buf = dma_pool_alloc(ha->s_dma_pool, GFP_ATOMIC, &stat_buf_dma);
 	if (stat_buf == NULL) {
@@ -2085,7 +2085,7 @@ qla2x00_get_link_status(scsi_qla_host_t *ha, uint16_t loop_id,
 	if (rval == QLA_SUCCESS) {
 		if (mcp->mb[0] != MBS_COMMAND_COMPLETE) {
 			DEBUG2_3_11(printk("%s(%ld): cmd failed. mbx0=%x.\n",
-			    __func__, ha->host_no, mcp->mb[0]);)
+			    __func__, ha->host_no, mcp->mb[0]));
 			status[0] = mcp->mb[0];
 			rval = BIT_1;
 		} else {
@@ -2110,12 +2110,12 @@ qla2x00_get_link_status(scsi_qla_host_t *ha, uint16_t loop_id,
 			    stat_buf->loss_sync_cnt, stat_buf->loss_sig_cnt,
 			    stat_buf->prim_seq_err_cnt,
 			    stat_buf->inval_xmit_word_cnt,
-			    stat_buf->inval_crc_cnt);)
+			    stat_buf->inval_crc_cnt));
 		}
 	} else {
 		/* Failed. */
 		DEBUG2_3_11(printk("%s(%ld): failed=%x.\n", __func__,
-		    ha->host_no, rval);)
+		    ha->host_no, rval));
 		rval = BIT_1;
 	}
 
@@ -2134,7 +2134,7 @@ qla24xx_get_isp_stats(scsi_qla_host_t *ha, uint32_t *dwbuf, uint32_t dwords,
 	uint32_t *sbuf, *siter;
 	dma_addr_t sbuf_dma;
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	if (dwords > (DMA_POOL_SIZE / 4)) {
 		DEBUG2_3_11(printk("%s(%ld): Unabled to retrieve %d DWORDs "
@@ -2198,7 +2198,7 @@ qla24xx_abort_command(scsi_qla_host_t *ha, srb_t *sp)
 	dma_addr_t	abt_dma;
 	uint32_t	handle;
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, ha->host_no));
 
 	fcport = sp->fcport;
 
@@ -2231,7 +2231,7 @@ qla24xx_abort_command(scsi_qla_host_t *ha, srb_t *sp)
 	rval = qla2x00_issue_iocb(ha, abt, abt_dma, 0);
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("%s(%ld): failed to issue IOCB (%x).\n",
-		    __func__, ha->host_no, rval);)
+		    __func__, ha->host_no, rval));
 	} else if (abt->entry_status != 0) {
 		DEBUG2_3_11(printk("%s(%ld): failed to complete IOCB "
 		    "-- error status (%x).\n", __func__, ha->host_no,
@@ -2240,10 +2240,10 @@ qla24xx_abort_command(scsi_qla_host_t *ha, srb_t *sp)
 	} else if (abt->nport_handle != __constant_cpu_to_le16(0)) {
 		DEBUG2_3_11(printk("%s(%ld): failed to complete IOCB "
 		    "-- completion status (%x).\n", __func__, ha->host_no,
-		    le16_to_cpu(abt->nport_handle));)
+		    le16_to_cpu(abt->nport_handle)));
 		rval = QLA_FUNCTION_FAILED;
 	} else {
-		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no);)
+		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
 		sp->flags |= SRB_ABORT_PENDING;
 	}
 
@@ -2270,7 +2270,7 @@ qla24xx_abort_target(fc_port_t *fcport)
 	if (fcport == NULL)
 		return 0;
 
-	DEBUG11(printk("%s(%ld): entered.\n", __func__, fcport->ha->host_no);)
+	DEBUG11(printk("%s(%ld): entered.\n", __func__, fcport->ha->host_no));
 
 	ha = fcport->ha;
 	tsk = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &tsk_dma);
@@ -2292,7 +2292,7 @@ qla24xx_abort_target(fc_port_t *fcport)
 	rval = qla2x00_issue_iocb(ha, tsk, tsk_dma, 0);
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("%s(%ld): failed to issue Target Reset IOCB "
-		    "(%x).\n", __func__, ha->host_no, rval);)
+		    "(%x).\n", __func__, ha->host_no, rval));
 		goto atarget_done;
 	} else if (tsk->p.sts.entry_status != 0) {
 		DEBUG2_3_11(printk("%s(%ld): failed to complete IOCB "
@@ -2304,7 +2304,7 @@ qla24xx_abort_target(fc_port_t *fcport)
 	    __constant_cpu_to_le16(CS_COMPLETE)) {
 		DEBUG2_3_11(printk("%s(%ld): failed to complete IOCB "
 		    "-- completion status (%x).\n", __func__,
-		    ha->host_no, le16_to_cpu(tsk->p.sts.comp_status));)
+		    ha->host_no, le16_to_cpu(tsk->p.sts.comp_status)));
 		rval = QLA_FUNCTION_FAILED;
 		goto atarget_done;
 	}
@@ -2313,9 +2313,9 @@ qla24xx_abort_target(fc_port_t *fcport)
 	rval = qla2x00_marker(ha, fcport->loop_id, 0, MK_SYNC_ID);
 	if (rval != QLA_SUCCESS) {
 		DEBUG2_3_11(printk("%s(%ld): failed to issue Marker IOCB "
-		    "(%x).\n", __func__, ha->host_no, rval);)
+		    "(%x).\n", __func__, ha->host_no, rval));
 	} else {
-		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no);)
+		DEBUG11(printk("%s(%ld): done.\n", __func__, ha->host_no));
 	}
 
 atarget_done:
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 0c6e334..fedf2f1 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -631,7 +631,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 
 		DEBUG2(printk("%s(%ld): aborting sp %p from RISC. pid=%ld.\n",
 		    __func__, ha->host_no, sp, serial));
-		DEBUG3(qla2x00_print_scsi_cmd(cmd);)
+		DEBUG3(qla2x00_print_scsi_cmd(cmd));
 
 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
 		if (ha->isp_ops.abort_command(ha, sp)) {
@@ -773,7 +773,7 @@ qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
 #endif
 	} else {
 		DEBUG2(printk(KERN_INFO
-		    "%s failed: loop not ready\n",__func__);)
+		    "%s failed: loop not ready\n",__func__));
 	}
 
 	if (ret == FAILED) {
@@ -1028,12 +1028,12 @@ qla2x00_loop_reset(scsi_qla_host_t *ha)
 		/* Empty */
 		DEBUG2_3(printk("%s(%ld): **** FAILED ****\n",
 				__func__,
-				ha->host_no);)
+				ha->host_no));
 	} else {
 		/* Empty */
 		DEBUG3(printk("%s(%ld): exiting normally.\n",
 				__func__,
-				ha->host_no);)
+				ha->host_no));
 	}
 
 	return(status);
-- 
cgit v0.10.2


From 0181944fe647cae18d545ac1167df3d15d393701 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:11:10 -0700
Subject: [SCSI] qla2xxx: Add support for extended error logging.

Similar in form to QLogic's standard offering -- via
the 'extended_error_logging' module parameter.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h
index 2cea009..5334253 100644
--- a/drivers/scsi/qla2xxx/qla_dbg.h
+++ b/drivers/scsi/qla2xxx/qla_dbg.h
@@ -38,11 +38,7 @@
 * Macros use for debugging the driver.
 */
 
-#if  DEBUG_QLA2100
-#define DEBUG(x)	do {x;} while (0)
-#else
-#define DEBUG(x)	do {} while (0)
-#endif
+#define DEBUG(x)	do { if (extended_error_logging) { x; } } while (0)
 
 #if defined(QL_DEBUG_LEVEL_1)
 #define DEBUG1(x)	do {x;} while (0)
@@ -50,27 +46,18 @@
 #define DEBUG1(x)	do {} while (0)
 #endif
 
-#if defined(QL_DEBUG_LEVEL_2)
-#define DEBUG2(x)       do {x;} while (0)
-#define DEBUG2_3(x)     do {x;} while (0)
-#define DEBUG2_3_11(x)  do {x;} while (0)
-#define DEBUG2_9_10(x)    do {x;} while (0)
-#define DEBUG2_11(x)    do {x;} while (0)
-#define DEBUG2_13(x)    do {x;} while (0)
-#else
-#define DEBUG2(x)	do {} while (0)
-#endif
+#define DEBUG2(x)	do { if (extended_error_logging) { x; } } while (0)
+#define DEBUG2_3(x)	do { if (extended_error_logging) { x; } } while (0)
+#define DEBUG2_3_11(x)	do { if (extended_error_logging) { x; } } while (0)
+#define DEBUG2_9_10(x)	do { if (extended_error_logging) { x; } } while (0)
+#define DEBUG2_11(x)	do { if (extended_error_logging) { x; } } while (0)
+#define DEBUG2_13(x)	do { if (extended_error_logging) { x; } } while (0)
 
 #if defined(QL_DEBUG_LEVEL_3)
 #define DEBUG3(x)	do {x;} while (0)
-#define DEBUG2_3(x)	do {x;} while (0)
-#define DEBUG2_3_11(x)	do {x;} while (0)
 #define DEBUG3_11(x)	do {x;} while (0)
 #else
 #define DEBUG3(x)	do {} while (0)
-  #if !defined(QL_DEBUG_LEVEL_2)
-  #define DEBUG2_3(x)	do {} while (0)
-  #endif
 #endif
 
 #if defined(QL_DEBUG_LEVEL_4)
@@ -94,20 +81,15 @@
 #if defined(QL_DEBUG_LEVEL_9)
 #define DEBUG9(x)       do {x;} while (0)
 #define DEBUG9_10(x)    do {x;} while (0)
-#define DEBUG2_9_10(x)	do {x;} while (0)
 #else
 #define DEBUG9(x)	do {} while (0)
 #endif
 
 #if defined(QL_DEBUG_LEVEL_10)
 #define DEBUG10(x)      do {x;} while (0)
-#define DEBUG2_9_10(x)	do {x;} while (0)
 #define DEBUG9_10(x)	do {x;} while (0)
 #else
 #define DEBUG10(x)	do {} while (0)
-  #if !defined(DEBUG2_9_10)
-  #define DEBUG2_9_10(x)	do {} while (0)
-  #endif
   #if !defined(DEBUG9_10)
   #define DEBUG9_10(x)	do {} while (0)
   #endif
@@ -115,23 +97,11 @@
 
 #if defined(QL_DEBUG_LEVEL_11)
 #define DEBUG11(x)      do{x;} while(0)
-#if !defined(DEBUG2_11)
-#define DEBUG2_11(x)    do{x;} while(0)
-#endif
-#if !defined(DEBUG2_3_11)
-#define DEBUG2_3_11(x)  do{x;} while(0)
-#endif
 #if !defined(DEBUG3_11)
 #define DEBUG3_11(x)    do{x;} while(0)
 #endif
 #else
 #define DEBUG11(x)	do{} while(0)
-  #if !defined(QL_DEBUG_LEVEL_2)
-  #define DEBUG2_11(x)	do{} while(0)
-    #if !defined(QL_DEBUG_LEVEL_3)
-    #define DEBUG2_3_11(x) do{} while(0)
-    #endif
-  #endif
   #if !defined(QL_DEBUG_LEVEL_3)
   #define DEBUG3_11(x)	do{} while(0)
   #endif
@@ -145,14 +115,8 @@
 
 #if defined(QL_DEBUG_LEVEL_13)
 #define DEBUG13(x)      do {x;} while (0)
-#if !defined(DEBUG2_13)
-#define DEBUG2_13(x)    do {x;} while(0)
-#endif
 #else
 #define DEBUG13(x)	do {} while (0)
-#if !defined(QL_DEBUG_LEVEL_2)
-#define DEBUG2_13(x)	do {} while(0)
-#endif
 #endif
 
 #if defined(QL_DEBUG_LEVEL_14)
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 9ff3298..750aef1 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -64,6 +64,7 @@ extern int ql2xplogiabsentdevice;
 extern int ql2xloginretrycount;
 extern int ql2xfdmienable;
 extern int ql2xallocfwdump;
+extern int extended_error_logging;
 
 extern void qla2x00_sp_compl(scsi_qla_host_t *, srb_t *);
 
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 101cf12..761dd1f 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1642,6 +1642,8 @@ qla2x00_nvram_config(scsi_qla_host_t *ha)
 	/*
 	 * Set host adapter parameters.
 	 */
+	if (nv->host_p[0] & BIT_7)
+		extended_error_logging = 1;
 	ha->flags.disable_risc_code_load = ((nv->host_p[0] & BIT_4) ? 1 : 0);
 	/* Always load RISC code on non ISP2[12]00 chips. */
 	if (!IS_QLA2100(ha) && !IS_QLA2200(ha))
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index fedf2f1..a15b609 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -61,6 +61,12 @@ MODULE_PARM_DESC(ql2xallocfwdump,
 		"during HBA initialization.  Memory allocation requirements "
 		"vary by ISP type.  Default is 1 - allocate memory.");
 
+int extended_error_logging;
+module_param(extended_error_logging, int, S_IRUGO|S_IRUSR);
+MODULE_PARM_DESC(extended_error_logging,
+		"Option to enable extended error logging, "
+		"Default is 0 - no logging. 1 - log errors.");
+
 static void qla2x00_free_device(scsi_qla_host_t *);
 
 static void qla2x00_config_dma_addressing(scsi_qla_host_t *ha);
@@ -2691,9 +2697,9 @@ qla2x00_module_init(void)
 
 	/* Derive version string. */
 	strcpy(qla2x00_version_str, QLA2XXX_VERSION);
-#if DEBUG_QLA2100
-	strcat(qla2x00_version_str, "-debug");
-#endif
+	if (extended_error_logging)
+		strcat(qla2x00_version_str, "-debug");
+
 	qla2xxx_transport_template =
 	    fc_attach_transport(&qla2xxx_transport_functions);
 	if (!qla2xxx_transport_template)
-- 
cgit v0.10.2


From 7d7abc77cc469ff41dde8eea606e1dab88d4ba86 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:11:17 -0700
Subject: [SCSI] qla2xxx: Correct 'loop-down' determination logic in
 qla2x00_fw_ready().

As there is no point in failing the initialization process when
firmware informs the host software that it could not transition
beyond a CONFIG_WAIT nor WAIT_FOR_LOGIN state.  Previous logic
would mark such conditions as a general *failure* and subsequently
tear-down the scsi-host during initialization.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 761dd1f..c7ab668 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1273,8 +1273,7 @@ qla2x00_fw_ready(scsi_qla_host_t *ha)
 			rval = QLA_FUNCTION_FAILED;
 
 			if (atomic_read(&ha->loop_down_timer) &&
-			    (fw_state >= FSTATE_LOSS_OF_SYNC ||
-				fw_state == FSTATE_WAIT_AL_PA)) {
+			    fw_state != FSTATE_READY) {
 				/* Loop down. Timeout on min_wait for states
 				 * other than Wait for Login.
 				 */
-- 
cgit v0.10.2


From 7ee613970947bdf5f722bb01547c97f4fc80fe9f Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:11:22 -0700
Subject: [SCSI] qla2xxx: Convert from pci_module_init() to
 pci_register_driver().

Also remove qla2xxx_probe_one/qla2xxx_remove_one stubs previously
used with external firmware module loaders.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index a15b609..3516e36 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1337,7 +1337,8 @@ qla24xx_disable_intrs(scsi_qla_host_t *ha)
 /*
  * PCI driver interface
  */
-static int qla2x00_probe_one(struct pci_dev *pdev)
+static int __devinit
+qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int	ret = -ENODEV;
 	device_reg_t __iomem *reg;
@@ -1650,7 +1651,8 @@ probe_out:
 	return ret;
 }
 
-static void qla2x00_remove_one(struct pci_dev *pdev)
+static void __devexit
+qla2x00_remove_one(struct pci_dev *pdev)
 {
 	scsi_qla_host_t *ha;
 
@@ -2644,40 +2646,16 @@ static struct pci_device_id qla2xxx_pci_tbl[] = {
 };
 MODULE_DEVICE_TABLE(pci, qla2xxx_pci_tbl);
 
-static int __devinit
-qla2xxx_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-	return qla2x00_probe_one(pdev);
-}
-
-static void __devexit
-qla2xxx_remove_one(struct pci_dev *pdev)
-{
-	qla2x00_remove_one(pdev);
-}
-
 static struct pci_driver qla2xxx_pci_driver = {
 	.name		= QLA2XXX_DRIVER_NAME,
 	.driver		= {
 		.owner		= THIS_MODULE,
 	},
 	.id_table	= qla2xxx_pci_tbl,
-	.probe		= qla2xxx_probe_one,
-	.remove		= __devexit_p(qla2xxx_remove_one),
+	.probe		= qla2x00_probe_one,
+	.remove		= __devexit_p(qla2x00_remove_one),
 };
 
-static inline int
-qla2x00_pci_module_init(void)
-{
-	return pci_module_init(&qla2xxx_pci_driver);
-}
-
-static inline void
-qla2x00_pci_module_exit(void)
-{
-	pci_unregister_driver(&qla2xxx_pci_driver);
-}
-
 /**
  * qla2x00_module_init - Module initialization.
  **/
@@ -2706,7 +2684,7 @@ qla2x00_module_init(void)
 		return -ENODEV;
 
 	printk(KERN_INFO "QLogic Fibre Channel HBA Driver\n");
-	ret = qla2x00_pci_module_init();
+	ret = pci_register_driver(&qla2xxx_pci_driver);
 	if (ret) {
 		kmem_cache_destroy(srb_cachep);
 		fc_release_transport(qla2xxx_transport_template);
@@ -2720,7 +2698,7 @@ qla2x00_module_init(void)
 static void __exit
 qla2x00_module_exit(void)
 {
-	qla2x00_pci_module_exit();
+	pci_unregister_driver(&qla2xxx_pci_driver);
 	qla2x00_release_firmware();
 	kmem_cache_destroy(srb_cachep);
 	fc_release_transport(qla2xxx_transport_template);
-- 
cgit v0.10.2


From 3ea66e28c20e3b23749c9001c58b37ad44263442 Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:11:27 -0700
Subject: [SCSI] qla2xxx: Correctly set the firmware NOS/OLS timeout during
 initialization.

Original code incorrectly assigned it to the driver's
link-down-timeout value (a value in seconds).

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index af023f5..a0a722c 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -278,7 +278,7 @@ struct init_cb_24xx {
 	uint16_t response_q_length;
 	uint16_t request_q_length;
 
-	uint16_t link_down_timeout;		/* Milliseconds. */
+	uint16_t link_down_on_nos;		/* Milliseconds. */
 
 	uint16_t prio_request_q_length;
 
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index c7ab668..ce32322 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -3379,7 +3379,6 @@ qla24xx_nvram_config(scsi_qla_host_t *ha)
 		nv->node_name[6] = 0x55;
 		nv->node_name[7] = 0x86;
 		nv->login_retry_count = __constant_cpu_to_le16(8);
-		nv->link_down_timeout = __constant_cpu_to_le16(200);
 		nv->interrupt_delay_timer = __constant_cpu_to_le16(0);
 		nv->login_timeout = __constant_cpu_to_le16(0);
 		nv->firmware_options_1 =
@@ -3408,7 +3407,7 @@ qla24xx_nvram_config(scsi_qla_host_t *ha)
 		*dptr1++ = *dptr2++;
 
 	icb->login_retry_count = nv->login_retry_count;
-	icb->link_down_timeout = nv->link_down_timeout;
+	icb->link_down_on_nos = nv->link_down_on_nos;
 
 	/* Copy 2nd segment. */
 	dptr1 = (uint8_t *)&icb->interrupt_delay_timer;
-- 
cgit v0.10.2


From 76c1534e0bd78e9a7662edcf5c994bac63d939fd Mon Sep 17 00:00:00 2001
From: Andrew Vasquez <andrew.vasquez@qlogic.com>
Date: Fri, 23 Jun 2006 16:11:32 -0700
Subject: [SCSI] qla2xxx: Update version number to 8.01.05-k3.

Signed-off-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h
index 6b31552..d2d6834 100644
--- a/drivers/scsi/qla2xxx/qla_version.h
+++ b/drivers/scsi/qla2xxx/qla_version.h
@@ -7,7 +7,7 @@
 /*
  * Driver version
  */
-#define QLA2XXX_VERSION      "8.01.05-k2"
+#define QLA2XXX_VERSION      "8.01.05-k3"
 
 #define QLA_DRIVER_MAJOR_VER	8
 #define QLA_DRIVER_MINOR_VER	1
-- 
cgit v0.10.2


From 041976fb6ae851cd18d96584a5d6361b564c9974 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sun, 25 Jun 2006 01:58:51 -0700
Subject: [SCSI] lpfc: sparse NULL warnings

From: Randy Dunlap <rdunlap@xenotime.net>

Fix sparse warnings: use NULL instead of 0 for pointers:
drivers/scsi/lpfc/lpfc_els.c:827:56: warning: Using plain integer as NULL pointer
drivers/scsi/lpfc/lpfc_els.c:2781:18: warning: Using plain integer as NULL pointer
drivers/scsi/lpfc/lpfc_els.c:2782:18: warning: Using plain integer as NULL pointer
drivers/scsi/lpfc/lpfc_init.c:951:21: warning: Using plain integer as NULL pointer
drivers/scsi/lpfc/lpfc_init.c:956:20: warning: Using plain integer as NULL pointer

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Acked-by: James Smart <James.Smart@Emulex.Com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 283b7d8..4126fd8 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -821,7 +821,7 @@ lpfc_issue_els_plogi(struct lpfc_hba * phba, uint32_t did, uint8_t retry)
 	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
 
 	cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm));
-	elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, 0, did,
+	elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, NULL, did,
 								ELS_CMD_PLOGI);
 	if (!elsiocb)
 		return 1;
@@ -2791,8 +2791,8 @@ lpfc_els_rsp_rps_acc(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
 
 	ndlp = (struct lpfc_nodelist *) pmb->context2;
 	xri = (uint16_t) ((unsigned long)(pmb->context1));
-	pmb->context1 = 0;
-	pmb->context2 = 0;
+	pmb->context1 = NULL;
+	pmb->context2 = NULL;
 
 	if (mb->mbxStatus) {
 		mempool_free( pmb, phba->mbox_mem_pool);
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 908d0f2..c8db857 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -939,12 +939,12 @@ lpfc_get_hba_model_desc(struct lpfc_hba * phba, uint8_t * mdp, uint8_t * descp)
 					"10-port ", "PCIe"};
 			break;
 		default:
-			m = (typeof(m)){ 0 };
+			m = (typeof(m)){ NULL };
 			break;
 		}
 		break;
 	default:
-		m = (typeof(m)){ 0 };
+		m = (typeof(m)){ NULL };
 		break;
 	}
 
-- 
cgit v0.10.2


From 3e7196cf6070821ff8246b15dfd219ffa6409062 Mon Sep 17 00:00:00 2001
From: GOTO Masanori <gotom@sanori.org>
Date: Sun, 25 Jun 2006 01:58:56 -0700
Subject: [SCSI] Add scsi_add_host() failure handling for nsp32

Add scsi_add_host() failure handling for nsp32
and silence warning.
  drivers/scsi/nsp32.c:2888: warning: ignoring return value of 'Scsi_add_host', declared with attribute warn_unused_result

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Signed-off-by: GOTO Masanori <gotom@sanori.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c
index 5c55e15..159fd5d 100644
--- a/drivers/scsi/nsp32.c
+++ b/drivers/scsi/nsp32.c
@@ -2886,12 +2886,19 @@ static int nsp32_detect(struct scsi_host_template *sht)
         }
 
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
-	scsi_add_host (host, &PCIDEV->dev);
+	ret = scsi_add_host(host, &PCIDEV->dev);
+	if (ret) {
+		nsp32_msg(KERN_ERR, "failed to add scsi host");
+		goto free_region;
+	}
 	scsi_scan_host(host);
 #endif
 	pci_set_drvdata(PCIDEV, host);
 	return DETECT_OK;
 
+ free_region:
+	release_region(host->io_port, host->n_io_port);
+
  free_irq:
 	free_irq(host->irq, data);
 
-- 
cgit v0.10.2


From 8d55a786febd077f3a0db9f0672dfa1288b452af Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Sun, 25 Jun 2006 01:58:58 -0700
Subject: [SCSI] Bogus disk geometry on large disks

We currently stuff a truncated size into the geometry logic and return the
result which can produce bizarre reports for a 4Tb array.  Since that
mapping logic isn't useful for disks that big don't try and map this way at
all.

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c
index b78354f..cd68a66 100644
--- a/drivers/scsi/scsicam.c
+++ b/drivers/scsi/scsicam.c
@@ -57,6 +57,7 @@ EXPORT_SYMBOL(scsi_bios_ptable);
 int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip)
 {
 	unsigned char *p;
+	u64 capacity64 = capacity;	/* Suppress gcc warning */
 	int ret;
 
 	p = scsi_bios_ptable(bdev);
@@ -68,7 +69,7 @@ int scsicam_bios_param(struct block_device *bdev, sector_t capacity, int *ip)
 			       (unsigned int *)ip + 0, (unsigned int *)ip + 1);
 	kfree(p);
 
-	if (ret == -1) {
+	if (ret == -1 && capacity64 < (1ULL << 32)) {
 		/* pick some standard mapping with at most 1024 cylinders,
 		   and at most 62 sectors per track - this works up to
 		   7905 MB */
-- 
cgit v0.10.2


From 99d19bb75b88cc997d3cd611903908714c735981 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Sun, 25 Jun 2006 01:58:54 -0700
Subject: [SCSI] random: remove redundant SA_SAMPLE_RANDOM from NinjaSCSI

The scsi layer is already calling add_disk_randomness in scsi_end_request.

Signed-off-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c
index 159fd5d..4190788 100644
--- a/drivers/scsi/nsp32.c
+++ b/drivers/scsi/nsp32.c
@@ -2866,8 +2866,7 @@ static int nsp32_detect(struct scsi_host_template *sht)
 	 */
 	nsp32_do_bus_reset(data);
 
-	ret = request_irq(host->irq, do_nsp32_isr,
-			  SA_SHIRQ | SA_SAMPLE_RANDOM, "nsp32", data);
+	ret = request_irq(host->irq, do_nsp32_isr, SA_SHIRQ, "nsp32", data);
 	if (ret < 0) {
 		nsp32_msg(KERN_ERR, "Unable to allocate IRQ for NinjaSCSI32 "
 			  "SCSI PCI controller. Interrupt: %d", host->irq);
diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 231f9c3..bb81218 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -1623,7 +1623,7 @@ static int nsp_cs_probe(struct pcmcia_device *link)
 	/* Interrupt handler */
 	link->irq.Handler	 = &nspintr;
 	link->irq.Instance       = info;
-	link->irq.Attributes     |= (SA_SHIRQ | SA_SAMPLE_RANDOM);
+	link->irq.Attributes     |= SA_SHIRQ;
 
 	/* General socket configuration */
 	link->conf.Attributes	 = CONF_ENABLE_IRQ;
-- 
cgit v0.10.2


From 12e9b5fb96028aab26af65bb06d2dee56c428a6e Mon Sep 17 00:00:00 2001
From: "Salyzyn, Mark" <mark_salyzyn@adaptec.com>
Date: Mon, 26 Jun 2006 08:37:39 -0400
Subject: [SCSI] aacraid: remove x86_64 IOMMU dependent code

This may seem like a DILLIGAF, but after chatting with the F/W folks,
there is no harm in dropping the page calculation as denoted in the
enclosed patch for these older adapters in this new age of 4GB+ memory
sticks. Any resource optimization within the old-old-old adapters for
systems with less than 4G of memory is of little consequence. The
existing AAC_QUIRK_31BIT flag in linit.c should look after the rest of
the legacy hardware DMA limitations.

Signed-off-by: Mark Salyzyn <aacraid@adaptec.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c
index 35b0a6e..1cd3584 100644
--- a/drivers/scsi/aacraid/comminit.c
+++ b/drivers/scsi/aacraid/comminit.c
@@ -92,28 +92,7 @@ static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long co
 	init->AdapterFibsPhysicalAddress = cpu_to_le32((u32)phys);
 	init->AdapterFibsSize = cpu_to_le32(fibsize);
 	init->AdapterFibAlign = cpu_to_le32(sizeof(struct hw_fib));
-	/* 
-	 * number of 4k pages of host physical memory. The aacraid fw needs
-	 * this number to be less than 4gb worth of pages. num_physpages is in
-	 * system page units. New firmware doesn't have any issues with the
-	 * mapping system, but older Firmware did, and had *troubles* dealing
-	 * with the math overloading past 32 bits, thus we must limit this
-	 * field.
-	 *
-	 * This assumes the memory is mapped zero->n, which isnt
-	 * always true on real computers. It also has some slight problems
-	 * with the GART on x86-64. I've btw never tried DMA from PCI space
-	 * on this platform but don't be surprised if its problematic.
-	 */
-#ifndef CONFIG_GART_IOMMU
-	if ((num_physpages << (PAGE_SHIFT - 12)) <= AAC_MAX_HOSTPHYSMEMPAGES) {
-		init->HostPhysMemPages = 
-			cpu_to_le32(num_physpages << (PAGE_SHIFT-12));
-	} else 
-#endif	
-	{
-		init->HostPhysMemPages = cpu_to_le32(AAC_MAX_HOSTPHYSMEMPAGES);
-	}
+	init->HostPhysMemPages = cpu_to_le32(AAC_MAX_HOSTPHYSMEMPAGES);
 
 	init->InitFlags = 0;
 	if (dev->new_comm_interface) {
-- 
cgit v0.10.2


From d7a1bb0a04ca835bffc0a91e64ab827dfba7d8f5 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Wed, 8 Mar 2006 14:50:12 -0500
Subject: [SCSI] Block I/O while SG reset operation in progress - the midlayer
 patch

The scsi midlayer portion of the patch

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 6a7a60f..6683d59 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1672,7 +1672,9 @@ int
 scsi_reset_provider(struct scsi_device *dev, int flag)
 {
 	struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL);
+	struct Scsi_Host *shost = dev->host;
 	struct request req;
+	unsigned long flags;
 	int rtn;
 
 	scmd->request = &req;
@@ -1699,6 +1701,10 @@ scsi_reset_provider(struct scsi_device *dev, int flag)
 	 */
 	scmd->pid			= 0;
 
+	spin_lock_irqsave(shost->host_lock, flags);
+	shost->tmf_in_progress = 1;
+	spin_unlock_irqrestore(shost->host_lock, flags);
+
 	switch (flag) {
 	case SCSI_TRY_RESET_DEVICE:
 		rtn = scsi_try_bus_device_reset(scmd);
@@ -1717,6 +1723,22 @@ scsi_reset_provider(struct scsi_device *dev, int flag)
 		rtn = FAILED;
 	}
 
+	spin_lock_irqsave(shost->host_lock, flags);
+	shost->tmf_in_progress = 0;
+	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	/*
+	 * be sure to wake up anyone who was sleeping or had their queue
+	 * suspended while we performed the TMF.
+	 */
+	SCSI_LOG_ERROR_RECOVERY(3,
+		printk("%s: waking up host to restart after TMF\n",
+		__FUNCTION__));
+
+	wake_up(&shost->host_wait);
+
+	scsi_run_host_queues(shost);
+
 	scsi_next_command(scmd);
 	return rtn;
 }
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index a42efd6..b3dd90f 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -542,6 +542,9 @@ struct Scsi_Host {
 	 */
 	unsigned ordered_tag:1;
 
+	/* task mgmt function in progress */
+	unsigned tmf_in_progress:1;
+
 	/*
 	 * Optional work queue to be utilized by the transport
 	 */
@@ -619,7 +622,8 @@ static inline int scsi_host_in_recovery(struct Scsi_Host *shost)
 {
 	return shost->shost_state == SHOST_RECOVERY ||
 		shost->shost_state == SHOST_CANCEL_RECOVERY ||
-		shost->shost_state == SHOST_DEL_RECOVERY;
+		shost->shost_state == SHOST_DEL_RECOVERY ||
+		shost->tmf_in_progress;
 }
 
 extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
-- 
cgit v0.10.2


From 79ac6745e4d95cd583bca744d313a323deb4adc2 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Wed, 8 Mar 2006 14:50:18 -0500
Subject: [SCSI] Block I/O while SG reset operation in progress - lpfc portion

This removes the duplicate functionality which had been added to
the lpfc driver.

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 087c445..f81691f 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -174,7 +174,6 @@ struct lpfc_hba {
 	dma_addr_t slim2p_mapping;
 	uint16_t pci_cfg_value;
 
-	struct semaphore hba_can_block;
 	int32_t hba_state;
 
 #define LPFC_STATE_UNKNOWN        0    /* HBA state is unknown */
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index c8db857..f68ad76 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1451,7 +1451,6 @@ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
 		goto out_put_host;
 
 	host->unique_id = phba->brd_no;
-	init_MUTEX(&phba->hba_can_block);
 	INIT_LIST_HEAD(&phba->ctrspbuflist);
 	INIT_LIST_HEAD(&phba->rnidrspbuflist);
 	INIT_LIST_HEAD(&phba->freebufList);
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 7dc4c2e..aea1ee4 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -41,20 +41,6 @@
 #define LPFC_ABORT_WAIT  2
 
 
-static inline void
-lpfc_block_requests(struct lpfc_hba * phba)
-{
-	down(&phba->hba_can_block);
-	scsi_block_requests(phba->host);
-}
-
-static inline void
-lpfc_unblock_requests(struct lpfc_hba * phba)
-{
-	scsi_unblock_requests(phba->host);
-	up(&phba->hba_can_block);
-}
-
 /*
  * This routine allocates a scsi buffer, which contains all the necessary
  * information needed to initiate a SCSI I/O.  The non-DMAable buffer region
@@ -859,7 +845,6 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 	unsigned int loop_count = 0;
 	int ret = SUCCESS;
 
-	lpfc_block_requests(phba);
 	spin_lock_irq(shost->host_lock);
 
 	lpfc_cmd = (struct lpfc_scsi_buf *)cmnd->host_scribble;
@@ -945,7 +930,6 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
 			cmnd->device->lun, cmnd->serial_number);
 
 	spin_unlock_irq(shost->host_lock);
-	lpfc_unblock_requests(phba);
 
 	return ret;
 }
@@ -963,7 +947,6 @@ lpfc_reset_lun_handler(struct scsi_cmnd *cmnd)
 	int ret = FAILED;
 	int cnt, loopcnt;
 
-	lpfc_block_requests(phba);
 	spin_lock_irq(shost->host_lock);
 	/*
 	 * If target is not in a MAPPED state, delay the reset until
@@ -1065,7 +1048,6 @@ out_free_scsi_buf:
 
 out:
 	spin_unlock_irq(shost->host_lock);
-	lpfc_unblock_requests(phba);
 	return ret;
 }
 
@@ -1080,7 +1062,6 @@ lpfc_reset_bus_handler(struct scsi_cmnd *cmnd)
 	int cnt, loopcnt;
 	struct lpfc_scsi_buf * lpfc_cmd;
 
-	lpfc_block_requests(phba);
 	spin_lock_irq(shost->host_lock);
 
 	lpfc_cmd = lpfc_get_scsi_buf(phba);
@@ -1163,7 +1144,6 @@ lpfc_reset_bus_handler(struct scsi_cmnd *cmnd)
 			phba->brd_no, ret);
 out:
 	spin_unlock_irq(shost->host_lock);
-	lpfc_unblock_requests(phba);
 	return ret;
 }
 
-- 
cgit v0.10.2


From a0785edff7b316cad566bdc369d3d034c4c1a39c Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Thu, 11 May 2006 13:27:09 -0400
Subject: [SCSI] fc transport: resolve scan vs delete deadlocks

In a prior posting to linux-scsi on the fc transport and workq
deadlocks, we noted a second error that did not have a patch:
  http://marc.theaimsgroup.com/?l=linux-scsi&m=114467847711383&w=2
  - There's a deadlock where scsi_remove_target() has to sit behind
    scsi_scan_target() due to contention over the scan_lock().

Subsequently we posted a request for comments about the deadlock:
  http://marc.theaimsgroup.com/?l=linux-scsi&m=114469358829500&w=2

This posting resolves the second error. Here's what we now understand,
and are implementing:

  If the lldd deletes the rport while a scan is active, the sdev's queue
  is blocked which stops the issuing of commands associated with the scan.
  At this point, the scan stalls, and does so with the shost->scan_mutex held.
  If, at this point, if any scan or delete request is made on the host, it
  will stall waiting for the scan_mutex.

  For the FC transport, we queue all delete work to a single workq.
  So, things worked fine when competing with the scan, as long as the
  target blocking the scan was the same target at the top of our delete
  workq, as the delete workq routine always unblocked just prior to
  requesting the delete.  Unfortunately, if the top of our delete workq
  was for a different target, we deadlock.  Additionally, if the target
  blocking scan returned, we were unblocking it in the scan workq routine,
  which really won't execute until the existing stalled scan workq
  completes (e.g. we're re-scheduling it while it is in the midst of its
  execution).

  This patch moves the unblock out of the workq routines and moves it to
  the context that is scheduling the work. This ensures that at some point,
  we will unblock the target that is blocking scan.  Please note, however,
  that the deadlock condition may still occur while it waits for the
  transport to timeout an unblock on a target.  Worst case, this is bounded
  by the transport dev_loss_tmo (default: 30 seconds).

Finally, Michael Reed deserves the credit for the bulk of this patch,
analysis, and it's testing. Thank you for your help.

Note: The request for comments statements about the gross-ness of the
  scan_mutex still stand.

Signed-off-by: Michael Reed <mdr@sgi.com>
Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index f2db7a4..c76e73a 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -1284,7 +1284,9 @@ EXPORT_SYMBOL(fc_release_transport);
  * @work:	Work to queue for execution.
  *
  * Return value:
- * 	0 on success / != 0 for error
+ * 	1 - work queued for execution
+ *	0 - work is already queued
+ *	-EINVAL - work queue doesn't exist
  **/
 static int
 fc_queue_work(struct Scsi_Host *shost, struct work_struct *work)
@@ -1434,8 +1436,6 @@ fc_starget_delete(void *data)
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	unsigned long flags;
 
-	scsi_target_unblock(&rport->dev);
-
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (rport->flags & FC_RPORT_DEVLOSS_PENDING) {
 		spin_unlock_irqrestore(shost->host_lock, flags);
@@ -1707,6 +1707,8 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 
 				spin_unlock_irqrestore(shost->host_lock, flags);
 
+				scsi_target_unblock(&rport->dev);
+
 				return rport;
 			}
 		}
@@ -1762,9 +1764,10 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 				/* initiate a scan of the target */
 				rport->flags |= FC_RPORT_SCAN_PENDING;
 				scsi_queue_work(shost, &rport->scan_work);
-			}
-
-			spin_unlock_irqrestore(shost->host_lock, flags);
+				spin_unlock_irqrestore(shost->host_lock, flags);
+				scsi_target_unblock(&rport->dev);
+			} else
+				spin_unlock_irqrestore(shost->host_lock, flags);
 
 			return rport;
 		}
@@ -1938,6 +1941,7 @@ fc_remote_port_rolechg(struct fc_rport  *rport, u32 roles)
 		rport->flags |= FC_RPORT_SCAN_PENDING;
 		scsi_queue_work(shost, &rport->scan_work);
 		spin_unlock_irqrestore(shost->host_lock, flags);
+		scsi_target_unblock(&rport->dev);
 	}
 }
 EXPORT_SYMBOL(fc_remote_port_rolechg);
@@ -1970,8 +1974,9 @@ fc_timeout_deleted_rport(void  *data)
 		dev_printk(KERN_ERR, &rport->dev,
 			"blocked FC remote port time out: no longer"
 			" a FCP target, removing starget\n");
-		fc_queue_work(shost, &rport->stgt_delete_work);
 		spin_unlock_irqrestore(shost->host_lock, flags);
+		scsi_target_unblock(&rport->dev);
+		fc_queue_work(shost, &rport->stgt_delete_work);
 		return;
 	}
 
@@ -2035,17 +2040,15 @@ fc_timeout_deleted_rport(void  *data)
 	 * went away and didn't come back - we'll remove
 	 * all attached scsi devices.
 	 */
-	fc_queue_work(shost, &rport->stgt_delete_work);
-
 	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	scsi_target_unblock(&rport->dev);
+	fc_queue_work(shost, &rport->stgt_delete_work);
 }
 
 /**
  * fc_scsi_scan_rport - called to perform a scsi scan on a remote port.
  *
- * Will unblock the target (in case it went away and has now come back),
- * then invoke a scan.
- *
  * @data:	remote port to be scanned.
  **/
 static void
@@ -2057,7 +2060,6 @@ fc_scsi_scan_rport(void *data)
 
 	if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
 	    (rport->roles & FC_RPORT_ROLE_FCP_TARGET)) {
-		scsi_target_unblock(&rport->dev);
 		scsi_scan_target(&rport->dev, rport->channel,
 			rport->scsi_target_id, SCAN_WILD_CARD, 1);
 	}
-- 
cgit v0.10.2


From 1c9e16e47a22c61d99aabb1c154e5106ddbf3575 Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Tue, 16 May 2006 16:13:36 -0400
Subject: [SCSI] update max sdev block limit

Updated patch to address comments from Pat Mansfield and Michael Reed:
Bumped max to 600 (10mins). Set default dev_loss_tmo to a value other
than the max (30s).

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index a1727a0..117c233 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -117,7 +117,7 @@ extern struct bus_type scsi_bus_type;
  * classes.
  */
 
-#define SCSI_DEVICE_BLOCK_MAX_TIMEOUT	(HZ*60)
+#define SCSI_DEVICE_BLOCK_MAX_TIMEOUT	600	/* units in seconds */
 extern int scsi_internal_device_block(struct scsi_device *sdev);
 extern int scsi_internal_device_unblock(struct scsi_device *sdev);
 
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index c76e73a..df6263d 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -368,7 +368,7 @@ static DECLARE_TRANSPORT_CLASS(fc_rport_class,
  *   should insulate the loss of a remote port.
  *   The maximum will be capped by the value of SCSI_DEVICE_BLOCK_MAX_TIMEOUT.
  */
-static unsigned int fc_dev_loss_tmo = SCSI_DEVICE_BLOCK_MAX_TIMEOUT;
+static unsigned int fc_dev_loss_tmo = 60;		/* seconds */
 
 module_param_named(dev_loss_tmo, fc_dev_loss_tmo, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(dev_loss_tmo,
-- 
cgit v0.10.2


From 3bdad7bd253f17ead00b4af2e82f84e9522c95ac Mon Sep 17 00:00:00 2001
From: James Smart <James.Smart@Emulex.Com>
Date: Mon, 26 Jun 2006 14:19:59 -0400
Subject: [SCSI] fc transport: bug fix: correct references

Original post was incorrect as it didn't realize that we already had
a self-referenc due to device_initialize(), and we were really only
missing the put on our own reference. This was hidden by the other bug
which had the midlayer reusing stargets after they were already free,
which was doing too many puts on our rport.

Updating FC transport for:
- Add put in fc_rport_final_delete(), to release the rport.
  Prior, we were leaving the rport with a reference, thus the shost
  with references, etc. If the driver was unloaded, shosts and rports
  remained, along with work threads, etc
- Fix fc_rport_create failure path - too many put's on parent
- Add commenting to easily track ref taking.

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index df6263d..b03aa85 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -1476,7 +1476,8 @@ fc_rport_final_delete(void *data)
 	transport_remove_device(dev);
 	device_del(dev);
 	transport_destroy_device(dev);
-	put_device(&shost->shost_gendev);
+	put_device(&shost->shost_gendev);	/* for fc_host->rport list */
+	put_device(dev);			/* for self-reference */
 }
 
 
@@ -1537,13 +1538,13 @@ fc_rport_create(struct Scsi_Host *shost, int channel,
 	else
 		rport->scsi_target_id = -1;
 	list_add_tail(&rport->peers, &fc_host->rports);
-	get_device(&shost->shost_gendev);
+	get_device(&shost->shost_gendev);	/* for fc_host->rport list */
 
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	dev = &rport->dev;
-	device_initialize(dev);
-	dev->parent = get_device(&shost->shost_gendev);
+	device_initialize(dev);			/* takes self reference */
+	dev->parent = get_device(&shost->shost_gendev); /* parent reference */
 	dev->release = fc_rport_dev_release;
 	sprintf(dev->bus_id, "rport-%d:%d-%d",
 		shost->host_no, channel, rport->number);
@@ -1567,10 +1568,9 @@ fc_rport_create(struct Scsi_Host *shost, int channel,
 
 delete_rport:
 	transport_destroy_device(dev);
-	put_device(dev->parent);
 	spin_lock_irqsave(shost->host_lock, flags);
 	list_del(&rport->peers);
-	put_device(&shost->shost_gendev);
+	put_device(&shost->shost_gendev);	/* for fc_host->rport list */
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	put_device(dev->parent);
 	kfree(rport);
-- 
cgit v0.10.2


From 7f8a894066b826a4baea49c2a3adbba0a56a192f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@cruncher.tec.linutronix.de>
Date: Wed, 28 Jun 2006 10:11:33 +0200
Subject: [MTD] DOC: Fixup read functions and do a little cleanup

The NAND rework resulted in non ECC based reads. Fix it up and
do a bit of cleanup while at it.

Pointed out by Adrian Bunk.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/drivers/mtd/devices/doc2000.c b/drivers/mtd/devices/doc2000.c
index c54e404..603a795 100644
--- a/drivers/mtd/devices/doc2000.c
+++ b/drivers/mtd/devices/doc2000.c
@@ -55,10 +55,6 @@ static int doc_read(struct mtd_info *mtd, loff_t from, size_t len,
 		    size_t *retlen, u_char *buf);
 static int doc_write(struct mtd_info *mtd, loff_t to, size_t len,
 		     size_t *retlen, const u_char *buf);
-static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
-			size_t *retlen, u_char *buf, u_char *eccbuf, struct nand_oobinfo *oobsel);
-static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
-			 size_t *retlen, const u_char *buf, u_char *eccbuf, struct nand_oobinfo *oobsel);
 static int doc_read_oob(struct mtd_info *mtd, loff_t ofs,
 			struct mtd_oob_ops *ops);
 static int doc_write_oob(struct mtd_info *mtd, loff_t ofs,
@@ -615,17 +611,10 @@ EXPORT_SYMBOL_GPL(DoC2k_init);
 static int doc_read(struct mtd_info *mtd, loff_t from, size_t len,
 		    size_t * retlen, u_char * buf)
 {
-	/* Just a special case of doc_read_ecc */
-	return doc_read_ecc(mtd, from, len, retlen, buf, NULL, NULL);
-}
-
-static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
-			size_t * retlen, u_char * buf, u_char * eccbuf, struct nand_oobinfo *oobsel)
-{
 	struct DiskOnChip *this = mtd->priv;
 	void __iomem *docptr = this->virtadr;
 	struct Nand *mychip;
-	unsigned char syndrome[6];
+	unsigned char syndrome[6], eccbuf[6];
 	volatile char dummy;
 	int i, len256 = 0, ret=0;
 	size_t left = len;
@@ -673,15 +662,9 @@ static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
 		DoC_Address(this, ADDR_COLUMN_PAGE, from, CDSN_CTRL_WP,
 			    CDSN_CTRL_ECC_IO);
 
-		if (eccbuf) {
-			/* Prime the ECC engine */
-			WriteDOC(DOC_ECC_RESET, docptr, ECCConf);
-			WriteDOC(DOC_ECC_EN, docptr, ECCConf);
-		} else {
-			/* disable the ECC engine */
-			WriteDOC(DOC_ECC_RESET, docptr, ECCConf);
-			WriteDOC(DOC_ECC_DIS, docptr, ECCConf);
-		}
+		/* Prime the ECC engine */
+		WriteDOC(DOC_ECC_RESET, docptr, ECCConf);
+		WriteDOC(DOC_ECC_EN, docptr, ECCConf);
 
 		/* treat crossing 256-byte sector for 2M x 8bits devices */
 		if (this->page256 && from + len > (from | 0xff) + 1) {
@@ -698,58 +681,59 @@ static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
 		/* Let the caller know we completed it */
 		*retlen += len;
 
-		if (eccbuf) {
-			/* Read the ECC data through the DiskOnChip ECC logic */
-			/* Note: this will work even with 2M x 8bit devices as   */
-			/*       they have 8 bytes of OOB per 256 page. mf.      */
-			DoC_ReadBuf(this, eccbuf, 6);
-
-			/* Flush the pipeline */
-			if (DoC_is_Millennium(this)) {
-				dummy = ReadDOC(docptr, ECCConf);
-				dummy = ReadDOC(docptr, ECCConf);
-				i = ReadDOC(docptr, ECCConf);
-			} else {
-				dummy = ReadDOC(docptr, 2k_ECCStatus);
-				dummy = ReadDOC(docptr, 2k_ECCStatus);
-				i = ReadDOC(docptr, 2k_ECCStatus);
-			}
+		/* Read the ECC data through the DiskOnChip ECC logic */
+		/* Note: this will work even with 2M x 8bit devices as   */
+		/*       they have 8 bytes of OOB per 256 page. mf.      */
+		DoC_ReadBuf(this, eccbuf, 6);
 
-			/* Check the ECC Status */
-			if (i & 0x80) {
-				int nb_errors;
-				/* There was an ECC error */
+		/* Flush the pipeline */
+		if (DoC_is_Millennium(this)) {
+			dummy = ReadDOC(docptr, ECCConf);
+			dummy = ReadDOC(docptr, ECCConf);
+			i = ReadDOC(docptr, ECCConf);
+		} else {
+			dummy = ReadDOC(docptr, 2k_ECCStatus);
+			dummy = ReadDOC(docptr, 2k_ECCStatus);
+			i = ReadDOC(docptr, 2k_ECCStatus);
+		}
+
+		/* Check the ECC Status */
+		if (i & 0x80) {
+			int nb_errors;
+			/* There was an ECC error */
 #ifdef ECC_DEBUG
-				printk(KERN_ERR "DiskOnChip ECC Error: Read at %lx\n", (long)from);
+			printk(KERN_ERR "DiskOnChip ECC Error: Read at %lx\n", (long)from);
 #endif
-				/* Read the ECC syndrom through the DiskOnChip ECC logic.
-				   These syndrome will be all ZERO when there is no error */
-				for (i = 0; i < 6; i++) {
-					syndrome[i] =
-					    ReadDOC(docptr, ECCSyndrome0 + i);
-				}
-	                        nb_errors = doc_decode_ecc(buf, syndrome);
+			/* Read the ECC syndrom through the DiskOnChip ECC
+			   logic.  These syndrome will be all ZERO when there
+			   is no error */
+			for (i = 0; i < 6; i++) {
+				syndrome[i] =
+					ReadDOC(docptr, ECCSyndrome0 + i);
+			}
+			nb_errors = doc_decode_ecc(buf, syndrome);
 
 #ifdef ECC_DEBUG
-				printk(KERN_ERR "Errors corrected: %x\n", nb_errors);
+			printk(KERN_ERR "Errors corrected: %x\n", nb_errors);
 #endif
-	                        if (nb_errors < 0) {
-					/* We return error, but have actually done the read. Not that
-					   this can be told to user-space, via sys_read(), but at least
-					   MTD-aware stuff can know about it by checking *retlen */
-					ret = -EIO;
-	                        }
+			if (nb_errors < 0) {
+				/* We return error, but have actually done the
+				   read. Not that this can be told to
+				   user-space, via sys_read(), but at least
+				   MTD-aware stuff can know about it by
+				   checking *retlen */
+				ret = -EIO;
 			}
+		}
 
 #ifdef PSYCHO_DEBUG
-			printk(KERN_DEBUG "ECC DATA at %lxB: %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
-				     (long)from, eccbuf[0], eccbuf[1], eccbuf[2],
-				     eccbuf[3], eccbuf[4], eccbuf[5]);
+		printk(KERN_DEBUG "ECC DATA at %lxB: %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
+		       (long)from, eccbuf[0], eccbuf[1], eccbuf[2],
+		       eccbuf[3], eccbuf[4], eccbuf[5]);
 #endif
 
-			/* disable the ECC engine */
-			WriteDOC(DOC_ECC_DIS, docptr , ECCConf);
-		}
+		/* disable the ECC engine */
+		WriteDOC(DOC_ECC_DIS, docptr , ECCConf);
 
 		/* according to 11.4.1, we need to wait for the busy line
 	         * drop if we read to the end of the page.  */
@@ -771,17 +755,10 @@ static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
 static int doc_write(struct mtd_info *mtd, loff_t to, size_t len,
 		     size_t * retlen, const u_char * buf)
 {
-	char eccbuf[6];
-	return doc_write_ecc(mtd, to, len, retlen, buf, eccbuf, NULL);
-}
-
-static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
-			 size_t * retlen, const u_char * buf,
-			 u_char * eccbuf, struct nand_oobinfo *oobsel)
-{
 	struct DiskOnChip *this = mtd->priv;
 	int di; /* Yes, DI is a hangover from when I was disassembling the binary driver */
 	void __iomem *docptr = this->virtadr;
+	unsigned char eccbuf[6];
 	volatile char dummy;
 	int len256 = 0;
 	struct Nand *mychip;
@@ -835,15 +812,9 @@ static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
 		DoC_Command(this, NAND_CMD_SEQIN, 0);
 		DoC_Address(this, ADDR_COLUMN_PAGE, to, 0, CDSN_CTRL_ECC_IO);
 
-		if (eccbuf) {
-			/* Prime the ECC engine */
-			WriteDOC(DOC_ECC_RESET, docptr, ECCConf);
-			WriteDOC(DOC_ECC_EN | DOC_ECC_RW, docptr, ECCConf);
-		} else {
-			/* disable the ECC engine */
-			WriteDOC(DOC_ECC_RESET, docptr, ECCConf);
-			WriteDOC(DOC_ECC_DIS, docptr, ECCConf);
-		}
+		/* Prime the ECC engine */
+		WriteDOC(DOC_ECC_RESET, docptr, ECCConf);
+		WriteDOC(DOC_ECC_EN | DOC_ECC_RW, docptr, ECCConf);
 
 		/* treat crossing 256-byte sector for 2M x 8bits devices */
 		if (this->page256 && to + len > (to | 0xff) + 1) {
@@ -873,39 +844,35 @@ static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
 
 		DoC_WriteBuf(this, &buf[len256], len - len256);
 
-		if (eccbuf) {
-			WriteDOC(CDSN_CTRL_ECC_IO | CDSN_CTRL_CE, docptr,
-				 CDSNControl);
-
-			if (DoC_is_Millennium(this)) {
-				WriteDOC(0, docptr, NOP);
-				WriteDOC(0, docptr, NOP);
-				WriteDOC(0, docptr, NOP);
-			} else {
-				WriteDOC_(0, docptr, this->ioreg);
-				WriteDOC_(0, docptr, this->ioreg);
-				WriteDOC_(0, docptr, this->ioreg);
-			}
+		WriteDOC(CDSN_CTRL_ECC_IO | CDSN_CTRL_CE, docptr, CDSNControl);
 
-			WriteDOC(CDSN_CTRL_ECC_IO | CDSN_CTRL_FLASH_IO | CDSN_CTRL_CE, docptr,
-				 CDSNControl);
+		if (DoC_is_Millennium(this)) {
+			WriteDOC(0, docptr, NOP);
+			WriteDOC(0, docptr, NOP);
+			WriteDOC(0, docptr, NOP);
+		} else {
+			WriteDOC_(0, docptr, this->ioreg);
+			WriteDOC_(0, docptr, this->ioreg);
+			WriteDOC_(0, docptr, this->ioreg);
+		}
 
-			/* Read the ECC data through the DiskOnChip ECC logic */
-			for (di = 0; di < 6; di++) {
-				eccbuf[di] = ReadDOC(docptr, ECCSyndrome0 + di);
-			}
+		WriteDOC(CDSN_CTRL_ECC_IO | CDSN_CTRL_FLASH_IO | CDSN_CTRL_CE, docptr,
+			 CDSNControl);
 
-			/* Reset the ECC engine */
-			WriteDOC(DOC_ECC_DIS, docptr, ECCConf);
+		/* Read the ECC data through the DiskOnChip ECC logic */
+		for (di = 0; di < 6; di++) {
+			eccbuf[di] = ReadDOC(docptr, ECCSyndrome0 + di);
+		}
+
+		/* Reset the ECC engine */
+		WriteDOC(DOC_ECC_DIS, docptr, ECCConf);
 
 #ifdef PSYCHO_DEBUG
-			printk
-			    ("OOB data at %lx is %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
-			     (long) to, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3],
-			     eccbuf[4], eccbuf[5]);
+		printk
+			("OOB data at %lx is %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
+			 (long) to, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3],
+			 eccbuf[4], eccbuf[5]);
 #endif
-		}
-
 		DoC_Command(this, NAND_CMD_PAGEPROG, 0);
 
 		DoC_Command(this, NAND_CMD_STATUS, CDSN_CTRL_WP);
diff --git a/drivers/mtd/devices/doc2001.c b/drivers/mtd/devices/doc2001.c
index 0cf022a..0e2a932 100644
--- a/drivers/mtd/devices/doc2001.c
+++ b/drivers/mtd/devices/doc2001.c
@@ -37,12 +37,6 @@ static int doc_read(struct mtd_info *mtd, loff_t from, size_t len,
 		    size_t *retlen, u_char *buf);
 static int doc_write(struct mtd_info *mtd, loff_t to, size_t len,
 		     size_t *retlen, const u_char *buf);
-static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
-			size_t *retlen, u_char *buf, u_char *eccbuf,
-			struct nand_oobinfo *oobsel);
-static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
-			 size_t *retlen, const u_char *buf, u_char *eccbuf,
-			 struct nand_oobinfo *oobsel);
 static int doc_read_oob(struct mtd_info *mtd, loff_t ofs,
 			struct mtd_oob_ops *ops);
 static int doc_write_oob(struct mtd_info *mtd, loff_t ofs,
@@ -397,17 +391,9 @@ EXPORT_SYMBOL_GPL(DoCMil_init);
 static int doc_read (struct mtd_info *mtd, loff_t from, size_t len,
 		     size_t *retlen, u_char *buf)
 {
-	/* Just a special case of doc_read_ecc */
-	return doc_read_ecc(mtd, from, len, retlen, buf, NULL, NULL);
-}
-
-static int doc_read_ecc (struct mtd_info *mtd, loff_t from, size_t len,
-			 size_t *retlen, u_char *buf, u_char *eccbuf,
-			 struct nand_oobinfo *oobsel)
-{
 	int i, ret;
 	volatile char dummy;
-	unsigned char syndrome[6];
+	unsigned char syndrome[6], eccbuf[6];
 	struct DiskOnChip *this = mtd->priv;
 	void __iomem *docptr = this->virtadr;
 	struct Nand *mychip = &this->chips[from >> (this->chipshift)];
@@ -437,15 +423,9 @@ static int doc_read_ecc (struct mtd_info *mtd, loff_t from, size_t len,
 	DoC_Address(docptr, 3, from, CDSN_CTRL_WP, 0x00);
 	DoC_WaitReady(docptr);
 
-	if (eccbuf) {
-		/* init the ECC engine, see Reed-Solomon EDC/ECC 11.1 .*/
-		WriteDOC (DOC_ECC_RESET, docptr, ECCConf);
-		WriteDOC (DOC_ECC_EN, docptr, ECCConf);
-	} else {
-		/* disable the ECC engine */
-		WriteDOC (DOC_ECC_RESET, docptr, ECCConf);
-		WriteDOC (DOC_ECC_DIS, docptr, ECCConf);
-	}
+	/* init the ECC engine, see Reed-Solomon EDC/ECC 11.1 .*/
+	WriteDOC (DOC_ECC_RESET, docptr, ECCConf);
+	WriteDOC (DOC_ECC_EN, docptr, ECCConf);
 
 	/* Read the data via the internal pipeline through CDSN IO register,
 	   see Pipelined Read Operations 11.3 */
@@ -465,58 +445,56 @@ static int doc_read_ecc (struct mtd_info *mtd, loff_t from, size_t len,
 	*retlen = len;
         ret = 0;
 
-	if (eccbuf) {
-		/* Read the ECC data from Spare Data Area,
-		   see Reed-Solomon EDC/ECC 11.1 */
-		dummy = ReadDOC(docptr, ReadPipeInit);
+	/* Read the ECC data from Spare Data Area,
+	   see Reed-Solomon EDC/ECC 11.1 */
+	dummy = ReadDOC(docptr, ReadPipeInit);
 #ifndef USE_MEMCPY
-		for (i = 0; i < 5; i++) {
-			/* N.B. you have to increase the source address in this way or the
-			   ECC logic will not work properly */
-			eccbuf[i] = ReadDOC(docptr, Mil_CDSN_IO + i);
-		}
+	for (i = 0; i < 5; i++) {
+		/* N.B. you have to increase the source address in this way or the
+		   ECC logic will not work properly */
+		eccbuf[i] = ReadDOC(docptr, Mil_CDSN_IO + i);
+	}
 #else
-		memcpy_fromio(eccbuf, docptr + DoC_Mil_CDSN_IO, 5);
+	memcpy_fromio(eccbuf, docptr + DoC_Mil_CDSN_IO, 5);
 #endif
-		eccbuf[5] = ReadDOC(docptr, LastDataRead);
+	eccbuf[5] = ReadDOC(docptr, LastDataRead);
 
-		/* Flush the pipeline */
-		dummy = ReadDOC(docptr, ECCConf);
-		dummy = ReadDOC(docptr, ECCConf);
+	/* Flush the pipeline */
+	dummy = ReadDOC(docptr, ECCConf);
+	dummy = ReadDOC(docptr, ECCConf);
 
-		/* Check the ECC Status */
-		if (ReadDOC(docptr, ECCConf) & 0x80) {
-                        int nb_errors;
-			/* There was an ECC error */
+	/* Check the ECC Status */
+	if (ReadDOC(docptr, ECCConf) & 0x80) {
+		int nb_errors;
+		/* There was an ECC error */
 #ifdef ECC_DEBUG
-			printk("DiskOnChip ECC Error: Read at %lx\n", (long)from);
+		printk("DiskOnChip ECC Error: Read at %lx\n", (long)from);
 #endif
-			/* Read the ECC syndrom through the DiskOnChip ECC logic.
-			   These syndrome will be all ZERO when there is no error */
-			for (i = 0; i < 6; i++) {
-				syndrome[i] = ReadDOC(docptr, ECCSyndrome0 + i);
-			}
-                        nb_errors = doc_decode_ecc(buf, syndrome);
+		/* Read the ECC syndrom through the DiskOnChip ECC logic.
+		   These syndrome will be all ZERO when there is no error */
+		for (i = 0; i < 6; i++) {
+			syndrome[i] = ReadDOC(docptr, ECCSyndrome0 + i);
+		}
+		nb_errors = doc_decode_ecc(buf, syndrome);
 #ifdef ECC_DEBUG
-			printk("ECC Errors corrected: %x\n", nb_errors);
+		printk("ECC Errors corrected: %x\n", nb_errors);
 #endif
-                        if (nb_errors < 0) {
-				/* We return error, but have actually done the read. Not that
-				   this can be told to user-space, via sys_read(), but at least
-				   MTD-aware stuff can know about it by checking *retlen */
-				ret = -EIO;
-                        }
+		if (nb_errors < 0) {
+			/* We return error, but have actually done the read. Not that
+			   this can be told to user-space, via sys_read(), but at least
+			   MTD-aware stuff can know about it by checking *retlen */
+			ret = -EIO;
 		}
+	}
 
 #ifdef PSYCHO_DEBUG
-		printk("ECC DATA at %lx: %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
-		       (long)from, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3],
-		       eccbuf[4], eccbuf[5]);
+	printk("ECC DATA at %lx: %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
+	       (long)from, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3],
+	       eccbuf[4], eccbuf[5]);
 #endif
 
-		/* disable the ECC engine */
-		WriteDOC(DOC_ECC_DIS, docptr , ECCConf);
-	}
+	/* disable the ECC engine */
+	WriteDOC(DOC_ECC_DIS, docptr , ECCConf);
 
 	return ret;
 }
@@ -524,15 +502,8 @@ static int doc_read_ecc (struct mtd_info *mtd, loff_t from, size_t len,
 static int doc_write (struct mtd_info *mtd, loff_t to, size_t len,
 		      size_t *retlen, const u_char *buf)
 {
-	char eccbuf[6];
-	return doc_write_ecc(mtd, to, len, retlen, buf, eccbuf, NULL);
-}
-
-static int doc_write_ecc (struct mtd_info *mtd, loff_t to, size_t len,
-			  size_t *retlen, const u_char *buf, u_char *eccbuf,
-			 struct nand_oobinfo *oobsel)
-{
 	int i,ret = 0;
+	char eccbuf[6];
 	volatile char dummy;
 	struct DiskOnChip *this = mtd->priv;
 	void __iomem *docptr = this->virtadr;
@@ -573,15 +544,9 @@ static int doc_write_ecc (struct mtd_info *mtd, loff_t to, size_t len,
 	DoC_Address(docptr, 3, to, 0x00, 0x00);
 	DoC_WaitReady(docptr);
 
-	if (eccbuf) {
-		/* init the ECC engine, see Reed-Solomon EDC/ECC 11.1 .*/
-		WriteDOC (DOC_ECC_RESET, docptr, ECCConf);
-		WriteDOC (DOC_ECC_EN | DOC_ECC_RW, docptr, ECCConf);
-	} else {
-		/* disable the ECC engine */
-		WriteDOC (DOC_ECC_RESET, docptr, ECCConf);
-		WriteDOC (DOC_ECC_DIS, docptr, ECCConf);
-	}
+	/* init the ECC engine, see Reed-Solomon EDC/ECC 11.1 .*/
+	WriteDOC (DOC_ECC_RESET, docptr, ECCConf);
+	WriteDOC (DOC_ECC_EN | DOC_ECC_RW, docptr, ECCConf);
 
 	/* Write the data via the internal pipeline through CDSN IO register,
 	   see Pipelined Write Operations 11.2 */
@@ -596,46 +561,44 @@ static int doc_write_ecc (struct mtd_info *mtd, loff_t to, size_t len,
 #endif
 	WriteDOC(0x00, docptr, WritePipeTerm);
 
-	if (eccbuf) {
-		/* Write ECC data to flash, the ECC info is generated by the DiskOnChip ECC logic
-		   see Reed-Solomon EDC/ECC 11.1 */
-		WriteDOC(0, docptr, NOP);
-		WriteDOC(0, docptr, NOP);
-		WriteDOC(0, docptr, NOP);
+	/* Write ECC data to flash, the ECC info is generated by the DiskOnChip ECC logic
+	   see Reed-Solomon EDC/ECC 11.1 */
+	WriteDOC(0, docptr, NOP);
+	WriteDOC(0, docptr, NOP);
+	WriteDOC(0, docptr, NOP);
 
-		/* Read the ECC data through the DiskOnChip ECC logic */
-		for (i = 0; i < 6; i++) {
-			eccbuf[i] = ReadDOC(docptr, ECCSyndrome0 + i);
-		}
+	/* Read the ECC data through the DiskOnChip ECC logic */
+	for (i = 0; i < 6; i++) {
+		eccbuf[i] = ReadDOC(docptr, ECCSyndrome0 + i);
+	}
 
-		/* ignore the ECC engine */
-		WriteDOC(DOC_ECC_DIS, docptr , ECCConf);
+	/* ignore the ECC engine */
+	WriteDOC(DOC_ECC_DIS, docptr , ECCConf);
 
 #ifndef USE_MEMCPY
-		/* Write the ECC data to flash */
-		for (i = 0; i < 6; i++) {
-			/* N.B. you have to increase the source address in this way or the
-			   ECC logic will not work properly */
-			WriteDOC(eccbuf[i], docptr, Mil_CDSN_IO + i);
-		}
+	/* Write the ECC data to flash */
+	for (i = 0; i < 6; i++) {
+		/* N.B. you have to increase the source address in this way or the
+		   ECC logic will not work properly */
+		WriteDOC(eccbuf[i], docptr, Mil_CDSN_IO + i);
+	}
 #else
-		memcpy_toio(docptr + DoC_Mil_CDSN_IO, eccbuf, 6);
+	memcpy_toio(docptr + DoC_Mil_CDSN_IO, eccbuf, 6);
 #endif
 
-		/* write the block status BLOCK_USED (0x5555) at the end of ECC data
-		   FIXME: this is only a hack for programming the IPL area for LinuxBIOS
-		   and should be replace with proper codes in user space utilities */
-		WriteDOC(0x55, docptr, Mil_CDSN_IO);
-		WriteDOC(0x55, docptr, Mil_CDSN_IO + 1);
+	/* write the block status BLOCK_USED (0x5555) at the end of ECC data
+	   FIXME: this is only a hack for programming the IPL area for LinuxBIOS
+	   and should be replace with proper codes in user space utilities */
+	WriteDOC(0x55, docptr, Mil_CDSN_IO);
+	WriteDOC(0x55, docptr, Mil_CDSN_IO + 1);
 
-		WriteDOC(0x00, docptr, WritePipeTerm);
+	WriteDOC(0x00, docptr, WritePipeTerm);
 
 #ifdef PSYCHO_DEBUG
-		printk("OOB data at %lx is %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
-		       (long) to, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3],
-		       eccbuf[4], eccbuf[5]);
+	printk("OOB data at %lx is %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
+	       (long) to, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3],
+	       eccbuf[4], eccbuf[5]);
 #endif
-	}
 
 	/* Commit the Page Program command and wait for ready
 	   see Software Requirement 11.4 item 1.*/
diff --git a/drivers/mtd/devices/doc2001plus.c b/drivers/mtd/devices/doc2001plus.c
index 66cb1e5..92dbb47 100644
--- a/drivers/mtd/devices/doc2001plus.c
+++ b/drivers/mtd/devices/doc2001plus.c
@@ -41,12 +41,6 @@ static int doc_read(struct mtd_info *mtd, loff_t from, size_t len,
 		size_t *retlen, u_char *buf);
 static int doc_write(struct mtd_info *mtd, loff_t to, size_t len,
 		size_t *retlen, const u_char *buf);
-static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
-		size_t *retlen, u_char *buf, u_char *eccbuf,
-		struct nand_oobinfo *oobsel);
-static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
-		size_t *retlen, const u_char *buf, u_char *eccbuf,
-		struct nand_oobinfo *oobsel);
 static int doc_read_oob(struct mtd_info *mtd, loff_t ofs,
 			struct mtd_oob_ops *ops);
 static int doc_write_oob(struct mtd_info *mtd, loff_t ofs,
@@ -595,18 +589,10 @@ static int doc_dumpblk(struct mtd_info *mtd, loff_t from)
 static int doc_read(struct mtd_info *mtd, loff_t from, size_t len,
 		    size_t *retlen, u_char *buf)
 {
-	/* Just a special case of doc_read_ecc */
-	return doc_read_ecc(mtd, from, len, retlen, buf, NULL, NULL);
-}
-
-static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
-			size_t *retlen, u_char *buf, u_char *eccbuf,
-			struct nand_oobinfo *oobsel)
-{
 	int ret, i;
 	volatile char dummy;
 	loff_t fofs;
-	unsigned char syndrome[6];
+	unsigned char syndrome[6], eccbuf[6];
 	struct DiskOnChip *this = mtd->priv;
 	void __iomem * docptr = this->virtadr;
 	struct Nand *mychip = &this->chips[from >> (this->chipshift)];
@@ -644,56 +630,51 @@ static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
 	WriteDOC(0, docptr, Mplus_FlashControl);
 	DoC_WaitReady(docptr);
 
-	if (eccbuf) {
-		/* init the ECC engine, see Reed-Solomon EDC/ECC 11.1 .*/
-		WriteDOC(DOC_ECC_RESET, docptr, Mplus_ECCConf);
-		WriteDOC(DOC_ECC_EN, docptr, Mplus_ECCConf);
-	} else {
-		/* disable the ECC engine */
-		WriteDOC(DOC_ECC_RESET, docptr, Mplus_ECCConf);
-	}
+	/* init the ECC engine, see Reed-Solomon EDC/ECC 11.1 .*/
+	WriteDOC(DOC_ECC_RESET, docptr, Mplus_ECCConf);
+	WriteDOC(DOC_ECC_EN, docptr, Mplus_ECCConf);
 
 	/* Let the caller know we completed it */
 	*retlen = len;
-        ret = 0;
+	ret = 0;
 
 	ReadDOC(docptr, Mplus_ReadPipeInit);
 	ReadDOC(docptr, Mplus_ReadPipeInit);
 
-	if (eccbuf) {
-		/* Read the data via the internal pipeline through CDSN IO
-		   register, see Pipelined Read Operations 11.3 */
-		MemReadDOC(docptr, buf, len);
+	/* Read the data via the internal pipeline through CDSN IO
+	   register, see Pipelined Read Operations 11.3 */
+	MemReadDOC(docptr, buf, len);
 
-		/* Read the ECC data following raw data */
-		MemReadDOC(docptr, eccbuf, 4);
-		eccbuf[4] = ReadDOC(docptr, Mplus_LastDataRead);
-		eccbuf[5] = ReadDOC(docptr, Mplus_LastDataRead);
+	/* Read the ECC data following raw data */
+	MemReadDOC(docptr, eccbuf, 4);
+	eccbuf[4] = ReadDOC(docptr, Mplus_LastDataRead);
+	eccbuf[5] = ReadDOC(docptr, Mplus_LastDataRead);
 
-		/* Flush the pipeline */
-		dummy = ReadDOC(docptr, Mplus_ECCConf);
-		dummy = ReadDOC(docptr, Mplus_ECCConf);
+	/* Flush the pipeline */
+	dummy = ReadDOC(docptr, Mplus_ECCConf);
+	dummy = ReadDOC(docptr, Mplus_ECCConf);
 
-		/* Check the ECC Status */
-		if (ReadDOC(docptr, Mplus_ECCConf) & 0x80) {
-                        int nb_errors;
-			/* There was an ECC error */
+	/* Check the ECC Status */
+	if (ReadDOC(docptr, Mplus_ECCConf) & 0x80) {
+		int nb_errors;
+		/* There was an ECC error */
 #ifdef ECC_DEBUG
-			printk("DiskOnChip ECC Error: Read at %lx\n", (long)from);
+		printk("DiskOnChip ECC Error: Read at %lx\n", (long)from);
 #endif
-			/* Read the ECC syndrom through the DiskOnChip ECC logic.
-			   These syndrome will be all ZERO when there is no error */
-			for (i = 0; i < 6; i++)
-				syndrome[i] = ReadDOC(docptr, Mplus_ECCSyndrome0 + i);
+		/* Read the ECC syndrom through the DiskOnChip ECC logic.
+		   These syndrome will be all ZERO when there is no error */
+		for (i = 0; i < 6; i++)
+			syndrome[i] = ReadDOC(docptr, Mplus_ECCSyndrome0 + i);
 
-                        nb_errors = doc_decode_ecc(buf, syndrome);
+		nb_errors = doc_decode_ecc(buf, syndrome);
 #ifdef ECC_DEBUG
-			printk("ECC Errors corrected: %x\n", nb_errors);
+		printk("ECC Errors corrected: %x\n", nb_errors);
 #endif
-                        if (nb_errors < 0) {
-				/* We return error, but have actually done the read. Not that
-				   this can be told to user-space, via sys_read(), but at least
-				   MTD-aware stuff can know about it by checking *retlen */
+		if (nb_errors < 0) {
+			/* We return error, but have actually done the
+			   read. Not that this can be told to user-space, via
+			   sys_read(), but at least MTD-aware stuff can know
+			   about it by checking *retlen */
 #ifdef ECC_DEBUG
 			printk("%s(%d): Millennium Plus ECC error (from=0x%x:\n",
 				__FILE__, __LINE__, (int)from);
@@ -707,24 +688,16 @@ static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
 				eccbuf[3], eccbuf[4], eccbuf[5]);
 #endif
 				ret = -EIO;
-                        }
 		}
+	}
 
 #ifdef PSYCHO_DEBUG
-		printk("ECC DATA at %lx: %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
-		       (long)from, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3],
-		       eccbuf[4], eccbuf[5]);
+	printk("ECC DATA at %lx: %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
+	       (long)from, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3],
+	       eccbuf[4], eccbuf[5]);
 #endif
-
-		/* disable the ECC engine */
-		WriteDOC(DOC_ECC_DIS, docptr , Mplus_ECCConf);
-	} else {
-		/* Read the data via the internal pipeline through CDSN IO
-		   register, see Pipelined Read Operations 11.3 */
-		MemReadDOC(docptr, buf, len-2);
-		buf[len-2] = ReadDOC(docptr, Mplus_LastDataRead);
-		buf[len-1] = ReadDOC(docptr, Mplus_LastDataRead);
-	}
+	/* disable the ECC engine */
+	WriteDOC(DOC_ECC_DIS, docptr , Mplus_ECCConf);
 
 	/* Disable flash internally */
 	WriteDOC(0, docptr, Mplus_FlashSelect);
@@ -735,17 +708,10 @@ static int doc_read_ecc(struct mtd_info *mtd, loff_t from, size_t len,
 static int doc_write(struct mtd_info *mtd, loff_t to, size_t len,
 		     size_t *retlen, const u_char *buf)
 {
-	char eccbuf[6];
-	return doc_write_ecc(mtd, to, len, retlen, buf, eccbuf, NULL);
-}
-
-static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
-			 size_t *retlen, const u_char *buf, u_char *eccbuf,
-			 struct nand_oobinfo *oobsel)
-{
 	int i, before, ret = 0;
 	loff_t fto;
 	volatile char dummy;
+	char eccbuf[6];
 	struct DiskOnChip *this = mtd->priv;
 	void __iomem * docptr = this->virtadr;
 	struct Nand *mychip = &this->chips[to >> (this->chipshift)];
@@ -795,46 +761,42 @@ static int doc_write_ecc(struct mtd_info *mtd, loff_t to, size_t len,
 	/* Disable the ECC engine */
 	WriteDOC(DOC_ECC_RESET, docptr, Mplus_ECCConf);
 
-	if (eccbuf) {
-		if (before) {
-			/* Write the block status BLOCK_USED (0x5555) */
-			WriteDOC(0x55, docptr, Mil_CDSN_IO);
-			WriteDOC(0x55, docptr, Mil_CDSN_IO);
-		}
-
-		/* init the ECC engine, see Reed-Solomon EDC/ECC 11.1 .*/
-		WriteDOC(DOC_ECC_EN | DOC_ECC_RW, docptr, Mplus_ECCConf);
+	if (before) {
+		/* Write the block status BLOCK_USED (0x5555) */
+		WriteDOC(0x55, docptr, Mil_CDSN_IO);
+		WriteDOC(0x55, docptr, Mil_CDSN_IO);
 	}
 
+	/* init the ECC engine, see Reed-Solomon EDC/ECC 11.1 .*/
+	WriteDOC(DOC_ECC_EN | DOC_ECC_RW, docptr, Mplus_ECCConf);
+
 	MemWriteDOC(docptr, (unsigned char *) buf, len);
 
-	if (eccbuf) {
-		/* Write ECC data to flash, the ECC info is generated by
-		   the DiskOnChip ECC logic see Reed-Solomon EDC/ECC 11.1 */
-		DoC_Delay(docptr, 3);
+	/* Write ECC data to flash, the ECC info is generated by
+	   the DiskOnChip ECC logic see Reed-Solomon EDC/ECC 11.1 */
+	DoC_Delay(docptr, 3);
 
-		/* Read the ECC data through the DiskOnChip ECC logic */
-		for (i = 0; i < 6; i++)
-			eccbuf[i] = ReadDOC(docptr, Mplus_ECCSyndrome0 + i);
+	/* Read the ECC data through the DiskOnChip ECC logic */
+	for (i = 0; i < 6; i++)
+		eccbuf[i] = ReadDOC(docptr, Mplus_ECCSyndrome0 + i);
 
-		/* disable the ECC engine */
-		WriteDOC(DOC_ECC_DIS, docptr, Mplus_ECCConf);
+	/* disable the ECC engine */
+	WriteDOC(DOC_ECC_DIS, docptr, Mplus_ECCConf);
 
-		/* Write the ECC data to flash */
-		MemWriteDOC(docptr, eccbuf, 6);
+	/* Write the ECC data to flash */
+	MemWriteDOC(docptr, eccbuf, 6);
 
-		if (!before) {
-			/* Write the block status BLOCK_USED (0x5555) */
-			WriteDOC(0x55, docptr, Mil_CDSN_IO+6);
-			WriteDOC(0x55, docptr, Mil_CDSN_IO+7);
-		}
+	if (!before) {
+		/* Write the block status BLOCK_USED (0x5555) */
+		WriteDOC(0x55, docptr, Mil_CDSN_IO+6);
+		WriteDOC(0x55, docptr, Mil_CDSN_IO+7);
+	}
 
 #ifdef PSYCHO_DEBUG
-		printk("OOB data at %lx is %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
-		       (long) to, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3],
-		       eccbuf[4], eccbuf[5]);
+	printk("OOB data at %lx is %2.2X %2.2X %2.2X %2.2X %2.2X %2.2X\n",
+	       (long) to, eccbuf[0], eccbuf[1], eccbuf[2], eccbuf[3],
+	       eccbuf[4], eccbuf[5]);
 #endif
-	}
 
 	WriteDOC(0x00, docptr, Mplus_WritePipeTerm);
 	WriteDOC(0x00, docptr, Mplus_WritePipeTerm);
-- 
cgit v0.10.2


From a144c5ae0956fb262e6c82624c82b1110a451437 Mon Sep 17 00:00:00 2001
From: Brian King <brking@us.ibm.com>
Date: Tue, 27 Jun 2006 11:10:31 -0500
Subject: [SCSI] scsi: Add allow_restart sysfs class attribute

This is a resend of a patch I generated in response to an email sent
by Ruben Faelens <parasietje@gmail.com>. His original email to
linux-scsi requested a method in which he could spin down a scsi disk
when not in use and have the kernel automatically spin it back up when
an I/O was generated to the disk. The infrastructure to automatically
spin a disk up has been in the scsi error handler for some time now,
but it is not enabled by default. This patch adds an sd sysfs attribute
which allows userspace to enable this behavior.

Signed-off-by: Brian King <brking@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index f899ff0..2e96c3d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -208,6 +208,23 @@ static ssize_t sd_store_cache_type(struct class_device *cdev, const char *buf,
 	return count;
 }
 
+static ssize_t sd_store_allow_restart(struct class_device *cdev, const char *buf,
+				      size_t count)
+{
+	struct scsi_disk *sdkp = to_scsi_disk(cdev);
+	struct scsi_device *sdp = sdkp->device;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (sdp->type != TYPE_DISK)
+		return -EINVAL;
+
+	sdp->allow_restart = simple_strtoul(buf, NULL, 10);
+
+	return count;
+}
+
 static ssize_t sd_show_cache_type(struct class_device *cdev, char *buf)
 {
 	struct scsi_disk *sdkp = to_scsi_disk(cdev);
@@ -223,10 +240,19 @@ static ssize_t sd_show_fua(struct class_device *cdev, char *buf)
 	return snprintf(buf, 20, "%u\n", sdkp->DPOFUA);
 }
 
+static ssize_t sd_show_allow_restart(struct class_device *cdev, char *buf)
+{
+	struct scsi_disk *sdkp = to_scsi_disk(cdev);
+
+	return snprintf(buf, 40, "%d\n", sdkp->device->allow_restart);
+}
+
 static struct class_device_attribute sd_disk_attrs[] = {
 	__ATTR(cache_type, S_IRUGO|S_IWUSR, sd_show_cache_type,
 	       sd_store_cache_type),
 	__ATTR(FUA, S_IRUGO, sd_show_fua, NULL),
+	__ATTR(allow_restart, S_IRUGO|S_IWUSR, sd_show_allow_restart,
+	       sd_store_allow_restart),
 	__ATTR_NULL,
 };
 
-- 
cgit v0.10.2


From 309bd271211caa5a04a8137649cebd7691376351 Mon Sep 17 00:00:00 2001
From: Brian King <brking@us.ibm.com>
Date: Tue, 27 Jun 2006 11:10:43 -0500
Subject: [SCSI] scsi: Device scanning oops for offlined devices (resend)

If a device gets offlined as a result of the Inquiry sent
during scanning, the following oops can occur. After the
disk gets put into the SDEV_OFFLINE state, the error handler
sends back the failed inquiry, which wakes the thread doing
the scan. This starts a race between the scanning thread
freeing the scsi device and the error handler calling
scsi_run_host_queues to restart the host. Since the disk
is in the SDEV_OFFLINE state, scsi_device_get will still
work, which results in __scsi_iterate_devices getting
a reference to the scsi disk when it shouldn't.

The following execution thread causes the oops:

CPU 0 (scan)				CPU 1 (eh)

---------------------------------------------------------
scsi_probe_and_add_lun
                        ....
                                        scsi_eh_offline_sdevs
                                        scsi_eh_flush_done_q
scsi_destroy_sdev
scsi_device_dev_release
                                        scsi_restart_operations
                                         scsi_run_host_queues
                                          __scsi_iterate_devices
                                           get_device
scsi_device_dev_release_usercontext
                                          scsi_run_queue
                                            <---OOPS--->

The patch fixes this by changing the state of the sdev to SDEV_DEL
before doing the final put_device, which should prevent the race
from occurring.

Original oops follows:

Badness in kref_get at lib/kref.c:32
Call Trace:
[C00000002F4476D0] [C00000000000EE20] .show_stack+0x68/0x1b0 (unreliable)
[C00000002F447770] [C00000000037515C] .program_check_exception+0x1cc/0x5a8
[C00000002F447840] [C00000000000446C] program_check_common+0xec/0x100
 Exception: 700 at .kref_get+0x10/0x28
    LR = .kobject_get+0x20/0x3c
[C00000002F447B30] [C00000002F447BC0] 0xc00000002f447bc0 (unreliable)
[C00000002F447BB0] [C000000000254BDC] .get_device+0x20/0x3c
[C00000002F447C30] [D000000000063188] .scsi_device_get+0x34/0xdc [scsi_mod]
[C00000002F447CC0] [D0000000000633EC] .__scsi_iterate_devices+0x50/0xbc [scsi_mod]
[C00000002F447D60] [D00000000006A910] .scsi_run_host_queues+0x34/0x5c [scsi_mod]
[C00000002F447DF0] [D000000000069054] .scsi_error_handler+0xdb4/0xe44 [scsi_mod]
[C00000002F447EE0] [C00000000007B4E0] .kthread+0x128/0x178
[C00000002F447F90] [C000000000025E84] .kernel_thread+0x4c/0x68
Unable to handle kernel paging request for <7>PCI: Enabling device: (0002:41:01.1), cmd 143
data at address 0x000001b8
Faulting instruction address: 0xd0000000000698e4
sym1: <1010-66> rev 0x1 at pci 0002:41:01.1 irq 216
sym1: No NVRAM, ID 7, Fast-80, LVD, parity checking
sym1: SCSI BUS has been reset.
scsi2 : sym-2.2.2
cpu 0x0: Vector: 300 (Data Access) at [c00000002f447a30]
    pc: d0000000000698e4: .scsi_run_queue+0x2c/0x218 [scsi_mod]
    lr: d00000000006a904: .scsi_run_host_queues+0x28/0x5c [scsi_mod]
    sp: c00000002f447cb0
   msr: 9000000000009032
   dar: 1b8
 dsisr: 40000000
  current = 0xc0000000045fecd0
  paca    = 0xc00000000048ee80
    pid   = 1123, comm = scsi_eh_1
enter ? for help
[c00000002f447d60] d00000000006a904 .scsi_run_host_queues+0x28/0x5c [scsi_mod]
[c00000002f447df0] d000000000069054 .scsi_error_handler+0xdb4/0xe44 [scsi_mod]
[c00000002f447ee0] c00000000007b4e0 .kthread+0x128/0x178
[c00000002f447f90] c000000000025e84 .kernel_thread+0x4c/0x68

Signed-off-by: Brian King <brking@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 68e0d7d..bf5191f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2037,6 +2037,9 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
 
 	case SDEV_DEL:
 		switch (oldstate) {
+		case SDEV_CREATED:
+		case SDEV_RUNNING:
+		case SDEV_OFFLINE:
 		case SDEV_CANCEL:
 			break;
 		default:
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 0f7e6f9..4c31799 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -810,6 +810,7 @@ static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
 
 static inline void scsi_destroy_sdev(struct scsi_device *sdev)
 {
+	scsi_device_set_state(sdev, SDEV_DEL);
 	if (sdev->host->hostt->slave_destroy)
 		sdev->host->hostt->slave_destroy(sdev);
 	transport_destroy_device(&sdev->sdev_gendev);
-- 
cgit v0.10.2


From 2076eb6ab8339bf09620a0160be3607bbbb61a50 Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsil.com>
Date: Tue, 27 Jun 2006 14:39:06 -0600
Subject: [SCSI] fusion : mpi header update

MPI Header Update

Signed-off-by: Eric Moore <Eric.Moore@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/message/fusion/lsi/fc_log.h b/drivers/message/fusion/lsi/fc_log.h
deleted file mode 100644
index dc98d46..0000000
--- a/drivers/message/fusion/lsi/fc_log.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- *  Copyright (c) 2000-2001 LSI Logic Corporation. All rights reserved.
- *
- *  NAME:           fc_log.h
- *  SUMMARY:        MPI IocLogInfo definitions for the SYMFC9xx chips
- *  DESCRIPTION:    Contains the enumerated list of values that may be returned
- *                  in the IOCLogInfo field of a MPI Default Reply Message.
- *
- *  CREATION DATE:  6/02/2000
- *  ID:             $Id: fc_log.h,v 4.6 2001/07/26 14:41:33 sschremm Exp $
- */
-
-
-/*
- * MpiIocLogInfo_t enum
- *
- * These 32 bit values are used in the IOCLogInfo field of the MPI reply
- * messages.
- * The value is 0xabcccccc where
- *          a = The type of log info as per the MPI spec. Since these codes are
- *              all for Fibre Channel this value will always be 2.
- *          b = Specifies a subclass of the firmware where
- *                  0 = FCP Initiator
- *                  1 = FCP Target
- *                  2 = LAN
- *                  3 = MPI Message Layer
- *                  4 = FC Link
- *                  5 = Context Manager
- *                  6 = Invalid Field Offset
- *                  7 = State Change Info
- *                  all others are reserved for future use
- *          c = A specific value within the subclass.
- *
- * NOTE: Any new values should be added to the end of each subclass so that the
- *       codes remain consistent across firmware releases.
- */
-typedef enum _MpiIocLogInfoFc
-{
-    MPI_IOCLOGINFO_FC_INIT_BASE                     = 0x20000000,
-    MPI_IOCLOGINFO_FC_INIT_ERROR_OUT_OF_ORDER_FRAME = 0x20000001, /* received an out of order frame - unsupported */
-    MPI_IOCLOGINFO_FC_INIT_ERROR_BAD_START_OF_FRAME = 0x20000002, /* Bad Rx Frame, bad start of frame primative */
-    MPI_IOCLOGINFO_FC_INIT_ERROR_BAD_END_OF_FRAME   = 0x20000003, /* Bad Rx Frame, bad end of frame primative */
-    MPI_IOCLOGINFO_FC_INIT_ERROR_OVER_RUN           = 0x20000004, /* Bad Rx Frame, overrun */
-    MPI_IOCLOGINFO_FC_INIT_ERROR_RX_OTHER           = 0x20000005, /* Other errors caught by IOC which require retries */
-    MPI_IOCLOGINFO_FC_INIT_ERROR_SUBPROC_DEAD       = 0x20000006, /* Main processor could not initialize sub-processor */
-    MPI_IOCLOGINFO_FC_INIT_ERROR_RX_OVERRUN         = 0x20000007, /* Scatter Gather overrun  */
-    MPI_IOCLOGINFO_FC_INIT_ERROR_RX_BAD_STATUS      = 0x20000008, /* Receiver detected context mismatch via invalid header */
-    MPI_IOCLOGINFO_FC_INIT_ERROR_RX_UNEXPECTED_FRAME= 0x20000009, /* CtxMgr detected unsupported frame type  */
-    MPI_IOCLOGINFO_FC_INIT_ERROR_LINK_FAILURE       = 0x2000000A, /* Link failure occurred  */
-    MPI_IOCLOGINFO_FC_INIT_ERROR_TX_TIMEOUT         = 0x2000000B, /* Transmitter timeout error */
-
-    MPI_IOCLOGINFO_FC_TARGET_BASE                   = 0x21000000,
-    MPI_IOCLOGINFO_FC_TARGET_NO_PDISC               = 0x21000001, /* not sent because we are waiting for a PDISC from the initiator */
-    MPI_IOCLOGINFO_FC_TARGET_NO_LOGIN               = 0x21000002, /* not sent because we are not logged in to the remote node */
-    MPI_IOCLOGINFO_FC_TARGET_DOAR_KILLED_BY_LIP     = 0x21000003, /* Data Out, Auto Response, not sent due to a LIP */
-    MPI_IOCLOGINFO_FC_TARGET_DIAR_KILLED_BY_LIP     = 0x21000004, /* Data In, Auto Response, not sent due to a LIP */
-    MPI_IOCLOGINFO_FC_TARGET_DIAR_MISSING_DATA      = 0x21000005, /* Data In, Auto Response, missing data frames */
-    MPI_IOCLOGINFO_FC_TARGET_DONR_KILLED_BY_LIP     = 0x21000006, /* Data Out, No Response, not sent due to a LIP */
-    MPI_IOCLOGINFO_FC_TARGET_WRSP_KILLED_BY_LIP     = 0x21000007, /* Auto-response after a write not sent due to a LIP */
-    MPI_IOCLOGINFO_FC_TARGET_DINR_KILLED_BY_LIP     = 0x21000008, /* Data In, No Response, not completed due to a LIP */
-    MPI_IOCLOGINFO_FC_TARGET_DINR_MISSING_DATA      = 0x21000009, /* Data In, No Response, missing data frames */
-    MPI_IOCLOGINFO_FC_TARGET_MRSP_KILLED_BY_LIP     = 0x2100000a, /* Manual Response not sent due to a LIP */
-    MPI_IOCLOGINFO_FC_TARGET_NO_CLASS_3             = 0x2100000b, /* not sent because remote node does not support Class 3 */
-    MPI_IOCLOGINFO_FC_TARGET_LOGIN_NOT_VALID        = 0x2100000c, /* not sent because login to remote node not validated */
-    MPI_IOCLOGINFO_FC_TARGET_FROM_OUTBOUND          = 0x2100000e, /* cleared from the outbound queue after a logout */
-    MPI_IOCLOGINFO_FC_TARGET_WAITING_FOR_DATA_IN    = 0x2100000f, /* cleared waiting for data after a logout */
-
-    MPI_IOCLOGINFO_FC_LAN_BASE                      = 0x22000000,
-    MPI_IOCLOGINFO_FC_LAN_TRANS_SGL_MISSING         = 0x22000001, /* Transaction Context Sgl Missing */
-    MPI_IOCLOGINFO_FC_LAN_TRANS_WRONG_PLACE         = 0x22000002, /* Transaction Context found before an EOB */
-    MPI_IOCLOGINFO_FC_LAN_TRANS_RES_BITS_SET        = 0x22000003, /* Transaction Context value has reserved bits set */
-    MPI_IOCLOGINFO_FC_LAN_WRONG_SGL_FLAG            = 0x22000004, /* Invalid SGL Flags */
-
-    MPI_IOCLOGINFO_FC_MSG_BASE                      = 0x23000000,
-
-    MPI_IOCLOGINFO_FC_LINK_BASE                     = 0x24000000,
-    MPI_IOCLOGINFO_FC_LINK_LOOP_INIT_TIMEOUT        = 0x24000001, /* Loop initialization timed out */
-    MPI_IOCLOGINFO_FC_LINK_ALREADY_INITIALIZED      = 0x24000002, /* Another system controller already initialized the loop */
-    MPI_IOCLOGINFO_FC_LINK_LINK_NOT_ESTABLISHED     = 0x24000003, /* Not synchronized to signal or still negotiating (possible cable problem) */
-    MPI_IOCLOGINFO_FC_LINK_CRC_ERROR                = 0x24000004, /* CRC check detected error on received frame */
-
-    MPI_IOCLOGINFO_FC_CTX_BASE                      = 0x25000000,
-
-    MPI_IOCLOGINFO_FC_INVALID_FIELD_BYTE_OFFSET     = 0x26000000, /* The lower 24 bits give the byte offset of the field in the request message that is invalid */
-    MPI_IOCLOGINFO_FC_INVALID_FIELD_MAX_OFFSET      = 0x26ffffff,
-
-    MPI_IOCLOGINFO_FC_STATE_CHANGE                  = 0x27000000  /* The lower 24 bits give additional information concerning state change */
-
-} MpiIocLogInfoFc_t;
diff --git a/drivers/message/fusion/lsi/mpi.h b/drivers/message/fusion/lsi/mpi.h
index 02cdc84..81ad776 100644
--- a/drivers/message/fusion/lsi/mpi.h
+++ b/drivers/message/fusion/lsi/mpi.h
@@ -6,7 +6,7 @@
  *          Title:  MPI Message independent structures and definitions
  *  Creation Date:  July 27, 2000
  *
- *    mpi.h Version:  01.05.10
+ *    mpi.h Version:  01.05.11
  *
  *  Version History
  *  ---------------
@@ -76,6 +76,7 @@
  *                      Added EEDP IOCStatus codes.
  *  08-03-05  01.05.09  Bumped MPI_HEADER_VERSION_UNIT.
  *  08-30-05  01.05.10  Added 2 new IOCStatus codes for Target.
+ *  03-27-06  01.05.11  Bumped MPI_HEADER_VERSION_UNIT.
  *  --------------------------------------------------------------------------
  */
 
@@ -106,7 +107,7 @@
 /* Note: The major versions of 0xe0 through 0xff are reserved */
 
 /* versioning for this MPI header set */
-#define MPI_HEADER_VERSION_UNIT             (0x0C)
+#define MPI_HEADER_VERSION_UNIT             (0x0D)
 #define MPI_HEADER_VERSION_DEV              (0x00)
 #define MPI_HEADER_VERSION_UNIT_MASK        (0xFF00)
 #define MPI_HEADER_VERSION_UNIT_SHIFT       (8)
diff --git a/drivers/message/fusion/lsi/mpi_cnfg.h b/drivers/message/fusion/lsi/mpi_cnfg.h
index b1becec..47e13e3 100644
--- a/drivers/message/fusion/lsi/mpi_cnfg.h
+++ b/drivers/message/fusion/lsi/mpi_cnfg.h
@@ -6,7 +6,7 @@
  *          Title:  MPI Config message, structures, and Pages
  *  Creation Date:  July 27, 2000
  *
- *    mpi_cnfg.h Version:  01.05.11
+ *    mpi_cnfg.h Version:  01.05.12
  *
  *  Version History
  *  ---------------
@@ -266,6 +266,16 @@
  *                      Added postpone SATA Init bit to SAS IO Unit Page 1
  *                      ControlFlags.
  *                      Changed LogEntry format for Log Page 0.
+ *  03-27-06  01.05.12  Added two new Flags defines for Manufacturing Page 4.
+ *                      Added Manufacturing Page 7.
+ *                      Added MPI_IOCPAGE2_CAP_FLAGS_RAID_64_BIT_ADDRESSING.
+ *                      Added IOC Page 6.
+ *                      Added PrevBootDeviceForm field to CONFIG_PAGE_BIOS_2.
+ *                      Added MaxLBAHigh field to RAID Volume Page 0.
+ *                      Added Nvdata version fields to SAS IO Unit Page 0.
+ *                      Added AdditionalControlFlags, MaxTargetPortConnectTime,
+ *                      ReportDeviceMissingDelay, and IODeviceMissingDelay
+ *                      fields to SAS IO Unit Page 1.
  *  --------------------------------------------------------------------------
  */
 
@@ -631,9 +641,11 @@ typedef struct _CONFIG_PAGE_MANUFACTURING_4
 } CONFIG_PAGE_MANUFACTURING_4, MPI_POINTER PTR_CONFIG_PAGE_MANUFACTURING_4,
   ManufacturingPage4_t, MPI_POINTER pManufacturingPage4_t;
 
-#define MPI_MANUFACTURING4_PAGEVERSION                  (0x03)
+#define MPI_MANUFACTURING4_PAGEVERSION                  (0x04)
 
 /* defines for the Flags field */
+#define MPI_MANPAGE4_FORCE_BAD_BLOCK_TABLE              (0x80)
+#define MPI_MANPAGE4_FORCE_OFFLINE_FAILOVER             (0x40)
 #define MPI_MANPAGE4_IME_DISABLE                        (0x20)
 #define MPI_MANPAGE4_IM_DISABLE                         (0x10)
 #define MPI_MANPAGE4_IS_DISABLE                         (0x08)
@@ -668,6 +680,66 @@ typedef struct _CONFIG_PAGE_MANUFACTURING_6
 #define MPI_MANUFACTURING6_PAGEVERSION                  (0x00)
 
 
+typedef struct _MPI_MANPAGE7_CONNECTOR_INFO
+{
+    U32                         Pinout;                 /* 00h */
+    U8                          Connector[16];          /* 04h */
+    U8                          Location;               /* 14h */
+    U8                          Reserved1;              /* 15h */
+    U16                         Slot;                   /* 16h */
+    U32                         Reserved2;              /* 18h */
+} MPI_MANPAGE7_CONNECTOR_INFO, MPI_POINTER PTR_MPI_MANPAGE7_CONNECTOR_INFO,
+  MpiManPage7ConnectorInfo_t, MPI_POINTER pMpiManPage7ConnectorInfo_t;
+
+/* defines for the Pinout field */
+#define MPI_MANPAGE7_PINOUT_SFF_8484_L4                 (0x00080000)
+#define MPI_MANPAGE7_PINOUT_SFF_8484_L3                 (0x00040000)
+#define MPI_MANPAGE7_PINOUT_SFF_8484_L2                 (0x00020000)
+#define MPI_MANPAGE7_PINOUT_SFF_8484_L1                 (0x00010000)
+#define MPI_MANPAGE7_PINOUT_SFF_8470_L4                 (0x00000800)
+#define MPI_MANPAGE7_PINOUT_SFF_8470_L3                 (0x00000400)
+#define MPI_MANPAGE7_PINOUT_SFF_8470_L2                 (0x00000200)
+#define MPI_MANPAGE7_PINOUT_SFF_8470_L1                 (0x00000100)
+#define MPI_MANPAGE7_PINOUT_SFF_8482                    (0x00000002)
+#define MPI_MANPAGE7_PINOUT_CONNECTION_UNKNOWN          (0x00000001)
+
+/* defines for the Location field */
+#define MPI_MANPAGE7_LOCATION_UNKNOWN                   (0x01)
+#define MPI_MANPAGE7_LOCATION_INTERNAL                  (0x02)
+#define MPI_MANPAGE7_LOCATION_EXTERNAL                  (0x04)
+#define MPI_MANPAGE7_LOCATION_SWITCHABLE                (0x08)
+#define MPI_MANPAGE7_LOCATION_AUTO                      (0x10)
+#define MPI_MANPAGE7_LOCATION_NOT_PRESENT               (0x20)
+#define MPI_MANPAGE7_LOCATION_NOT_CONNECTED             (0x80)
+
+/*
+ * Host code (drivers, BIOS, utilities, etc.) should leave this define set to
+ * one and check NumPhys at runtime.
+ */
+#ifndef MPI_MANPAGE7_CONNECTOR_INFO_MAX
+#define MPI_MANPAGE7_CONNECTOR_INFO_MAX   (1)
+#endif
+
+typedef struct _CONFIG_PAGE_MANUFACTURING_7
+{
+    CONFIG_PAGE_HEADER          Header;                 /* 00h */
+    U32                         Reserved1;              /* 04h */
+    U32                         Reserved2;              /* 08h */
+    U32                         Flags;                  /* 0Ch */
+    U8                          EnclosureName[16];      /* 10h */
+    U8                          NumPhys;                /* 20h */
+    U8                          Reserved3;              /* 21h */
+    U16                         Reserved4;              /* 22h */
+    MPI_MANPAGE7_CONNECTOR_INFO ConnectorInfo[MPI_MANPAGE7_CONNECTOR_INFO_MAX]; /* 24h */
+} CONFIG_PAGE_MANUFACTURING_7, MPI_POINTER PTR_CONFIG_PAGE_MANUFACTURING_7,
+  ManufacturingPage7_t, MPI_POINTER pManufacturingPage7_t;
+
+#define MPI_MANUFACTURING7_PAGEVERSION                  (0x00)
+
+/* defines for the Flags field */
+#define MPI_MANPAGE7_FLAG_USE_SLOT_INFO                 (0x00000001)
+
+
 /****************************************************************************
 *   IO Unit Config Pages
 ****************************************************************************/
@@ -867,7 +939,7 @@ typedef struct _CONFIG_PAGE_IOC_2
 } CONFIG_PAGE_IOC_2, MPI_POINTER PTR_CONFIG_PAGE_IOC_2,
   IOCPage2_t, MPI_POINTER pIOCPage2_t;
 
-#define MPI_IOCPAGE2_PAGEVERSION                        (0x03)
+#define MPI_IOCPAGE2_PAGEVERSION                        (0x04)
 
 /* IOC Page 2 Capabilities flags */
 
@@ -878,6 +950,7 @@ typedef struct _CONFIG_PAGE_IOC_2
 #define MPI_IOCPAGE2_CAP_FLAGS_RAID_6_SUPPORT           (0x00000010)
 #define MPI_IOCPAGE2_CAP_FLAGS_RAID_10_SUPPORT          (0x00000020)
 #define MPI_IOCPAGE2_CAP_FLAGS_RAID_50_SUPPORT          (0x00000040)
+#define MPI_IOCPAGE2_CAP_FLAGS_RAID_64_BIT_ADDRESSING   (0x10000000)
 #define MPI_IOCPAGE2_CAP_FLAGS_SES_SUPPORT              (0x20000000)
 #define MPI_IOCPAGE2_CAP_FLAGS_SAFTE_SUPPORT            (0x40000000)
 #define MPI_IOCPAGE2_CAP_FLAGS_CROSS_CHANNEL_SUPPORT    (0x80000000)
@@ -975,6 +1048,44 @@ typedef struct _CONFIG_PAGE_IOC_5
 
 #define MPI_IOCPAGE5_PAGEVERSION                        (0x00)
 
+typedef struct _CONFIG_PAGE_IOC_6
+{
+    CONFIG_PAGE_HEADER          Header;                         /* 00h */
+    U32                         CapabilitiesFlags;              /* 04h */
+    U8                          MaxDrivesIS;                    /* 08h */
+    U8                          MaxDrivesIM;                    /* 09h */
+    U8                          MaxDrivesIME;                   /* 0Ah */
+    U8                          Reserved1;                      /* 0Bh */
+    U8                          MinDrivesIS;                    /* 0Ch */
+    U8                          MinDrivesIM;                    /* 0Dh */
+    U8                          MinDrivesIME;                   /* 0Eh */
+    U8                          Reserved2;                      /* 0Fh */
+    U8                          MaxGlobalHotSpares;             /* 10h */
+    U8                          Reserved3;                      /* 11h */
+    U16                         Reserved4;                      /* 12h */
+    U32                         Reserved5;                      /* 14h */
+    U32                         SupportedStripeSizeMapIS;       /* 18h */
+    U32                         SupportedStripeSizeMapIME;      /* 1Ch */
+    U32                         Reserved6;                      /* 20h */
+    U8                          MetadataSize;                   /* 24h */
+    U8                          Reserved7;                      /* 25h */
+    U16                         Reserved8;                      /* 26h */
+    U16                         MaxBadBlockTableEntries;        /* 28h */
+    U16                         Reserved9;                      /* 2Ah */
+    U16                         IRNvsramUsage;                  /* 2Ch */
+    U16                         Reserved10;                     /* 2Eh */
+    U32                         IRNvsramVersion;                /* 30h */
+    U32                         Reserved11;                     /* 34h */
+    U32                         Reserved12;                     /* 38h */
+} CONFIG_PAGE_IOC_6, MPI_POINTER PTR_CONFIG_PAGE_IOC_6,
+  IOCPage6_t, MPI_POINTER pIOCPage6_t;
+
+#define MPI_IOCPAGE6_PAGEVERSION                        (0x00)
+
+/* IOC Page 6 Capabilities Flags */
+
+#define MPI_IOCPAGE6_CAP_FLAGS_GLOBAL_HOT_SPARE         (0x00000001)
+
 
 /****************************************************************************
 *   BIOS Config Pages
@@ -1218,13 +1329,13 @@ typedef struct _CONFIG_PAGE_BIOS_2
     U32                         Reserved5;              /* 14h */
     U32                         Reserved6;              /* 18h */
     U8                          BootDeviceForm;         /* 1Ch */
-    U8                          Reserved7;              /* 1Dh */
+    U8                          PrevBootDeviceForm;     /* 1Ch */
     U16                         Reserved8;              /* 1Eh */
     MPI_BIOSPAGE2_BOOT_DEVICE   BootDevice;             /* 20h */
 } CONFIG_PAGE_BIOS_2, MPI_POINTER PTR_CONFIG_PAGE_BIOS_2,
   BIOSPage2_t, MPI_POINTER pBIOSPage2_t;
 
-#define MPI_BIOSPAGE2_PAGEVERSION                       (0x01)
+#define MPI_BIOSPAGE2_PAGEVERSION                       (0x02)
 
 #define MPI_BIOSPAGE2_FORM_MASK                         (0x0F)
 #define MPI_BIOSPAGE2_FORM_ADAPTER_ORDER                (0x00)
@@ -2080,7 +2191,7 @@ typedef struct _CONFIG_PAGE_RAID_VOL_0
     RAID_VOL0_STATUS        VolumeStatus;   /* 08h */
     RAID_VOL0_SETTINGS      VolumeSettings; /* 0Ch */
     U32                     MaxLBA;         /* 10h */
-    U32                     Reserved1;      /* 14h */
+    U32                     MaxLBAHigh;     /* 14h */
     U32                     StripeSize;     /* 18h */
     U32                     Reserved2;      /* 1Ch */
     U32                     Reserved3;      /* 20h */
@@ -2092,7 +2203,7 @@ typedef struct _CONFIG_PAGE_RAID_VOL_0
 } CONFIG_PAGE_RAID_VOL_0, MPI_POINTER PTR_CONFIG_PAGE_RAID_VOL_0,
   RaidVolumePage0_t, MPI_POINTER pRaidVolumePage0_t;
 
-#define MPI_RAIDVOLPAGE0_PAGEVERSION                    (0x05)
+#define MPI_RAIDVOLPAGE0_PAGEVERSION                    (0x06)
 
 /* values for RAID Volume Page 0 InactiveStatus field */
 #define MPI_RAIDVOLPAGE0_UNKNOWN_INACTIVE               (0x00)
@@ -2324,7 +2435,8 @@ typedef struct _MPI_SAS_IO_UNIT0_PHY_DATA
 typedef struct _CONFIG_PAGE_SAS_IO_UNIT_0
 {
     CONFIG_EXTENDED_PAGE_HEADER     Header;                             /* 00h */
-    U32                             Reserved1;                          /* 08h */
+    U16                             NvdataVersionDefault;               /* 08h */
+    U16                             NvdataVersionPersistent;            /* 0Ah */
     U8                              NumPhys;                            /* 0Ch */
     U8                              Reserved2;                          /* 0Dh */
     U16                             Reserved3;                          /* 0Eh */
@@ -2332,7 +2444,7 @@ typedef struct _CONFIG_PAGE_SAS_IO_UNIT_0
 } CONFIG_PAGE_SAS_IO_UNIT_0, MPI_POINTER PTR_CONFIG_PAGE_SAS_IO_UNIT_0,
   SasIOUnitPage0_t, MPI_POINTER pSasIOUnitPage0_t;
 
-#define MPI_SASIOUNITPAGE0_PAGEVERSION      (0x03)
+#define MPI_SASIOUNITPAGE0_PAGEVERSION      (0x04)
 
 /* values for SAS IO Unit Page 0 PortFlags */
 #define MPI_SAS_IOUNIT0_PORT_FLAGS_DISCOVERY_IN_PROGRESS    (0x08)
@@ -2373,12 +2485,13 @@ typedef struct _CONFIG_PAGE_SAS_IO_UNIT_0
 
 typedef struct _MPI_SAS_IO_UNIT1_PHY_DATA
 {
-    U8          Port;                   /* 00h */
-    U8          PortFlags;              /* 01h */
-    U8          PhyFlags;               /* 02h */
-    U8          MaxMinLinkRate;         /* 03h */
-    U32         ControllerPhyDeviceInfo;/* 04h */
-    U32         Reserved1;              /* 08h */
+    U8          Port;                       /* 00h */
+    U8          PortFlags;                  /* 01h */
+    U8          PhyFlags;                   /* 02h */
+    U8          MaxMinLinkRate;             /* 03h */
+    U32         ControllerPhyDeviceInfo;    /* 04h */
+    U16         MaxTargetPortConnectTime;   /* 08h */
+    U16         Reserved1;                  /* 0Ah */
 } MPI_SAS_IO_UNIT1_PHY_DATA, MPI_POINTER PTR_MPI_SAS_IO_UNIT1_PHY_DATA,
   SasIOUnit1PhyData, MPI_POINTER pSasIOUnit1PhyData;
 
@@ -2395,15 +2508,17 @@ typedef struct _CONFIG_PAGE_SAS_IO_UNIT_1
     CONFIG_EXTENDED_PAGE_HEADER Header;                             /* 00h */
     U16                         ControlFlags;                       /* 08h */
     U16                         MaxNumSATATargets;                  /* 0Ah */
-    U32                         Reserved1;                          /* 0Ch */
+    U16                         AdditionalControlFlags;             /* 0Ch */
+    U16                         Reserved1;                          /* 0Eh */
     U8                          NumPhys;                            /* 10h */
     U8                          SATAMaxQDepth;                      /* 11h */
-    U16                         Reserved2;                          /* 12h */
+    U8                          ReportDeviceMissingDelay;           /* 12h */
+    U8                          IODeviceMissingDelay;               /* 13h */
     MPI_SAS_IO_UNIT1_PHY_DATA   PhyData[MPI_SAS_IOUNIT1_PHY_MAX];   /* 14h */
 } CONFIG_PAGE_SAS_IO_UNIT_1, MPI_POINTER PTR_CONFIG_PAGE_SAS_IO_UNIT_1,
   SasIOUnitPage1_t, MPI_POINTER pSasIOUnitPage1_t;
 
-#define MPI_SASIOUNITPAGE1_PAGEVERSION      (0x05)
+#define MPI_SASIOUNITPAGE1_PAGEVERSION      (0x06)
 
 /* values for SAS IO Unit Page 1 ControlFlags */
 #define MPI_SAS_IOUNIT1_CONTROL_DEVICE_SELF_TEST            (0x8000)
@@ -2428,6 +2543,13 @@ typedef struct _CONFIG_PAGE_SAS_IO_UNIT_1
 #define MPI_SAS_IOUNIT1_CONTROL_FIRST_LVL_DISC_ONLY         (0x0002)
 #define MPI_SAS_IOUNIT1_CONTROL_CLEAR_AFFILIATION           (0x0001)
 
+/* values for SAS IO Unit Page 1 AdditionalControlFlags */
+#define MPI_SAS_IOUNIT1_ACONTROL_ALLOW_TABLE_TO_TABLE       (0x0001)
+
+/* defines for SAS IO Unit Page 1 ReportDeviceMissingDelay */
+#define MPI_SAS_IOUNIT1_REPORT_MISSING_TIMEOUT_MASK         (0x7F)
+#define MPI_SAS_IOUNIT1_REPORT_MISSING_UNIT_16              (0x80)
+
 /* values for SAS IO Unit Page 1 PortFlags */
 #define MPI_SAS_IOUNIT1_PORT_FLAGS_0_TARGET_IOC_NUM         (0x00)
 #define MPI_SAS_IOUNIT1_PORT_FLAGS_1_TARGET_IOC_NUM         (0x04)
diff --git a/drivers/message/fusion/lsi/mpi_history.txt b/drivers/message/fusion/lsi/mpi_history.txt
index 4a5f8dd..582cfe7 100644
--- a/drivers/message/fusion/lsi/mpi_history.txt
+++ b/drivers/message/fusion/lsi/mpi_history.txt
@@ -6,25 +6,25 @@
  Copyright (c) 2000-2005 LSI Logic Corporation.
 
  ---------------------------------------
- Header Set Release Version:    01.05.12
- Header Set Release Date:       08-30-05
+ Header Set Release Version:    01.05.13
+ Header Set Release Date:       03-27-06
  ---------------------------------------
 
  Filename               Current version     Prior version
  ----------             ---------------     -------------
- mpi.h                  01.05.10            01.05.09
- mpi_ioc.h              01.05.10            01.05.09
- mpi_cnfg.h             01.05.11            01.05.10
- mpi_init.h             01.05.06            01.05.06
- mpi_targ.h             01.05.05            01.05.05
+ mpi.h                  01.05.11            01.05.10
+ mpi_ioc.h              01.05.11            01.05.10
+ mpi_cnfg.h             01.05.12            01.05.11
+ mpi_init.h             01.05.07            01.05.06
+ mpi_targ.h             01.05.06            01.05.05
  mpi_fc.h               01.05.01            01.05.01
  mpi_lan.h              01.05.01            01.05.01
  mpi_raid.h             01.05.02            01.05.02
  mpi_tool.h             01.05.03            01.05.03
  mpi_inb.h              01.05.01            01.05.01
- mpi_sas.h              01.05.02            01.05.01
- mpi_type.h             01.05.02            01.05.01
- mpi_history.txt        01.05.12            01.05.11
+ mpi_sas.h              01.05.03            01.05.02
+ mpi_type.h             01.05.02            01.05.02
+ mpi_history.txt        01.05.13            01.05.12
 
 
  *  Date      Version   Description
@@ -93,6 +93,7 @@ mpi.h
  *                      Added EEDP IOCStatus codes.
  *  08-03-05  01.05.09  Bumped MPI_HEADER_VERSION_UNIT.
  *  08-30-05  01.05.10  Added 2 new IOCStatus codes for Target.
+ *  03-27-06  01.05.11  Bumped MPI_HEADER_VERSION_UNIT.
  *  --------------------------------------------------------------------------
 
 mpi_ioc.h
@@ -170,6 +171,17 @@ mpi_ioc.h
  *                      Added new ReasonCode value for SAS Device Status Change
  *                      event.
  *                      Added new family code for FC949E.
+ *  03-27-06  01.05.11  Added MPI_IOCFACTS_CAPABILITY_TLR.
+ *                      Added additional Reason Codes and more event data fields
+ *                      to EVENT_DATA_SAS_DEVICE_STATUS_CHANGE.
+ *                      Added EVENT_DATA_SAS_BROADCAST_PRIMITIVE structure and
+ *                      new event.
+ *                      Added MPI_EVENT_SAS_SMP_ERROR and event data structure.
+ *                      Added MPI_EVENT_SAS_INIT_DEVICE_STATUS_CHANGE and event
+ *                      data structure.
+ *                      Added MPI_EVENT_SAS_INIT_TABLE_OVERFLOW and event
+ *                      data structure.
+ *                      Added MPI_EXT_IMAGE_TYPE_INITIALIZATION.
  *  --------------------------------------------------------------------------
 
 mpi_cnfg.h
@@ -425,6 +437,16 @@ mpi_cnfg.h
  *                      Added postpone SATA Init bit to SAS IO Unit Page 1
  *                      ControlFlags.
  *                      Changed LogEntry format for Log Page 0.
+ *  03-27-06  01.05.12  Added two new Flags defines for Manufacturing Page 4.
+ *                      Added Manufacturing Page 7.
+ *                      Added MPI_IOCPAGE2_CAP_FLAGS_RAID_64_BIT_ADDRESSING.
+ *                      Added IOC Page 6.
+ *                      Added PrevBootDeviceForm field to CONFIG_PAGE_BIOS_2.
+ *                      Added MaxLBAHigh field to RAID Volume Page 0.
+ *                      Added Nvdata version fields to SAS IO Unit Page 0.
+ *                      Added AdditionalControlFlags, MaxTargetPortConnectTime,
+ *                      ReportDeviceMissingDelay, and IODeviceMissingDelay
+ *                      fields to SAS IO Unit Page 1.
  *  --------------------------------------------------------------------------
 
 mpi_init.h
@@ -467,6 +489,7 @@ mpi_init.h
  *                      Added four new defines for SEP SlotStatus.
  *  08-03-05  01.05.06  Fixed some MPI_SCSIIO32_MSGFLGS_ defines to make them
  *                      unique in the first 32 characters.
+ *  03-27-06  01.05.07  Added Task Management type of Clear ACA.
  *  --------------------------------------------------------------------------
 
 mpi_targ.h
@@ -511,6 +534,7 @@ mpi_targ.h
  *  02-22-05  01.05.03  Changed a comment.
  *  03-11-05  01.05.04  Removed TargetAssistExtended Request.
  *  06-24-05  01.05.05  Added TargetAssistExtended structures and defines.
+ *  03-27-06  01.05.06  Added a comment.
  *  --------------------------------------------------------------------------
 
 mpi_fc.h
@@ -610,6 +634,10 @@ mpi_sas.h
  *  08-30-05  01.05.02  Added DeviceInfo bit for SEP.
  *                      Added PrimFlags and Primitive field to SAS IO Unit
  *                      Control request, and added a new operation code.
+ *  03-27-06  01.05.03  Added Force Full Discovery, Transmit Port Select Signal,
+ *                      and Remove Device operations to SAS IO Unit Control.
+ *                      Added DevHandle field to SAS IO Unit Control request and
+ *                      reply.
  *  --------------------------------------------------------------------------
 
 mpi_type.h
@@ -625,20 +653,20 @@ mpi_type.h
 
 mpi_history.txt         Parts list history
 
-Filename    01.05.12  01.05.11  01.05.10  01.05.09
-----------  --------  --------  --------  --------
-mpi.h       01.05.10  01.05.09  01.05.08  01.05.07
-mpi_ioc.h   01.05.10  01.05.09  01.05.09  01.05.08
-mpi_cnfg.h  01.05.11  01.05.10  01.05.09  01.05.08
-mpi_init.h  01.05.06  01.05.06  01.05.05  01.05.04
-mpi_targ.h  01.05.05  01.05.05  01.05.05  01.05.04
-mpi_fc.h    01.05.01  01.05.01  01.05.01  01.05.01
-mpi_lan.h   01.05.01  01.05.01  01.05.01  01.05.01
-mpi_raid.h  01.05.02  01.05.02  01.05.02  01.05.02
-mpi_tool.h  01.05.03  01.05.03  01.05.03  01.05.03
-mpi_inb.h   01.05.01  01.05.01  01.05.01  01.05.01
-mpi_sas.h   01.05.02  01.05.01  01.05.01  01.05.01
-mpi_type.h  01.05.02  01.05.01  01.05.01  01.05.01
+Filename    01.05.13  01.05.12  01.05.11  01.05.10  01.05.09
+----------  --------  --------  --------  --------  --------
+mpi.h       01.05.11  01.05.10  01.05.09  01.05.08  01.05.07
+mpi_ioc.h   01.05.11  01.05.10  01.05.09  01.05.09  01.05.08
+mpi_cnfg.h  01.05.12  01.05.11  01.05.10  01.05.09  01.05.08
+mpi_init.h  01.05.07  01.05.06  01.05.06  01.05.05  01.05.04
+mpi_targ.h  01.05.06  01.05.05  01.05.05  01.05.05  01.05.04
+mpi_fc.h    01.05.01  01.05.01  01.05.01  01.05.01  01.05.01
+mpi_lan.h   01.05.01  01.05.01  01.05.01  01.05.01  01.05.01
+mpi_raid.h  01.05.02  01.05.02  01.05.02  01.05.02  01.05.02
+mpi_tool.h  01.05.03  01.05.03  01.05.03  01.05.03  01.05.03
+mpi_inb.h   01.05.01  01.05.01  01.05.01  01.05.01  01.05.01
+mpi_sas.h   01.05.03  01.05.02  01.05.01  01.05.01  01.05.01
+mpi_type.h  01.05.02  01.05.02  01.05.01  01.05.01  01.05.01
 
 Filename    01.05.08   01.05.07   01.05.06   01.05.05   01.05.04   01.05.03
 ----------  --------   --------   --------   --------   --------   --------
diff --git a/drivers/message/fusion/lsi/mpi_init.h b/drivers/message/fusion/lsi/mpi_init.h
index 68941f4..c1c6789 100644
--- a/drivers/message/fusion/lsi/mpi_init.h
+++ b/drivers/message/fusion/lsi/mpi_init.h
@@ -6,7 +6,7 @@
  *          Title:  MPI initiator mode messages and structures
  *  Creation Date:  June 8, 2000
  *
- *    mpi_init.h Version:  01.05.06
+ *    mpi_init.h Version:  01.05.07
  *
  *  Version History
  *  ---------------
@@ -52,6 +52,7 @@
  *                      Added four new defines for SEP SlotStatus.
  *  08-03-05  01.05.06  Fixed some MPI_SCSIIO32_MSGFLGS_ defines to make them
  *                      unique in the first 32 characters.
+ *  03-27-06  01.05.07  Added Task Management type of Clear ACA.
  *  --------------------------------------------------------------------------
  */
 
@@ -427,6 +428,7 @@ typedef struct _MSG_SCSI_TASK_MGMT
 #define MPI_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET    (0x05)
 #define MPI_SCSITASKMGMT_TASKTYPE_CLEAR_TASK_SET        (0x06)
 #define MPI_SCSITASKMGMT_TASKTYPE_QUERY_TASK            (0x07)
+#define MPI_SCSITASKMGMT_TASKTYPE_CLEAR_ACA             (0x08)
 
 /* MsgFlags bits */
 #define MPI_SCSITASKMGMT_MSGFLAGS_TARGET_RESET_OPTION   (0x00)
diff --git a/drivers/message/fusion/lsi/mpi_ioc.h b/drivers/message/fusion/lsi/mpi_ioc.h
index 2c5f43f..18ba407 100644
--- a/drivers/message/fusion/lsi/mpi_ioc.h
+++ b/drivers/message/fusion/lsi/mpi_ioc.h
@@ -6,7 +6,7 @@
  *          Title:  MPI IOC, Port, Event, FW Download, and FW Upload messages
  *  Creation Date:  August 11, 2000
  *
- *    mpi_ioc.h Version:  01.05.10
+ *    mpi_ioc.h Version:  01.05.11
  *
  *  Version History
  *  ---------------
@@ -87,6 +87,17 @@
  *                      Added new ReasonCode value for SAS Device Status Change
  *                      event.
  *                      Added new family code for FC949E.
+ *  03-27-06  01.05.11  Added MPI_IOCFACTS_CAPABILITY_TLR.
+ *                      Added additional Reason Codes and more event data fields
+ *                      to EVENT_DATA_SAS_DEVICE_STATUS_CHANGE.
+ *                      Added EVENT_DATA_SAS_BROADCAST_PRIMITIVE structure and
+ *                      new event.
+ *                      Added MPI_EVENT_SAS_SMP_ERROR and event data structure.
+ *                      Added MPI_EVENT_SAS_INIT_DEVICE_STATUS_CHANGE and event
+ *                      data structure.
+ *                      Added MPI_EVENT_SAS_INIT_TABLE_OVERFLOW and event
+ *                      data structure.
+ *                      Added MPI_EXT_IMAGE_TYPE_INITIALIZATION.
  *  --------------------------------------------------------------------------
  */
 
@@ -272,6 +283,7 @@ typedef struct _MSG_IOC_FACTS_REPLY
 #define MPI_IOCFACTS_CAPABILITY_MULTICAST               (0x00000100)
 #define MPI_IOCFACTS_CAPABILITY_SCSIIO32                (0x00000200)
 #define MPI_IOCFACTS_CAPABILITY_NO_SCSIIO16             (0x00000400)
+#define MPI_IOCFACTS_CAPABILITY_TLR                     (0x00000800)
 
 
 /*****************************************************************************
@@ -448,30 +460,34 @@ typedef struct _MSG_EVENT_ACK_REPLY
 
 /* Event */
 
-#define MPI_EVENT_NONE                      (0x00000000)
-#define MPI_EVENT_LOG_DATA                  (0x00000001)
-#define MPI_EVENT_STATE_CHANGE              (0x00000002)
-#define MPI_EVENT_UNIT_ATTENTION            (0x00000003)
-#define MPI_EVENT_IOC_BUS_RESET             (0x00000004)
-#define MPI_EVENT_EXT_BUS_RESET             (0x00000005)
-#define MPI_EVENT_RESCAN                    (0x00000006)
-#define MPI_EVENT_LINK_STATUS_CHANGE        (0x00000007)
-#define MPI_EVENT_LOOP_STATE_CHANGE         (0x00000008)
-#define MPI_EVENT_LOGOUT                    (0x00000009)
-#define MPI_EVENT_EVENT_CHANGE              (0x0000000A)
-#define MPI_EVENT_INTEGRATED_RAID           (0x0000000B)
-#define MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE (0x0000000C)
-#define MPI_EVENT_ON_BUS_TIMER_EXPIRED      (0x0000000D)
-#define MPI_EVENT_QUEUE_FULL                (0x0000000E)
-#define MPI_EVENT_SAS_DEVICE_STATUS_CHANGE  (0x0000000F)
-#define MPI_EVENT_SAS_SES                   (0x00000010)
-#define MPI_EVENT_PERSISTENT_TABLE_FULL     (0x00000011)
-#define MPI_EVENT_SAS_PHY_LINK_STATUS       (0x00000012)
-#define MPI_EVENT_SAS_DISCOVERY_ERROR       (0x00000013)
-#define MPI_EVENT_IR_RESYNC_UPDATE          (0x00000014)
-#define MPI_EVENT_IR2                       (0x00000015)
-#define MPI_EVENT_SAS_DISCOVERY             (0x00000016)
-#define MPI_EVENT_LOG_ENTRY_ADDED           (0x00000021)
+#define MPI_EVENT_NONE                          (0x00000000)
+#define MPI_EVENT_LOG_DATA                      (0x00000001)
+#define MPI_EVENT_STATE_CHANGE                  (0x00000002)
+#define MPI_EVENT_UNIT_ATTENTION                (0x00000003)
+#define MPI_EVENT_IOC_BUS_RESET                 (0x00000004)
+#define MPI_EVENT_EXT_BUS_RESET                 (0x00000005)
+#define MPI_EVENT_RESCAN                        (0x00000006)
+#define MPI_EVENT_LINK_STATUS_CHANGE            (0x00000007)
+#define MPI_EVENT_LOOP_STATE_CHANGE             (0x00000008)
+#define MPI_EVENT_LOGOUT                        (0x00000009)
+#define MPI_EVENT_EVENT_CHANGE                  (0x0000000A)
+#define MPI_EVENT_INTEGRATED_RAID               (0x0000000B)
+#define MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE     (0x0000000C)
+#define MPI_EVENT_ON_BUS_TIMER_EXPIRED          (0x0000000D)
+#define MPI_EVENT_QUEUE_FULL                    (0x0000000E)
+#define MPI_EVENT_SAS_DEVICE_STATUS_CHANGE      (0x0000000F)
+#define MPI_EVENT_SAS_SES                       (0x00000010)
+#define MPI_EVENT_PERSISTENT_TABLE_FULL         (0x00000011)
+#define MPI_EVENT_SAS_PHY_LINK_STATUS           (0x00000012)
+#define MPI_EVENT_SAS_DISCOVERY_ERROR           (0x00000013)
+#define MPI_EVENT_IR_RESYNC_UPDATE              (0x00000014)
+#define MPI_EVENT_IR2                           (0x00000015)
+#define MPI_EVENT_SAS_DISCOVERY                 (0x00000016)
+#define MPI_EVENT_SAS_BROADCAST_PRIMITIVE       (0x00000017)
+#define MPI_EVENT_SAS_INIT_DEVICE_STATUS_CHANGE (0x00000018)
+#define MPI_EVENT_SAS_INIT_TABLE_OVERFLOW       (0x00000019)
+#define MPI_EVENT_SAS_SMP_ERROR                 (0x0000001A)
+#define MPI_EVENT_LOG_ENTRY_ADDED               (0x00000021)
 
 /* AckRequired field values */
 
@@ -558,18 +574,25 @@ typedef struct _EVENT_DATA_SAS_DEVICE_STATUS_CHANGE
     U8                      PhyNum;                     /* 0Eh */
     U8                      Reserved1;                  /* 0Fh */
     U64                     SASAddress;                 /* 10h */
+    U8                      LUN[8];                     /* 18h */
+    U16                     TaskTag;                    /* 20h */
+    U16                     Reserved2;                  /* 22h */
 } EVENT_DATA_SAS_DEVICE_STATUS_CHANGE,
   MPI_POINTER PTR_EVENT_DATA_SAS_DEVICE_STATUS_CHANGE,
   MpiEventDataSasDeviceStatusChange_t,
   MPI_POINTER pMpiEventDataSasDeviceStatusChange_t;
 
 /* MPI SAS Device Status Change Event data ReasonCode values */
-#define MPI_EVENT_SAS_DEV_STAT_RC_ADDED                 (0x03)
-#define MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING        (0x04)
-#define MPI_EVENT_SAS_DEV_STAT_RC_SMART_DATA            (0x05)
-#define MPI_EVENT_SAS_DEV_STAT_RC_NO_PERSIST_ADDED      (0x06)
-#define MPI_EVENT_SAS_DEV_STAT_RC_UNSUPPORTED           (0x07)
-#define MPI_EVENT_SAS_DEV_STAT_RC_INTERNAL_DEVICE_RESET (0x08)
+#define MPI_EVENT_SAS_DEV_STAT_RC_ADDED                     (0x03)
+#define MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING            (0x04)
+#define MPI_EVENT_SAS_DEV_STAT_RC_SMART_DATA                (0x05)
+#define MPI_EVENT_SAS_DEV_STAT_RC_NO_PERSIST_ADDED          (0x06)
+#define MPI_EVENT_SAS_DEV_STAT_RC_UNSUPPORTED               (0x07)
+#define MPI_EVENT_SAS_DEV_STAT_RC_INTERNAL_DEVICE_RESET     (0x08)
+#define MPI_EVENT_SAS_DEV_STAT_RC_TASK_ABORT_INTERNAL       (0x09)
+#define MPI_EVENT_SAS_DEV_STAT_RC_ABORT_TASK_SET_INTERNAL   (0x0A)
+#define MPI_EVENT_SAS_DEV_STAT_RC_CLEAR_TASK_SET_INTERNAL   (0x0B)
+#define MPI_EVENT_SAS_DEV_STAT_RC_QUERY_TASK_INTERNAL       (0x0C)
 
 
 /* SCSI Event data for Queue Full event */
@@ -742,6 +765,27 @@ typedef struct _EVENT_DATA_SAS_SES
 } EVENT_DATA_SAS_SES, MPI_POINTER PTR_EVENT_DATA_SAS_SES,
   MpiEventDataSasSes_t, MPI_POINTER pMpiEventDataSasSes_t;
 
+/* SAS Broadcast Primitive Event data */
+
+typedef struct _EVENT_DATA_SAS_BROADCAST_PRIMITIVE
+{
+    U8                      PhyNum;                     /* 00h */
+    U8                      Port;                       /* 01h */
+    U8                      PortWidth;                  /* 02h */
+    U8                      Primitive;                  /* 04h */
+} EVENT_DATA_SAS_BROADCAST_PRIMITIVE,
+  MPI_POINTER PTR_EVENT_DATA_SAS_BROADCAST_PRIMITIVE,
+  MpiEventDataSasBroadcastPrimitive_t,
+  MPI_POINTER pMpiEventDataSasBroadcastPrimitive_t;
+
+#define MPI_EVENT_PRIMITIVE_CHANGE              (0x01)
+#define MPI_EVENT_PRIMITIVE_EXPANDER            (0x03)
+#define MPI_EVENT_PRIMITIVE_RESERVED2           (0x04)
+#define MPI_EVENT_PRIMITIVE_RESERVED3           (0x05)
+#define MPI_EVENT_PRIMITIVE_RESERVED4           (0x06)
+#define MPI_EVENT_PRIMITIVE_CHANGE0_RESERVED    (0x07)
+#define MPI_EVENT_PRIMITIVE_CHANGE1_RESERVED    (0x08)
+
 /* SAS Phy Link Status Event data */
 
 typedef struct _EVENT_DATA_SAS_PHY_LINK_STATUS
@@ -804,6 +848,53 @@ typedef struct _EVENT_DATA_DISCOVERY_ERROR
 #define MPI_EVENT_DSCVRY_ERR_DS_MULTPL_PATHS                (0x00000800)
 #define MPI_EVENT_DSCVRY_ERR_DS_MAX_SATA_TARGETS            (0x00001000)
 
+/* SAS SMP Error Event data */
+
+typedef struct _EVENT_DATA_SAS_SMP_ERROR
+{
+    U8                      Status;                     /* 00h */
+    U8                      Port;                       /* 01h */
+    U8                      SMPFunctionResult;          /* 02h */
+    U8                      Reserved1;                  /* 03h */
+    U64                     SASAddress;                 /* 04h */
+} EVENT_DATA_SAS_SMP_ERROR, MPI_POINTER PTR_EVENT_DATA_SAS_SMP_ERROR,
+  MpiEventDataSasSmpError_t, MPI_POINTER pMpiEventDataSasSmpError_t;
+
+/* defines for the Status field of the SAS SMP Error event */
+#define MPI_EVENT_SAS_SMP_FUNCTION_RESULT_VALID         (0x00)
+#define MPI_EVENT_SAS_SMP_CRC_ERROR                     (0x01)
+#define MPI_EVENT_SAS_SMP_TIMEOUT                       (0x02)
+#define MPI_EVENT_SAS_SMP_NO_DESTINATION                (0x03)
+#define MPI_EVENT_SAS_SMP_BAD_DESTINATION               (0x04)
+
+/* SAS Initiator Device Status Change Event data */
+
+typedef struct _EVENT_DATA_SAS_INIT_DEV_STATUS_CHANGE
+{
+    U8                      ReasonCode;                 /* 00h */
+    U8                      Port;                       /* 01h */
+    U16                     DevHandle;                  /* 02h */
+    U64                     SASAddress;                 /* 04h */
+} EVENT_DATA_SAS_INIT_DEV_STATUS_CHANGE,
+  MPI_POINTER PTR_EVENT_DATA_SAS_INIT_DEV_STATUS_CHANGE,
+  MpiEventDataSasInitDevStatusChange_t,
+  MPI_POINTER pMpiEventDataSasInitDevStatusChange_t;
+
+/* defines for the ReasonCode field of the SAS Initiator Device Status Change event */
+#define MPI_EVENT_SAS_INIT_RC_ADDED                 (0x01)
+
+/* SAS Initiator Device Table Overflow Event data */
+
+typedef struct _EVENT_DATA_SAS_INIT_TABLE_OVERFLOW
+{
+    U8                      MaxInit;                    /* 00h */
+    U8                      CurrentInit;                /* 01h */
+    U16                     Reserved1;                  /* 02h */
+} EVENT_DATA_SAS_INIT_TABLE_OVERFLOW,
+  MPI_POINTER PTR_EVENT_DATA_SAS_INIT_TABLE_OVERFLOW,
+  MpiEventDataSasInitTableOverflow_t,
+  MPI_POINTER pMpiEventDataSasInitTableOverflow_t;
+
 
 /*****************************************************************************
 *
@@ -1013,5 +1104,6 @@ typedef struct _MPI_EXT_IMAGE_HEADER
 #define MPI_EXT_IMAGE_TYPE_FW                   (0x01)
 #define MPI_EXT_IMAGE_TYPE_NVDATA               (0x03)
 #define MPI_EXT_IMAGE_TYPE_BOOTLOADER           (0x04)
+#define MPI_EXT_IMAGE_TYPE_INITIALIZATION       (0x05)
 
 #endif
diff --git a/drivers/message/fusion/lsi/mpi_log_sas.h b/drivers/message/fusion/lsi/mpi_log_sas.h
index a9c14ad..871ebc0 100644
--- a/drivers/message/fusion/lsi/mpi_log_sas.h
+++ b/drivers/message/fusion/lsi/mpi_log_sas.h
@@ -13,6 +13,8 @@
 #ifndef IOPI_IOCLOGINFO_H_INCLUDED
 #define IOPI_IOCLOGINFO_H_INCLUDED
 
+#define SAS_LOGINFO_NEXUS_LOSS		0x31170000
+#define SAS_LOGINFO_MASK			0xFFFF0000
 
 /****************************************************************************/
 /*  IOC LOGINFO defines, 0x00000000 - 0x0FFFFFFF                            */
@@ -51,6 +53,9 @@
 #define IOP_LOGINFO_CODE_CONFIG_INVALID_PAGE_DNM        (0x00030500) /* Device Not Mapped */
 #define IOP_LOGINFO_CODE_CONFIG_INVALID_PAGE_PERSIST    (0x00030600) /* Persistent Page not found */
 #define IOP_LOGINFO_CODE_CONFIG_INVALID_PAGE_DEFAULT    (0x00030700) /* Default Page not found */
+
+#define IOP_LOGINFO_CODE_DIAG_MSG_ERROR                 (0x00040000) /* Error handling diag msg - or'd with diag status */
+
 #define IOP_LOGINFO_CODE_TASK_TERMINATED                (0x00050000)
 
 #define IOP_LOGINFO_CODE_ENCL_MGMT_READ_ACTION_ERR0R    (0x00060001) /* Read Action not supported for SEP msg */
@@ -103,6 +108,7 @@
 #define PL_LOGINFO_CODE_IO_EXECUTED                         (0x00140000)
 #define PL_LOGINFO_CODE_PERS_RESV_OUT_NOT_AFFIL_OWNER       (0x00150000)
 #define PL_LOGINFO_CODE_OPEN_TXDMA_ABORT                    (0x00160000)
+#define PL_LOGINFO_CODE_IO_DEVICE_MISSING_DELAY_RETRY       (0x00170000)
 #define PL_LOGINFO_SUB_CODE_OPEN_FAILURE                    (0x00000100)
 #define PL_LOGINFO_SUB_CODE_OPEN_FAILURE_NO_DEST_TIMEOUT    (0x00000101)
 #define PL_LOGINFO_SUB_CODE_OPEN_FAILURE_ORR_TIMEOUT        (0x0000011A) /* Open Reject (Retry) Timeout */
@@ -165,11 +171,81 @@
 /****************************************************************************/
 /* IR LOGINFO_CODE defines, valid if IOC_LOGINFO_ORIGINATOR = IR            */
 /****************************************************************************/
-#define IR_LOGINFO_CODE_UNUSED1                         (0x00010000)
-#define IR_LOGINFO_CODE_UNUSED2                         (0x00020000)
+#define IR_LOGINFO_RAID_ACTION_ERROR                           (0x00010000)
+#define IR_LOGINFO_CODE_UNUSED2                                (0x00020000)
+
+/* Amount of information passed down for Create Volume is too large */
+#define IR_LOGINFO_VOLUME_CREATE_INVALID_LENGTH                (0x00010001)
+/* Creation of duplicate volume attempted (Bus/Target ID checked) */
+#define IR_LOGINFO_VOLUME_CREATE_DUPLICATE                     (0x00010002)
+/* Creation failed due to maximum number of supported volumes exceeded */
+#define IR_LOGINFO_VOLUME_CREATE_NO_SLOTS                      (0x00010003)
+/* Creation failed due to DMA error in trying to read from host */
+#define IR_LOGINFO_VOLUME_CREATE_DMA_ERROR                     (0x00010004)
+/* Creation failed due to invalid volume type passed down */
+#define IR_LOGINFO_VOLUME_CREATE_INVALID_VOLUME_TYPE           (0x00010005)
+/* Creation failed due to error reading MFG Page 4 */
+#define IR_LOGINFO_VOLUME_MFG_PAGE4_ERROR                      (0x00010006)
+/* Creation failed when trying to create internal structures */
+#define IR_LOGINFO_VOLUME_INTERNAL_CONFIG_STRUCTURE_ERROR      (0x00010007)
+
+/* Activation failed due to trying to activate an already active volume */
+#define IR_LOGINFO_VOLUME_ACTIVATING_AN_ACTIVE_VOLUME          (0x00010010)
+/* Activation failed due to trying to active unsupported volume type  */
+#define IR_LOGINFO_VOLUME_ACTIVATING_INVALID_VOLUME_TYPE       (0x00010011)
+/* Activation failed due to trying to active too many volumes  */
+#define IR_LOGINFO_VOLUME_ACTIVATING_TOO_MANY_VOLUMES          (0x00010012)
+/* Activation failed due to Volume ID in use already */
+#define IR_LOGINFO_VOLUME_ACTIVATING_VOLUME_ID_IN_USE          (0x00010013)
+/* Activation failed call to activateVolume returned failure */
+#define IR_LOGINFO_VOLUME_ACTIVATE_VOLUME_FAILED               (0x00010014)
+/* Activation failed trying to import the volume */
+#define IR_LOGINFO_VOLUME_ACTIVATING_IMPORT_VOLUME_FAILED      (0x00010015)
+
+/* Phys Disk failed, too many phys disks */
+#define IR_LOGINFO_PHYSDISK_CREATE_TOO_MANY_DISKS              (0x00010020)
+/* Amount of information passed down for Create Pnysdisk is too large */
+#define IR_LOGINFO_PHYSDISK_CREATE_INVALID_LENGTH              (0x00010021)
+/* Creation failed due to DMA error in trying to read from host */
+#define IR_LOGINFO_PHYSDISK_CREATE_DMA_ERROR                   (0x00010022)
+/* Creation failed due to invalid Bus TargetID passed down */
+#define IR_LOGINFO_PHYSDISK_CREATE_BUS_TID_INVALID             (0x00010023)
+/* Creation failed due to error in creating RAID Phys Disk Config Page */
+#define IR_LOGINFO_PHYSDISK_CREATE_CONFIG_PAGE_ERROR           (0x00010024)
+
+
+/* Compatibility Error : IR Disabled */
+#define IR_LOGINFO_COMPAT_ERROR_RAID_DISABLED                  (0x00010030)
+/* Compatibility Error : Inquiry Comand failed */
+#define IR_LOGINFO_COMPAT_ERROR_INQUIRY_FAILED                 (0x00010031)
+/* Compatibility Error : Device not direct access device */
+#define IR_LOGINFO_COMPAT_ERROR_NOT_DIRECT_ACCESS              (0x00010032)
+/* Compatibility Error : Removable device found */
+#define IR_LOGINFO_COMPAT_ERROR_REMOVABLE_FOUND                (0x00010033)
+/* Compatibility Error : Device SCSI Version not 2 or higher */
+#define IR_LOGINFO_COMPAT_ERROR_NEED_SCSI_2_OR_HIGHER          (0x00010034)
+/* Compatibility Error : SATA device, 48 BIT LBA not supported */
+#define IR_LOGINFO_COMPAT_ERROR_SATA_48BIT_LBA_NOT_SUPPORTED   (0x00010035)
+/* Compatibility Error : Device does not have 512 byte block sizes */
+#define IR_LOGINFO_COMPAT_ERROR_DEVICE_NOT_512_BYTE_BLOCK      (0x00010036)
+/* Compatibility Error : Volume Type check failed */
+#define IR_LOGINFO_COMPAT_ERROR_VOLUME_TYPE_CHECK_FAILED       (0x00010037)
+/* Compatibility Error : Volume Type is unsupported by FW */
+#define IR_LOGINFO_COMPAT_ERROR_UNSUPPORTED_VOLUME_TYPE        (0x00010038)
+/* Compatibility Error : Disk drive too small for use in volume */
+#define IR_LOGINFO_COMPAT_ERROR_DISK_TOO_SMALL                 (0x00010039)
+/* Compatibility Error : Phys disk for Create Volume not found */
+#define IR_LOGINFO_COMPAT_ERROR_PHYS_DISK_NOT_FOUND            (0x0001003A)
+/* Compatibility Error : membership count error, too many or too few disks for volume type */
+#define IR_LOGINFO_COMPAT_ERROR_MEMBERSHIP_COUNT               (0x0001003B)
+/* Compatibility Error : Disk stripe sizes must be 64KB */
+#define IR_LOGINFO_COMPAT_ERROR_NON_64K_STRIPE_SIZE            (0x0001003C)
+/* Compatibility Error : IME size limited to < 2TB */
+#define IR_LOGINFO_COMPAT_ERROR_IME_VOL_NOT_CURRENTLY_SUPPORTED (0x0001003D)
+
 
 /****************************************************************************/
-/* Defines for convienence                                                  */
+/* Defines for convenience                                                  */
 /****************************************************************************/
 #define IOC_LOGINFO_PREFIX_IOP                          ((MPI_IOCLOGINFO_TYPE_SAS << MPI_IOCLOGINFO_TYPE_SHIFT) | IOC_LOGINFO_ORIGINATOR_IOP)
 #define IOC_LOGINFO_PREFIX_PL                           ((MPI_IOCLOGINFO_TYPE_SAS << MPI_IOCLOGINFO_TYPE_SHIFT) | IOC_LOGINFO_ORIGINATOR_PL)
diff --git a/drivers/message/fusion/lsi/mpi_sas.h b/drivers/message/fusion/lsi/mpi_sas.h
index 7051486..50b8f0a 100644
--- a/drivers/message/fusion/lsi/mpi_sas.h
+++ b/drivers/message/fusion/lsi/mpi_sas.h
@@ -6,7 +6,7 @@
  *          Title:  MPI Serial Attached SCSI structures and definitions
  *  Creation Date:  August 19, 2004
  *
- *    mpi_sas.h Version:  01.05.02
+ *    mpi_sas.h Version:  01.05.03
  *
  *  Version History
  *  ---------------
@@ -17,6 +17,10 @@
  *  08-30-05  01.05.02  Added DeviceInfo bit for SEP.
  *                      Added PrimFlags and Primitive field to SAS IO Unit
  *                      Control request, and added a new operation code.
+ *  03-27-06  01.05.03  Added Force Full Discovery, Transmit Port Select Signal,
+ *                      and Remove Device operations to SAS IO Unit Control.
+ *                      Added DevHandle field to SAS IO Unit Control request and
+ *                      reply.
  *  --------------------------------------------------------------------------
  */
 
@@ -209,7 +213,7 @@ typedef struct _MSG_SAS_IOUNIT_CONTROL_REQUEST
     U8                      Reserved1;          /* 01h */
     U8                      ChainOffset;        /* 02h */
     U8                      Function;           /* 03h */
-    U16                     Reserved2;          /* 04h */
+    U16                     DevHandle;          /* 04h */
     U8                      Reserved3;          /* 06h */
     U8                      MsgFlags;           /* 07h */
     U32                     MsgContext;         /* 08h */
@@ -231,6 +235,9 @@ typedef struct _MSG_SAS_IOUNIT_CONTROL_REQUEST
 #define MPI_SAS_OP_PHY_CLEAR_ERROR_LOG          (0x08)
 #define MPI_SAS_OP_MAP_CURRENT                  (0x09)
 #define MPI_SAS_OP_SEND_PRIMITIVE               (0x0A)
+#define MPI_SAS_OP_FORCE_FULL_DISCOVERY         (0x0B)
+#define MPI_SAS_OP_TRANSMIT_PORT_SELECT_SIGNAL  (0x0C)
+#define MPI_SAS_OP_TRANSMIT_REMOVE_DEVICE       (0x0D)
 
 /* values for the PrimFlags field */
 #define MPI_SAS_PRIMFLAGS_SINGLE                (0x08)
@@ -245,7 +252,7 @@ typedef struct _MSG_SAS_IOUNIT_CONTROL_REPLY
     U8                      Reserved1;          /* 01h */
     U8                      MsgLength;          /* 02h */
     U8                      Function;           /* 03h */
-    U16                     Reserved2;          /* 04h */
+    U16                     DevHandle;          /* 04h */
     U8                      Reserved3;          /* 06h */
     U8                      MsgFlags;           /* 07h */
     U32                     MsgContext;         /* 08h */
diff --git a/drivers/message/fusion/lsi/mpi_targ.h b/drivers/message/fusion/lsi/mpi_targ.h
index 3f46285..20b6673 100644
--- a/drivers/message/fusion/lsi/mpi_targ.h
+++ b/drivers/message/fusion/lsi/mpi_targ.h
@@ -6,7 +6,7 @@
  *          Title:  MPI Target mode messages and structures
  *  Creation Date:  June 22, 2000
  *
- *    mpi_targ.h Version:  01.05.05
+ *    mpi_targ.h Version:  01.05.06
  *
  *  Version History
  *  ---------------
@@ -54,6 +54,7 @@
  *  02-22-05  01.05.03  Changed a comment.
  *  03-11-05  01.05.04  Removed TargetAssistExtended Request.
  *  06-24-05  01.05.05  Added TargetAssistExtended structures and defines.
+ *  03-27-06  01.05.06  Added a comment.
  *  --------------------------------------------------------------------------
  */
 
@@ -351,7 +352,7 @@ typedef struct _MSG_TARGET_ASSIST_REQUEST
 #define TARGET_ASSIST_FLAGS_CONFIRMED               (0x08)
 #define TARGET_ASSIST_FLAGS_REPOST_CMD_BUFFER       (0x80)
 
-
+/* Standard Target Mode Reply message */
 typedef struct _MSG_TARGET_ERROR_REPLY
 {
     U16                     Reserved;                   /* 00h */
-- 
cgit v0.10.2


From 65c92b09acf0218b64f1c7ba4fdabeb8b732c876 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Wed, 28 Jun 2006 12:22:50 -0400
Subject: [SCSI] scsi_transport_sas: introduce a sas_port entity

this patch introduces a port object, separates out ports and phys,
with ports becoming the primary objects of the tree.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_sas_internal.h b/drivers/scsi/scsi_sas_internal.h
index d76e6e3..e1edab4 100644
--- a/drivers/scsi/scsi_sas_internal.h
+++ b/drivers/scsi/scsi_sas_internal.h
@@ -2,7 +2,8 @@
 #define _SCSI_SAS_INTERNAL_H
 
 #define SAS_HOST_ATTRS		0
-#define SAS_PORT_ATTRS		17
+#define SAS_PHY_ATTRS		17
+#define SAS_PORT_ATTRS		1
 #define SAS_RPORT_ATTRS		7
 #define SAS_END_DEV_ATTRS	3
 #define SAS_EXPANDER_ATTRS	7
@@ -13,12 +14,14 @@ struct sas_internal {
 	struct sas_domain_function_template *dft;
 
 	struct class_device_attribute private_host_attrs[SAS_HOST_ATTRS];
-	struct class_device_attribute private_phy_attrs[SAS_PORT_ATTRS];
+	struct class_device_attribute private_phy_attrs[SAS_PHY_ATTRS];
+	struct class_device_attribute private_port_attrs[SAS_PORT_ATTRS];
 	struct class_device_attribute private_rphy_attrs[SAS_RPORT_ATTRS];
 	struct class_device_attribute private_end_dev_attrs[SAS_END_DEV_ATTRS];
 	struct class_device_attribute private_expander_attrs[SAS_EXPANDER_ATTRS];
 
 	struct transport_container phy_attr_cont;
+	struct transport_container port_attr_cont;
 	struct transport_container rphy_attr_cont;
 	struct transport_container end_dev_attr_cont;
 	struct transport_container expander_attr_cont;
@@ -28,7 +31,8 @@ struct sas_internal {
 	 * needed by scsi_sysfs.c
 	 */
 	struct class_device_attribute *host_attrs[SAS_HOST_ATTRS + 1];
-	struct class_device_attribute *phy_attrs[SAS_PORT_ATTRS + 1];
+	struct class_device_attribute *phy_attrs[SAS_PHY_ATTRS + 1];
+	struct class_device_attribute *port_attrs[SAS_PORT_ATTRS + 1];
 	struct class_device_attribute *rphy_attrs[SAS_RPORT_ATTRS + 1];
 	struct class_device_attribute *end_dev_attrs[SAS_END_DEV_ATTRS + 1];
 	struct class_device_attribute *expander_attrs[SAS_EXPANDER_ATTRS + 1];
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 1fe6b2d..dd07562 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -174,12 +174,29 @@ static int sas_host_match(struct attribute_container *cont,
 
 static int do_sas_phy_delete(struct device *dev, void *data)
 {
-	if (scsi_is_sas_phy(dev))
+	int pass = (int)(unsigned long)data;
+
+	if (pass == 0 && scsi_is_sas_port(dev))
+		sas_port_delete(dev_to_sas_port(dev));
+	else if (pass == 1 && scsi_is_sas_phy(dev))
 		sas_phy_delete(dev_to_phy(dev));
 	return 0;
 }
 
 /**
+ * sas_remove_children  --  tear down a devices SAS data structures
+ * @dev:	device belonging to the sas object
+ *
+ * Removes all SAS PHYs and remote PHYs for a given object
+ */
+void sas_remove_children(struct device *dev)
+{
+	device_for_each_child(dev, (void *)0, do_sas_phy_delete);
+	device_for_each_child(dev, (void *)1, do_sas_phy_delete);
+}
+EXPORT_SYMBOL(sas_remove_children);
+
+/**
  * sas_remove_host  --  tear down a Scsi_Host's SAS data structures
  * @shost:	Scsi Host that is torn down
  *
@@ -188,13 +205,13 @@ static int do_sas_phy_delete(struct device *dev, void *data)
  */
 void sas_remove_host(struct Scsi_Host *shost)
 {
-	device_for_each_child(&shost->shost_gendev, NULL, do_sas_phy_delete);
+	sas_remove_children(&shost->shost_gendev);
 }
 EXPORT_SYMBOL(sas_remove_host);
 
 
 /*
- * SAS Port attributes
+ * SAS Phy attributes
  */
 
 #define sas_phy_show_simple(field, name, format_string, cast)		\
@@ -310,7 +327,7 @@ sas_phy_protocol_attr(identify.target_port_protocols,
 sas_phy_simple_attr(identify.sas_address, sas_address, "0x%016llx\n",
 		unsigned long long);
 sas_phy_simple_attr(identify.phy_identifier, phy_identifier, "%d\n", u8);
-sas_phy_simple_attr(port_identifier, port_identifier, "%d\n", u8);
+//sas_phy_simple_attr(port_identifier, port_identifier, "%d\n", u8);
 sas_phy_linkspeed_attr(negotiated_linkrate);
 sas_phy_linkspeed_attr(minimum_linkrate_hw);
 sas_phy_linkspeed_attr(minimum_linkrate);
@@ -378,9 +395,10 @@ struct sas_phy *sas_phy_alloc(struct device *parent, int number)
 	device_initialize(&phy->dev);
 	phy->dev.parent = get_device(parent);
 	phy->dev.release = sas_phy_release;
+	INIT_LIST_HEAD(&phy->port_siblings);
 	if (scsi_is_sas_expander_device(parent)) {
 		struct sas_rphy *rphy = dev_to_rphy(parent);
-		sprintf(phy->dev.bus_id, "phy-%d-%d:%d", shost->host_no,
+		sprintf(phy->dev.bus_id, "phy-%d:%d:%d", shost->host_no,
 			rphy->scsi_target_id, number);
 	} else
 		sprintf(phy->dev.bus_id, "phy-%d:%d", shost->host_no, number);
@@ -440,8 +458,8 @@ sas_phy_delete(struct sas_phy *phy)
 {
 	struct device *dev = &phy->dev;
 
-	if (phy->rphy)
-		sas_rphy_delete(phy->rphy);
+	/* this happens if the phy is still part of a port when deleted */
+	BUG_ON(!list_empty(&phy->port_siblings));
 
 	transport_remove_device(dev);
 	device_del(dev);
@@ -464,6 +482,258 @@ int scsi_is_sas_phy(const struct device *dev)
 EXPORT_SYMBOL(scsi_is_sas_phy);
 
 /*
+ * SAS Port attributes
+ */
+#define sas_port_show_simple(field, name, format_string, cast)		\
+static ssize_t								\
+show_sas_port_##name(struct class_device *cdev, char *buf)		\
+{									\
+	struct sas_port *port = transport_class_to_sas_port(cdev);	\
+									\
+	return snprintf(buf, 20, format_string, cast port->field);	\
+}
+
+#define sas_port_simple_attr(field, name, format_string, type)		\
+	sas_port_show_simple(field, name, format_string, (type))	\
+static CLASS_DEVICE_ATTR(name, S_IRUGO, show_sas_port_##name, NULL)
+
+sas_port_simple_attr(num_phys, num_phys, "%d\n", int);
+
+static DECLARE_TRANSPORT_CLASS(sas_port_class,
+			       "sas_port", NULL, NULL, NULL);
+
+static int sas_port_match(struct attribute_container *cont, struct device *dev)
+{
+	struct Scsi_Host *shost;
+	struct sas_internal *i;
+
+	if (!scsi_is_sas_port(dev))
+		return 0;
+	shost = dev_to_shost(dev->parent);
+
+	if (!shost->transportt)
+		return 0;
+	if (shost->transportt->host_attrs.ac.class !=
+			&sas_host_class.class)
+		return 0;
+
+	i = to_sas_internal(shost->transportt);
+	return &i->port_attr_cont.ac == cont;
+}
+
+
+static void sas_port_release(struct device *dev)
+{
+	struct sas_port *port = dev_to_sas_port(dev);
+
+	BUG_ON(!list_empty(&port->phy_list));
+
+	put_device(dev->parent);
+	kfree(port);
+}
+
+static void sas_port_create_link(struct sas_port *port,
+				 struct sas_phy *phy)
+{
+	sysfs_create_link(&port->dev.kobj, &phy->dev.kobj, phy->dev.bus_id);
+	sysfs_create_link(&phy->dev.kobj, &port->dev.kobj, "port");
+}
+
+static void sas_port_delete_link(struct sas_port *port,
+				 struct sas_phy *phy)
+{
+	sysfs_remove_link(&port->dev.kobj, phy->dev.bus_id);
+	sysfs_remove_link(&phy->dev.kobj, "port");
+}
+
+/** sas_port_alloc - allocate and initialize a SAS port structure
+ *
+ * @parent:	parent device
+ * @port_id:	port number
+ *
+ * Allocates a SAS port structure.  It will be added to the device tree
+ * below the device specified by @parent which must be either a Scsi_Host
+ * or a sas_expander_device.
+ *
+ * Returns %NULL on error
+ */
+struct sas_port *sas_port_alloc(struct device *parent, int port_id)
+{
+	struct Scsi_Host *shost = dev_to_shost(parent);
+	struct sas_port *port;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return NULL;
+
+	port->port_identifier = port_id;
+
+	device_initialize(&port->dev);
+
+	port->dev.parent = get_device(parent);
+	port->dev.release = sas_port_release;
+
+	mutex_init(&port->phy_list_mutex);
+	INIT_LIST_HEAD(&port->phy_list);
+
+	if (scsi_is_sas_expander_device(parent)) {
+		struct sas_rphy *rphy = dev_to_rphy(parent);
+		sprintf(port->dev.bus_id, "port-%d:%d:%d", shost->host_no,
+			rphy->scsi_target_id, port->port_identifier);
+	} else
+		sprintf(port->dev.bus_id, "port-%d:%d", shost->host_no,
+			port->port_identifier);
+
+	transport_setup_device(&port->dev);
+
+	return port;
+}
+EXPORT_SYMBOL(sas_port_alloc);
+
+/**
+ * sas_port_add - add a SAS port to the device hierarchy
+ *
+ * @port:	port to be added
+ *
+ * publishes a port to the rest of the system
+ */
+int sas_port_add(struct sas_port *port)
+{
+	int error;
+
+	/* No phys should be added until this is made visible */
+	BUG_ON(!list_empty(&port->phy_list));
+
+	error = device_add(&port->dev);
+
+	if (error)
+		return error;
+
+	transport_add_device(&port->dev);
+	transport_configure_device(&port->dev);
+
+	return 0;
+}
+EXPORT_SYMBOL(sas_port_add);
+
+/**
+ * sas_port_free  --  free a SAS PORT
+ * @port:	SAS PORT to free
+ *
+ * Frees the specified SAS PORT.
+ *
+ * Note:
+ *   This function must only be called on a PORT that has not
+ *   sucessfully been added using sas_port_add().
+ */
+void sas_port_free(struct sas_port *port)
+{
+	transport_destroy_device(&port->dev);
+	put_device(&port->dev);
+}
+EXPORT_SYMBOL(sas_port_free);
+
+/**
+ * sas_port_delete  --  remove SAS PORT
+ * @port:	SAS PORT to remove
+ *
+ * Removes the specified SAS PORT.  If the SAS PORT has an
+ * associated phys, unlink them from the port as well.
+ */
+void sas_port_delete(struct sas_port *port)
+{
+	struct device *dev = &port->dev;
+	struct sas_phy *phy, *tmp_phy;
+
+	if (port->rphy) {
+		sas_rphy_delete(port->rphy);
+		port->rphy = NULL;
+	}
+
+	mutex_lock(&port->phy_list_mutex);
+	list_for_each_entry_safe(phy, tmp_phy, &port->phy_list,
+				 port_siblings) {
+		sas_port_delete_link(port, phy);
+		list_del_init(&phy->port_siblings);
+	}
+	mutex_unlock(&port->phy_list_mutex);
+
+	transport_remove_device(dev);
+	device_del(dev);
+	transport_destroy_device(dev);
+	put_device(dev);
+}
+EXPORT_SYMBOL(sas_port_delete);
+
+/**
+ * scsi_is_sas_port --  check if a struct device represents a SAS port
+ * @dev:	device to check
+ *
+ * Returns:
+ *	%1 if the device represents a SAS Port, %0 else
+ */
+int scsi_is_sas_port(const struct device *dev)
+{
+	return dev->release == sas_port_release;
+}
+EXPORT_SYMBOL(scsi_is_sas_port);
+
+/**
+ * sas_port_add_phy - add another phy to a port to form a wide port
+ * @port:	port to add the phy to
+ * @phy:	phy to add
+ *
+ * When a port is initially created, it is empty (has no phys).  All
+ * ports must have at least one phy to operated, and all wide ports
+ * must have at least two.  The current code makes no difference
+ * between ports and wide ports, but the only object that can be
+ * connected to a remote device is a port, so ports must be formed on
+ * all devices with phys if they're connected to anything.
+ */
+void sas_port_add_phy(struct sas_port *port, struct sas_phy *phy)
+{
+	mutex_lock(&port->phy_list_mutex);
+	if (unlikely(!list_empty(&phy->port_siblings))) {
+		/* make sure we're already on this port */
+		struct sas_phy *tmp;
+
+		list_for_each_entry(tmp, &port->phy_list, port_siblings)
+			if (tmp == phy)
+				break;
+		/* If this trips, you added a phy that was already
+		 * part of a different port */
+		if (unlikely(tmp != phy)) {
+			dev_printk(KERN_ERR, &port->dev, "trying to add phy %s fails: it's already part of another port\n", phy->dev.bus_id);
+			BUG();
+		}
+	} else {
+		sas_port_create_link(port, phy);
+		list_add_tail(&phy->port_siblings, &port->phy_list);
+		port->num_phys++;
+	}
+	mutex_unlock(&port->phy_list_mutex);
+}
+EXPORT_SYMBOL(sas_port_add_phy);
+
+/**
+ * sas_port_delete_phy - remove a phy from a port or wide port
+ * @port:	port to remove the phy from
+ * @phy:	phy to remove
+ *
+ * This operation is used for tearing down ports again.  It must be
+ * done to every port or wide port before calling sas_port_delete.
+ */
+void sas_port_delete_phy(struct sas_port *port, struct sas_phy *phy)
+{
+	mutex_lock(&port->phy_list_mutex);
+	sas_port_delete_link(port, phy);
+	list_del_init(&phy->port_siblings);
+	port->num_phys--;
+	mutex_unlock(&port->phy_list_mutex);
+}
+EXPORT_SYMBOL(sas_port_delete_phy);
+
+/*
  * SAS remote PHY attributes.
  */
 
@@ -767,7 +1037,7 @@ static void sas_rphy_initialize(struct sas_rphy *rphy)
  * Returns:
  *	SAS PHY allocated or %NULL if the allocation failed.
  */
-struct sas_rphy *sas_end_device_alloc(struct sas_phy *parent)
+struct sas_rphy *sas_end_device_alloc(struct sas_port *parent)
 {
 	struct Scsi_Host *shost = dev_to_shost(&parent->dev);
 	struct sas_end_device *rdev;
@@ -780,8 +1050,13 @@ struct sas_rphy *sas_end_device_alloc(struct sas_phy *parent)
 	device_initialize(&rdev->rphy.dev);
 	rdev->rphy.dev.parent = get_device(&parent->dev);
 	rdev->rphy.dev.release = sas_end_device_release;
-	sprintf(rdev->rphy.dev.bus_id, "end_device-%d:%d-%d",
-		shost->host_no, parent->port_identifier, parent->number);
+	if (scsi_is_sas_expander_device(parent->dev.parent)) {
+		struct sas_rphy *rphy = dev_to_rphy(parent->dev.parent);
+		sprintf(rdev->rphy.dev.bus_id, "end_device-%d:%d:%d",
+			shost->host_no, rphy->scsi_target_id, parent->port_identifier);
+	} else
+		sprintf(rdev->rphy.dev.bus_id, "end_device-%d:%d",
+			shost->host_no, parent->port_identifier);
 	rdev->rphy.identify.device_type = SAS_END_DEVICE;
 	sas_rphy_initialize(&rdev->rphy);
 	transport_setup_device(&rdev->rphy.dev);
@@ -798,7 +1073,7 @@ EXPORT_SYMBOL(sas_end_device_alloc);
  * Returns:
  *	SAS PHY allocated or %NULL if the allocation failed.
  */
-struct sas_rphy *sas_expander_alloc(struct sas_phy *parent,
+struct sas_rphy *sas_expander_alloc(struct sas_port *parent,
 				    enum sas_device_type type)
 {
 	struct Scsi_Host *shost = dev_to_shost(&parent->dev);
@@ -837,7 +1112,7 @@ EXPORT_SYMBOL(sas_expander_alloc);
  */
 int sas_rphy_add(struct sas_rphy *rphy)
 {
-	struct sas_phy *parent = dev_to_phy(rphy->dev.parent);
+	struct sas_port *parent = dev_to_sas_port(rphy->dev.parent);
 	struct Scsi_Host *shost = dev_to_shost(parent->dev.parent);
 	struct sas_host_attrs *sas_host = to_sas_host_attrs(shost);
 	struct sas_identify *identify = &rphy->identify;
@@ -910,7 +1185,7 @@ void
 sas_rphy_delete(struct sas_rphy *rphy)
 {
 	struct device *dev = &rphy->dev;
-	struct sas_phy *parent = dev_to_phy(dev->parent);
+	struct sas_port *parent = dev_to_sas_port(dev->parent);
 	struct Scsi_Host *shost = dev_to_shost(parent->dev.parent);
 	struct sas_host_attrs *sas_host = to_sas_host_attrs(shost);
 
@@ -920,7 +1195,7 @@ sas_rphy_delete(struct sas_rphy *rphy)
 		break;
 	case SAS_EDGE_EXPANDER_DEVICE:
 	case SAS_FANOUT_EXPANDER_DEVICE:
-		device_for_each_child(dev, NULL, do_sas_phy_delete);
+		sas_remove_children(dev);
 		break;
 	default:
 		break;
@@ -967,7 +1242,7 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel,
 
 	mutex_lock(&sas_host->lock);
 	list_for_each_entry(rphy, &sas_host->rphy_list, list) {
-		struct sas_phy *parent = dev_to_phy(rphy->dev.parent);
+		struct sas_port *parent = dev_to_sas_port(rphy->dev.parent);
 
 		if (rphy->identify.device_type != SAS_END_DEVICE ||
 		    rphy->scsi_target_id == -1)
@@ -1003,16 +1278,19 @@ static int sas_user_scan(struct Scsi_Host *shost, uint channel,
 #define SETUP_OPTIONAL_RPORT_ATTRIBUTE(field, func)			\
 	SETUP_TEMPLATE(rphy_attrs, field, S_IRUGO, i->f->func)
 
-#define SETUP_PORT_ATTRIBUTE(field)					\
+#define SETUP_PHY_ATTRIBUTE(field)					\
 	SETUP_TEMPLATE(phy_attrs, field, S_IRUGO, 1)
 
-#define SETUP_OPTIONAL_PORT_ATTRIBUTE(field, func)			\
+#define SETUP_PORT_ATTRIBUTE(field)					\
+	SETUP_TEMPLATE(port_attrs, field, S_IRUGO, 1)
+
+#define SETUP_OPTIONAL_PHY_ATTRIBUTE(field, func)			\
 	SETUP_TEMPLATE(phy_attrs, field, S_IRUGO, i->f->func)
 
-#define SETUP_PORT_ATTRIBUTE_WRONLY(field)				\
+#define SETUP_PHY_ATTRIBUTE_WRONLY(field)				\
 	SETUP_TEMPLATE(phy_attrs, field, S_IWUGO, 1)
 
-#define SETUP_OPTIONAL_PORT_ATTRIBUTE_WRONLY(field, func)		\
+#define SETUP_OPTIONAL_PHY_ATTRIBUTE_WRONLY(field, func)		\
 	SETUP_TEMPLATE(phy_attrs, field, S_IWUGO, i->f->func)
 
 #define SETUP_END_DEV_ATTRIBUTE(field)					\
@@ -1048,6 +1326,11 @@ sas_attach_transport(struct sas_function_template *ft)
 	i->phy_attr_cont.ac.match = sas_phy_match;
 	transport_container_register(&i->phy_attr_cont);
 
+	i->port_attr_cont.ac.class = &sas_port_class.class;
+	i->port_attr_cont.ac.attrs = &i->port_attrs[0];
+	i->port_attr_cont.ac.match = sas_port_match;
+	transport_container_register(&i->port_attr_cont);
+
 	i->rphy_attr_cont.ac.class = &sas_rphy_class.class;
 	i->rphy_attr_cont.ac.attrs = &i->rphy_attrs[0];
 	i->rphy_attr_cont.ac.match = sas_rphy_match;
@@ -1066,30 +1349,35 @@ sas_attach_transport(struct sas_function_template *ft)
 	i->f = ft;
 
 	count = 0;
+	SETUP_PORT_ATTRIBUTE(num_phys);
 	i->host_attrs[count] = NULL;
 
 	count = 0;
-	SETUP_PORT_ATTRIBUTE(initiator_port_protocols);
-	SETUP_PORT_ATTRIBUTE(target_port_protocols);
-	SETUP_PORT_ATTRIBUTE(device_type);
-	SETUP_PORT_ATTRIBUTE(sas_address);
-	SETUP_PORT_ATTRIBUTE(phy_identifier);
-	SETUP_PORT_ATTRIBUTE(port_identifier);
-	SETUP_PORT_ATTRIBUTE(negotiated_linkrate);
-	SETUP_PORT_ATTRIBUTE(minimum_linkrate_hw);
-	SETUP_PORT_ATTRIBUTE(minimum_linkrate);
-	SETUP_PORT_ATTRIBUTE(maximum_linkrate_hw);
-	SETUP_PORT_ATTRIBUTE(maximum_linkrate);
-
-	SETUP_PORT_ATTRIBUTE(invalid_dword_count);
-	SETUP_PORT_ATTRIBUTE(running_disparity_error_count);
-	SETUP_PORT_ATTRIBUTE(loss_of_dword_sync_count);
-	SETUP_PORT_ATTRIBUTE(phy_reset_problem_count);
-	SETUP_OPTIONAL_PORT_ATTRIBUTE_WRONLY(link_reset, phy_reset);
-	SETUP_OPTIONAL_PORT_ATTRIBUTE_WRONLY(hard_reset, phy_reset);
+	SETUP_PHY_ATTRIBUTE(initiator_port_protocols);
+	SETUP_PHY_ATTRIBUTE(target_port_protocols);
+	SETUP_PHY_ATTRIBUTE(device_type);
+	SETUP_PHY_ATTRIBUTE(sas_address);
+	SETUP_PHY_ATTRIBUTE(phy_identifier);
+	//SETUP_PHY_ATTRIBUTE(port_identifier);
+	SETUP_PHY_ATTRIBUTE(negotiated_linkrate);
+	SETUP_PHY_ATTRIBUTE(minimum_linkrate_hw);
+	SETUP_PHY_ATTRIBUTE(minimum_linkrate);
+	SETUP_PHY_ATTRIBUTE(maximum_linkrate_hw);
+	SETUP_PHY_ATTRIBUTE(maximum_linkrate);
+
+	SETUP_PHY_ATTRIBUTE(invalid_dword_count);
+	SETUP_PHY_ATTRIBUTE(running_disparity_error_count);
+	SETUP_PHY_ATTRIBUTE(loss_of_dword_sync_count);
+	SETUP_PHY_ATTRIBUTE(phy_reset_problem_count);
+	SETUP_OPTIONAL_PHY_ATTRIBUTE_WRONLY(link_reset, phy_reset);
+	SETUP_OPTIONAL_PHY_ATTRIBUTE_WRONLY(hard_reset, phy_reset);
 	i->phy_attrs[count] = NULL;
 
 	count = 0;
+	SETUP_PORT_ATTRIBUTE(num_phys);
+	i->port_attrs[count] = NULL;
+
+	count = 0;
 	SETUP_RPORT_ATTRIBUTE(rphy_initiator_port_protocols);
 	SETUP_RPORT_ATTRIBUTE(rphy_target_port_protocols);
 	SETUP_RPORT_ATTRIBUTE(rphy_device_type);
@@ -1131,6 +1419,7 @@ void sas_release_transport(struct scsi_transport_template *t)
 
 	transport_container_unregister(&i->t.host_attrs);
 	transport_container_unregister(&i->phy_attr_cont);
+	transport_container_unregister(&i->port_attr_cont);
 	transport_container_unregister(&i->rphy_attr_cont);
 	transport_container_unregister(&i->end_dev_attr_cont);
 	transport_container_unregister(&i->expander_attr_cont);
@@ -1149,9 +1438,12 @@ static __init int sas_transport_init(void)
 	error = transport_class_register(&sas_phy_class);
 	if (error)
 		goto out_unregister_transport;
-	error = transport_class_register(&sas_rphy_class);
+	error = transport_class_register(&sas_port_class);
 	if (error)
 		goto out_unregister_phy;
+	error = transport_class_register(&sas_rphy_class);
+	if (error)
+		goto out_unregister_port;
 	error = transport_class_register(&sas_end_dev_class);
 	if (error)
 		goto out_unregister_rphy;
@@ -1165,6 +1457,8 @@ static __init int sas_transport_init(void)
 	transport_class_unregister(&sas_end_dev_class);
  out_unregister_rphy:
 	transport_class_unregister(&sas_rphy_class);
+ out_unregister_port:
+	transport_class_unregister(&sas_port_class);
  out_unregister_phy:
 	transport_class_unregister(&sas_phy_class);
  out_unregister_transport:
@@ -1178,6 +1472,7 @@ static void __exit sas_transport_exit(void)
 {
 	transport_class_unregister(&sas_host_class);
 	transport_class_unregister(&sas_phy_class);
+	transport_class_unregister(&sas_port_class);
 	transport_class_unregister(&sas_rphy_class);
 	transport_class_unregister(&sas_end_dev_class);
 	transport_class_unregister(&sas_expander_class);
diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h
index 93cfb4b..e3c503c 100644
--- a/include/scsi/scsi_transport_sas.h
+++ b/include/scsi/scsi_transport_sas.h
@@ -3,6 +3,7 @@
 
 #include <linux/transport_class.h>
 #include <linux/types.h>
+#include <linux/mutex.h>
 
 struct scsi_transport_template;
 struct sas_rphy;
@@ -55,7 +56,6 @@ struct sas_phy {
 	enum sas_linkrate	minimum_linkrate;
 	enum sas_linkrate	maximum_linkrate_hw;
 	enum sas_linkrate	maximum_linkrate;
-	u8			port_identifier;
 
 	/* internal state */
 	unsigned int		local_attached : 1;
@@ -66,8 +66,8 @@ struct sas_phy {
 	u32			loss_of_dword_sync_count;
 	u32			phy_reset_problem_count;
 
-	/* the other end of the link */
-	struct sas_rphy		*rphy;
+	/* for the list of phys belonging to a port */
+	struct list_head	port_siblings;
 };
 
 #define dev_to_phy(d) \
@@ -124,6 +124,24 @@ struct sas_expander_device {
 #define rphy_to_expander_device(r) \
 	container_of((r), struct sas_expander_device, rphy)
 
+struct sas_port {
+	struct device		dev;
+
+	u8			port_identifier;
+	int			num_phys;
+
+	/* the other end of the link */
+	struct sas_rphy		*rphy;
+
+	struct mutex		phy_list_mutex;
+	struct list_head	phy_list;
+};
+
+#define dev_to_sas_port(d) \
+	container_of((d), struct sas_port, dev)
+#define transport_class_to_sas_port(cdev) \
+	dev_to_sas_port((cdev)->dev)
+
 /* The functions by which the transport class and the driver communicate */
 struct sas_function_template {
 	int (*get_linkerrors)(struct sas_phy *);
@@ -133,6 +151,7 @@ struct sas_function_template {
 };
 
 
+void sas_remove_children(struct device *);
 extern void sas_remove_host(struct Scsi_Host *);
 
 extern struct sas_phy *sas_phy_alloc(struct device *, int);
@@ -141,13 +160,21 @@ extern int sas_phy_add(struct sas_phy *);
 extern void sas_phy_delete(struct sas_phy *);
 extern int scsi_is_sas_phy(const struct device *);
 
-extern struct sas_rphy *sas_end_device_alloc(struct sas_phy *);
-extern struct sas_rphy *sas_expander_alloc(struct sas_phy *, enum sas_device_type);
+extern struct sas_rphy *sas_end_device_alloc(struct sas_port *);
+extern struct sas_rphy *sas_expander_alloc(struct sas_port *, enum sas_device_type);
 void sas_rphy_free(struct sas_rphy *);
 extern int sas_rphy_add(struct sas_rphy *);
 extern void sas_rphy_delete(struct sas_rphy *);
 extern int scsi_is_sas_rphy(const struct device *);
 
+struct sas_port *sas_port_alloc(struct device *, int);
+int sas_port_add(struct sas_port *);
+void sas_port_free(struct sas_port *);
+void sas_port_delete(struct sas_port *);
+void sas_port_add_phy(struct sas_port *, struct sas_phy *);
+void sas_port_delete_phy(struct sas_port *, struct sas_phy *);
+int scsi_is_sas_port(const struct device *);
+
 extern struct scsi_transport_template *
 sas_attach_transport(struct sas_function_template *);
 extern void sas_release_transport(struct scsi_transport_template *);
-- 
cgit v0.10.2


From 547f9a218436ea35baf9a52e981753e44d9cff1f Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsil.com>
Date: Tue, 27 Jun 2006 14:42:12 -0600
Subject: [SCSI] mptsas: wide port support

* Wide port support added - using James Bottomley's new SAS wide port API.
(There is a known problem in sas transport layer reported yesterday to
James. The Kobject dev.bus_ids for end devices are not unique across
expanders. I have added a work around in this patch, where I asigning
an unique port identifier for every port within the host - this solves
the problem, but I expect a fix from James in the sas transport).

* Adding target_alloc and target_destroy entry points, and moving code over
from the slave entry points.

* The renaming of some mptscsih_xxx functions declared in mptsas.c,
to mptsas_xxx.

* Target Reset moved from slave_destroy to hotplug work thread
handling (with regard to device removal). Also inhibit IO to end device
while device is being broken down . Talked to James Smart about this
at Linux Expo (with questions of how the fc transport handles this).

* Cleaning up the kzalloc's, and kfree's

Signed-off-by: Eric Moore <Eric.Moore@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/message/fusion/Makefile b/drivers/message/fusion/Makefile
index 51740b3..b114236 100644
--- a/drivers/message/fusion/Makefile
+++ b/drivers/message/fusion/Makefile
@@ -33,6 +33,11 @@
 #  For mptfc:
 #CFLAGS_mptfc.o += -DMPT_DEBUG_FC
 
+#  For mptsas:
+#CFLAGS_mptsas.o += -DMPT_DEBUG_SAS
+#CFLAGS_mptsas.o += -DMPT_DEBUG_SAS_WIDE
+
+
 #=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-} LSI_LOGIC
 
 obj-$(CONFIG_FUSION_SPI)	+= mptbase.o mptscsih.o mptspi.o
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 4720f9a..ddef586 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -76,8 +76,8 @@
 #define COPYRIGHT	"Copyright (c) 1999-2005 " MODULEAUTHOR
 #endif
 
-#define MPT_LINUX_VERSION_COMMON	"3.03.10"
-#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.03.10"
+#define MPT_LINUX_VERSION_COMMON	"3.04.00"
+#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.00"
 #define WHAT_MAGIC_STRING		"@" "(" "#" ")"
 
 #define show_mptmod_ver(s,ver)  \
@@ -342,6 +342,7 @@ typedef struct _VirtTarget {
 	u8			 negoFlags;	/* bit field, see above */
 	u8			 raidVolume;	/* set, if RAID Volume */
 	u8			 type;		/* byte 0 of Inquiry data */
+	u8			 deleted;	/* target in process of being removed */
 	u32			 num_luns;
 	u32			 luns[8];		/* Max LUNs is 256 */
 } VirtTarget;
@@ -633,7 +634,7 @@ typedef struct _MPT_ADAPTER
 	int			 sas_index; /* index refrencing */
 	MPT_SAS_MGMT		 sas_mgmt;
 	int			 num_ports;
-	struct work_struct	 mptscsih_persistTask;
+	struct work_struct	 sas_persist_task;
 
 	struct work_struct	 fc_setup_reset_work;
 	struct list_head	 fc_rports;
@@ -642,6 +643,7 @@ typedef struct _MPT_ADAPTER
 	struct work_struct	 fc_rescan_work;
 	char			 fc_rescan_work_q_name[KOBJ_NAME_LEN];
 	struct workqueue_struct *fc_rescan_work_q;
+	u8		port_serial_number;
 } MPT_ADAPTER;
 
 /*
@@ -893,6 +895,13 @@ typedef struct _mpt_sge {
 #define DBG_DUMP_REQUEST_FRAME_HDR(mfp)
 #endif
 
+// debug sas wide ports
+#ifdef MPT_DEBUG_SAS_WIDE
+#define dsaswideprintk(x) printk x
+#else
+#define dsaswideprintk(x)
+#endif
+
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index af6ec55..0877023 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -50,11 +50,14 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/workqueue.h>
+#include <linux/delay.h>	/* for mdelay */
 
+#include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_transport_sas.h>
+#include <scsi/scsi_dbg.h>
 
 #include "mptbase.h"
 #include "mptscsih.h"
@@ -137,23 +140,37 @@ struct mptsas_devinfo {
 	u32	device_info;	/* bitfield detailed info about this device */
 };
 
+/*
+ * Specific details on ports, wide/narrow
+ */
+struct mptsas_portinfo_details{
+	u8	port_id; 	/* port number provided to transport */
+	u16	num_phys;	/* number of phys belong to this port */
+	u64	phy_bitmask; 	/* TODO, extend support for 255 phys */
+	struct sas_rphy *rphy;	/* transport layer rphy object */
+	struct sas_port *port;	/* transport layer port object */
+	struct scsi_target *starget;
+	struct mptsas_portinfo *port_info;
+};
+
 struct mptsas_phyinfo {
 	u8	phy_id; 		/* phy index */
-	u8	port_id; 		/* port number this phy is part of */
+	u8	port_id; 		/* firmware port identifier */
 	u8	negotiated_link_rate;	/* nego'd link rate for this phy */
 	u8	hw_link_rate; 		/* hardware max/min phys link rate */
 	u8	programmed_link_rate;	/* programmed max/min phy link rate */
+	u8	sas_port_add_phy;	/* flag to request sas_port_add_phy*/
 	struct mptsas_devinfo identify;	/* point to phy device info */
 	struct mptsas_devinfo attached;	/* point to attached device info */
-	struct sas_phy *phy;
-	struct sas_rphy *rphy;
-	struct scsi_target *starget;
+	struct sas_phy *phy;		/* transport layer phy object */
+	struct mptsas_portinfo *portinfo;
+	struct mptsas_portinfo_details * port_details;
 };
 
 struct mptsas_portinfo {
 	struct list_head list;
 	u16		handle;		/* unique id to address this */
-	u8		num_phys;	/* number of phys */
+	u16		num_phys;	/* number of phys */
 	struct mptsas_phyinfo *phy_info;
 };
 
@@ -169,7 +186,7 @@ struct mptsas_enclosure {
 	u8	sep_channel;		/* SEP channel logical channel id */
 };
 
-#ifdef SASDEBUG
+#ifdef MPT_DEBUG_SAS
 static void mptsas_print_phy_data(MPI_SAS_IO_UNIT0_PHY_DATA *phy_data)
 {
 	printk("---- IO UNIT PAGE 0 ------------\n");
@@ -305,7 +322,7 @@ mptsas_find_portinfo_by_handle(MPT_ADAPTER *ioc, u16 handle)
 static inline int
 mptsas_is_end_device(struct mptsas_devinfo * attached)
 {
-	if ((attached->handle) &&
+	if ((attached->sas_address) &&
 	    (attached->device_info &
 	    MPI_SAS_DEVICE_INFO_END_DEVICE) &&
 	    ((attached->device_info &
@@ -319,6 +336,253 @@ mptsas_is_end_device(struct mptsas_devinfo * attached)
 		return 0;
 }
 
+/* no mutex */
+void
+mptsas_port_delete(struct mptsas_portinfo_details * port_details)
+{
+	struct mptsas_portinfo *port_info;
+	struct mptsas_phyinfo *phy_info;
+	u8	i;
+
+	if (!port_details)
+		return;
+
+	port_info = port_details->port_info;
+	phy_info = port_info->phy_info;
+
+	dsaswideprintk((KERN_DEBUG "%s: [%p]: port=%02d num_phys=%02d "
+	    	"bitmask=0x%016llX\n",
+		__FUNCTION__, port_details, port_details->port_id,
+		port_details->num_phys, port_details->phy_bitmask));
+
+	for (i = 0; i < port_info->num_phys; i++, phy_info++) {
+		if(phy_info->port_details != port_details)
+			continue;
+		memset(&phy_info->attached, 0, sizeof(struct mptsas_devinfo));
+		phy_info->port_details = NULL;
+	}
+	kfree(port_details);
+}
+
+static inline struct sas_rphy *
+mptsas_get_rphy(struct mptsas_phyinfo *phy_info)
+{
+	if (phy_info->port_details)
+		return phy_info->port_details->rphy;
+	else
+		return NULL;
+}
+
+static inline void
+mptsas_set_rphy(struct mptsas_phyinfo *phy_info, struct sas_rphy *rphy)
+{
+	if (phy_info->port_details) {
+		phy_info->port_details->rphy = rphy;
+		dsaswideprintk((KERN_DEBUG "sas_rphy_add: rphy=%p\n", rphy));
+	}
+
+#ifdef MPT_DEBUG_SAS_WIDE
+	if (rphy) {
+		dev_printk(KERN_DEBUG, &rphy->dev, "add:");
+		printk("rphy=%p release=%p\n",
+			rphy, rphy->dev.release);
+	}
+#endif
+}
+
+static inline struct sas_port *
+mptsas_get_port(struct mptsas_phyinfo *phy_info)
+{
+	if (phy_info->port_details)
+		return phy_info->port_details->port;
+	else
+		return NULL;
+}
+
+static inline void
+mptsas_set_port(struct mptsas_phyinfo *phy_info, struct sas_port *port)
+{
+	if (phy_info->port_details)
+		phy_info->port_details->port = port;
+
+#ifdef MPT_DEBUG_SAS_WIDE
+	if (port) {
+		dev_printk(KERN_DEBUG, &port->dev, "add: ");
+		printk("port=%p release=%p\n",
+			port, port->dev.release);
+	}
+#endif
+}
+
+static inline struct scsi_target *
+mptsas_get_starget(struct mptsas_phyinfo *phy_info)
+{
+	if (phy_info->port_details)
+		return phy_info->port_details->starget;
+	else
+		return NULL;
+}
+
+static inline void
+mptsas_set_starget(struct mptsas_phyinfo *phy_info, struct scsi_target *
+starget)
+{
+	if (phy_info->port_details)
+		phy_info->port_details->starget = starget;
+}
+
+
+/*
+ * mptsas_setup_wide_ports
+ *
+ * Updates for new and existing narrow/wide port configuration
+ * in the sas_topology
+ */
+void
+mptsas_setup_wide_ports(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
+{
+	struct mptsas_portinfo_details * port_details;
+	struct mptsas_phyinfo *phy_info, *phy_info_cmp;
+	u64	sas_address;
+	int	i, j;
+
+	mutex_lock(&ioc->sas_topology_mutex);
+
+	phy_info = port_info->phy_info;
+	for (i = 0 ; i < port_info->num_phys ; i++, phy_info++) {
+		if (phy_info->attached.handle)
+			continue;
+		port_details = phy_info->port_details;
+		if (!port_details)
+			continue;
+		if (port_details->num_phys < 2)
+			continue;
+		/*
+		 * Removing a phy from a port, letting the last
+		 * phy be removed by firmware events.
+		 */
+		dsaswideprintk((KERN_DEBUG
+			"%s: [%p]: port=%d deleting phy = %d\n",
+			__FUNCTION__, port_details,
+			port_details->port_id, i));
+		port_details->num_phys--;
+		port_details->phy_bitmask &= ~ (1 << phy_info->phy_id);
+		memset(&phy_info->attached, 0, sizeof(struct mptsas_devinfo));
+		sas_port_delete_phy(port_details->port, phy_info->phy);
+		phy_info->port_details = NULL;
+	}
+
+	/*
+	 * Populate and refresh the tree
+	 */
+	phy_info = port_info->phy_info;
+	for (i = 0 ; i < port_info->num_phys ; i++, phy_info++) {
+		sas_address = phy_info->attached.sas_address;
+		dsaswideprintk((KERN_DEBUG "phy_id=%d sas_address=0x%018llX\n",
+			i, sas_address));
+		if (!sas_address)
+			continue;
+		port_details = phy_info->port_details;
+		/*
+		 * Forming a port
+		 */
+		if (!port_details) {
+			port_details = kzalloc(sizeof(*port_details),
+				GFP_KERNEL);
+			if (!port_details)
+				goto out;
+			port_details->num_phys = 1;
+			port_details->port_info = port_info;
+			port_details->port_id = ioc->port_serial_number++;
+			if (phy_info->phy_id < 64 )
+				port_details->phy_bitmask |=
+				    (1 << phy_info->phy_id);
+			phy_info->sas_port_add_phy=1;
+			dsaswideprintk((KERN_DEBUG "\t\tForming port\n\t\t"
+				"phy_id=%d sas_address=0x%018llX\n",
+				i, sas_address));
+			phy_info->port_details = port_details;
+		}
+
+		if (i == port_info->num_phys - 1)
+			continue;
+		phy_info_cmp = &port_info->phy_info[i + 1];
+		for (j = i + 1 ; j < port_info->num_phys ; j++,
+		    phy_info_cmp++) {
+			if (!phy_info_cmp->attached.sas_address)
+				continue;
+			if (sas_address != phy_info_cmp->attached.sas_address)
+				continue;
+			if (phy_info_cmp->port_details == port_details )
+				continue;
+			dsaswideprintk((KERN_DEBUG
+				"\t\tphy_id=%d sas_address=0x%018llX\n",
+				j, phy_info_cmp->attached.sas_address));
+			if (phy_info_cmp->port_details) {
+				port_details->rphy =
+				    mptsas_get_rphy(phy_info_cmp);
+				port_details->port =
+				    mptsas_get_port(phy_info_cmp);
+				port_details->starget =
+				    mptsas_get_starget(phy_info_cmp);
+				port_details->port_id =
+					phy_info_cmp->port_details->port_id;
+				port_details->num_phys =
+					phy_info_cmp->port_details->num_phys;
+//				port_info->port_serial_number--;
+				ioc->port_serial_number--;
+				if (!phy_info_cmp->port_details->num_phys)
+					kfree(phy_info_cmp->port_details);
+			} else
+				phy_info_cmp->sas_port_add_phy=1;
+			/*
+			 * Adding a phy to a port
+			 */
+			phy_info_cmp->port_details = port_details;
+			if (phy_info_cmp->phy_id < 64 )
+				port_details->phy_bitmask |=
+				(1 << phy_info_cmp->phy_id);
+			port_details->num_phys++;
+		}
+	}
+
+ out:
+
+#ifdef MPT_DEBUG_SAS_WIDE
+	for (i = 0; i < port_info->num_phys; i++) {
+		port_details = port_info->phy_info[i].port_details;
+		if (!port_details)
+			continue;
+		dsaswideprintk((KERN_DEBUG
+			"%s: [%p]: phy_id=%02d port_id=%02d num_phys=%02d "
+		    	"bitmask=0x%016llX\n",
+			__FUNCTION__,
+			port_details, i, port_details->port_id,
+			port_details->num_phys, port_details->phy_bitmask));
+		dsaswideprintk((KERN_DEBUG"\t\tport = %p rphy=%p\n",
+			port_details->port, port_details->rphy));
+	}
+	dsaswideprintk((KERN_DEBUG"\n"));
+#endif
+	mutex_unlock(&ioc->sas_topology_mutex);
+}
+
+static void
+mptsas_target_reset(MPT_ADAPTER *ioc, VirtTarget * vtarget)
+{
+	MPT_SCSI_HOST		*hd = (MPT_SCSI_HOST *)ioc->sh->hostdata;
+
+	if (mptscsih_TMHandler(hd,
+	     MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
+	     vtarget->bus_id, vtarget->target_id, 0, 0, 5) < 0) {
+		hd->tmPending = 0;
+		hd->tmState = TM_STATE_NONE;
+		printk(MYIOC_s_WARN_FMT
+	       "Error processing TaskMgmt id=%d TARGET_RESET\n",
+			ioc->name, vtarget->target_id);
+	}
+}
+
 static int
 mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc, struct mptsas_enclosure *enclosure,
 		u32 form, u32 form_specific)
@@ -400,11 +664,105 @@ mptsas_slave_configure(struct scsi_device *sdev)
 	return mptscsih_slave_configure(sdev);
 }
 
-/*
- * This is pretty ugly.  We will be able to seriously clean it up
- * once the DV code in mptscsih goes away and we can properly
- * implement ->target_alloc.
- */
+static int
+mptsas_target_alloc(struct scsi_target *starget)
+{
+	struct Scsi_Host *host = dev_to_shost(&starget->dev);
+	MPT_SCSI_HOST		*hd = (MPT_SCSI_HOST *)host->hostdata;
+	VirtTarget		*vtarget;
+	u32			target_id;
+	u32			channel;
+	struct sas_rphy		*rphy;
+	struct mptsas_portinfo	*p;
+	int 			 i;
+
+	vtarget = kzalloc(sizeof(VirtTarget), GFP_KERNEL);
+	if (!vtarget)
+		return -ENOMEM;
+
+	vtarget->starget = starget;
+	vtarget->ioc_id = hd->ioc->id;
+	vtarget->tflags = MPT_TARGET_FLAGS_Q_YES|MPT_TARGET_FLAGS_VALID_INQUIRY;
+
+	target_id = starget->id;
+	channel = 0;
+
+	hd->Targets[target_id] = vtarget;
+
+	/*
+	 * RAID volumes placed beyond the last expected port.
+	 */
+	if (starget->channel == hd->ioc->num_ports)
+		goto out;
+
+	rphy = dev_to_rphy(starget->dev.parent);
+	mutex_lock(&hd->ioc->sas_topology_mutex);
+	list_for_each_entry(p, &hd->ioc->sas_topology, list) {
+		for (i = 0; i < p->num_phys; i++) {
+			if (p->phy_info[i].attached.sas_address !=
+					rphy->identify.sas_address)
+				continue;
+			target_id = p->phy_info[i].attached.id;
+			channel = p->phy_info[i].attached.channel;
+			mptsas_set_starget(&p->phy_info[i], starget);
+
+			/*
+			 * Exposing hidden raid components
+			 */
+			if (mptscsih_is_phys_disk(hd->ioc, target_id)) {
+				target_id = mptscsih_raid_id_to_num(hd,
+						target_id);
+				vtarget->tflags |=
+				    MPT_TARGET_FLAGS_RAID_COMPONENT;
+			}
+			mutex_unlock(&hd->ioc->sas_topology_mutex);
+			goto out;
+		}
+	}
+	mutex_unlock(&hd->ioc->sas_topology_mutex);
+
+	kfree(vtarget);
+	return -ENXIO;
+
+ out:
+	vtarget->target_id = target_id;
+	vtarget->bus_id = channel;
+	starget->hostdata = vtarget;
+	return 0;
+}
+
+static void
+mptsas_target_destroy(struct scsi_target *starget)
+{
+	struct Scsi_Host *host = dev_to_shost(&starget->dev);
+	MPT_SCSI_HOST		*hd = (MPT_SCSI_HOST *)host->hostdata;
+	struct sas_rphy		*rphy;
+	struct mptsas_portinfo	*p;
+	int 			 i;
+
+	if (!starget->hostdata)
+		return;
+
+	if (starget->channel == hd->ioc->num_ports)
+		goto out;
+
+	rphy = dev_to_rphy(starget->dev.parent);
+	list_for_each_entry(p, &hd->ioc->sas_topology, list) {
+		for (i = 0; i < p->num_phys; i++) {
+			if (p->phy_info[i].attached.sas_address !=
+					rphy->identify.sas_address)
+				continue;
+			mptsas_set_starget(&p->phy_info[i], NULL);
+			goto out;
+		}
+	}
+
+ out:
+	kfree(starget->hostdata);
+	starget->hostdata = NULL;
+}
+
+
 static int
 mptsas_slave_alloc(struct scsi_device *sdev)
 {
@@ -412,61 +770,41 @@ mptsas_slave_alloc(struct scsi_device *sdev)
 	MPT_SCSI_HOST		*hd = (MPT_SCSI_HOST *)host->hostdata;
 	struct sas_rphy		*rphy;
 	struct mptsas_portinfo	*p;
-	VirtTarget		*vtarget;
 	VirtDevice		*vdev;
 	struct scsi_target 	*starget;
-	u32			target_id;
-	int i;
+	int 			i;
 
 	vdev = kzalloc(sizeof(VirtDevice), GFP_KERNEL);
 	if (!vdev) {
-		printk(MYIOC_s_ERR_FMT "slave_alloc kmalloc(%zd) FAILED!\n",
+		printk(MYIOC_s_ERR_FMT "slave_alloc kzalloc(%zd) FAILED!\n",
 				hd->ioc->name, sizeof(VirtDevice));
 		return -ENOMEM;
 	}
-	sdev->hostdata = vdev;
 	starget = scsi_target(sdev);
-	vtarget = starget->hostdata;
-	vtarget->ioc_id = hd->ioc->id;
-	vdev->vtarget = vtarget;
-	if (vtarget->num_luns == 0) {
-		vtarget->tflags = MPT_TARGET_FLAGS_Q_YES|MPT_TARGET_FLAGS_VALID_INQUIRY;
-		hd->Targets[sdev->id] = vtarget;
-	}
+	vdev->vtarget = starget->hostdata;
 
 	/*
-	  RAID volumes placed beyond the last expected port.
-	*/
-	if (sdev->channel == hd->ioc->num_ports) {
-		target_id = sdev->id;
-		vtarget->bus_id = 0;
-		vdev->lun = 0;
+	 * RAID volumes placed beyond the last expected port.
+	 */
+	if (sdev->channel == hd->ioc->num_ports)
 		goto out;
-	}
 
 	rphy = dev_to_rphy(sdev->sdev_target->dev.parent);
 	mutex_lock(&hd->ioc->sas_topology_mutex);
 	list_for_each_entry(p, &hd->ioc->sas_topology, list) {
 		for (i = 0; i < p->num_phys; i++) {
-			if (p->phy_info[i].attached.sas_address ==
-					rphy->identify.sas_address) {
-				target_id = p->phy_info[i].attached.id;
-				vtarget->bus_id = p->phy_info[i].attached.channel;
-				vdev->lun = sdev->lun;
-				p->phy_info[i].starget = sdev->sdev_target;
-				/*
-				 * Exposing hidden disk (RAID)
-				 */
-				if (mptscsih_is_phys_disk(hd->ioc, target_id)) {
-					target_id = mptscsih_raid_id_to_num(hd,
-							target_id);
-					vdev->vtarget->tflags |=
-					    MPT_TARGET_FLAGS_RAID_COMPONENT;
-					sdev->no_uld_attach = 1;
-				}
-				mutex_unlock(&hd->ioc->sas_topology_mutex);
-				goto out;
-			}
+			if (p->phy_info[i].attached.sas_address !=
+					rphy->identify.sas_address)
+				continue;
+			vdev->lun = sdev->lun;
+			/*
+			 * Exposing hidden raid components
+			 */
+			if (mptscsih_is_phys_disk(hd->ioc,
+					p->phy_info[i].attached.id))
+				sdev->no_uld_attach = 1;
+			mutex_unlock(&hd->ioc->sas_topology_mutex);
+			goto out;
 		}
 	}
 	mutex_unlock(&hd->ioc->sas_topology_mutex);
@@ -475,57 +813,39 @@ mptsas_slave_alloc(struct scsi_device *sdev)
 	return -ENXIO;
 
  out:
-	vtarget->target_id = target_id;
-	vtarget->num_luns++;
+	vdev->vtarget->num_luns++;
+	sdev->hostdata = vdev;
 	return 0;
 }
 
-static void
-mptsas_slave_destroy(struct scsi_device *sdev)
+static int
+mptsas_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 {
-	struct Scsi_Host *host = sdev->host;
-	MPT_SCSI_HOST *hd = (MPT_SCSI_HOST *)host->hostdata;
-	VirtDevice *vdev;
+	VirtDevice	*vdev = SCpnt->device->hostdata;
 
-	/*
-	 * Issue target reset to flush firmware outstanding commands.
-	 */
-	vdev = sdev->hostdata;
-	if (vdev->configured_lun){
-		if (mptscsih_TMHandler(hd,
-		     MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
-		     vdev->vtarget->bus_id,
-		     vdev->vtarget->target_id,
-		     0, 0, 5 /* 5 second timeout */)
-		     < 0){
-
-			/* The TM request failed!
-			 * Fatal error case.
-			 */
-			printk(MYIOC_s_WARN_FMT
-		       "Error processing TaskMgmt id=%d TARGET_RESET\n",
-				hd->ioc->name,
-				vdev->vtarget->target_id);
-
-			hd->tmPending = 0;
-			hd->tmState = TM_STATE_NONE;
-		}
+//	scsi_print_command(SCpnt);
+	if (vdev->vtarget->deleted) {
+		SCpnt->result = DID_NO_CONNECT << 16;
+		done(SCpnt);
+		return 0;
 	}
-	mptscsih_slave_destroy(sdev);
+
+	return mptscsih_qcmd(SCpnt,done);
 }
 
+
 static struct scsi_host_template mptsas_driver_template = {
 	.module				= THIS_MODULE,
 	.proc_name			= "mptsas",
 	.proc_info			= mptscsih_proc_info,
 	.name				= "MPT SPI Host",
 	.info				= mptscsih_info,
-	.queuecommand			= mptscsih_qcmd,
-	.target_alloc			= mptscsih_target_alloc,
+	.queuecommand			= mptsas_qcmd,
+	.target_alloc			= mptsas_target_alloc,
 	.slave_alloc			= mptsas_slave_alloc,
 	.slave_configure		= mptsas_slave_configure,
-	.target_destroy			= mptscsih_target_destroy,
-	.slave_destroy			= mptsas_slave_destroy,
+	.target_destroy			= mptsas_target_destroy,
+	.slave_destroy			= mptscsih_slave_destroy,
 	.change_queue_depth 		= mptscsih_change_queue_depth,
 	.eh_abort_handler		= mptscsih_abort,
 	.eh_device_reset_handler	= mptscsih_dev_reset,
@@ -795,7 +1115,7 @@ mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
 
 	port_info->num_phys = buffer->NumPhys;
 	port_info->phy_info = kcalloc(port_info->num_phys,
-		sizeof(struct mptsas_phyinfo),GFP_KERNEL);
+		sizeof(*port_info->phy_info),GFP_KERNEL);
 	if (!port_info->phy_info) {
 		error = -ENOMEM;
 		goto out_free_consistent;
@@ -811,6 +1131,7 @@ mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
 		    buffer->PhyData[i].Port;
 		port_info->phy_info[i].negotiated_link_rate =
 		    buffer->PhyData[i].NegotiatedLinkRate;
+		port_info->phy_info[i].portinfo = port_info;
 	}
 
  out_free_consistent:
@@ -968,7 +1289,7 @@ mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info,
 	CONFIGPARMS cfg;
 	SasExpanderPage0_t *buffer;
 	dma_addr_t dma_handle;
-	int error;
+	int i, error;
 
 	hdr.PageVersion = MPI_SASEXPANDER0_PAGEVERSION;
 	hdr.ExtPageLength = 0;
@@ -1013,12 +1334,15 @@ mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info,
 	port_info->num_phys = buffer->NumPhys;
 	port_info->handle = le16_to_cpu(buffer->DevHandle);
 	port_info->phy_info = kcalloc(port_info->num_phys,
-		sizeof(struct mptsas_phyinfo),GFP_KERNEL);
+		sizeof(*port_info->phy_info),GFP_KERNEL);
 	if (!port_info->phy_info) {
 		error = -ENOMEM;
 		goto out_free_consistent;
 	}
 
+	for (i = 0; i < port_info->num_phys; i++)
+		port_info->phy_info[i].portinfo = port_info;
+
  out_free_consistent:
 	pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
 			    buffer, dma_handle);
@@ -1161,19 +1485,23 @@ static int mptsas_probe_one_phy(struct device *dev,
 {
 	MPT_ADAPTER *ioc;
 	struct sas_phy *phy;
-	int error;
+	struct sas_port *port;
+	int error = 0;
 
-	if (!dev)
-		return -ENODEV;
+	if (!dev) {
+		error = -ENODEV;
+		goto out;
+	}
 
 	if (!phy_info->phy) {
 		phy = sas_phy_alloc(dev, index);
-		if (!phy)
-			return -ENOMEM;
+		if (!phy) {
+			error = -ENOMEM;
+			goto out;
+		}
 	} else
 		phy = phy_info->phy;
 
-	phy->port_identifier = phy_info->port_id;
 	mptsas_parse_device_info(&phy->identify, &phy_info->identify);
 
 	/*
@@ -1265,19 +1593,50 @@ static int mptsas_probe_one_phy(struct device *dev,
 		error = sas_phy_add(phy);
 		if (error) {
 			sas_phy_free(phy);
-			return error;
+			goto out;
 		}
 		phy_info->phy = phy;
 	}
 
-	if ((phy_info->attached.handle) &&
-	    (!phy_info->rphy)) {
+	if (!phy_info->attached.handle ||
+			!phy_info->port_details)
+		goto out;
+
+	port = mptsas_get_port(phy_info);
+	ioc = phy_to_ioc(phy_info->phy);
+
+	if (phy_info->sas_port_add_phy) {
+
+		if (!port) {
+			port = sas_port_alloc(dev,
+			    phy_info->port_details->port_id);
+			dsaswideprintk((KERN_DEBUG
+			    "sas_port_alloc: port=%p dev=%p port_id=%d\n",
+			    port, dev, phy_info->port_details->port_id));
+			if (!port) {
+				error = -ENOMEM;
+				goto out;
+			}
+			error = sas_port_add(port);
+			if (error) {
+				dfailprintk((MYIOC_s_ERR_FMT
+					"%s: exit at line=%d\n", ioc->name,
+					__FUNCTION__, __LINE__));
+				goto out;
+			}
+			mptsas_set_port(phy_info, port);
+		}
+		dsaswideprintk((KERN_DEBUG "sas_port_add_phy: phy_id=%d\n",
+		    phy_info->phy_id));
+		sas_port_add_phy(port, phy_info->phy);
+		phy_info->sas_port_add_phy = 0;
+	}
+
+	if (!mptsas_get_rphy(phy_info) && port && !port->rphy) {
 
 		struct sas_rphy *rphy;
 		struct sas_identify identify;
 
-		ioc = phy_to_ioc(phy_info->phy);
-
 		/*
 		 * Let the hotplug_work thread handle processing
 		 * the adding/removing of devices that occur
@@ -1285,36 +1644,42 @@ static int mptsas_probe_one_phy(struct device *dev,
 		 */
 		if (ioc->sas_discovery_runtime &&
 			mptsas_is_end_device(&phy_info->attached))
-			return 0;
+				goto out;
 
 		mptsas_parse_device_info(&identify, &phy_info->attached);
 		switch (identify.device_type) {
 		case SAS_END_DEVICE:
-			rphy = sas_end_device_alloc(phy);
+			rphy = sas_end_device_alloc(port);
 			break;
 		case SAS_EDGE_EXPANDER_DEVICE:
 		case SAS_FANOUT_EXPANDER_DEVICE:
-			rphy = sas_expander_alloc(phy, identify.device_type);
+			rphy = sas_expander_alloc(port, identify.device_type);
 			break;
 		default:
 			rphy = NULL;
 			break;
 		}
-		if (!rphy)
-			return 0; /* non-fatal: an rphy can be added later */
+		if (!rphy) {
+			dfailprintk((MYIOC_s_ERR_FMT
+				"%s: exit at line=%d\n", ioc->name,
+				__FUNCTION__, __LINE__));
+			goto out;
+		}
 
 		rphy->identify = identify;
-
 		error = sas_rphy_add(rphy);
 		if (error) {
+			dfailprintk((MYIOC_s_ERR_FMT
+				"%s: exit at line=%d\n", ioc->name,
+				__FUNCTION__, __LINE__));
 			sas_rphy_free(rphy);
-			return error;
+			goto out;
 		}
-
-		phy_info->rphy = rphy;
+		mptsas_set_rphy(phy_info, rphy);
 	}
 
-	return 0;
+ out:
+	return error;
 }
 
 static int
@@ -1342,8 +1707,7 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc)
 		for (i = 0; i < hba->num_phys; i++)
 			port_info->phy_info[i].negotiated_link_rate =
 				hba->phy_info[i].negotiated_link_rate;
-		if (hba->phy_info)
-			kfree(hba->phy_info);
+		kfree(hba->phy_info);
 		kfree(hba);
 		hba = NULL;
 	}
@@ -1362,24 +1726,24 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc)
 		    port_info->phy_info[i].phy_id;
 		handle = port_info->phy_info[i].identify.handle;
 
-		if (port_info->phy_info[i].attached.handle) {
+		if (port_info->phy_info[i].attached.handle)
 			mptsas_sas_device_pg0(ioc,
 				&port_info->phy_info[i].attached,
 				(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
 				 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
 				port_info->phy_info[i].attached.handle);
-		}
+	}
 
+	mptsas_setup_wide_ports(ioc, port_info);
+
+	for (i = 0; i < port_info->num_phys; i++, ioc->sas_index++)
 		mptsas_probe_one_phy(&ioc->sh->shost_gendev,
 		    &port_info->phy_info[i], ioc->sas_index, 1);
-		ioc->sas_index++;
-	}
 
 	return 0;
 
  out_free_port_info:
-	if (hba)
-		kfree(hba);
+	kfree(hba);
  out:
 	return error;
 }
@@ -1388,6 +1752,8 @@ static int
 mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle)
 {
 	struct mptsas_portinfo *port_info, *p, *ex;
+	struct device *parent;
+	struct sas_rphy *rphy;
 	int error = -ENOMEM, i, j;
 
 	ex = kzalloc(sizeof(*port_info), GFP_KERNEL);
@@ -1409,16 +1775,13 @@ mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle)
 		list_add_tail(&port_info->list, &ioc->sas_topology);
 	} else {
 		port_info->handle = ex->handle;
-		if (ex->phy_info)
-			kfree(ex->phy_info);
+		kfree(ex->phy_info);
 		kfree(ex);
 		ex = NULL;
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
 
 	for (i = 0; i < port_info->num_phys; i++) {
-		struct device *parent;
-
 		mptsas_sas_expander_pg1(ioc, &port_info->phy_info[i],
 			(MPI_SAS_EXPAND_PGAD_FORM_HANDLE_PHY_NUM <<
 			 MPI_SAS_EXPAND_PGAD_FORM_SHIFT), (i << 16) + *handle);
@@ -1442,34 +1805,34 @@ mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle)
 			port_info->phy_info[i].attached.phy_id =
 			    port_info->phy_info[i].phy_id;
 		}
+	}
 
-		/*
-		 * If we find a parent port handle this expander is
-		 * attached to another expander, else it hangs of the
-		 * HBA phys.
-		 */
-		parent = &ioc->sh->shost_gendev;
+	parent = &ioc->sh->shost_gendev;
+	for (i = 0; i < port_info->num_phys; i++) {
 		mutex_lock(&ioc->sas_topology_mutex);
 		list_for_each_entry(p, &ioc->sas_topology, list) {
 			for (j = 0; j < p->num_phys; j++) {
-				if (port_info->phy_info[i].identify.handle ==
+				if (port_info->phy_info[i].identify.handle !=
 						p->phy_info[j].attached.handle)
-					parent = &p->phy_info[j].rphy->dev;
+					continue;
+				rphy = mptsas_get_rphy(&p->phy_info[j]);
+				parent = &rphy->dev;
 			}
 		}
 		mutex_unlock(&ioc->sas_topology_mutex);
+	}
+
+	mptsas_setup_wide_ports(ioc, port_info);
 
+	for (i = 0; i < port_info->num_phys; i++, ioc->sas_index++)
 		mptsas_probe_one_phy(parent, &port_info->phy_info[i],
 		    ioc->sas_index, 0);
-		ioc->sas_index++;
-	}
 
 	return 0;
 
  out_free_port_info:
 	if (ex) {
-		if (ex->phy_info)
-			kfree(ex->phy_info);
+		kfree(ex->phy_info);
 		kfree(ex);
 	}
  out:
@@ -1488,7 +1851,12 @@ mptsas_delete_expander_phys(MPT_ADAPTER *ioc)
 {
 	struct mptsas_portinfo buffer;
 	struct mptsas_portinfo *port_info, *n, *parent;
+	struct mptsas_phyinfo *phy_info;
+	struct scsi_target * starget;
+	VirtTarget * vtarget;
+	struct sas_port * port;
 	int i;
+	u64	expander_sas_address;
 
 	mutex_lock(&ioc->sas_topology_mutex);
 	list_for_each_entry_safe(port_info, n, &ioc->sas_topology, list) {
@@ -1503,6 +1871,25 @@ mptsas_delete_expander_phys(MPT_ADAPTER *ioc)
 		     MPI_SAS_EXPAND_PGAD_FORM_SHIFT), port_info->handle)) {
 
 			/*
+			 * Issue target reset to all child end devices
+			 * then mark them deleted to prevent further
+			 * IO going to them.
+			 */
+			phy_info = port_info->phy_info;
+			for (i = 0; i < port_info->num_phys; i++, phy_info++) {
+				starget = mptsas_get_starget(phy_info);
+				if (!starget)
+					continue;
+				vtarget = starget->hostdata;
+				if(vtarget->deleted)
+					continue;
+				vtarget->deleted = 1;
+				mptsas_target_reset(ioc, vtarget);
+				sas_port_delete(mptsas_get_port(phy_info));
+				mptsas_port_delete(phy_info->port_details);
+			}
+
+			/*
 			 * Obtain the port_info instance to the parent port
 			 */
 			parent = mptsas_find_portinfo_by_handle(ioc,
@@ -1511,34 +1898,43 @@ mptsas_delete_expander_phys(MPT_ADAPTER *ioc)
 			if (!parent)
 				goto next_port;
 
+			expander_sas_address =
+				port_info->phy_info[0].identify.sas_address;
+
 			/*
 			 * Delete rphys in the parent that point
 			 * to this expander.  The transport layer will
 			 * cleanup all the children.
 			 */
-			for (i = 0; i < parent->num_phys; i++) {
-				if ((!parent->phy_info[i].rphy) ||
-				    (parent->phy_info[i].attached.sas_address !=
-				   port_info->phy_info[i].identify.sas_address))
+			phy_info = parent->phy_info;
+			for (i = 0; i < parent->num_phys; i++, phy_info++) {
+				port = mptsas_get_port(phy_info);
+				if (!port)
+					continue;
+				if (phy_info->attached.sas_address !=
+					expander_sas_address)
 					continue;
-				sas_rphy_delete(parent->phy_info[i].rphy);
-				memset(&parent->phy_info[i].attached, 0,
-				    sizeof(struct mptsas_devinfo));
-				parent->phy_info[i].rphy = NULL;
-				parent->phy_info[i].starget = NULL;
+#ifdef MPT_DEBUG_SAS_WIDE
+				dev_printk(KERN_DEBUG, &port->dev, "delete\n");
+#endif
+				sas_port_delete(port);
+				mptsas_port_delete(phy_info->port_details);
 			}
  next_port:
+
+			phy_info = port_info->phy_info;
+			for (i = 0; i < port_info->num_phys; i++, phy_info++)
+				mptsas_port_delete(phy_info->port_details);
+
 			list_del(&port_info->list);
-			if (port_info->phy_info)
-				kfree(port_info->phy_info);
+			kfree(port_info->phy_info);
 			kfree(port_info);
 		}
 		/*
 		* Free this memory allocated from inside
 		* mptsas_sas_expander_pg0
 		*/
-		if (buffer.phy_info)
-			kfree(buffer.phy_info);
+		kfree(buffer.phy_info);
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
 }
@@ -1574,60 +1970,59 @@ mptsas_scan_sas_topology(MPT_ADAPTER *ioc)
 /*
  * Work queue thread to handle Runtime discovery
  * Mere purpose is the hot add/delete of expanders
+ *(Mutex UNLOCKED)
  */
 static void
-mptscsih_discovery_work(void * arg)
+__mptsas_discovery_work(MPT_ADAPTER *ioc)
 {
-	struct mptsas_discovery_event *ev = arg;
-	MPT_ADAPTER *ioc = ev->ioc;
 	u32 handle = 0xFFFF;
 
-	mutex_lock(&ioc->sas_discovery_mutex);
 	ioc->sas_discovery_runtime=1;
 	mptsas_delete_expander_phys(ioc);
 	mptsas_probe_hba_phys(ioc);
 	while (!mptsas_probe_expander_phys(ioc, &handle))
 		;
-	kfree(ev);
 	ioc->sas_discovery_runtime=0;
+}
+
+/*
+ * Work queue thread to handle Runtime discovery
+ * Mere purpose is the hot add/delete of expanders
+ *(Mutex LOCKED)
+ */
+static void
+mptsas_discovery_work(void * arg)
+{
+	struct mptsas_discovery_event *ev = arg;
+	MPT_ADAPTER *ioc = ev->ioc;
+
+	mutex_lock(&ioc->sas_discovery_mutex);
+	__mptsas_discovery_work(ioc);
 	mutex_unlock(&ioc->sas_discovery_mutex);
+	kfree(ev);
 }
 
 static struct mptsas_phyinfo *
-mptsas_find_phyinfo_by_parent(MPT_ADAPTER *ioc, u16 parent_handle, u8 phy_id)
+mptsas_find_phyinfo_by_sas_address(MPT_ADAPTER *ioc, u64 sas_address)
 {
 	struct mptsas_portinfo *port_info;
-	struct mptsas_devinfo device_info;
 	struct mptsas_phyinfo *phy_info = NULL;
-	int i, error;
-
-	/*
-	 * Retrieve the parent sas_address
-	 */
-	error = mptsas_sas_device_pg0(ioc, &device_info,
-		(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
-		 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
-		parent_handle);
-	if (error)
-		return NULL;
+	int i;
 
-	/*
-	 * The phy_info structures are never deallocated during lifetime of
-	 * a host, so the code below is safe without additional refcounting.
-	 */
 	mutex_lock(&ioc->sas_topology_mutex);
 	list_for_each_entry(port_info, &ioc->sas_topology, list) {
 		for (i = 0; i < port_info->num_phys; i++) {
-			if (port_info->phy_info[i].identify.sas_address ==
-			    device_info.sas_address &&
-			    port_info->phy_info[i].phy_id == phy_id) {
-				phy_info = &port_info->phy_info[i];
-				break;
-			}
+			if (port_info->phy_info[i].attached.sas_address
+			    != sas_address)
+				continue;
+			if (!mptsas_is_end_device(
+				&port_info->phy_info[i].attached))
+				continue;
+			phy_info = &port_info->phy_info[i];
+			break;
 		}
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
-
 	return phy_info;
 }
 
@@ -1638,21 +2033,19 @@ mptsas_find_phyinfo_by_target(MPT_ADAPTER *ioc, u32 id)
 	struct mptsas_phyinfo *phy_info = NULL;
 	int i;
 
-	/*
-	 * The phy_info structures are never deallocated during lifetime of
-	 * a host, so the code below is safe without additional refcounting.
-	 */
 	mutex_lock(&ioc->sas_topology_mutex);
 	list_for_each_entry(port_info, &ioc->sas_topology, list) {
-		for (i = 0; i < port_info->num_phys; i++)
-			if (mptsas_is_end_device(&port_info->phy_info[i].attached))
-				if (port_info->phy_info[i].attached.id == id) {
-					phy_info = &port_info->phy_info[i];
-					break;
-				}
+		for (i = 0; i < port_info->num_phys; i++) {
+			if (port_info->phy_info[i].attached.id != id)
+				continue;
+			if (!mptsas_is_end_device(
+				&port_info->phy_info[i].attached))
+				continue;
+			phy_info = &port_info->phy_info[i];
+			break;
+		}
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
-
 	return phy_info;
 }
 
@@ -1660,7 +2053,7 @@ mptsas_find_phyinfo_by_target(MPT_ADAPTER *ioc, u32 id)
  * Work queue thread to clear the persitency table
  */
 static void
-mptscsih_sas_persist_clear_table(void * arg)
+mptsas_persist_clear_table(void * arg)
 {
 	MPT_ADAPTER *ioc = (MPT_ADAPTER *)arg;
 
@@ -1681,7 +2074,6 @@ mptsas_reprobe_target(struct scsi_target *starget, int uld_attach)
 			mptsas_reprobe_lun);
 }
 
-
 /*
  * Work queue thread to handle SAS hotplug events
  */
@@ -1692,14 +2084,17 @@ mptsas_hotplug_work(void *arg)
 	MPT_ADAPTER *ioc = ev->ioc;
 	struct mptsas_phyinfo *phy_info;
 	struct sas_rphy *rphy;
+	struct sas_port *port;
 	struct scsi_device *sdev;
+	struct scsi_target * starget;
 	struct sas_identify identify;
 	char *ds = NULL;
 	struct mptsas_devinfo sas_device;
 	VirtTarget *vtarget;
+	VirtDevice *vdevice;
 
-	mutex_lock(&ioc->sas_discovery_mutex);
 
+	mutex_lock(&ioc->sas_discovery_mutex);
 	switch (ev->event_type) {
 	case MPTSAS_DEL_DEVICE:
 
@@ -1708,24 +2103,50 @@ mptsas_hotplug_work(void *arg)
 		/*
 		 * Sanity checks, for non-existing phys and remote rphys.
 		 */
-		if (!phy_info)
+		if (!phy_info || !phy_info->port_details) {
+			dfailprintk((MYIOC_s_ERR_FMT
+				"%s: exit at line=%d\n", ioc->name,
+			       	__FUNCTION__, __LINE__));
 			break;
-		if (!phy_info->rphy)
+		}
+		rphy = mptsas_get_rphy(phy_info);
+		if (!rphy) {
+			dfailprintk((MYIOC_s_ERR_FMT
+				"%s: exit at line=%d\n", ioc->name,
+			       	__FUNCTION__, __LINE__));
+			break;
+		}
+		port = mptsas_get_port(phy_info);
+		if (!port) {
+			dfailprintk((MYIOC_s_ERR_FMT
+				"%s: exit at line=%d\n", ioc->name,
+			       	__FUNCTION__, __LINE__));
 			break;
-		if (phy_info->starget) {
-			vtarget = phy_info->starget->hostdata;
+		}
+
+		starget = mptsas_get_starget(phy_info);
+		if (starget) {
+			vtarget = starget->hostdata;
 
-			if (!vtarget)
+			if (!vtarget) {
+				dfailprintk((MYIOC_s_ERR_FMT
+					"%s: exit at line=%d\n", ioc->name,
+					__FUNCTION__, __LINE__));
 				break;
+			}
+
 			/*
 			 * Handling  RAID components
 			 */
 			if (ev->phys_disk_num_valid) {
 				vtarget->target_id = ev->phys_disk_num;
 				vtarget->tflags |= MPT_TARGET_FLAGS_RAID_COMPONENT;
-				mptsas_reprobe_target(vtarget->starget, 1);
+				mptsas_reprobe_target(starget, 1);
 				break;
 			}
+
+			vtarget->deleted = 1;
+			mptsas_target_reset(ioc, vtarget);
 		}
 
 		if (phy_info->attached.device_info & MPI_SAS_DEVICE_INFO_SSP_TARGET)
@@ -1739,10 +2160,11 @@ mptsas_hotplug_work(void *arg)
 		       "removing %s device, channel %d, id %d, phy %d\n",
 		       ioc->name, ds, ev->channel, ev->id, phy_info->phy_id);
 
-		sas_rphy_delete(phy_info->rphy);
-		memset(&phy_info->attached, 0, sizeof(struct mptsas_devinfo));
-		phy_info->rphy = NULL;
-		phy_info->starget = NULL;
+#ifdef MPT_DEBUG_SAS_WIDE
+		dev_printk(KERN_DEBUG, &port->dev, "delete\n");
+#endif
+		sas_port_delete(port);
+		mptsas_port_delete(phy_info->port_details);
 		break;
 	case MPTSAS_ADD_DEVICE:
 
@@ -1754,59 +2176,60 @@ mptsas_hotplug_work(void *arg)
 		 */
 		if (mptsas_sas_device_pg0(ioc, &sas_device,
 		    (MPI_SAS_DEVICE_PGAD_FORM_BUS_TARGET_ID <<
-		     MPI_SAS_DEVICE_PGAD_FORM_SHIFT), ev->id))
+		     MPI_SAS_DEVICE_PGAD_FORM_SHIFT), ev->id)) {
+				dfailprintk((MYIOC_s_ERR_FMT
+					"%s: exit at line=%d\n", ioc->name,
+					__FUNCTION__, __LINE__));
 			break;
+		}
 
-		phy_info = mptsas_find_phyinfo_by_parent(ioc,
-				sas_device.handle_parent, sas_device.phy_id);
+		ssleep(2);
+		__mptsas_discovery_work(ioc);
 
-		if (!phy_info) {
-			u32 handle = 0xFFFF;
+		phy_info = mptsas_find_phyinfo_by_sas_address(ioc,
+				sas_device.sas_address);
 
-			/*
-			* Its possible when an expander has been hot added
-			* containing attached devices, the sas firmware
-			* may send a RC_ADDED event prior to the
-			* DISCOVERY STOP event. If that occurs, our
-			* view of the topology in the driver in respect to this
-			* expander might of not been setup, and we hit this
-			* condition.
-			* Therefore, this code kicks off discovery to
-			* refresh the data.
-			* Then again, we check whether the parent phy has
-			* been created.
-			*/
-			ioc->sas_discovery_runtime=1;
-			mptsas_delete_expander_phys(ioc);
-			mptsas_probe_hba_phys(ioc);
-			while (!mptsas_probe_expander_phys(ioc, &handle))
-				;
-			ioc->sas_discovery_runtime=0;
-
-			phy_info = mptsas_find_phyinfo_by_parent(ioc,
-				sas_device.handle_parent, sas_device.phy_id);
-			if (!phy_info)
-				break;
+		if (!phy_info || !phy_info->port_details) {
+			dfailprintk((MYIOC_s_ERR_FMT
+				"%s: exit at line=%d\n", ioc->name,
+			       	__FUNCTION__, __LINE__));
+			break;
 		}
 
-		if (phy_info->starget) {
-			vtarget = phy_info->starget->hostdata;
+		starget = mptsas_get_starget(phy_info);
+		if (starget) {
+			vtarget = starget->hostdata;
 
-			if (!vtarget)
+			if (!vtarget) {
+				dfailprintk((MYIOC_s_ERR_FMT
+					"%s: exit at line=%d\n", ioc->name,
+				       	__FUNCTION__, __LINE__));
 				break;
+			}
 			/*
 			 * Handling  RAID components
 			 */
 			if (vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) {
 				vtarget->tflags &= ~MPT_TARGET_FLAGS_RAID_COMPONENT;
 				vtarget->target_id = ev->id;
-				mptsas_reprobe_target(phy_info->starget, 0);
+				mptsas_reprobe_target(starget, 0);
 			}
 			break;
 		}
 
-		if (phy_info->rphy)
+		if (mptsas_get_rphy(phy_info)) {
+			dfailprintk((MYIOC_s_ERR_FMT
+				"%s: exit at line=%d\n", ioc->name,
+			       	__FUNCTION__, __LINE__));
 			break;
+		}
+		port = mptsas_get_port(phy_info);
+		if (!port) {
+			dfailprintk((MYIOC_s_ERR_FMT
+				"%s: exit at line=%d\n", ioc->name,
+			       	__FUNCTION__, __LINE__));
+			break;
+		}
 
 		memcpy(&phy_info->attached, &sas_device,
 		    sizeof(struct mptsas_devinfo));
@@ -1823,28 +2246,23 @@ mptsas_hotplug_work(void *arg)
 		       ioc->name, ds, ev->channel, ev->id, ev->phy_id);
 
 		mptsas_parse_device_info(&identify, &phy_info->attached);
-		switch (identify.device_type) {
-		case SAS_END_DEVICE:
-			rphy = sas_end_device_alloc(phy_info->phy);
-			break;
-		case SAS_EDGE_EXPANDER_DEVICE:
-		case SAS_FANOUT_EXPANDER_DEVICE:
-			rphy = sas_expander_alloc(phy_info->phy, identify.device_type);
-			break;
-		default:
-			rphy = NULL;
-			break;
-		}
-		if (!rphy)
+		rphy = sas_end_device_alloc(port);
+		if (!rphy) {
+			dfailprintk((MYIOC_s_ERR_FMT
+				"%s: exit at line=%d\n", ioc->name,
+			       	__FUNCTION__, __LINE__));
 			break; /* non-fatal: an rphy can be added later */
+		}
 
 		rphy->identify = identify;
 		if (sas_rphy_add(rphy)) {
+			dfailprintk((MYIOC_s_ERR_FMT
+				"%s: exit at line=%d\n", ioc->name,
+			       	__FUNCTION__, __LINE__));
 			sas_rphy_free(rphy);
 			break;
 		}
-
-		phy_info->rphy = rphy;
+		mptsas_set_rphy(phy_info, rphy);
 		break;
 	case MPTSAS_ADD_RAID:
 		sdev = scsi_device_lookup(
@@ -1876,6 +2294,9 @@ mptsas_hotplug_work(void *arg)
 		printk(MYIOC_s_INFO_FMT
 		       "removing raid volume, channel %d, id %d\n",
 		       ioc->name, ioc->num_ports, ev->id);
+		vdevice = sdev->hostdata;
+		vdevice->vtarget->deleted = 1;
+		mptsas_target_reset(ioc, vdevice->vtarget);
 		scsi_remove_device(sdev);
 		scsi_device_put(sdev);
 		mpt_findImVolumes(ioc);
@@ -1885,12 +2306,13 @@ mptsas_hotplug_work(void *arg)
 		break;
 	}
 
-	kfree(ev);
 	mutex_unlock(&ioc->sas_discovery_mutex);
+	kfree(ev);
+
 }
 
 static void
-mptscsih_send_sas_event(MPT_ADAPTER *ioc,
+mptsas_send_sas_event(MPT_ADAPTER *ioc,
 		EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data)
 {
 	struct mptsas_hotplug_event *ev;
@@ -1906,7 +2328,7 @@ mptscsih_send_sas_event(MPT_ADAPTER *ioc,
 	switch (sas_event_data->ReasonCode) {
 	case MPI_EVENT_SAS_DEV_STAT_RC_ADDED:
 	case MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING:
-		ev = kmalloc(sizeof(*ev), GFP_ATOMIC);
+		ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
 		if (!ev) {
 			printk(KERN_WARNING "mptsas: lost hotplug event\n");
 			break;
@@ -1936,10 +2358,9 @@ mptscsih_send_sas_event(MPT_ADAPTER *ioc,
 	/*
 	 * Persistent table is full.
 	 */
-		INIT_WORK(&ioc->mptscsih_persistTask,
-		    mptscsih_sas_persist_clear_table,
-		    (void *)ioc);
-		schedule_work(&ioc->mptscsih_persistTask);
+		INIT_WORK(&ioc->sas_persist_task,
+		    mptsas_persist_clear_table, (void *)ioc);
+		schedule_work(&ioc->sas_persist_task);
 		break;
 	case MPI_EVENT_SAS_DEV_STAT_RC_SMART_DATA:
 	/* TODO */
@@ -1951,7 +2372,7 @@ mptscsih_send_sas_event(MPT_ADAPTER *ioc,
 }
 
 static void
-mptscsih_send_raid_event(MPT_ADAPTER *ioc,
+mptsas_send_raid_event(MPT_ADAPTER *ioc,
 		EVENT_DATA_RAID *raid_event_data)
 {
 	struct mptsas_hotplug_event *ev;
@@ -1961,13 +2382,12 @@ mptscsih_send_raid_event(MPT_ADAPTER *ioc,
 	if (ioc->bus_type != SAS)
 		return;
 
-	ev = kmalloc(sizeof(*ev), GFP_ATOMIC);
+	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
 	if (!ev) {
 		printk(KERN_WARNING "mptsas: lost hotplug event\n");
 		return;
 	}
 
-	memset(ev,0,sizeof(struct mptsas_hotplug_event));
 	INIT_WORK(&ev->work, mptsas_hotplug_work, ev);
 	ev->ioc = ioc;
 	ev->id = raid_event_data->VolumeID;
@@ -2029,7 +2449,7 @@ mptscsih_send_raid_event(MPT_ADAPTER *ioc,
 }
 
 static void
-mptscsih_send_discovery(MPT_ADAPTER *ioc,
+mptsas_send_discovery_event(MPT_ADAPTER *ioc,
 	EVENT_DATA_SAS_DISCOVERY *discovery_data)
 {
 	struct mptsas_discovery_event *ev;
@@ -2044,11 +2464,10 @@ mptscsih_send_discovery(MPT_ADAPTER *ioc,
 	if (discovery_data->DiscoveryStatus)
 		return;
 
-	ev = kmalloc(sizeof(*ev), GFP_ATOMIC);
+	ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
 	if (!ev)
 		return;
-	memset(ev,0,sizeof(struct mptsas_discovery_event));
-	INIT_WORK(&ev->work, mptscsih_discovery_work, ev);
+	INIT_WORK(&ev->work, mptsas_discovery_work, ev);
 	ev->ioc = ioc;
 	schedule_work(&ev->work);
 };
@@ -2076,21 +2495,21 @@ mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
 
 	switch (event) {
 	case MPI_EVENT_SAS_DEVICE_STATUS_CHANGE:
-		mptscsih_send_sas_event(ioc,
+		mptsas_send_sas_event(ioc,
 			(EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *)reply->Data);
 		break;
 	case MPI_EVENT_INTEGRATED_RAID:
-		mptscsih_send_raid_event(ioc,
+		mptsas_send_raid_event(ioc,
 			(EVENT_DATA_RAID *)reply->Data);
 		break;
 	case MPI_EVENT_PERSISTENT_TABLE_FULL:
-		INIT_WORK(&ioc->mptscsih_persistTask,
-		    mptscsih_sas_persist_clear_table,
+		INIT_WORK(&ioc->sas_persist_task,
+		    mptsas_persist_clear_table,
 		    (void *)ioc);
-		schedule_work(&ioc->mptscsih_persistTask);
+		schedule_work(&ioc->sas_persist_task);
 		break;
 	 case MPI_EVENT_SAS_DISCOVERY:
-		mptscsih_send_discovery(ioc,
+		mptsas_send_discovery_event(ioc,
 			(EVENT_DATA_SAS_DISCOVERY *)reply->Data);
 		break;
 	default:
@@ -2309,7 +2728,7 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	return 0;
 
-out_mptsas_probe:
+ out_mptsas_probe:
 
 	mptscsih_remove(pdev);
 	return error;
@@ -2319,6 +2738,7 @@ static void __devexit mptsas_remove(struct pci_dev *pdev)
 {
 	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
 	struct mptsas_portinfo *p, *n;
+	int i;
 
 	ioc->sas_discovery_ignore_events=1;
 	sas_remove_host(ioc->sh);
@@ -2326,8 +2746,9 @@ static void __devexit mptsas_remove(struct pci_dev *pdev)
 	mutex_lock(&ioc->sas_topology_mutex);
 	list_for_each_entry_safe(p, n, &ioc->sas_topology, list) {
 		list_del(&p->list);
-		if (p->phy_info)
-			kfree(p->phy_info);
+		for (i = 0 ; i < p->num_phys ; i++)
+			mptsas_port_delete(p->phy_info[i].port_details);
+		kfree(p->phy_info);
 		kfree(p);
 	}
 	mutex_unlock(&ioc->sas_topology_mutex);
-- 
cgit v0.10.2


From 87cf89866790a373edcf88c12b64d6d38560acdd Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsil.com>
Date: Tue, 27 Jun 2006 16:09:26 -0600
Subject: [SCSI] mptsas: Adding 1078 ROC support

* Adding 1078 ROC (Raid On Chip) Support - New host adapter

* Moving all PCI Vendor/Device ids to using internal defines; a request
from Christoph/James B. some time ago for when the next chip was added.

* Removing SAS 1066/1066E Vendor/Device IDs, as there are no plans to
manufacture that controller.

Signed-off-by: Eric Moore <Eric.Moore@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 12dd8d4..8ac77ca 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -1220,31 +1220,25 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 	port = psize = 0;
 	for (ii=0; ii < DEVICE_COUNT_RESOURCE; ii++) {
 		if (pci_resource_flags(pdev, ii) & PCI_BASE_ADDRESS_SPACE_IO) {
+			if (psize)
+				continue;
 			/* Get I/O space! */
 			port = pci_resource_start(pdev, ii);
 			psize = pci_resource_len(pdev,ii);
 		} else {
+			if (msize)
+				continue;
 			/* Get memmap */
 			mem_phys = pci_resource_start(pdev, ii);
 			msize = pci_resource_len(pdev,ii);
-			break;
 		}
 	}
 	ioc->mem_size = msize;
 
-	if (ii == DEVICE_COUNT_RESOURCE) {
-		printk(KERN_ERR MYNAM ": ERROR - MPT adapter has no memory regions defined!\n");
-		kfree(ioc);
-		return -EINVAL;
-	}
-
-	dinitprintk((KERN_INFO MYNAM ": MPT adapter @ %lx, msize=%dd bytes\n", mem_phys, msize));
-	dinitprintk((KERN_INFO MYNAM ": (port i/o @ %lx, psize=%dd bytes)\n", port, psize));
-
 	mem = NULL;
 	/* Get logical ptr for PciMem0 space */
 	/*mem = ioremap(mem_phys, msize);*/
-	mem = ioremap(mem_phys, 0x100);
+	mem = ioremap(mem_phys, msize);
 	if (mem == NULL) {
 		printk(KERN_ERR MYNAM ": ERROR - Unable to map adapter memory!\n");
 		kfree(ioc);
@@ -1344,11 +1338,6 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 		ioc->bus_type = SAS;
 		ioc->errata_flag_1064 = 1;
 	}
-	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1066) {
-		ioc->prod_name = "LSISAS1066";
-		ioc->bus_type = SAS;
-		ioc->errata_flag_1064 = 1;
-	}
 	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1068) {
 		ioc->prod_name = "LSISAS1068";
 		ioc->bus_type = SAS;
@@ -1358,14 +1347,14 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 		ioc->prod_name = "LSISAS1064E";
 		ioc->bus_type = SAS;
 	}
-	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1066E) {
-		ioc->prod_name = "LSISAS1066E";
-		ioc->bus_type = SAS;
-	}
 	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1068E) {
 		ioc->prod_name = "LSISAS1068E";
 		ioc->bus_type = SAS;
 	}
+	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1078) {
+		ioc->prod_name = "LSISAS1078";
+		ioc->bus_type = SAS;
+	}
 
 	if (ioc->errata_flag_1064)
 		pci_disable_io_access(pdev);
@@ -3185,6 +3174,37 @@ mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag)
 	u32 diag1val = 0;
 #endif
 
+	if (ioc->pcidev->device == MPI_MANUFACTPAGE_DEVID_SAS1078) {
+		drsprintk((MYIOC_s_WARN_FMT "%s: Doorbell=%p; 1078 reset "
+			"address=%p\n",  ioc->name, __FUNCTION__,
+			&ioc->chip->Doorbell, &ioc->chip->Reset_1078));
+		CHIPREG_WRITE32(&ioc->chip->Reset_1078, 0x07);
+		if (sleepFlag == CAN_SLEEP)
+			msleep(1);
+		else
+			mdelay(1);
+
+		for (count = 0; count < 60; count ++) {
+			doorbell = CHIPREG_READ32(&ioc->chip->Doorbell);
+			doorbell &= MPI_IOC_STATE_MASK;
+
+			drsprintk((MYIOC_s_INFO_FMT
+				"looking for READY STATE: doorbell=%x"
+			        " count=%d\n",
+				ioc->name, doorbell, count));
+			if (doorbell == MPI_IOC_STATE_READY) {
+				return 0;
+			}
+
+			/* wait 1 sec */
+			if (sleepFlag == CAN_SLEEP)
+				msleep(1000);
+			else
+				mdelay(1000);
+		}
+		return -1;
+	}
+
 	/* Clear any existing interrupts */
 	CHIPREG_WRITE32(&ioc->chip->IntStatus, 0);
 
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index ddef586..7bf0855 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -308,7 +308,8 @@ typedef struct _SYSIF_REGS
 	u32	HostIndex;	/* 50     Host Index register        */
 	u32	Reserved4[15];	/* 54-8F                             */
 	u32	Fubar;		/* 90     For Fubar usage            */
-	u32	Reserved5[27];	/* 94-FF                             */
+	u32     Reserved5[1050];/* 94-10F8                           */
+	u32     Reset_1078;     /* 10FC   Reset 1078                 */
 } SYSIF_REGS;
 
 /*
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index 74714e5..d0141cc 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -132,21 +132,21 @@ static struct scsi_host_template mptfc_driver_template = {
  */
 
 static struct pci_device_id mptfc_pci_table[] = {
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_FC909,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVICEID_FC909,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_FC919,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVICEID_FC919,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_FC929,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVICEID_FC929,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_FC919X,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVICEID_FC919X,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_FC929X,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVICEID_FC929X,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_FC939X,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVICEID_FC939X,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_FC949X,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVICEID_FC949X,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_FC949ES,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVICEID_FC949E,
 		PCI_ANY_ID, PCI_ANY_ID },
 	{0}	/* Terminating entry */
 };
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 0877023..bc36f5f 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -2757,17 +2757,15 @@ static void __devexit mptsas_remove(struct pci_dev *pdev)
 }
 
 static struct pci_device_id mptsas_pci_table[] = {
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1064,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1064,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1066,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1068,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1068,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1064E,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1064E,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1068E,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1066E,
-		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1068E,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1078,
 		PCI_ANY_ID, PCI_ANY_ID },
 	{0}	/* Terminating entry */
 };
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 3201de0..0a1ff76 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -775,9 +775,9 @@ static struct spi_function_template mptspi_transport_functions = {
  */
 
 static struct pci_device_id mptspi_pci_table[] = {
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_53C1030,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_53C1030,
 		PCI_ANY_ID, PCI_ANY_ID },
-	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_1030_53C1035,
+	{ PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_53C1035,
 		PCI_ANY_ID, PCI_ANY_ID },
 	{0}	/* Terminating entry */
 };
-- 
cgit v0.10.2


From dc6a78f1af10d28fb8c395034ae1e099b85c05b0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Tue, 27 Jun 2006 22:01:28 -0700
Subject: [SCSI] atp870u: reduce huge stack usage

The atp870u driver is the largest stack eater reported by checkstack
(on x86_864, allmodconfig).  This converts the offending function
to kmalloc+kfree struct atp_unit instead of allocating it on the stack.
Was:
0x0000164c atp870u_probe [atp870u]:			3176
Now:
0x0000164c atp870u_probe [atp870u]:			408

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c
index 3ee4d4d..69e6e98 100644
--- a/drivers/scsi/atp870u.c
+++ b/drivers/scsi/atp870u.c
@@ -2625,29 +2625,32 @@ static int atp870u_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	unsigned int base_io, tmport, error,n;
 	unsigned char host_id;
 	struct Scsi_Host *shpnt = NULL;
-	struct atp_unit atp_dev, *p;
+	struct atp_unit *atpdev, *p;
 	unsigned char setupdata[2][16];
 	int count = 0;
-	
+
+	atpdev = kzalloc(sizeof(*atpdev), GFP_KERNEL);
+	if (!atpdev)
+		return -ENOMEM;
+
 	if (pci_enable_device(pdev))
-		return -EIO;
+		goto err_eio;
 
         if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
                 printk(KERN_INFO "atp870u: use 32bit DMA mask.\n");
         } else {
                 printk(KERN_ERR "atp870u: DMA mask required but not available.\n");
-                return -EIO;
+		goto err_eio;
         }
 
-	memset(&atp_dev, 0, sizeof atp_dev);
 	/*
 	 * It's probably easier to weed out some revisions like
 	 * this than via the PCI device table
 	 */
 	if (ent->device == PCI_DEVICE_ID_ARTOP_AEC7610) {
-		error = pci_read_config_byte(pdev, PCI_CLASS_REVISION, &atp_dev.chip_ver);
-		if (atp_dev.chip_ver < 2)
-			return -EIO;
+		error = pci_read_config_byte(pdev, PCI_CLASS_REVISION, &atpdev->chip_ver);
+		if (atpdev->chip_ver < 2)
+			goto err_eio;
 	}
 
 	switch (ent->device) {
@@ -2656,15 +2659,15 @@ static int atp870u_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	case ATP880_DEVID1:	
 	case ATP880_DEVID2:	
 	case ATP885_DEVID:	
-		atp_dev.chip_ver = 0x04;
+		atpdev->chip_ver = 0x04;
 	default:
 		break;
 	}
 	base_io = pci_resource_start(pdev, 0);
 	base_io &= 0xfffffff8;
-	
+
 	if ((ent->device == ATP880_DEVID1)||(ent->device == ATP880_DEVID2)) {
-		error = pci_read_config_byte(pdev, PCI_CLASS_REVISION, &atp_dev.chip_ver);
+		error = pci_read_config_byte(pdev, PCI_CLASS_REVISION, &atpdev->chip_ver);
 		pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 0x80);//JCC082803
 
 		host_id = inb(base_io + 0x39);
@@ -2672,17 +2675,17 @@ static int atp870u_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 		printk(KERN_INFO "   ACARD AEC-67160 PCI Ultra3 LVD Host Adapter: %d"
 			"    IO:%x, IRQ:%d.\n", count, base_io, pdev->irq);
-		atp_dev.ioport[0] = base_io + 0x40;
-		atp_dev.pciport[0] = base_io + 0x28;
-		atp_dev.dev_id = ent->device;
-		atp_dev.host_id[0] = host_id;
+		atpdev->ioport[0] = base_io + 0x40;
+		atpdev->pciport[0] = base_io + 0x28;
+		atpdev->dev_id = ent->device;
+		atpdev->host_id[0] = host_id;
 
 		tmport = base_io + 0x22;
-		atp_dev.scam_on = inb(tmport);
+		atpdev->scam_on = inb(tmport);
 		tmport += 0x13;
-		atp_dev.global_map[0] = inb(tmport);
+		atpdev->global_map[0] = inb(tmport);
 		tmport += 0x07;
-		atp_dev.ultra_map[0] = inw(tmport);
+		atpdev->ultra_map[0] = inw(tmport);
 
 		n = 0x3f09;
 next_fblk_880:
@@ -2695,57 +2698,57 @@ next_fblk_880:
 		if (inb(base_io + 0x30) == 0xff)
 			goto flash_ok_880;
 
-		atp_dev.sp[0][m++] = inb(base_io + 0x30);
-		atp_dev.sp[0][m++] = inb(base_io + 0x31);
-		atp_dev.sp[0][m++] = inb(base_io + 0x32);
-		atp_dev.sp[0][m++] = inb(base_io + 0x33);
+		atpdev->sp[0][m++] = inb(base_io + 0x30);
+		atpdev->sp[0][m++] = inb(base_io + 0x31);
+		atpdev->sp[0][m++] = inb(base_io + 0x32);
+		atpdev->sp[0][m++] = inb(base_io + 0x33);
 		outw(n, base_io + 0x34);
 		n += 0x0002;
-		atp_dev.sp[0][m++] = inb(base_io + 0x30);
-		atp_dev.sp[0][m++] = inb(base_io + 0x31);
-		atp_dev.sp[0][m++] = inb(base_io + 0x32);
-		atp_dev.sp[0][m++] = inb(base_io + 0x33);
+		atpdev->sp[0][m++] = inb(base_io + 0x30);
+		atpdev->sp[0][m++] = inb(base_io + 0x31);
+		atpdev->sp[0][m++] = inb(base_io + 0x32);
+		atpdev->sp[0][m++] = inb(base_io + 0x33);
 		outw(n, base_io + 0x34);
 		n += 0x0002;
-		atp_dev.sp[0][m++] = inb(base_io + 0x30);
-		atp_dev.sp[0][m++] = inb(base_io + 0x31);
-		atp_dev.sp[0][m++] = inb(base_io + 0x32);
-		atp_dev.sp[0][m++] = inb(base_io + 0x33);
+		atpdev->sp[0][m++] = inb(base_io + 0x30);
+		atpdev->sp[0][m++] = inb(base_io + 0x31);
+		atpdev->sp[0][m++] = inb(base_io + 0x32);
+		atpdev->sp[0][m++] = inb(base_io + 0x33);
 		outw(n, base_io + 0x34);
 		n += 0x0002;
-		atp_dev.sp[0][m++] = inb(base_io + 0x30);
-		atp_dev.sp[0][m++] = inb(base_io + 0x31);
-		atp_dev.sp[0][m++] = inb(base_io + 0x32);
-		atp_dev.sp[0][m++] = inb(base_io + 0x33);
+		atpdev->sp[0][m++] = inb(base_io + 0x30);
+		atpdev->sp[0][m++] = inb(base_io + 0x31);
+		atpdev->sp[0][m++] = inb(base_io + 0x32);
+		atpdev->sp[0][m++] = inb(base_io + 0x33);
 		n += 0x0018;
 		goto next_fblk_880;
 flash_ok_880:
 		outw(0, base_io + 0x34);
-		atp_dev.ultra_map[0] = 0;
-		atp_dev.async[0] = 0;
+		atpdev->ultra_map[0] = 0;
+		atpdev->async[0] = 0;
 		for (k = 0; k < 16; k++) {
 			n = 1;
 			n = n << k;
-			if (atp_dev.sp[0][k] > 1) {
-				atp_dev.ultra_map[0] |= n;
+			if (atpdev->sp[0][k] > 1) {
+				atpdev->ultra_map[0] |= n;
 			} else {
-				if (atp_dev.sp[0][k] == 0)
-					atp_dev.async[0] |= n;
+				if (atpdev->sp[0][k] == 0)
+					atpdev->async[0] |= n;
  			}
 	 	}
-		atp_dev.async[0] = ~(atp_dev.async[0]);
-		outb(atp_dev.global_map[0], base_io + 0x35);
+		atpdev->async[0] = ~(atpdev->async[0]);
+		outb(atpdev->global_map[0], base_io + 0x35);
  
 		shpnt = scsi_host_alloc(&atp870u_template, sizeof(struct atp_unit));
 		if (!shpnt)
-			return -ENOMEM;
+			goto err_nomem;
 
 		p = (struct atp_unit *)&shpnt->hostdata;
 
-		atp_dev.host = shpnt;
-		atp_dev.pdev = pdev;
+		atpdev->host = shpnt;
+		atpdev->pdev = pdev;
 		pci_set_drvdata(pdev, p);
-		memcpy(p, &atp_dev, sizeof atp_dev);
+		memcpy(p, atpdev, sizeof(*atpdev));
 		if (atp870u_init_tables(shpnt) < 0) {
 			printk(KERN_ERR "Unable to allocate tables for Acard controller\n");
 			goto unregister;
@@ -2798,24 +2801,24 @@ flash_ok_880:
 			printk(KERN_INFO "   ACARD AEC-67162 PCI Ultra3 LVD Host Adapter:  IO:%x, IRQ:%d.\n"
 			       , base_io, pdev->irq);
         	
-		atp_dev.pdev = pdev;	
-		atp_dev.dev_id  = ent->device;
-		atp_dev.baseport = base_io;
-		atp_dev.ioport[0] = base_io + 0x80;
-		atp_dev.ioport[1] = base_io + 0xc0;
-		atp_dev.pciport[0] = base_io + 0x40;
-		atp_dev.pciport[1] = base_io + 0x50;
+		atpdev->pdev = pdev;
+		atpdev->dev_id  = ent->device;
+		atpdev->baseport = base_io;
+		atpdev->ioport[0] = base_io + 0x80;
+		atpdev->ioport[1] = base_io + 0xc0;
+		atpdev->pciport[0] = base_io + 0x40;
+		atpdev->pciport[1] = base_io + 0x50;
 				
 		shpnt = scsi_host_alloc(&atp870u_template, sizeof(struct atp_unit));
 		if (!shpnt)
-			return -ENOMEM;
+			goto err_nomem;
         	
 		p = (struct atp_unit *)&shpnt->hostdata;
         	
-		atp_dev.host = shpnt;
-		atp_dev.pdev = pdev;
+		atpdev->host = shpnt;
+		atpdev->pdev = pdev;
 		pci_set_drvdata(pdev, p);
-		memcpy(p, &atp_dev, sizeof(struct atp_unit));
+		memcpy(p, atpdev, sizeof(struct atp_unit));
 		if (atp870u_init_tables(shpnt) < 0)
 			goto unregister;
 			
@@ -2974,33 +2977,33 @@ flash_ok_885:
 		printk(KERN_INFO "   ACARD AEC-671X PCI Ultra/W SCSI-2/3 Host Adapter: %d "
 			"IO:%x, IRQ:%d.\n", count, base_io, pdev->irq);
 
-		atp_dev.ioport[0] = base_io;
-		atp_dev.pciport[0] = base_io + 0x20;
-		atp_dev.dev_id = ent->device;
+		atpdev->ioport[0] = base_io;
+		atpdev->pciport[0] = base_io + 0x20;
+		atpdev->dev_id = ent->device;
 		host_id &= 0x07;
-		atp_dev.host_id[0] = host_id;
+		atpdev->host_id[0] = host_id;
 		tmport = base_io + 0x22;
-		atp_dev.scam_on = inb(tmport);
+		atpdev->scam_on = inb(tmport);
 		tmport += 0x0b;
-		atp_dev.global_map[0] = inb(tmport++);
-		atp_dev.ultra_map[0] = inw(tmport);
+		atpdev->global_map[0] = inb(tmport++);
+		atpdev->ultra_map[0] = inw(tmport);
 
-		if (atp_dev.ultra_map[0] == 0) {
-			atp_dev.scam_on = 0x00;
-			atp_dev.global_map[0] = 0x20;
-			atp_dev.ultra_map[0] = 0xffff;
+		if (atpdev->ultra_map[0] == 0) {
+			atpdev->scam_on = 0x00;
+			atpdev->global_map[0] = 0x20;
+			atpdev->ultra_map[0] = 0xffff;
 		}
 
 		shpnt = scsi_host_alloc(&atp870u_template, sizeof(struct atp_unit));
 		if (!shpnt)
-			return -ENOMEM;
+			goto err_nomem;
 
 		p = (struct atp_unit *)&shpnt->hostdata;
 		
-		atp_dev.host = shpnt;
-		atp_dev.pdev = pdev;
+		atpdev->host = shpnt;
+		atpdev->pdev = pdev;
 		pci_set_drvdata(pdev, p);
-		memcpy(p, &atp_dev, sizeof atp_dev);
+		memcpy(p, atpdev, sizeof(*atpdev));
 		if (atp870u_init_tables(shpnt) < 0)
 			goto unregister;
 
@@ -3010,7 +3013,7 @@ flash_ok_885:
 		}
 
 		spin_lock_irqsave(shpnt->host_lock, flags);
-		if (atp_dev.chip_ver > 0x07) {	/* check if atp876 chip then enable terminator */
+		if (atpdev->chip_ver > 0x07) {	/* check if atp876 chip then enable terminator */
 			tmport = base_io + 0x3e;
 			outb(0x00, tmport);
 		}
@@ -3044,7 +3047,7 @@ flash_ok_885:
 		outb((inb(tmport) & 0xef), tmport);
 		tmport++;
 		outb((inb(tmport) | 0x20), tmport);
-		if (atp_dev.chip_ver == 4)
+		if (atpdev->chip_ver == 4)
 			shpnt->max_id = 16;
 		else		
 			shpnt->max_id = 8;
@@ -3093,6 +3096,12 @@ unregister:
 	printk("atp870u_prob:unregister\n");
 	scsi_host_put(shpnt);
 	return -1;		
+err_eio:
+	kfree(atpdev);
+	return -EIO;
+err_nomem:
+	kfree(atpdev);
+	return -ENOMEM;
 }
 
 /* The abort command does not leave the device in a clean state where
-- 
cgit v0.10.2


From 844d3b427ef1a4f96e54866747bdb6c6cbca4c6a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 28 Jun 2006 21:48:27 -0700
Subject: MTD: fix all kernel-doc warnings

Fix all kernel-doc warnings in MTD headers and source files:
- add some missing struct fields;
- correct some function parameter names;
- use kernel-doc format for function doc. headers;
- nand_ecc.c contains only exported interfaces, no internal ones;

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 6e463d0..32f3856 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -1295,7 +1295,9 @@ in this page</entry>
      </para>
 !Idrivers/mtd/nand/nand_base.c
 !Idrivers/mtd/nand/nand_bbt.c
-!Idrivers/mtd/nand/nand_ecc.c
+<!-- No internal functions for kernel-doc:
+X!Idrivers/mtd/nand/nand_ecc.c
+-->
   </chapter>
 
   <chapter id="credits">
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 80a7665..62b8613 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -155,7 +155,7 @@ static u16 nand_read_word(struct mtd_info *mtd)
 /**
  * nand_select_chip - [DEFAULT] control CE line
  * @mtd:	MTD device structure
- * @chip:	chipnumber to select, -1 for deselect
+ * @chipnr:	chipnumber to select, -1 for deselect
  *
  * Default select function for 1 chip devices.
  */
@@ -542,7 +542,6 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
  * Send command to NAND device. This is the version for the new large page
  * devices We dont have the separate regions as we have in the small page
  * devices.  We must emulate NAND_CMD_READOOB to keep the code compatible.
- *
  */
 static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 			    int column, int page_addr)
@@ -656,7 +655,7 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 
 /**
  * nand_get_device - [GENERIC] Get chip for selected access
- * @this:	the nand chip descriptor
+ * @chip:	the nand chip descriptor
  * @mtd:	MTD device structure
  * @new_state:	the state which is requested
  *
@@ -696,13 +695,12 @@ nand_get_device(struct nand_chip *chip, struct mtd_info *mtd, int new_state)
 /**
  * nand_wait - [DEFAULT]  wait until the command is done
  * @mtd:	MTD device structure
- * @this:	NAND chip structure
+ * @chip:	NAND chip structure
  *
  * Wait for command done. This applies to erase and program only
  * Erase can take up to 400ms and program up to 20ms according to
  * general NAND and SmartMedia specs
- *
-*/
+ */
 static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
 {
 
@@ -896,6 +894,7 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 /**
  * nand_transfer_oob - [Internal] Transfer oob to client buffer
  * @chip:	nand chip structure
+ * @oob:	oob destination address
  * @ops:	oob ops structure
  */
 static uint8_t *nand_transfer_oob(struct nand_chip *chip, uint8_t *oob,
@@ -946,6 +945,7 @@ static uint8_t *nand_transfer_oob(struct nand_chip *chip, uint8_t *oob,
  *
  * @mtd:	MTD device structure
  * @from:	offset to read from
+ * @ops:	oob ops structure
  *
  * Internal function. Called with chip held.
  */
@@ -1760,7 +1760,7 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 /**
  * nand_write_oob - [MTD Interface] NAND write data and/or out-of-band
  * @mtd:	MTD device structure
- * @from:	offset to read from
+ * @to:		offset to write to
  * @ops:	oob operation description structure
  */
 static int nand_write_oob(struct mtd_info *mtd, loff_t to,
@@ -2055,7 +2055,7 @@ static void nand_sync(struct mtd_info *mtd)
 /**
  * nand_block_isbad - [MTD Interface] Check if block at offset is bad
  * @mtd:	MTD device structure
- * @ofs:	offset relative to mtd start
+ * @offs:	offset relative to mtd start
  */
 static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
 {
diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index 2a163e4..dd438ca 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -65,8 +65,7 @@ static const u_char nand_ecc_precalc_table[] = {
 };
 
 /**
- * nand_calculate_ecc - [NAND Interface] Calculate 3 byte ECC code
- *			for 256 byte block
+ * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block
  * @mtd:	MTD block structure
  * @dat:	raw data
  * @ecc_code:	buffer for ECC
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 6655927..2266f03 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -202,7 +202,7 @@ typedef enum {
 struct nand_chip;
 
 /**
- * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independend devices
+ * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices
  * @lock:               protection lock
  * @active:		the mtd device which holds the controller currently
  * @wq:			wait queue to sleep on if a NAND operation is in progress
@@ -223,12 +223,15 @@ struct nand_hw_control {
  * @total:	total number of ecc bytes per page
  * @prepad:	padding information for syndrome based ecc generators
  * @postpad:	padding information for syndrome based ecc generators
+ * @layout:	ECC layout control struct pointer
  * @hwctl:	function to control hardware ecc generator. Must only
  *		be provided if an hardware ECC is available
  * @calculate:	function for ecc calculation or readback from ecc hardware
  * @correct:	function for ecc correction, matching to ecc generator (sw/hw)
  * @read_page:	function to read a page according to the ecc generator requirements
  * @write_page:	function to write a page according to the ecc generator requirements
+ * @read_oob:	function to read chip OOB data
+ * @write_oob:	function to write chip OOB data
  */
 struct nand_ecc_ctrl {
 	nand_ecc_modes_t	mode;
@@ -300,11 +303,15 @@ struct nand_buffers {
  * @cmdfunc:		[REPLACEABLE] hardwarespecific function for writing commands to the chip
  * @waitfunc:		[REPLACEABLE] hardwarespecific function for wait on ready
  * @ecc:		[BOARDSPECIFIC] ecc control ctructure
+ * @buffers:		buffer structure for read/write
+ * @hwcontrol:		platform-specific hardware control structure
+ * @ops:		oob operation operands
  * @erase_cmd:		[INTERN] erase command write function, selectable due to AND support
  * @scan_bbt:		[REPLACEABLE] function to scan bad block table
  * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transfering data from array to read regs (tR)
  * @wq:			[INTERN] wait queue to sleep on if a NAND operation is in progress
  * @state:		[INTERN] the current state of the NAND device
+ * @oob_poi:		poison value buffer
  * @page_shift:		[INTERN] number of address bits in a page (column address bits)
  * @phys_erase_shift:	[INTERN] number of address bits in a physical eraseblock
  * @bbt_erase_shift:	[INTERN] number of address bits in a bbt entry
@@ -521,7 +528,7 @@ extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len,
  * struct platform_nand_chip - chip level device structure
  *
  * @nr_chips:		max. number of chips to scan for
- * @chip_offs:		chip number offset
+ * @chip_offset:	chip number offset
  * @nr_partitions:	number of partitions pointed to by partitions (or zero)
  * @partitions:		mtd partition list
  * @chip_delay:		R/B delay value in us
@@ -546,7 +553,7 @@ struct platform_nand_chip {
  * @hwcontrol:		platform specific hardware control structure
  * @dev_ready:		platform specific function to read ready/busy pin
  * @select_chip:	platform specific chip select function
- * @priv_data:		private data to transport driver specific settings
+ * @priv:		private data to transport driver specific settings
  *
  * All fields are optional and depend on the hardware driver requirements
  */
-- 
cgit v0.10.2


From ea9b6dcc152f09c207117ab121d4fa03d2db282a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 28 Jun 2006 21:48:38 -0700
Subject: MTD: kernel-doc fixes + additions

Fix some kernel-doc typos/spellos.
Use kernel-doc syntax in places where it was almost used.
Correct/add struct, struct field, and function param names where needed.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 32f3856..630159c 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -109,7 +109,7 @@
 		for most of the implementations. These functions can be replaced by the
 		board driver if neccecary. Those functions are called via pointers in the
 		NAND chip description structure. The board driver can set the functions which
-		should be replaced by board dependend functions before calling nand_scan().
+		should be replaced by board dependent functions before calling nand_scan().
 		If the function pointer is NULL on entry to nand_scan() then the pointer
 		is set to the default function which is suitable for the detected chip type.
 		</para></listitem>
@@ -133,7 +133,7 @@
 	  	[REPLACEABLE]</para><para>
 		Replaceable members hold hardware related functions which can be 
 		provided by the board driver. The board driver can set the functions which
-		should be replaced by board dependend functions before calling nand_scan().
+		should be replaced by board dependent functions before calling nand_scan().
 		If the function pointer is NULL on entry to nand_scan() then the pointer
 		is set to the default function which is suitable for the detected chip type.
 		</para></listitem>
@@ -156,9 +156,8 @@
      	<title>Basic board driver</title>
 	<para>
 		For most boards it will be sufficient to provide just the
-		basic functions and fill out some really board dependend
+		basic functions and fill out some really board dependent
 		members in the nand chip description structure.
-		See drivers/mtd/nand/skeleton for reference.
 	</para>
 	<sect1>
 		<title>Basic defines</title>
diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h
index 7a7fbe8..1221b7c 100644
--- a/include/linux/mtd/bbm.h
+++ b/include/linux/mtd/bbm.h
@@ -19,21 +19,21 @@
 
 /**
  * struct nand_bbt_descr - bad block table descriptor
- * @param options	options for this descriptor
- * @param pages		the page(s) where we find the bbt, used with
+ * @options:		options for this descriptor
+ * @pages:		the page(s) where we find the bbt, used with
  * 			option BBT_ABSPAGE when bbt is searched,
  * 			then we store the found bbts pages here.
  *			Its an array and supports up to 8 chips now
- * @param offs		offset of the pattern in the oob area of the page
- * @param veroffs	offset of the bbt version counter in the oob are of the page
- * @param version	version read from the bbt page during scan
- * @param len		length of the pattern, if 0 no pattern check is performed
- * @param maxblocks	maximum number of blocks to search for a bbt. This number of
- *			blocks is reserved at the end of the device
+ * @offs:		offset of the pattern in the oob area of the page
+ * @veroffs:		offset of the bbt version counter in the oob area of the page
+ * @version:		version read from the bbt page during scan
+ * @len:		length of the pattern, if 0 no pattern check is performed
+ * @maxblocks:		maximum number of blocks to search for a bbt. This
+ *			number of blocks is reserved at the end of the device
  *			where the tables are written.
- * @param reserved_block_code	if non-0, this pattern denotes a reserved
+ * @reserved_block_code: if non-0, this pattern denotes a reserved
  *			(rather than bad) block in the stored bbt
- * @param pattern	pattern to identify bad block table or factory marked
+ * @pattern:		pattern to identify bad block table or factory marked
  *			good / bad blocks, can be NULL, if len = 0
  *
  * Descriptor for the bad block table marker and the descriptor for the
@@ -93,12 +93,15 @@ struct nand_bbt_descr {
 #define ONENAND_BADBLOCK_POS	0
 
 /**
- * struct bbt_info - [GENERIC] Bad Block Table data structure
- * @param bbt_erase_shift	[INTERN] number of address bits in a bbt entry
- * @param badblockpos		[INTERN] position of the bad block marker in the oob area
- * @param bbt			[INTERN] bad block table pointer
- * @param badblock_pattern	[REPLACEABLE] bad block scan pattern used for initial bad block scan
- * @param priv			[OPTIONAL] pointer to private bbm date
+ * struct bbm_info - [GENERIC] Bad Block Table data structure
+ * @bbt_erase_shift:	[INTERN] number of address bits in a bbt entry
+ * @badblockpos:	[INTERN] position of the bad block marker in the oob area
+ * @options:		options for this descriptor
+ * @bbt:		[INTERN] bad block table pointer
+ * @isbad_bbt:		function to determine if a block is bad
+ * @badblock_pattern:	[REPLACEABLE] bad block scan pattern used for
+ *			initial bad block scan
+ * @priv:		[OPTIONAL] pointer to private bbm date
  */
 struct bbm_info {
 	int bbt_erase_shift;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 9b7a2b5..94a443d 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -77,11 +77,11 @@ typedef enum {
  *
  * @len:	number of bytes to write/read. When a data buffer is given
  *		(datbuf != NULL) this is the number of data bytes. When
- +		no data buffer is available this is the number of oob bytes.
+ *		no data buffer is available this is the number of oob bytes.
  *
  * @retlen:	number of bytes written/read. When a data buffer is given
  *		(datbuf != NULL) this is the number of data bytes. When
- +		no data buffer is available this is the number of oob bytes.
+ *		no data buffer is available this is the number of oob bytes.
  *
  * @ooblen:	number of oob bytes per page
  * @ooboffs:	offset of oob data in the oob area (only relevant when
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 2266f03..0b4cd2f 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -407,7 +407,6 @@ struct nand_chip {
 
 /**
  * struct nand_flash_dev - NAND Flash Device ID Structure
- *
  * @name:	Identify the device type
  * @id:		device ID code
  * @pagesize:	Pagesize in bytes. Either 256 or 512 or 0
@@ -526,7 +525,6 @@ extern int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len,
 
 /**
  * struct platform_nand_chip - chip level device structure
- *
  * @nr_chips:		max. number of chips to scan for
  * @chip_offset:	chip number offset
  * @nr_partitions:	number of partitions pointed to by partitions (or zero)
@@ -549,7 +547,6 @@ struct platform_nand_chip {
 
 /**
  * struct platform_nand_ctrl - controller level device structure
- *
  * @hwcontrol:		platform specific hardware control structure
  * @dev_ready:		platform specific function to read ready/busy pin
  * @select_chip:	platform specific chip select function
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 9ce9a48..1f49721 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -23,7 +23,7 @@ extern int onenand_scan(struct mtd_info *mtd, int max_chips);
 /* Free resources held by the OneNAND device */
 extern void onenand_release(struct mtd_info *mtd);
 
-/**
+/*
  * onenand_state_t - chip states
  * Enumeration for OneNAND flash chip state
  */
@@ -42,9 +42,9 @@ typedef enum {
 
 /**
  * struct onenand_bufferram - OneNAND BufferRAM Data
- * @param block		block address in BufferRAM
- * @param page		page address in BufferRAM
- * @param valid		valid flag
+ * @block:		block address in BufferRAM
+ * @page:		page address in BufferRAM
+ * @valid:		valid flag
  */
 struct onenand_bufferram {
 	int block;
@@ -54,32 +54,43 @@ struct onenand_bufferram {
 
 /**
  * struct onenand_chip - OneNAND Private Flash Chip Data
- * @param base		[BOARDSPECIFIC] address to access OneNAND
- * @param chipsize	[INTERN] the size of one chip for multichip arrays
- * @param device_id	[INTERN] device ID
- * @param verstion_id	[INTERN] version ID
- * @param options	[BOARDSPECIFIC] various chip options. They can partly be set to inform onenand_scan about
- * @param erase_shift	[INTERN] number of address bits in a block
- * @param page_shift	[INTERN] number of address bits in a page
- * @param ppb_shift	[INTERN] number of address bits in a pages per block
- * @param page_mask	[INTERN] a page per block mask
- * @param bufferam_index	[INTERN] BufferRAM index
- * @param bufferam	[INTERN] BufferRAM info
- * @param readw		[REPLACEABLE] hardware specific function for read short
- * @param writew	[REPLACEABLE] hardware specific function for write short
- * @param command	[REPLACEABLE] hardware specific function for writing commands to the chip
- * @param wait		[REPLACEABLE] hardware specific function for wait on ready
- * @param read_bufferram	[REPLACEABLE] hardware specific function for BufferRAM Area
- * @param write_bufferram	[REPLACEABLE] hardware specific function for BufferRAM Area
- * @param read_word	[REPLACEABLE] hardware specific function for read register of OneNAND
- * @param write_word	[REPLACEABLE] hardware specific function for write register of OneNAND
- * @param scan_bbt	[REPLACEALBE] hardware specific function for scaning Bad block Table
- * @param chip_lock	[INTERN] spinlock used to protect access to this structure and the chip
- * @param wq		[INTERN] wait queue to sleep on if a OneNAND operation is in progress
- * @param state		[INTERN] the current state of the OneNAND device
- * @param ecclayout	[REPLACEABLE] the default ecc placement scheme
- * @param bbm		[REPLACEABLE] pointer to Bad Block Management
- * @param priv		[OPTIONAL] pointer to private chip date
+ * @base:		[BOARDSPECIFIC] address to access OneNAND
+ * @chipsize:		[INTERN] the size of one chip for multichip arrays
+ * @device_id:		[INTERN] device ID
+ * @density_mask:	chip density, used for DDP devices
+ * @verstion_id:	[INTERN] version ID
+ * @options:		[BOARDSPECIFIC] various chip options. They can
+ *			partly be set to inform onenand_scan about
+ * @erase_shift:	[INTERN] number of address bits in a block
+ * @page_shift:		[INTERN] number of address bits in a page
+ * @ppb_shift:		[INTERN] number of address bits in a pages per block
+ * @page_mask:		[INTERN] a page per block mask
+ * @bufferram_index:	[INTERN] BufferRAM index
+ * @bufferram:		[INTERN] BufferRAM info
+ * @readw:		[REPLACEABLE] hardware specific function for read short
+ * @writew:		[REPLACEABLE] hardware specific function for write short
+ * @command:		[REPLACEABLE] hardware specific function for writing
+ *			commands to the chip
+ * @wait:		[REPLACEABLE] hardware specific function for wait on ready
+ * @read_bufferram:	[REPLACEABLE] hardware specific function for BufferRAM Area
+ * @write_bufferram:	[REPLACEABLE] hardware specific function for BufferRAM Area
+ * @read_word:		[REPLACEABLE] hardware specific function for read
+ *			register of OneNAND
+ * @write_word:		[REPLACEABLE] hardware specific function for write
+ *			register of OneNAND
+ * @mmcontrol:		sync burst read function
+ * @block_markbad:	function to mark a block as bad
+ * @scan_bbt:		[REPLACEALBE] hardware specific function for scanning
+ *			Bad block Table
+ * @chip_lock:		[INTERN] spinlock used to protect access to this
+ *			structure and the chip
+ * @wq:			[INTERN] wait queue to sleep on if a OneNAND
+ *			operation is in progress
+ * @state:		[INTERN] the current state of the OneNAND device
+ * @page_buf:		data buffer
+ * @ecclayout:		[REPLACEABLE] the default ecc placement scheme
+ * @bbm:		[REPLACEABLE] pointer to Bad Block Management
+ * @priv:		[OPTIONAL] pointer to private chip date
  */
 struct onenand_chip {
 	void __iomem		*base;
@@ -147,9 +158,9 @@ struct onenand_chip {
 #define ONENAND_MFR_SAMSUNG	0xec
 
 /**
- * struct nand_manufacturers - NAND Flash Manufacturer ID Structure
- * @param name:		Manufacturer name
- * @param id:		manufacturer ID code of device.
+ * struct onenand_manufacturers - NAND Flash Manufacturer ID Structure
+ * @name:	Manufacturer name
+ * @id:		manufacturer ID code of device.
 */
 struct onenand_manufacturers {
         int id;
diff --git a/include/mtd/mtd-abi.h b/include/mtd/mtd-abi.h
index 31329fc..1da3f7f 100644
--- a/include/mtd/mtd-abi.h
+++ b/include/mtd/mtd-abi.h
@@ -133,7 +133,7 @@ struct nand_ecclayout {
 };
 
 /**
- * struct mtd_ecc_stats - error correction status
+ * struct mtd_ecc_stats - error correction stats
  *
  * @corrected:	number of corrected bits
  * @failed:	number of uncorrectable errors
-- 
cgit v0.10.2


From c6e8c6ccf96e9249805d0e9828b994f4c926ad51 Mon Sep 17 00:00:00 2001
From: KaiGai Kohei <kaigai@ak.jp.nec.com>
Date: Thu, 29 Jun 2006 15:33:02 +0100
Subject: [JFFS2][XATTR] Fix xd->refcnt race condition

When xd->refcnt is checked whether this xdatum should be released
or not, atomic_dec_and_lock() is used to ensure holding the
c->erase_completion_lock.

This fix change a specification of delete_xattr_datum().
Previously, it's only called when xd->refcnt equals zero.
(calling it with positive xd->refcnt cause a BUG())
If you applied this patch, the function checks whether
xd->refcnt is zero or not under the spinlock if necessary.
Then, it marks xd DEAD flahs and links with xattr_dead_list
or releases it immediately when xd->refcnt become zero.

Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 18e66db..25bc1ae 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -50,9 +50,10 @@
  *   is used to write xdatum to medium. xd->version will be incremented.
  * create_xattr_datum(c, xprefix, xname, xvalue, xsize)
  *   is used to create new xdatum and write to medium.
- * delete_xattr_datum(c, xd)
- *   is used to delete a xdatum. It marks xd JFFS2_XFLAGS_DEAD, and allows
- *   GC to reclaim those physical nodes.
+ * unrefer_xattr_datum(c, xd)
+ *   is used to delete a xdatum. When nobody refers this xdatum, JFFS2_XFLAGS_DEAD
+ *   is set on xd->flags and chained xattr_dead_list or release it immediately.
+ *   In the first case, the garbage collector release it later.
  * -------------------------------------------------- */
 static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize)
 {
@@ -394,22 +395,24 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
 	return xd;
 }
 
-static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
+static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
 {
 	/* must be called under down_write(xattr_sem) */
-	BUG_ON(atomic_read(&xd->refcnt));
+	if (atomic_dec_and_lock(&xd->refcnt, &c->erase_completion_lock)) {
+		uint32_t xid = xd->xid, version = xd->version;
 
-	unload_xattr_datum(c, xd);
-	xd->flags |= JFFS2_XFLAGS_DEAD;
-	spin_lock(&c->erase_completion_lock);
-	if (xd->node == (void *)xd) {
-		BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID));
-		jffs2_free_xattr_datum(xd);
-	} else {
-		list_add(&xd->xindex, &c->xattr_dead_list);
+		unload_xattr_datum(c, xd);
+		xd->flags |= JFFS2_XFLAGS_DEAD;
+		if (xd->node == (void *)xd) {
+			BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID));
+			jffs2_free_xattr_datum(xd);
+		} else {
+			list_add(&xd->xindex, &c->xattr_dead_list);
+		}
+		spin_unlock(&c->erase_completion_lock);
+
+		dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xid, version);
 	}
-	spin_unlock(&c->erase_completion_lock);
-	dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xd->xid, xd->version);
 }
 
 /* -------- xref related functions ------------------
@@ -580,8 +583,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re
 	dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n",
 		  ref->ino, ref->xid, ref->xseqno);
 
-	if (atomic_dec_and_test(&xd->refcnt))
-		delete_xattr_datum(c, xd);
+	unrefer_xattr_datum(c, xd);
 }
 
 void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
@@ -1119,8 +1121,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 					ref->next = c->xref_dead_list;
 					c->xref_dead_list = ref;
 					spin_unlock(&c->erase_completion_lock);
-					if (atomic_dec_and_test(&xd->refcnt))
-						delete_xattr_datum(c, xd);
+					unrefer_xattr_datum(c, xd);
 				} else {
 					ref->ic = ic;
 					ref->xd = xd;
@@ -1156,8 +1157,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 	down_write(&c->xattr_sem);
 	if (rc) {
 		JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request);
-		if (atomic_dec_and_test(&xd->refcnt))
-			delete_xattr_datum(c, xd);
+		unrefer_xattr_datum(c, xd);
 		up_write(&c->xattr_sem);
 		return rc;
 	}
@@ -1170,8 +1170,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 			ic->xref = ref;
 		}
 		rc = PTR_ERR(newref);
-		if (atomic_dec_and_test(&xd->refcnt))
-			delete_xattr_datum(c, xd);
+		unrefer_xattr_datum(c, xd);
 	} else if (ref) {
 		delete_xattr_ref(c, ref);
 	}
-- 
cgit v0.10.2


From 332959cb521af6bb0281c7aae797f1f91dcb4c42 Mon Sep 17 00:00:00 2001
From: Martin Habets <errandir_news@mph.eclipse.co.uk>
Date: Wed, 28 Jun 2006 15:40:36 +0100
Subject: [SCSI] st: remove unused st_buffer.in_use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I noticed that in_use in st_buffer is not used. The patch below
against 2.6.17-rc3 removes it, assuming there is no future use for it.
It was tested in a sparc SS20 with a DLT4000.

Signed-off-by: Martin Habets <errandir_news@mph.eclipse.co.uk>
Acked-by: Kai M�kisara <kai.makisara@kolumbus.fi>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index b5218fc..756ceb9 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -3599,7 +3599,6 @@ static struct st_buffer *
 	tb->use_sg = max_sg;
 	tb->frp = (struct st_buf_fragment *)(&(tb->sg[0]) + max_sg);
 
-	tb->in_use = 1;
 	tb->dma = need_dma;
 	tb->buffer_size = got;
 
diff --git a/drivers/scsi/st.h b/drivers/scsi/st.h
index 4112090..05a5cae 100644
--- a/drivers/scsi/st.h
+++ b/drivers/scsi/st.h
@@ -31,7 +31,6 @@ struct st_request {
 
 /* The tape buffer descriptor. */
 struct st_buffer {
-	unsigned char in_use;
 	unsigned char dma;	/* DMA-able buffer */
 	unsigned char do_dio;   /* direct i/o set up? */
 	int buffer_size;
-- 
cgit v0.10.2


From 01cb225dad8da2e717356fab03240e2f4a8d01bf Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 28 Jun 2006 12:00:22 -0500
Subject: [SCSI] iscsi: add target discvery event to transport class

Patch from david.somayajulu@qlogic.com:

Add target discovery event. We may have a setup where the iscsi traffic
is on a different netowrk than the other network traffic. In this case
we will want to do discovery though the iscsi card. This patch adds
a event to the transport class that can be used by hw iscsi cards that
support this.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 5569fdc..99e76d4 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -978,6 +978,21 @@ iscsi_if_transport_ep(struct iscsi_transport *transport,
 }
 
 static int
+iscsi_tgt_dscvr(struct iscsi_transport *transport,
+		struct iscsi_uevent *ev)
+{
+	struct sockaddr *dst_addr;
+
+	if (!transport->tgt_dscvr)
+		return -EINVAL;
+
+	dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev));
+	return transport->tgt_dscvr(ev->u.tgt_dscvr.type,
+				    ev->u.tgt_dscvr.host_no,
+				    ev->u.tgt_dscvr.enable, dst_addr);
+}
+
+static int
 iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	int err = 0;
@@ -1065,6 +1080,9 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT:
 		err = iscsi_if_transport_ep(transport, ev, nlh->nlmsg_type);
 		break;
+	case ISCSI_UEVENT_TGT_DSCVR:
+		err = iscsi_tgt_dscvr(transport, ev);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index 253797c..8813f0f 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -47,12 +47,20 @@ enum iscsi_uevent_e {
 	ISCSI_UEVENT_TRANSPORT_EP_POLL		= UEVENT_BASE + 13,
 	ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT	= UEVENT_BASE + 14,
 
+	ISCSI_UEVENT_TGT_DSCVR		= UEVENT_BASE + 15,
+
 	/* up events */
 	ISCSI_KEVENT_RECV_PDU		= KEVENT_BASE + 1,
 	ISCSI_KEVENT_CONN_ERROR		= KEVENT_BASE + 2,
 	ISCSI_KEVENT_IF_ERROR		= KEVENT_BASE + 3,
 };
 
+enum iscsi_tgt_dscvr {
+	ISCSI_TGT_DSCVR_SEND_TARGETS	= 1,
+	ISCSI_TGT_DSCVR_ISNS		= 2,
+	ISCSI_TGT_DSCVR_SLP		= 3,
+};
+
 struct iscsi_uevent {
 	uint32_t type; /* k/u events type */
 	uint32_t iferror; /* carries interface or resource errors */
@@ -116,6 +124,17 @@ struct iscsi_uevent {
 		struct msg_transport_disconnect {
 			uint64_t	ep_handle;
 		} ep_disconnect;
+		struct msg_tgt_dscvr {
+			enum iscsi_tgt_dscvr	type;
+			uint32_t	host_no;
+			/*
+ 			 * enable = 1 to establish a new connection
+			 * with the server. enable = 0 to disconnect
+			 * from the server. Used primarily to switch
+			 * from one iSNS server to another.
+			 */
+			uint32_t	enable;
+		} tgt_dscvr;
 	} u;
 	union {
 		/* messages k -> u */
@@ -141,6 +160,24 @@ struct iscsi_uevent {
 		struct msg_transport_connect_ret {
 			uint64_t	handle;
 		} ep_connect_ret;
+		struct msg_tgt_dscvr_ret {
+			/*
+			 * session/connection pair used to reference
+			 * the connection to server
+			 */
+			uint32_t	sid;
+			uint32_t	cid;
+			union {
+				struct isns {
+					/* port # for conn to iSNS server */
+					uint16_t isns_port;
+					/* listening port to receive SCNs */
+					uint16_t scn_port;
+					/* listening port to receive ESIs */
+					uint16_t esi_port;
+				} isns_attrib;
+			} u;
+		} tgt_dscvr_ret;
 	} r;
 } __attribute__ ((aligned (sizeof(uint64_t))));
 
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index b684426..b95151a 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -127,6 +127,8 @@ struct iscsi_transport {
 			   uint64_t *ep_handle);
 	int (*ep_poll) (uint64_t ep_handle, int timeout_ms);
 	void (*ep_disconnect) (uint64_t ep_handle);
+	int (*tgt_dscvr) (enum iscsi_tgt_dscvr type, uint32_t host_no,
+			  uint32_t enable, struct sockaddr *dst_addr);
 };
 
 /*
-- 
cgit v0.10.2


From a54a52caad4bd6166cb7fa64e4e93031fa2fda5d Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 28 Jun 2006 12:00:23 -0500
Subject: [SCSI] iscsi: fixup set/get param functions

Reduce duplication in the software iscsi_transport modules by
adding a libiscsi function to handle the common grunt work.

This also has the drivers return specifc -EXXX values for different
errors so userspace can finally handle them in a sane way.

Also just pass the sysfs buffers to the drivers so HW iscsi can
get/set its string values, like targetname, and initiatorname.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 2673a11..7c76a98 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1697,6 +1697,185 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_bind);
 
+
+int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
+		    enum iscsi_param param, char *buf, int buflen)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	struct iscsi_session *session = conn->session;
+	uint32_t value;
+
+	switch(param) {
+	case ISCSI_PARAM_MAX_RECV_DLENGTH:
+		sscanf(buf, "%d", &conn->max_recv_dlength);
+		break;
+	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
+		sscanf(buf, "%d", &conn->max_xmit_dlength);
+		break;
+	case ISCSI_PARAM_HDRDGST_EN:
+		sscanf(buf, "%d", &conn->hdrdgst_en);
+		break;
+	case ISCSI_PARAM_DATADGST_EN:
+		sscanf(buf, "%d", &conn->datadgst_en);
+		break;
+	case ISCSI_PARAM_INITIAL_R2T_EN:
+		sscanf(buf, "%d", &session->initial_r2t_en);
+		break;
+	case ISCSI_PARAM_MAX_R2T:
+		sscanf(buf, "%d", &session->max_r2t);
+		break;
+	case ISCSI_PARAM_IMM_DATA_EN:
+		sscanf(buf, "%d", &session->imm_data_en);
+		break;
+	case ISCSI_PARAM_FIRST_BURST:
+		sscanf(buf, "%d", &session->first_burst);
+		break;
+	case ISCSI_PARAM_MAX_BURST:
+		sscanf(buf, "%d", &session->max_burst);
+		break;
+	case ISCSI_PARAM_PDU_INORDER_EN:
+		sscanf(buf, "%d", &session->pdu_inorder_en);
+		break;
+	case ISCSI_PARAM_DATASEQ_INORDER_EN:
+		sscanf(buf, "%d", &session->dataseq_inorder_en);
+		break;
+	case ISCSI_PARAM_ERL:
+		sscanf(buf, "%d", &session->erl);
+		break;
+	case ISCSI_PARAM_IFMARKER_EN:
+		sscanf(buf, "%d", &value);
+		BUG_ON(value);
+		break;
+	case ISCSI_PARAM_OFMARKER_EN:
+		sscanf(buf, "%d", &value);
+		BUG_ON(value);
+		break;
+	case ISCSI_PARAM_EXP_STATSN:
+		sscanf(buf, "%u", &conn->exp_statsn);
+		break;
+	case ISCSI_PARAM_TARGET_NAME:
+		/* this should not change between logins */
+		if (session->targetname)
+			break;
+
+		session->targetname = kstrdup(buf, GFP_KERNEL);
+		if (!session->targetname)
+			return -ENOMEM;
+		break;
+	case ISCSI_PARAM_TPGT:
+		sscanf(buf, "%d", &session->tpgt);
+		break;
+	case ISCSI_PARAM_PERSISTENT_PORT:
+		sscanf(buf, "%d", &conn->persistent_port);
+		break;
+	case ISCSI_PARAM_PERSISTENT_ADDRESS:
+		/*
+		 * this is the address returned in discovery so it should
+		 * not change between logins.
+		 */
+		if (conn->persistent_address)
+			break;
+
+		conn->persistent_address = kstrdup(buf, GFP_KERNEL);
+		if (!conn->persistent_address)
+			return -ENOMEM;
+		break;
+	default:
+		return -ENOSYS;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iscsi_set_param);
+
+int iscsi_session_get_param(struct iscsi_cls_session *cls_session,
+			    enum iscsi_param param, char *buf)
+{
+	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+	struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
+	int len;
+
+	switch(param) {
+	case ISCSI_PARAM_INITIAL_R2T_EN:
+		len = sprintf(buf, "%d\n", session->initial_r2t_en);
+		break;
+	case ISCSI_PARAM_MAX_R2T:
+		len = sprintf(buf, "%hu\n", session->max_r2t);
+		break;
+	case ISCSI_PARAM_IMM_DATA_EN:
+		len = sprintf(buf, "%d\n", session->imm_data_en);
+		break;
+	case ISCSI_PARAM_FIRST_BURST:
+		len = sprintf(buf, "%u\n", session->first_burst);
+		break;
+	case ISCSI_PARAM_MAX_BURST:
+		len = sprintf(buf, "%u\n", session->max_burst);
+		break;
+	case ISCSI_PARAM_PDU_INORDER_EN:
+		len = sprintf(buf, "%d\n", session->pdu_inorder_en);
+		break;
+	case ISCSI_PARAM_DATASEQ_INORDER_EN:
+		len = sprintf(buf, "%d\n", session->dataseq_inorder_en);
+		break;
+	case ISCSI_PARAM_ERL:
+		len = sprintf(buf, "%d\n", session->erl);
+		break;
+	case ISCSI_PARAM_TARGET_NAME:
+		len = sprintf(buf, "%s\n", session->targetname);
+		break;
+	case ISCSI_PARAM_TPGT:
+		len = sprintf(buf, "%d\n", session->tpgt);
+		break;
+	default:
+		return -ENOSYS;
+	}
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(iscsi_session_get_param);
+
+int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
+			 enum iscsi_param param, char *buf)
+{
+	struct iscsi_conn *conn = cls_conn->dd_data;
+	int len;
+
+	switch(param) {
+	case ISCSI_PARAM_MAX_RECV_DLENGTH:
+		len = sprintf(buf, "%u\n", conn->max_recv_dlength);
+		break;
+	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
+		len = sprintf(buf, "%u\n", conn->max_xmit_dlength);
+		break;
+	case ISCSI_PARAM_HDRDGST_EN:
+		len = sprintf(buf, "%d\n", conn->hdrdgst_en);
+		break;
+	case ISCSI_PARAM_DATADGST_EN:
+		len = sprintf(buf, "%d\n", conn->datadgst_en);
+		break;
+	case ISCSI_PARAM_IFMARKER_EN:
+		len = sprintf(buf, "%d\n", conn->ifmarker_en);
+		break;
+	case ISCSI_PARAM_OFMARKER_EN:
+		len = sprintf(buf, "%d\n", conn->ofmarker_en);
+		break;
+	case ISCSI_PARAM_EXP_STATSN:
+		len = sprintf(buf, "%u\n", conn->exp_statsn);
+		break;
+	case ISCSI_PARAM_PERSISTENT_PORT:
+		len = sprintf(buf, "%d\n", conn->persistent_port);
+		break;
+	case ISCSI_PARAM_PERSISTENT_ADDRESS:
+		len = sprintf(buf, "%s\n", conn->persistent_address);
+		break;
+	default:
+		return -ENOSYS;
+	}
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(iscsi_conn_get_param);
+
 MODULE_AUTHOR("Mike Christie");
 MODULE_DESCRIPTION("iSCSI library functions");
 MODULE_LICENSE("GPL");
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 99e76d4..147c854 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -233,7 +233,6 @@ static void iscsi_session_release(struct device *dev)
 
 	shost = iscsi_session_to_shost(session);
 	scsi_host_put(shost);
-	kfree(session->targetname);
 	kfree(session);
 	module_put(transport->owner);
 }
@@ -388,7 +387,6 @@ static void iscsi_conn_release(struct device *dev)
 	struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev);
 	struct device *parent = conn->dev.parent;
 
-	kfree(conn->persistent_address);
 	kfree(conn);
 	put_device(parent);
 }
@@ -877,23 +875,13 @@ iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev
 	return 0;
 }
 
-static void
-iscsi_copy_param(struct iscsi_uevent *ev, uint32_t *value, char *data)
-{
-	if (ev->u.set_param.len != sizeof(uint32_t))
-		BUG();
-	memcpy(value, data, min_t(uint32_t, sizeof(uint32_t),
-		ev->u.set_param.len));
-}
-
 static int
 iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
 {
 	char *data = (char*)ev + sizeof(*ev);
 	struct iscsi_cls_conn *conn;
 	struct iscsi_cls_session *session;
-	int err = 0;
-	uint32_t value = 0;
+	int err = 0, value = 0;
 
 	session = iscsi_session_lookup(ev->u.set_param.sid);
 	conn = iscsi_conn_lookup(ev->u.set_param.sid, ev->u.set_param.cid);
@@ -902,42 +890,13 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
 
 	switch (ev->u.set_param.param) {
 	case ISCSI_PARAM_SESS_RECOVERY_TMO:
-		iscsi_copy_param(ev, &value, data);
+		sscanf(data, "%d", &value);
 		if (value != 0)
 			session->recovery_tmo = value;
 		break;
-	case ISCSI_PARAM_TARGET_NAME:
-		/* this should not change between logins */
-		if (session->targetname)
-			return 0;
-
-		session->targetname = kstrdup(data, GFP_KERNEL);
-		if (!session->targetname)
-			return -ENOMEM;
-		break;
-	case ISCSI_PARAM_TPGT:
-		iscsi_copy_param(ev, &value, data);
-		session->tpgt = value;
-		break;
-	case ISCSI_PARAM_PERSISTENT_PORT:
-		iscsi_copy_param(ev, &value, data);
-		conn->persistent_port = value;
-		break;
-	case ISCSI_PARAM_PERSISTENT_ADDRESS:
-		/*
-		 * this is the address returned in discovery so it should
-		 * not change between logins.
-		 */
-		if (conn->persistent_address)
-			return 0;
-
-		conn->persistent_address = kstrdup(data, GFP_KERNEL);
-		if (!conn->persistent_address)
-			return -ENOMEM;
-		break;
 	default:
-		iscsi_copy_param(ev, &value, data);
-		err = transport->set_param(conn, ev->u.set_param.param, value);
+		err = transport->set_param(conn, ev->u.set_param.param,
+					   data, ev->u.set_param.len);
 	}
 
 	return err;
@@ -1165,49 +1124,31 @@ struct class_device_attribute class_device_attr_##_prefix##_##_name =	\
 /*
  * iSCSI connection attrs
  */
-#define iscsi_conn_int_attr_show(param, format)				\
+#define iscsi_conn_attr_show(param)					\
 static ssize_t								\
-show_conn_int_param_##param(struct class_device *cdev, char *buf)	\
+show_conn_param_##param(struct class_device *cdev, char *buf)		\
 {									\
-	uint32_t value = 0;						\
 	struct iscsi_cls_conn *conn = iscsi_cdev_to_conn(cdev);		\
 	struct iscsi_transport *t = conn->transport;			\
-									\
-	t->get_conn_param(conn, param, &value);				\
-	return snprintf(buf, 20, format"\n", value);			\
+	return t->get_conn_param(conn, param, buf);			\
 }
 
-#define iscsi_conn_int_attr(field, param, format)			\
-	iscsi_conn_int_attr_show(param, format)				\
-static ISCSI_CLASS_ATTR(conn, field, S_IRUGO, show_conn_int_param_##param, \
+#define iscsi_conn_attr(field, param)					\
+	iscsi_conn_attr_show(param)					\
+static ISCSI_CLASS_ATTR(conn, field, S_IRUGO, show_conn_param_##param,	\
 			NULL);
 
-iscsi_conn_int_attr(max_recv_dlength, ISCSI_PARAM_MAX_RECV_DLENGTH, "%u");
-iscsi_conn_int_attr(max_xmit_dlength, ISCSI_PARAM_MAX_XMIT_DLENGTH, "%u");
-iscsi_conn_int_attr(header_digest, ISCSI_PARAM_HDRDGST_EN, "%d");
-iscsi_conn_int_attr(data_digest, ISCSI_PARAM_DATADGST_EN, "%d");
-iscsi_conn_int_attr(ifmarker, ISCSI_PARAM_IFMARKER_EN, "%d");
-iscsi_conn_int_attr(ofmarker, ISCSI_PARAM_OFMARKER_EN, "%d");
-iscsi_conn_int_attr(persistent_port, ISCSI_PARAM_PERSISTENT_PORT, "%d");
-iscsi_conn_int_attr(port, ISCSI_PARAM_CONN_PORT, "%d");
-iscsi_conn_int_attr(exp_statsn, ISCSI_PARAM_EXP_STATSN, "%u");
-
-#define iscsi_conn_str_attr_show(param)					\
-static ssize_t								\
-show_conn_str_param_##param(struct class_device *cdev, char *buf)	\
-{									\
-	struct iscsi_cls_conn *conn = iscsi_cdev_to_conn(cdev);		\
-	struct iscsi_transport *t = conn->transport;			\
-	return t->get_conn_str_param(conn, param, buf);			\
-}
-
-#define iscsi_conn_str_attr(field, param)				\
-	iscsi_conn_str_attr_show(param)					\
-static ISCSI_CLASS_ATTR(conn, field, S_IRUGO, show_conn_str_param_##param, \
-			NULL);
-
-iscsi_conn_str_attr(persistent_address, ISCSI_PARAM_PERSISTENT_ADDRESS);
-iscsi_conn_str_attr(address, ISCSI_PARAM_CONN_ADDRESS);
+iscsi_conn_attr(max_recv_dlength, ISCSI_PARAM_MAX_RECV_DLENGTH);
+iscsi_conn_attr(max_xmit_dlength, ISCSI_PARAM_MAX_XMIT_DLENGTH);
+iscsi_conn_attr(header_digest, ISCSI_PARAM_HDRDGST_EN);
+iscsi_conn_attr(data_digest, ISCSI_PARAM_DATADGST_EN);
+iscsi_conn_attr(ifmarker, ISCSI_PARAM_IFMARKER_EN);
+iscsi_conn_attr(ofmarker, ISCSI_PARAM_OFMARKER_EN);
+iscsi_conn_attr(persistent_port, ISCSI_PARAM_PERSISTENT_PORT);
+iscsi_conn_attr(port, ISCSI_PARAM_CONN_PORT);
+iscsi_conn_attr(exp_statsn, ISCSI_PARAM_EXP_STATSN);
+iscsi_conn_attr(persistent_address, ISCSI_PARAM_PERSISTENT_ADDRESS);
+iscsi_conn_attr(address, ISCSI_PARAM_CONN_ADDRESS);
 
 #define iscsi_cdev_to_session(_cdev) \
 	iscsi_dev_to_session(_cdev->dev)
@@ -1215,61 +1156,36 @@ iscsi_conn_str_attr(address, ISCSI_PARAM_CONN_ADDRESS);
 /*
  * iSCSI session attrs
  */
-#define iscsi_session_int_attr_show(param, format)			\
-static ssize_t								\
-show_session_int_param_##param(struct class_device *cdev, char *buf)	\
-{									\
-	uint32_t value = 0;						\
-	struct iscsi_cls_session *session = iscsi_cdev_to_session(cdev);	\
-	struct iscsi_transport *t = session->transport;			\
-									\
-	t->get_session_param(session, param, &value);			\
-	return snprintf(buf, 20, format"\n", value);			\
-}
-
-#define iscsi_session_int_attr(field, param, format)			\
-	iscsi_session_int_attr_show(param, format)			\
-static ISCSI_CLASS_ATTR(sess, field, S_IRUGO, show_session_int_param_##param, \
-			NULL);
-
-iscsi_session_int_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN, "%d");
-iscsi_session_int_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T, "%hu");
-iscsi_session_int_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN, "%d");
-iscsi_session_int_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST, "%u");
-iscsi_session_int_attr(max_burst_len, ISCSI_PARAM_MAX_BURST, "%u");
-iscsi_session_int_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN, "%d");
-iscsi_session_int_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN, "%d");
-iscsi_session_int_attr(erl, ISCSI_PARAM_ERL, "%d");
-iscsi_session_int_attr(tpgt, ISCSI_PARAM_TPGT, "%d");
-
-#define iscsi_session_str_attr_show(param)				\
+#define iscsi_session_attr_show(param)					\
 static ssize_t								\
-show_session_str_param_##param(struct class_device *cdev, char *buf)	\
+show_session_param_##param(struct class_device *cdev, char *buf)	\
 {									\
 	struct iscsi_cls_session *session = iscsi_cdev_to_session(cdev); \
 	struct iscsi_transport *t = session->transport;			\
-	return t->get_session_str_param(session, param, buf);		\
+	return t->get_session_param(session, param, buf);		\
 }
 
-#define iscsi_session_str_attr(field, param)				\
-	iscsi_session_str_attr_show(param)				\
-static ISCSI_CLASS_ATTR(sess, field, S_IRUGO, show_session_str_param_##param, \
+#define iscsi_session_attr(field, param)				\
+	iscsi_session_attr_show(param)					\
+static ISCSI_CLASS_ATTR(sess, field, S_IRUGO, show_session_param_##param, \
 			NULL);
 
-iscsi_session_str_attr(targetname, ISCSI_PARAM_TARGET_NAME);
+iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME);
+iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN);
+iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T);
+iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN);
+iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST);
+iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST);
+iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN);
+iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN);
+iscsi_session_attr(erl, ISCSI_PARAM_ERL);
+iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT);
 
-/*
- * Private session and conn attrs. userspace uses several iscsi values
- * to identify each session between reboots. Some of these values may not
- * be present in the iscsi_transport/LLD driver becuase userspace handles
- * login (and failback for login redirect) so for these type of drivers
- * the class manages the attrs and values for the iscsi_transport/LLD
- */
 #define iscsi_priv_session_attr_show(field, format)			\
 static ssize_t								\
-show_priv_session_##field(struct class_device *cdev, char *buf)	\
+show_priv_session_##field(struct class_device *cdev, char *buf)		\
 {									\
-	struct iscsi_cls_session *session = iscsi_cdev_to_session(cdev); \
+	struct iscsi_cls_session *session = iscsi_cdev_to_session(cdev);\
 	return sprintf(buf, format"\n", session->field);		\
 }
 
@@ -1277,31 +1193,15 @@ show_priv_session_##field(struct class_device *cdev, char *buf)	\
 	iscsi_priv_session_attr_show(field, format)			\
 static ISCSI_CLASS_ATTR(priv_sess, field, S_IRUGO, show_priv_session_##field, \
 			NULL)
-iscsi_priv_session_attr(targetname, "%s");
-iscsi_priv_session_attr(tpgt, "%d");
 iscsi_priv_session_attr(recovery_tmo, "%d");
 
-#define iscsi_priv_conn_attr_show(field, format)			\
-static ssize_t								\
-show_priv_conn_##field(struct class_device *cdev, char *buf)		\
-{									\
-	struct iscsi_cls_conn *conn = iscsi_cdev_to_conn(cdev);		\
-	return sprintf(buf, format"\n", conn->field);			\
-}
-
-#define iscsi_priv_conn_attr(field, format)				\
-	iscsi_priv_conn_attr_show(field, format)			\
-static ISCSI_CLASS_ATTR(priv_conn, field, S_IRUGO, show_priv_conn_##field, \
-			NULL)
-iscsi_priv_conn_attr(persistent_address, "%s");
-iscsi_priv_conn_attr(persistent_port, "%d");
-
 #define SETUP_PRIV_SESSION_RD_ATTR(field)				\
 do {									\
 	priv->session_attrs[count] = &class_device_attr_priv_sess_##field; \
 	count++;							\
 } while (0)
 
+
 #define SETUP_SESSION_RD_ATTR(field, param_flag)			\
 do {									\
 	if (tt->param_mask & param_flag) {				\
@@ -1310,12 +1210,6 @@ do {									\
 	}								\
 } while (0)
 
-#define SETUP_PRIV_CONN_RD_ATTR(field)					\
-do {									\
-	priv->conn_attrs[count] = &class_device_attr_priv_conn_##field; \
-	count++;							\
-} while (0)
-
 #define SETUP_CONN_RD_ATTR(field, param_flag)				\
 do {									\
 	if (tt->param_mask & param_flag) {				\
@@ -1442,16 +1336,8 @@ iscsi_register_transport(struct iscsi_transport *tt)
 	SETUP_CONN_RD_ATTR(address, ISCSI_CONN_ADDRESS);
 	SETUP_CONN_RD_ATTR(port, ISCSI_CONN_PORT);
 	SETUP_CONN_RD_ATTR(exp_statsn, ISCSI_EXP_STATSN);
-
-	if (tt->param_mask & ISCSI_PERSISTENT_ADDRESS)
-		SETUP_CONN_RD_ATTR(persistent_address, ISCSI_PERSISTENT_ADDRESS);
-	else
-		SETUP_PRIV_CONN_RD_ATTR(persistent_address);
-
-	if (tt->param_mask & ISCSI_PERSISTENT_PORT)
-		SETUP_CONN_RD_ATTR(persistent_port, ISCSI_PERSISTENT_PORT);
-	else
-		SETUP_PRIV_CONN_RD_ATTR(persistent_port);
+	SETUP_CONN_RD_ATTR(persistent_address, ISCSI_PERSISTENT_ADDRESS);
+	SETUP_CONN_RD_ATTR(persistent_port, ISCSI_PERSISTENT_PORT);
 
 	BUG_ON(count > ISCSI_CONN_ATTRS);
 	priv->conn_attrs[count] = NULL;
@@ -1471,18 +1357,10 @@ iscsi_register_transport(struct iscsi_transport *tt)
 	SETUP_SESSION_RD_ATTR(data_pdu_in_order, ISCSI_PDU_INORDER_EN);
 	SETUP_SESSION_RD_ATTR(data_seq_in_order, ISCSI_DATASEQ_INORDER_EN);
 	SETUP_SESSION_RD_ATTR(erl, ISCSI_ERL);
+	SETUP_SESSION_RD_ATTR(targetname, ISCSI_TARGET_NAME);
+	SETUP_SESSION_RD_ATTR(tpgt, ISCSI_TPGT);
 	SETUP_PRIV_SESSION_RD_ATTR(recovery_tmo);
 
-	if (tt->param_mask & ISCSI_TARGET_NAME)
-		SETUP_SESSION_RD_ATTR(targetname, ISCSI_TARGET_NAME);
-	else
-		SETUP_PRIV_SESSION_RD_ATTR(targetname);
-
-	if (tt->param_mask & ISCSI_TPGT)
-		SETUP_SESSION_RD_ATTR(tpgt, ISCSI_TPGT);
-	else
-		SETUP_PRIV_SESSION_RD_ATTR(tpgt);
-
 	BUG_ON(count > ISCSI_SESSION_ATTRS);
 	priv->session_attrs[count] = NULL;
 
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index cbf7e58..ba27608 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -157,6 +157,11 @@ struct iscsi_conn {
 	int			max_xmit_dlength; /* target_max_recv_dsl */
 	int			hdrdgst_en;
 	int			datadgst_en;
+	int			ifmarker_en;
+	int			ofmarker_en;
+	/* values userspace uses to id a conn */
+	int			persistent_port;
+	char			*persistent_address;
 
 	/* MIB-statistics */
 	uint64_t		txdata_octets;
@@ -196,8 +201,8 @@ struct iscsi_session {
 	int			pdu_inorder_en;
 	int			dataseq_inorder_en;
 	int			erl;
-	int			ifmarker_en;
-	int			ofmarker_en;
+	int			tpgt;
+	char			*targetname;
 
 	/* control data */
 	struct iscsi_transport	*tt;
@@ -240,6 +245,10 @@ iscsi_session_setup(struct iscsi_transport *, struct scsi_transport_template *,
 extern void iscsi_session_teardown(struct iscsi_cls_session *);
 extern struct iscsi_session *class_to_transport_session(struct iscsi_cls_session *);
 extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *);
+extern int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
+			   enum iscsi_param param, char *buf, int buflen);
+extern int iscsi_session_get_param(struct iscsi_cls_session *cls_session,
+				   enum iscsi_param param, char *buf);
 
 #define session_to_cls(_sess) \
 	hostdata_session(_sess->host->hostdata)
@@ -255,6 +264,8 @@ extern void iscsi_conn_stop(struct iscsi_cls_conn *, int);
 extern int iscsi_conn_bind(struct iscsi_cls_session *, struct iscsi_cls_conn *,
 			   int);
 extern void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err);
+extern int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
+				enum iscsi_param param, char *buf);
 
 /*
  * pdu and task processing
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index b95151a..0539705 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -34,6 +34,7 @@ struct iscsi_cls_conn;
 struct iscsi_conn;
 struct iscsi_cmd_task;
 struct iscsi_mgmt_task;
+struct sockaddr;
 
 /**
  * struct iscsi_transport - iSCSI Transport template
@@ -46,7 +47,12 @@ struct iscsi_mgmt_task;
  * @bind_conn:		associate this connection with existing iSCSI session
  *			and specified transport descriptor
  * @destroy_conn:	destroy inactive iSCSI connection
- * @set_param:		set iSCSI Data-Path operational parameter
+ * @set_param:		set iSCSI parameter. Return 0 on success, -ENODATA
+ *			when param is not supported, and a -Exx value on other
+ *			error.
+ * @get_param		get iSCSI parameter. Must return number of bytes
+ *			copied to buffer on success, -ENODATA when param
+ *			is not supported, and a -Exx value on other error
  * @start_conn:		set connection to be operational
  * @stop_conn:		suspend/recover/terminate connection
  * @send_pdu:		send iSCSI PDU, Login, Logout, NOP-Out, Reject, Text.
@@ -97,15 +103,11 @@ struct iscsi_transport {
 	void (*stop_conn) (struct iscsi_cls_conn *conn, int flag);
 	void (*destroy_conn) (struct iscsi_cls_conn *conn);
 	int (*set_param) (struct iscsi_cls_conn *conn, enum iscsi_param param,
-			  uint32_t value);
+			  char *buf, int buflen);
 	int (*get_conn_param) (struct iscsi_cls_conn *conn,
-			       enum iscsi_param param, uint32_t *value);
+			       enum iscsi_param param, char *buf);
 	int (*get_session_param) (struct iscsi_cls_session *session,
-				  enum iscsi_param param, uint32_t *value);
-	int (*get_conn_str_param) (struct iscsi_cls_conn *conn,
-				   enum iscsi_param param, char *buf);
-	int (*get_session_str_param) (struct iscsi_cls_session *session,
-				      enum iscsi_param param, char *buf);
+				  enum iscsi_param param, char *buf);
 	int (*send_pdu) (struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 			 char *data, uint32_t data_size);
 	void (*get_stats) (struct iscsi_cls_conn *conn,
@@ -157,13 +159,6 @@ struct iscsi_cls_conn {
 	struct iscsi_transport *transport;
 	uint32_t cid;			/* connection id */
 
-	/* portal/group values we got during discovery */
-	char *persistent_address;
-	int persistent_port;
-	/* portal/group values we are currently using */
-	char *address;
-	int port;
-
 	int active;			/* must be accessed with the connlock */
 	struct device dev;		/* sysfs transport/container device */
 	struct mempool_zone *z_error;
@@ -187,10 +182,6 @@ struct iscsi_cls_session {
 	struct list_head host_list;
 	struct iscsi_transport *transport;
 
-	/* iSCSI values used as unique id by userspace. */
-	char *targetname;
-	int tpgt;
-
 	/* recovery fields */
 	int recovery_tmo;
 	struct work_struct recovery_work;
-- 
cgit v0.10.2


From 358ff019b89aa530ab6c0dd139d8089c932b103f Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 28 Jun 2006 12:00:25 -0500
Subject: [SCSI] iscsi: convert iser to new set/get param fns

Convert iser to libiscsi get/set param functions.
Fix bugs in it returning old error return values and
have it expose exp_statsn.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 4c3f2de..d277fdf 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -438,159 +438,50 @@ iscsi_iser_session_create(struct iscsi_transport *iscsit,
 }
 
 static int
-iscsi_iser_conn_set_param(struct iscsi_cls_conn *cls_conn,
-			  enum iscsi_param param, uint32_t value)
+iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
+		     enum iscsi_param param, char *buf, int buflen)
 {
-	struct iscsi_conn *conn = cls_conn->dd_data;
-	struct iscsi_session *session = conn->session;
-
-	spin_lock_bh(&session->lock);
-	if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE &&
-	    conn->stop_stage != STOP_CONN_RECOVER) {
-		printk(KERN_ERR "iscsi_iser: can not change parameter [%d]\n",
-		       param);
-		spin_unlock_bh(&session->lock);
-		return 0;
-	}
-	spin_unlock_bh(&session->lock);
+	int value;
 
 	switch (param) {
 	case ISCSI_PARAM_MAX_RECV_DLENGTH:
 		/* TBD */
 		break;
-	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
-		conn->max_xmit_dlength =  value;
-		break;
 	case ISCSI_PARAM_HDRDGST_EN:
+		sscanf(buf, "%d", &value);
 		if (value) {
 			printk(KERN_ERR "DataDigest wasn't negotiated to None");
 			return -EPROTO;
 		}
 		break;
 	case ISCSI_PARAM_DATADGST_EN:
+		sscanf(buf, "%d", &value);
 		if (value) {
 			printk(KERN_ERR "DataDigest wasn't negotiated to None");
 			return -EPROTO;
 		}
 		break;
-	case ISCSI_PARAM_INITIAL_R2T_EN:
-		session->initial_r2t_en = value;
-		break;
-	case ISCSI_PARAM_IMM_DATA_EN:
-		session->imm_data_en = value;
-		break;
-	case ISCSI_PARAM_FIRST_BURST:
-		session->first_burst = value;
-		break;
-	case ISCSI_PARAM_MAX_BURST:
-		session->max_burst = value;
-		break;
-	case ISCSI_PARAM_PDU_INORDER_EN:
-		session->pdu_inorder_en = value;
-		break;
-	case ISCSI_PARAM_DATASEQ_INORDER_EN:
-		session->dataseq_inorder_en = value;
-		break;
-	case ISCSI_PARAM_ERL:
-		session->erl = value;
-		break;
 	case ISCSI_PARAM_IFMARKER_EN:
+		sscanf(buf, "%d", &value);
 		if (value) {
 			printk(KERN_ERR "IFMarker wasn't negotiated to No");
 			return -EPROTO;
 		}
 		break;
 	case ISCSI_PARAM_OFMARKER_EN:
+		sscanf(buf, "%d", &value);
 		if (value) {
 			printk(KERN_ERR "OFMarker wasn't negotiated to No");
 			return -EPROTO;
 		}
 		break;
 	default:
-		break;
-	}
-
-	return 0;
-}
-
-static int
-iscsi_iser_session_get_param(struct iscsi_cls_session *cls_session,
-			     enum iscsi_param param, uint32_t *value)
-{
-	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
-	struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
-
-	switch (param) {
-	case ISCSI_PARAM_INITIAL_R2T_EN:
-		*value = session->initial_r2t_en;
-		break;
-	case ISCSI_PARAM_MAX_R2T:
-		*value = session->max_r2t;
-		break;
-	case ISCSI_PARAM_IMM_DATA_EN:
-		*value = session->imm_data_en;
-		break;
-	case ISCSI_PARAM_FIRST_BURST:
-		*value = session->first_burst;
-		break;
-	case ISCSI_PARAM_MAX_BURST:
-		*value = session->max_burst;
-		break;
-	case ISCSI_PARAM_PDU_INORDER_EN:
-		*value = session->pdu_inorder_en;
-		break;
-	case ISCSI_PARAM_DATASEQ_INORDER_EN:
-		*value = session->dataseq_inorder_en;
-		break;
-	case ISCSI_PARAM_ERL:
-		*value = session->erl;
-		break;
-	case ISCSI_PARAM_IFMARKER_EN:
-		*value = 0;
-		break;
-	case ISCSI_PARAM_OFMARKER_EN:
-		*value = 0;
-		break;
-	default:
-		return ISCSI_ERR_PARAM_NOT_FOUND;
-	}
-
-	return 0;
-}
-
-static int
-iscsi_iser_conn_get_param(struct iscsi_cls_conn *cls_conn,
-			  enum iscsi_param param, uint32_t *value)
-{
-	struct iscsi_conn *conn = cls_conn->dd_data;
-
-	switch(param) {
-	case ISCSI_PARAM_MAX_RECV_DLENGTH:
-		*value = conn->max_recv_dlength;
-		break;
-	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
-		*value = conn->max_xmit_dlength;
-		break;
-	case ISCSI_PARAM_HDRDGST_EN:
-		*value = 0;
-		break;
-	case ISCSI_PARAM_DATADGST_EN:
-		*value = 0;
-		break;
-	/*case ISCSI_PARAM_TARGET_RECV_DLENGTH:
-		*value = conn->target_recv_dlength;
-		break;
-	case ISCSI_PARAM_INITIATOR_RECV_DLENGTH:
-		*value = conn->initiator_recv_dlength;
-		break;*/
-	default:
-		return ISCSI_ERR_PARAM_NOT_FOUND;
+		return iscsi_set_param(cls_conn, param, buf, buflen);
 	}
 
 	return 0;
 }
 
-
 static void
 iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
 {
@@ -702,7 +593,12 @@ static struct iscsi_transport iscsi_iser_transport = {
 				  ISCSI_FIRST_BURST |
 				  ISCSI_MAX_BURST |
 				  ISCSI_PDU_INORDER_EN |
-				  ISCSI_DATASEQ_INORDER_EN,
+				  ISCSI_DATASEQ_INORDER_EN |
+				  ISCSI_EXP_STATSN |
+				  ISCSI_PERSISTENT_PORT |
+				  ISCSI_PERSISTENT_ADDRESS |
+				  ISCSI_TARGET_NAME |
+				  ISCSI_TPGT,
 	.host_template          = &iscsi_iser_sht,
 	.conndata_size		= sizeof(struct iscsi_conn),
 	.max_lun                = ISCSI_ISER_MAX_LUN,
@@ -714,9 +610,9 @@ static struct iscsi_transport iscsi_iser_transport = {
 	.create_conn            = iscsi_iser_conn_create,
 	.bind_conn              = iscsi_iser_conn_bind,
 	.destroy_conn           = iscsi_iser_conn_destroy,
-	.set_param              = iscsi_iser_conn_set_param,
-	.get_conn_param		= iscsi_iser_conn_get_param,
-	.get_session_param	= iscsi_iser_session_get_param,
+	.set_param              = iscsi_iser_set_param,
+	.get_conn_param		= iscsi_conn_get_param,
+	.get_session_param	= iscsi_session_get_param,
 	.start_conn             = iscsi_iser_conn_start,
 	.stop_conn              = iscsi_conn_stop,
 	/* these are called as part of conn recovery */
-- 
cgit v0.10.2


From 5c75b7fcf0c0e3921391fd93f5fa58ec9a6c428f Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 28 Jun 2006 12:00:26 -0500
Subject: [SCSI] iscsi: convert iscsi_tcp to new set/get param fns

Convert iscsi_tcp to new lib functions.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index b4743a9..848fb2a 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -2130,19 +2130,21 @@ iscsi_r2tpool_free(struct iscsi_session *session)
 
 static int
 iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
-		     uint32_t value)
+		     char *buf, int buflen)
 {
 	struct iscsi_conn *conn = cls_conn->dd_data;
 	struct iscsi_session *session = conn->session;
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+	int value;
 
 	switch(param) {
 	case ISCSI_PARAM_MAX_RECV_DLENGTH: {
 		char *saveptr = tcp_conn->data;
 		gfp_t flags = GFP_KERNEL;
 
+		sscanf(buf, "%d", &value);
 		if (tcp_conn->data_size >= value) {
-			conn->max_recv_dlength = value;
+			iscsi_set_param(cls_conn, param, buf, buflen);
 			break;
 		}
 
@@ -2165,15 +2167,12 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
 		else
 			free_pages((unsigned long)saveptr,
 				   get_order(tcp_conn->data_size));
-		conn->max_recv_dlength = value;
+		iscsi_set_param(cls_conn, param, buf, buflen);
 		tcp_conn->data_size = value;
-		}
-		break;
-	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
-		conn->max_xmit_dlength =  value;
 		break;
+		}
 	case ISCSI_PARAM_HDRDGST_EN:
-		conn->hdrdgst_en = value;
+		iscsi_set_param(cls_conn, param, buf, buflen);
 		tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
 		if (conn->hdrdgst_en) {
 			tcp_conn->hdr_size += sizeof(__u32);
@@ -2197,7 +2196,7 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
 		}
 		break;
 	case ISCSI_PARAM_DATADGST_EN:
-		conn->datadgst_en = value;
+		iscsi_set_param(cls_conn, param, buf, buflen);
 		if (conn->datadgst_en) {
 			if (!tcp_conn->data_tx_tfm)
 				tcp_conn->data_tx_tfm =
@@ -2220,121 +2219,36 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
 		tcp_conn->sendpage = conn->datadgst_en ?
 			sock_no_sendpage : tcp_conn->sock->ops->sendpage;
 		break;
-	case ISCSI_PARAM_INITIAL_R2T_EN:
-		session->initial_r2t_en = value;
-		break;
 	case ISCSI_PARAM_MAX_R2T:
+		sscanf(buf, "%d", &value);
 		if (session->max_r2t == roundup_pow_of_two(value))
 			break;
 		iscsi_r2tpool_free(session);
-		session->max_r2t = value;
+		iscsi_set_param(cls_conn, param, buf, buflen);
 		if (session->max_r2t & (session->max_r2t - 1))
 			session->max_r2t = roundup_pow_of_two(session->max_r2t);
 		if (iscsi_r2tpool_alloc(session))
 			return -ENOMEM;
 		break;
-	case ISCSI_PARAM_IMM_DATA_EN:
-		session->imm_data_en = value;
-		break;
-	case ISCSI_PARAM_FIRST_BURST:
-		session->first_burst = value;
-		break;
-	case ISCSI_PARAM_MAX_BURST:
-		session->max_burst = value;
-		break;
-	case ISCSI_PARAM_PDU_INORDER_EN:
-		session->pdu_inorder_en = value;
-		break;
-	case ISCSI_PARAM_DATASEQ_INORDER_EN:
-		session->dataseq_inorder_en = value;
-		break;
-	case ISCSI_PARAM_ERL:
-		session->erl = value;
-		break;
-	case ISCSI_PARAM_IFMARKER_EN:
-		BUG_ON(value);
-		session->ifmarker_en = value;
-		break;
-	case ISCSI_PARAM_OFMARKER_EN:
-		BUG_ON(value);
-		session->ofmarker_en = value;
-		break;
-	case ISCSI_PARAM_EXP_STATSN:
-		conn->exp_statsn = value;
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-static int
-iscsi_session_get_param(struct iscsi_cls_session *cls_session,
-			enum iscsi_param param, uint32_t *value)
-{
-	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
-	struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
-
-	switch(param) {
-	case ISCSI_PARAM_INITIAL_R2T_EN:
-		*value = session->initial_r2t_en;
-		break;
-	case ISCSI_PARAM_MAX_R2T:
-		*value = session->max_r2t;
-		break;
-	case ISCSI_PARAM_IMM_DATA_EN:
-		*value = session->imm_data_en;
-		break;
-	case ISCSI_PARAM_FIRST_BURST:
-		*value = session->first_burst;
-		break;
-	case ISCSI_PARAM_MAX_BURST:
-		*value = session->max_burst;
-		break;
-	case ISCSI_PARAM_PDU_INORDER_EN:
-		*value = session->pdu_inorder_en;
-		break;
-	case ISCSI_PARAM_DATASEQ_INORDER_EN:
-		*value = session->dataseq_inorder_en;
-		break;
-	case ISCSI_PARAM_ERL:
-		*value = session->erl;
-		break;
-	case ISCSI_PARAM_IFMARKER_EN:
-		*value = session->ifmarker_en;
-		break;
-	case ISCSI_PARAM_OFMARKER_EN:
-		*value = session->ofmarker_en;
-		break;
 	default:
-		return -EINVAL;
+		return iscsi_set_param(cls_conn, param, buf, buflen);
 	}
 
 	return 0;
 }
 
 static int
-iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
-		     enum iscsi_param param, uint32_t *value)
+iscsi_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
+			 enum iscsi_param param, char *buf)
 {
 	struct iscsi_conn *conn = cls_conn->dd_data;
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct inet_sock *inet;
+	struct ipv6_pinfo *np;
+	struct sock *sk;
+	int len;
 
 	switch(param) {
-	case ISCSI_PARAM_MAX_RECV_DLENGTH:
-		*value = conn->max_recv_dlength;
-		break;
-	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
-		*value = conn->max_xmit_dlength;
-		break;
-	case ISCSI_PARAM_HDRDGST_EN:
-		*value = conn->hdrdgst_en;
-		break;
-	case ISCSI_PARAM_DATADGST_EN:
-		*value = conn->datadgst_en;
-		break;
 	case ISCSI_PARAM_CONN_PORT:
 		mutex_lock(&conn->xmitmutex);
 		if (!tcp_conn->sock) {
@@ -2343,30 +2257,9 @@ iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
 		}
 
 		inet = inet_sk(tcp_conn->sock->sk);
-		*value = be16_to_cpu(inet->dport);
+		len = sprintf(buf, "%hu\n", be16_to_cpu(inet->dport));
 		mutex_unlock(&conn->xmitmutex);
-	case ISCSI_PARAM_EXP_STATSN:
-		*value = conn->exp_statsn;
 		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int
-iscsi_conn_get_str_param(struct iscsi_cls_conn *cls_conn,
-			 enum iscsi_param param, char *buf)
-{
-	struct iscsi_conn *conn = cls_conn->dd_data;
-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-	struct sock *sk;
-	struct inet_sock *inet;
-	struct ipv6_pinfo *np;
-	int len = 0;
-
-	switch (param) {
 	case ISCSI_PARAM_CONN_ADDRESS:
 		mutex_lock(&conn->xmitmutex);
 		if (!tcp_conn->sock) {
@@ -2388,7 +2281,7 @@ iscsi_conn_get_str_param(struct iscsi_cls_conn *cls_conn,
 		mutex_unlock(&conn->xmitmutex);
 		break;
 	default:
-		return -EINVAL;
+		return iscsi_conn_get_param(cls_conn, param, buf);
 	}
 
 	return len;
@@ -2501,7 +2394,11 @@ static struct iscsi_transport iscsi_tcp_transport = {
 				  ISCSI_ERL |
 				  ISCSI_CONN_PORT |
 				  ISCSI_CONN_ADDRESS |
-				  ISCSI_EXP_STATSN,
+				  ISCSI_EXP_STATSN |
+				  ISCSI_PERSISTENT_PORT |
+				  ISCSI_PERSISTENT_ADDRESS |
+				  ISCSI_TARGET_NAME |
+				  ISCSI_TPGT,
 	.host_template		= &iscsi_sht,
 	.conndata_size		= sizeof(struct iscsi_conn),
 	.max_conn		= 1,
@@ -2514,8 +2411,7 @@ static struct iscsi_transport iscsi_tcp_transport = {
 	.bind_conn		= iscsi_tcp_conn_bind,
 	.destroy_conn		= iscsi_tcp_conn_destroy,
 	.set_param		= iscsi_conn_set_param,
-	.get_conn_param		= iscsi_conn_get_param,
-	.get_conn_str_param	= iscsi_conn_get_str_param,
+	.get_conn_param		= iscsi_tcp_conn_get_param,
 	.get_session_param	= iscsi_session_get_param,
 	.start_conn		= iscsi_conn_start,
 	.stop_conn		= iscsi_conn_stop,
-- 
cgit v0.10.2


From f53a88da18e3c04c3ade07bc5eff520ee4259c3e Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 28 Jun 2006 12:00:27 -0500
Subject: [SCSI] iscsi: fix session refcouting

iscsi_tcp and iser cannot be rmmod from the kernel when sessions
are running because session removal is driven from userspace. For
those modules we get a module reference when a session is
created then drop it when the session is removed.

For qla4xxx, they can jsut remove the sessions from the pci remove
function like normal HW drivers, so this patch moves the module
reference from the transport class functions shared by all
drivers to the libiscsi functions only used be software iscsi
modules.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 7c76a98..7e6e031 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1287,13 +1287,18 @@ iscsi_session_setup(struct iscsi_transport *iscsit,
 	if (scsi_add_host(shost, NULL))
 		goto add_host_fail;
 
+	if (!try_module_get(iscsit->owner))
+		goto cls_session_fail;
+
 	cls_session = iscsi_create_session(shost, iscsit, 0);
 	if (!cls_session)
-		goto cls_session_fail;
+		goto module_put;
 	*(unsigned long*)shost->hostdata = (unsigned long)cls_session;
 
 	return cls_session;
 
+module_put:
+	module_put(iscsit->owner);
 cls_session_fail:
 	scsi_remove_host(shost);
 add_host_fail:
@@ -1325,6 +1330,7 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
 
 	iscsi_destroy_session(cls_session);
 	scsi_host_put(shost);
+	module_put(cls_session->transport->owner);
 }
 EXPORT_SYMBOL_GPL(iscsi_session_teardown);
 
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 147c854..8717ff5 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -228,13 +228,11 @@ static struct iscsi_cls_conn *iscsi_conn_lookup(uint32_t sid, uint32_t cid)
 static void iscsi_session_release(struct device *dev)
 {
 	struct iscsi_cls_session *session = iscsi_dev_to_session(dev);
-	struct iscsi_transport *transport = session->transport;
 	struct Scsi_Host *shost;
 
 	shost = iscsi_session_to_shost(session);
 	scsi_host_put(shost);
 	kfree(session);
-	module_put(transport->owner);
 }
 
 static int iscsi_is_session_dev(const struct device *dev)
@@ -305,13 +303,11 @@ iscsi_create_session(struct Scsi_Host *shost,
 	struct iscsi_cls_session *session;
 	int err;
 
-	if (!try_module_get(transport->owner))
-		return NULL;
-
 	session = kzalloc(sizeof(*session) + transport->sessiondata_size,
 			  GFP_KERNEL);
 	if (!session)
-		goto module_put;
+		return NULL;
+
 	session->transport = transport;
 	session->recovery_tmo = 120;
 	INIT_WORK(&session->recovery_work, session_recovery_timedout, session);
@@ -349,8 +345,6 @@ iscsi_create_session(struct Scsi_Host *shost,
 
 free_session:
 	kfree(session);
-module_put:
-	module_put(transport->owner);
 	return NULL;
 }
 
-- 
cgit v0.10.2


From e6f3b63f50b4bb9fdc9025e0c3994acd265ad3a2 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 28 Jun 2006 12:00:29 -0500
Subject: [SCSI] iscsi: rm channel usage from iscsi

I do not remember what I was thinking when we added the channel
as a argument to the session create function. It was probably
due to too much cut and paste work from the FC transport class.

The channel is meaningless for iscsi drivers so this patch drops
its usage everywhere in the iscsi related code.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 7e6e031..499e79f 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1290,7 +1290,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit,
 	if (!try_module_get(iscsit->owner))
 		goto cls_session_fail;
 
-	cls_session = iscsi_create_session(shost, iscsit, 0);
+	cls_session = iscsi_create_session(shost, iscsit);
 	if (!cls_session)
 		goto module_put;
 	*(unsigned long*)shost->hostdata = (unsigned long)cls_session;
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 8717ff5..c0ec502 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -248,10 +248,9 @@ static int iscsi_user_scan(struct Scsi_Host *shost, uint channel,
 
 	mutex_lock(&ihost->mutex);
 	list_for_each_entry(session, &ihost->sessions, host_list) {
-		if ((channel == SCAN_WILD_CARD ||
-		     channel == session->channel) &&
+		if ((channel == SCAN_WILD_CARD || channel == 0) &&
 		    (id == SCAN_WILD_CARD || id == session->target_id))
-			scsi_scan_target(&session->dev, session->channel,
+			scsi_scan_target(&session->dev, 0,
 					 session->target_id, lun, 1);
 	}
 	mutex_unlock(&ihost->mutex);
@@ -297,7 +296,7 @@ EXPORT_SYMBOL_GPL(iscsi_block_session);
  **/
 struct iscsi_cls_session *
 iscsi_create_session(struct Scsi_Host *shost,
-		     struct iscsi_transport *transport, int channel)
+		     struct iscsi_transport *transport)
 {
 	struct iscsi_host *ihost;
 	struct iscsi_cls_session *session;
@@ -322,7 +321,6 @@ iscsi_create_session(struct Scsi_Host *shost,
 	ihost = shost->shost_data;
 
 	session->sid = iscsi_session_nr++;
-	session->channel = channel;
 	session->target_id = ihost->next_target_id++;
 
 	snprintf(session->dev.bus_id, BUS_ID_SIZE, "session%u",
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 0539705..2e3cb37 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -187,7 +187,6 @@ struct iscsi_cls_session {
 	struct work_struct recovery_work;
 
 	int target_id;
-	int channel;
 
 	int sid;				/* session id */
 	void *dd_data;				/* LLD private data */
@@ -210,7 +209,7 @@ struct iscsi_host {
  * session and connection functions that can be used by HW iSCSI LLDs
  */
 extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost,
-				struct iscsi_transport *t, int channel);
+						struct iscsi_transport *t);
 extern int iscsi_destroy_session(struct iscsi_cls_session *session);
 extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess,
 					    uint32_t cid);
-- 
cgit v0.10.2


From 8434aa8b6fe5af27a33b8aa830c24e3680356c83 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 28 Jun 2006 12:00:30 -0500
Subject: [SCSI] iscsi: break up session creation into two stages

qla4xxx is initialized in two steps like other HW drivers.
It allocates the host, sets up the HW, then adds the host.
For iscsi part of HW setup is setting up persistent iscsi
sessions. At that time, the interupts are off and the driver
is not completely set up so we just want to allocate them.
We do not want to add them to sysfs and expose them to userspace
because userspace could try to do lots of fun things with them
like scanning and at that time the driver is not ready.

So this patch breakes up the session creation like other
functions that use the driver model in two the alloc
and add parts. When the driver is ready, it can then add
the sessions and userspace can begin using them.

This also fixes a bug in the addition error patch where
we forgot to do a get on the session.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index c0ec502..f39da0c 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -287,20 +287,11 @@ void iscsi_block_session(struct iscsi_cls_session *session)
 }
 EXPORT_SYMBOL_GPL(iscsi_block_session);
 
-/**
- * iscsi_create_session - create iscsi class session
- * @shost: scsi host
- * @transport: iscsi transport
- *
- * This can be called from a LLD or iscsi_transport.
- **/
 struct iscsi_cls_session *
-iscsi_create_session(struct Scsi_Host *shost,
-		     struct iscsi_transport *transport)
+iscsi_alloc_session(struct Scsi_Host *shost,
+		    struct iscsi_transport *transport)
 {
-	struct iscsi_host *ihost;
 	struct iscsi_cls_session *session;
-	int err;
 
 	session = kzalloc(sizeof(*session) + transport->sessiondata_size,
 			  GFP_KERNEL);
@@ -313,8 +304,20 @@ iscsi_create_session(struct Scsi_Host *shost,
 	INIT_LIST_HEAD(&session->host_list);
 	INIT_LIST_HEAD(&session->sess_list);
 
+	session->dev.parent = &shost->shost_gendev;
+	session->dev.release = iscsi_session_release;
+	device_initialize(&session->dev);
 	if (transport->sessiondata_size)
 		session->dd_data = &session[1];
+	return session;
+}
+EXPORT_SYMBOL_GPL(iscsi_alloc_session);
+
+int iscsi_add_session(struct iscsi_cls_session *session)
+{
+	struct Scsi_Host *shost = iscsi_session_to_shost(session);
+	struct iscsi_host *ihost;
+	int err;
 
 	/* this is released in the dev's release function */
 	scsi_host_get(shost);
@@ -325,37 +328,51 @@ iscsi_create_session(struct Scsi_Host *shost,
 
 	snprintf(session->dev.bus_id, BUS_ID_SIZE, "session%u",
 		 session->sid);
-	session->dev.parent = &shost->shost_gendev;
-	session->dev.release = iscsi_session_release;
-	err = device_register(&session->dev);
+	err = device_add(&session->dev);
 	if (err) {
 		dev_printk(KERN_ERR, &session->dev, "iscsi: could not "
 			   "register session's dev\n");
-		goto free_session;
+		goto release_host;
 	}
 	transport_register_device(&session->dev);
 
 	mutex_lock(&ihost->mutex);
 	list_add(&session->host_list, &ihost->sessions);
 	mutex_unlock(&ihost->mutex);
+	return 0;
 
-	return session;
-
-free_session:
-	kfree(session);
-	return NULL;
+release_host:
+	scsi_host_put(shost);
+	return err;
 }
-
-EXPORT_SYMBOL_GPL(iscsi_create_session);
+EXPORT_SYMBOL_GPL(iscsi_add_session);
 
 /**
- * iscsi_destroy_session - destroy iscsi session
- * @session: iscsi_session
+ * iscsi_create_session - create iscsi class session
+ * @shost: scsi host
+ * @transport: iscsi transport
  *
- * Can be called by a LLD or iscsi_transport. There must not be
- * any running connections.
+ * This can be called from a LLD or iscsi_transport.
  **/
-int iscsi_destroy_session(struct iscsi_cls_session *session)
+struct iscsi_cls_session *
+iscsi_create_session(struct Scsi_Host *shost,
+		     struct iscsi_transport *transport)
+{
+	struct iscsi_cls_session *session;
+
+	session = iscsi_alloc_session(shost, transport);
+	if (!session)
+		return NULL;
+
+	if (iscsi_add_session(session)) {
+		iscsi_free_session(session);
+		return NULL;
+	}
+	return session;
+}
+EXPORT_SYMBOL_GPL(iscsi_create_session);
+
+void iscsi_remove_session(struct iscsi_cls_session *session)
 {
 	struct Scsi_Host *shost = iscsi_session_to_shost(session);
 	struct iscsi_host *ihost = shost->shost_data;
@@ -367,11 +384,33 @@ int iscsi_destroy_session(struct iscsi_cls_session *session)
 	list_del(&session->host_list);
 	mutex_unlock(&ihost->mutex);
 
+	scsi_remove_target(&session->dev);
+
 	transport_unregister_device(&session->dev);
-	device_unregister(&session->dev);
-	return 0;
+	device_del(&session->dev);
 }
+EXPORT_SYMBOL_GPL(iscsi_remove_session);
 
+void iscsi_free_session(struct iscsi_cls_session *session)
+{
+	put_device(&session->dev);
+}
+
+EXPORT_SYMBOL_GPL(iscsi_free_session);
+
+/**
+ * iscsi_destroy_session - destroy iscsi session
+ * @session: iscsi_session
+ *
+ * Can be called by a LLD or iscsi_transport. There must not be
+ * any running connections.
+ **/
+int iscsi_destroy_session(struct iscsi_cls_session *session)
+{
+	iscsi_remove_session(session);
+	iscsi_free_session(session);
+	return 0;
+}
 EXPORT_SYMBOL_GPL(iscsi_destroy_session);
 
 static void iscsi_conn_release(struct device *dev)
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 2e3cb37..53493d5 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -199,6 +199,9 @@ struct iscsi_cls_session {
 #define iscsi_session_to_shost(_session) \
 	dev_to_shost(_session->dev.parent)
 
+#define starget_to_session(_stgt) \
+	iscsi_dev_to_session(_stgt->dev.parent)
+
 struct iscsi_host {
 	int next_target_id;
 	struct list_head sessions;
@@ -208,8 +211,13 @@ struct iscsi_host {
 /*
  * session and connection functions that can be used by HW iSCSI LLDs
  */
+extern struct iscsi_cls_session *iscsi_alloc_session(struct Scsi_Host *shost,
+					struct iscsi_transport *transport);
+extern int iscsi_add_session(struct iscsi_cls_session *session);
 extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost,
 						struct iscsi_transport *t);
+extern void iscsi_remove_session(struct iscsi_cls_session *session);
+extern void iscsi_free_session(struct iscsi_cls_session *session);
 extern int iscsi_destroy_session(struct iscsi_cls_session *session);
 extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess,
 					    uint32_t cid);
-- 
cgit v0.10.2


From 6a8a0d3621745279a131d95f0204dc9ddac60d55 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 28 Jun 2006 12:00:31 -0500
Subject: [SCSI] iscsi: pass target nr to session creation

So the drivers do not use the channel numbers, but some do
use the target numbers. We were just adding some goofy
variable that just increases for the target nr. This is useless
for software iscsi because it is always zero. And for qla4xxx
the target nr is actually the index of the target/session
in its FW or FLASH tables. We needed to expose this to userspace
so apps could access those numbers so this patch just adds the
target nr to the iscsi session creation functions. This way
when qla4xxx's Hw thinks a session is at target nr 4
in its hw, it is exposed as that number in sysfs.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 499e79f..7e6e031 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1290,7 +1290,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit,
 	if (!try_module_get(iscsit->owner))
 		goto cls_session_fail;
 
-	cls_session = iscsi_create_session(shost, iscsit);
+	cls_session = iscsi_create_session(shost, iscsit, 0);
 	if (!cls_session)
 		goto module_put;
 	*(unsigned long*)shost->hostdata = (unsigned long)cls_session;
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index f39da0c..7963c05 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -304,6 +304,8 @@ iscsi_alloc_session(struct Scsi_Host *shost,
 	INIT_LIST_HEAD(&session->host_list);
 	INIT_LIST_HEAD(&session->sess_list);
 
+	/* this is released in the dev's release function */
+	scsi_host_get(shost);
 	session->dev.parent = &shost->shost_gendev;
 	session->dev.release = iscsi_session_release;
 	device_initialize(&session->dev);
@@ -313,18 +315,15 @@ iscsi_alloc_session(struct Scsi_Host *shost,
 }
 EXPORT_SYMBOL_GPL(iscsi_alloc_session);
 
-int iscsi_add_session(struct iscsi_cls_session *session)
+int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id)
 {
 	struct Scsi_Host *shost = iscsi_session_to_shost(session);
 	struct iscsi_host *ihost;
 	int err;
 
-	/* this is released in the dev's release function */
-	scsi_host_get(shost);
 	ihost = shost->shost_data;
-
 	session->sid = iscsi_session_nr++;
-	session->target_id = ihost->next_target_id++;
+	session->target_id = target_id;
 
 	snprintf(session->dev.bus_id, BUS_ID_SIZE, "session%u",
 		 session->sid);
@@ -356,7 +355,8 @@ EXPORT_SYMBOL_GPL(iscsi_add_session);
  **/
 struct iscsi_cls_session *
 iscsi_create_session(struct Scsi_Host *shost,
-		     struct iscsi_transport *transport)
+		     struct iscsi_transport *transport,
+		     unsigned int target_id)
 {
 	struct iscsi_cls_session *session;
 
@@ -364,7 +364,7 @@ iscsi_create_session(struct Scsi_Host *shost,
 	if (!session)
 		return NULL;
 
-	if (iscsi_add_session(session)) {
+	if (iscsi_add_session(session, target_id)) {
 		iscsi_free_session(session);
 		return NULL;
 	}
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 53493d5..f7b0db5 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -203,7 +203,6 @@ struct iscsi_cls_session {
 	iscsi_dev_to_session(_stgt->dev.parent)
 
 struct iscsi_host {
-	int next_target_id;
 	struct list_head sessions;
 	struct mutex mutex;
 };
@@ -213,9 +212,11 @@ struct iscsi_host {
  */
 extern struct iscsi_cls_session *iscsi_alloc_session(struct Scsi_Host *shost,
 					struct iscsi_transport *transport);
-extern int iscsi_add_session(struct iscsi_cls_session *session);
+extern int iscsi_add_session(struct iscsi_cls_session *session,
+			     unsigned int target_id);
 extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost,
-						struct iscsi_transport *t);
+						struct iscsi_transport *t,
+						unsigned int target_id);
 extern void iscsi_remove_session(struct iscsi_cls_session *session);
 extern void iscsi_free_session(struct iscsi_cls_session *session);
 extern int iscsi_destroy_session(struct iscsi_cls_session *session);
-- 
cgit v0.10.2


From 53cb8a1f45e06a2627a6d89b151cccb95fa45cbf Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Wed, 28 Jun 2006 12:00:32 -0500
Subject: [SCSI] iscsi: add async notification of session events

This patch adds or modifies the transport class functions
used to notify userspace of session state events.

We modify the session addition up event and add a destruction event
to notify userspace of session creation, relogin and destruction.

And we modify the conn error event to be sent by broadcast
since multiple listeners may want to listen for it.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 7963c05..7b9e8fa 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -413,11 +413,59 @@ int iscsi_destroy_session(struct iscsi_cls_session *session)
 }
 EXPORT_SYMBOL_GPL(iscsi_destroy_session);
 
+static void mempool_zone_destroy(struct mempool_zone *zp)
+{
+	mempool_destroy(zp->pool);
+	kfree(zp);
+}
+
+static void*
+mempool_zone_alloc_skb(gfp_t gfp_mask, void *pool_data)
+{
+	struct mempool_zone *zone = pool_data;
+
+	return alloc_skb(zone->size, gfp_mask);
+}
+
+static void
+mempool_zone_free_skb(void *element, void *pool_data)
+{
+	kfree_skb(element);
+}
+
+static struct mempool_zone *
+mempool_zone_init(unsigned max, unsigned size, unsigned hiwat)
+{
+	struct mempool_zone *zp;
+
+	zp = kzalloc(sizeof(*zp), GFP_KERNEL);
+	if (!zp)
+		return NULL;
+
+	zp->size = size;
+	zp->hiwat = hiwat;
+	INIT_LIST_HEAD(&zp->freequeue);
+	spin_lock_init(&zp->freelock);
+	atomic_set(&zp->allocated, 0);
+
+	zp->pool = mempool_create(max, mempool_zone_alloc_skb,
+				  mempool_zone_free_skb, zp);
+	if (!zp->pool) {
+		kfree(zp);
+		return NULL;
+	}
+
+	return zp;
+}
+
 static void iscsi_conn_release(struct device *dev)
 {
 	struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev);
 	struct device *parent = conn->dev.parent;
 
+	mempool_zone_destroy(conn->z_pdu);
+	mempool_zone_destroy(conn->z_error);
+
 	kfree(conn);
 	put_device(parent);
 }
@@ -427,6 +475,31 @@ static int iscsi_is_conn_dev(const struct device *dev)
 	return dev->release == iscsi_conn_release;
 }
 
+static int iscsi_create_event_pools(struct iscsi_cls_conn *conn)
+{
+	conn->z_pdu = mempool_zone_init(Z_MAX_PDU,
+			NLMSG_SPACE(sizeof(struct iscsi_uevent) +
+				    sizeof(struct iscsi_hdr) +
+				    DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH),
+			Z_HIWAT_PDU);
+	if (!conn->z_pdu) {
+		dev_printk(KERN_ERR, &conn->dev, "iscsi: can not allocate "
+			   "pdu zone for new conn\n");
+		return -ENOMEM;
+	}
+
+	conn->z_error = mempool_zone_init(Z_MAX_ERROR,
+			NLMSG_SPACE(sizeof(struct iscsi_uevent)),
+			Z_HIWAT_ERROR);
+	if (!conn->z_error) {
+		dev_printk(KERN_ERR, &conn->dev, "iscsi: can not allocate "
+			   "error zone for new conn\n");
+		mempool_zone_destroy(conn->z_pdu);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
 /**
  * iscsi_create_conn - create iscsi class connection
  * @session: iscsi cls session
@@ -459,9 +532,12 @@ iscsi_create_conn(struct iscsi_cls_session *session, uint32_t cid)
 	conn->transport = transport;
 	conn->cid = cid;
 
+	if (iscsi_create_event_pools(conn))
+		goto free_conn;
+
 	/* this is released in the dev's release function */
 	if (!get_device(&session->dev))
-		goto free_conn;
+		goto free_conn_pools;
 
 	snprintf(conn->dev.bus_id, BUS_ID_SIZE, "connection%d:%u",
 		 session->sid, cid);
@@ -478,6 +554,8 @@ iscsi_create_conn(struct iscsi_cls_session *session, uint32_t cid)
 
 release_parent_ref:
 	put_device(&session->dev);
+free_conn_pools:
+
 free_conn:
 	kfree(conn);
 	return NULL;
@@ -525,20 +603,6 @@ static inline struct list_head *skb_to_lh(struct sk_buff *skb)
 	return (struct list_head *)&skb->cb;
 }
 
-static void*
-mempool_zone_alloc_skb(gfp_t gfp_mask, void *pool_data)
-{
-	struct mempool_zone *zone = pool_data;
-
-	return alloc_skb(zone->size, gfp_mask);
-}
-
-static void
-mempool_zone_free_skb(void *element, void *pool_data)
-{
-	kfree_skb(element);
-}
-
 static void
 mempool_zone_complete(struct mempool_zone *zone)
 {
@@ -558,37 +622,6 @@ mempool_zone_complete(struct mempool_zone *zone)
 	spin_unlock_irqrestore(&zone->freelock, flags);
 }
 
-static struct mempool_zone *
-mempool_zone_init(unsigned max, unsigned size, unsigned hiwat)
-{
-	struct mempool_zone *zp;
-
-	zp = kzalloc(sizeof(*zp), GFP_KERNEL);
-	if (!zp)
-		return NULL;
-
-	zp->size = size;
-	zp->hiwat = hiwat;
-	INIT_LIST_HEAD(&zp->freequeue);
-	spin_lock_init(&zp->freelock);
-	atomic_set(&zp->allocated, 0);
-
-	zp->pool = mempool_create(max, mempool_zone_alloc_skb,
-				  mempool_zone_free_skb, zp);
-	if (!zp->pool) {
-		kfree(zp);
-		return NULL;
-	}
-
-	return zp;
-}
-
-static void mempool_zone_destroy(struct mempool_zone *zp)
-{
-	mempool_destroy(zp->pool);
-	kfree(zp);
-}
-
 static struct sk_buff*
 mempool_zone_get_skb(struct mempool_zone *zone)
 {
@@ -601,6 +634,27 @@ mempool_zone_get_skb(struct mempool_zone *zone)
 }
 
 static int
+iscsi_broadcast_skb(struct mempool_zone *zone, struct sk_buff *skb)
+{
+	unsigned long flags;
+	int rc;
+
+	skb_get(skb);
+	rc = netlink_broadcast(nls, skb, 0, 1, GFP_KERNEL);
+	if (rc < 0) {
+		mempool_free(skb, zone->pool);
+		printk(KERN_ERR "iscsi: can not broadcast skb (%d)\n", rc);
+		return rc;
+	}
+
+	spin_lock_irqsave(&zone->freelock, flags);
+	INIT_LIST_HEAD(skb_to_lh(skb));
+	list_add(skb_to_lh(skb), &zone->freequeue);
+	spin_unlock_irqrestore(&zone->freelock, flags);
+	return 0;
+}
+
+static int
 iscsi_unicast_skb(struct mempool_zone *zone, struct sk_buff *skb, int pid)
 {
 	unsigned long flags;
@@ -695,7 +749,7 @@ void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error)
 	ev->r.connerror.cid = conn->cid;
 	ev->r.connerror.sid = iscsi_conn_get_sid(conn);
 
-	iscsi_unicast_skb(conn->z_error, skb, priv->daemon_pid);
+	iscsi_broadcast_skb(conn->z_error, skb);
 
 	dev_printk(KERN_INFO, &conn->dev, "iscsi: detected conn error (%d)\n",
 		   error);
@@ -796,6 +850,131 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
 	return err;
 }
 
+/**
+ * iscsi_if_destroy_session_done - send session destr. completion event
+ * @conn: last connection for session
+ *
+ * This is called by HW iscsi LLDs to notify userpsace that its HW has
+ * removed a session.
+ **/
+int iscsi_if_destroy_session_done(struct iscsi_cls_conn *conn)
+{
+	struct iscsi_internal *priv;
+	struct iscsi_cls_session *session;
+	struct Scsi_Host *shost;
+	struct iscsi_uevent *ev;
+	struct sk_buff  *skb;
+	struct nlmsghdr *nlh;
+	unsigned long flags;
+	int rc, len = NLMSG_SPACE(sizeof(*ev));
+
+	priv = iscsi_if_transport_lookup(conn->transport);
+	if (!priv)
+		return -EINVAL;
+
+	session = iscsi_dev_to_session(conn->dev.parent);
+	shost = iscsi_session_to_shost(session);
+
+	mempool_zone_complete(conn->z_pdu);
+
+	skb = mempool_zone_get_skb(conn->z_pdu);
+	if (!skb) {
+		dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of "
+			  "session creation event\n");
+		return -ENOMEM;
+	}
+
+	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	ev = NLMSG_DATA(nlh);
+	ev->transport_handle = iscsi_handle(conn->transport);
+	ev->type = ISCSI_KEVENT_DESTROY_SESSION;
+	ev->r.d_session.host_no = shost->host_no;
+	ev->r.d_session.sid = session->sid;
+
+	/*
+	 * this will occur if the daemon is not up, so we just warn
+	 * the user and when the daemon is restarted it will handle it
+	 */
+	rc = iscsi_broadcast_skb(conn->z_pdu, skb);
+	if (rc < 0)
+		dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of "
+			  "session destruction event. Check iscsi daemon\n");
+
+	spin_lock_irqsave(&sesslock, flags);
+	list_del(&session->sess_list);
+	spin_unlock_irqrestore(&sesslock, flags);
+
+	spin_lock_irqsave(&connlock, flags);
+	conn->active = 0;
+	list_del(&conn->conn_list);
+	spin_unlock_irqrestore(&connlock, flags);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(iscsi_if_destroy_session_done);
+
+/**
+ * iscsi_if_create_session_done - send session creation completion event
+ * @conn: leading connection for session
+ *
+ * This is called by HW iscsi LLDs to notify userpsace that its HW has
+ * created a session or a existing session is back in the logged in state.
+ **/
+int iscsi_if_create_session_done(struct iscsi_cls_conn *conn)
+{
+	struct iscsi_internal *priv;
+	struct iscsi_cls_session *session;
+	struct Scsi_Host *shost;
+	struct iscsi_uevent *ev;
+	struct sk_buff  *skb;
+	struct nlmsghdr *nlh;
+	unsigned long flags;
+	int rc, len = NLMSG_SPACE(sizeof(*ev));
+
+	priv = iscsi_if_transport_lookup(conn->transport);
+	if (!priv)
+		return -EINVAL;
+
+	session = iscsi_dev_to_session(conn->dev.parent);
+	shost = iscsi_session_to_shost(session);
+
+	mempool_zone_complete(conn->z_pdu);
+
+	skb = mempool_zone_get_skb(conn->z_pdu);
+	if (!skb) {
+		dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of "
+			  "session creation event\n");
+		return -ENOMEM;
+	}
+
+	nlh = __nlmsg_put(skb, priv->daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+	ev = NLMSG_DATA(nlh);
+	ev->transport_handle = iscsi_handle(conn->transport);
+	ev->type = ISCSI_UEVENT_CREATE_SESSION;
+	ev->r.c_session_ret.host_no = shost->host_no;
+	ev->r.c_session_ret.sid = session->sid;
+
+	/*
+	 * this will occur if the daemon is not up, so we just warn
+	 * the user and when the daemon is restarted it will handle it
+	 */
+	rc = iscsi_broadcast_skb(conn->z_pdu, skb);
+	if (rc < 0)
+		dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of "
+			  "session creation event. Check iscsi daemon\n");
+
+	spin_lock_irqsave(&sesslock, flags);
+	list_add(&session->sess_list, &sesslist);
+	spin_unlock_irqrestore(&sesslock, flags);
+
+	spin_lock_irqsave(&connlock, flags);
+	list_add(&conn->conn_list, &connlist);
+	conn->active = 1;
+	spin_unlock_irqrestore(&connlock, flags);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(iscsi_if_create_session_done);
+
 static int
 iscsi_if_create_session(struct iscsi_internal *priv, struct iscsi_uevent *ev)
 {
@@ -841,26 +1020,6 @@ iscsi_if_create_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev)
 		return -ENOMEM;
 	}
 
-	conn->z_pdu = mempool_zone_init(Z_MAX_PDU,
-			NLMSG_SPACE(sizeof(struct iscsi_uevent) +
-				    sizeof(struct iscsi_hdr) +
-				    DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH),
-			Z_HIWAT_PDU);
-	if (!conn->z_pdu) {
-		dev_printk(KERN_ERR, &conn->dev, "iscsi: can not allocate "
-			   "pdu zone for new conn\n");
-		goto destroy_conn;
-	}
-
-	conn->z_error = mempool_zone_init(Z_MAX_ERROR,
-			NLMSG_SPACE(sizeof(struct iscsi_uevent)),
-			Z_HIWAT_ERROR);
-	if (!conn->z_error) {
-		dev_printk(KERN_ERR, &conn->dev, "iscsi: can not allocate "
-			   "error zone for new conn\n");
-		goto free_pdu_pool;
-	}
-
 	ev->r.c_conn_ret.sid = session->sid;
 	ev->r.c_conn_ret.cid = conn->cid;
 
@@ -870,13 +1029,6 @@ iscsi_if_create_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev)
 	spin_unlock_irqrestore(&connlock, flags);
 
 	return 0;
-
-free_pdu_pool:
-	mempool_zone_destroy(conn->z_pdu);
-destroy_conn:
-	if (transport->destroy_conn)
-		transport->destroy_conn(conn->dd_data);
-	return -ENOMEM;
 }
 
 static int
@@ -884,7 +1036,6 @@ iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev
 {
 	unsigned long flags;
 	struct iscsi_cls_conn *conn;
-	struct mempool_zone *z_error, *z_pdu;
 
 	conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid);
 	if (!conn)
@@ -894,15 +1045,8 @@ iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev
 	list_del(&conn->conn_list);
 	spin_unlock_irqrestore(&connlock, flags);
 
-	z_pdu = conn->z_pdu;
-	z_error = conn->z_error;
-
 	if (transport->destroy_conn)
 		transport->destroy_conn(conn);
-
-	mempool_zone_destroy(z_pdu);
-	mempool_zone_destroy(z_error);
-
 	return 0;
 }
 
@@ -1331,6 +1475,7 @@ iscsi_register_transport(struct iscsi_transport *tt)
 	if (!priv)
 		return NULL;
 	INIT_LIST_HEAD(&priv->list);
+	priv->daemon_pid = -1;
 	priv->iscsi_transport = tt;
 	priv->t.user_scan = iscsi_user_scan;
 
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index 8813f0f..55ebf03 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -53,6 +53,7 @@ enum iscsi_uevent_e {
 	ISCSI_KEVENT_RECV_PDU		= KEVENT_BASE + 1,
 	ISCSI_KEVENT_CONN_ERROR		= KEVENT_BASE + 2,
 	ISCSI_KEVENT_IF_ERROR		= KEVENT_BASE + 3,
+	ISCSI_KEVENT_DESTROY_SESSION	= KEVENT_BASE + 4,
 };
 
 enum iscsi_tgt_dscvr {
@@ -157,27 +158,13 @@ struct iscsi_uevent {
 			uint32_t	cid;
 			uint32_t	error; /* enum iscsi_err */
 		} connerror;
+		struct msg_session_destroyed {
+			uint32_t	host_no;
+			uint32_t	sid;
+		} d_session;
 		struct msg_transport_connect_ret {
 			uint64_t	handle;
 		} ep_connect_ret;
-		struct msg_tgt_dscvr_ret {
-			/*
-			 * session/connection pair used to reference
-			 * the connection to server
-			 */
-			uint32_t	sid;
-			uint32_t	cid;
-			union {
-				struct isns {
-					/* port # for conn to iSNS server */
-					uint16_t isns_port;
-					/* listening port to receive SCNs */
-					uint16_t scn_port;
-					/* listening port to receive ESIs */
-					uint16_t esi_port;
-				} isns_attrib;
-			} u;
-		} tgt_dscvr_ret;
 	} r;
 } __attribute__ ((aligned (sizeof(uint64_t))));
 
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index f7b0db5..5a3df1d 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -214,6 +214,8 @@ extern struct iscsi_cls_session *iscsi_alloc_session(struct Scsi_Host *shost,
 					struct iscsi_transport *transport);
 extern int iscsi_add_session(struct iscsi_cls_session *session,
 			     unsigned int target_id);
+extern int iscsi_if_create_session_done(struct iscsi_cls_conn *conn);
+extern int iscsi_if_destroy_session_done(struct iscsi_cls_conn *conn);
 extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost,
 						struct iscsi_transport *t,
 						unsigned int target_id);
@@ -226,4 +228,5 @@ extern int iscsi_destroy_conn(struct iscsi_cls_conn *conn);
 extern void iscsi_unblock_session(struct iscsi_cls_session *session);
 extern void iscsi_block_session(struct iscsi_cls_session *session);
 
+
 #endif
-- 
cgit v0.10.2


From 0f13fc09db68de92585558984bff1c51b87db72f Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Thu, 29 Jun 2006 13:02:11 -0400
Subject: [SCSI] 53c700: fix breakage caused by the autosense update

A bit of a brown paper bag issue.  The previous patch to remove the soon
to be ripped out fields that were used in autosense actually broke the
driver.  This patch fixes it and has been tested (honestly).

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index bff0479..24dfd54 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -174,6 +174,7 @@ STATIC int NCR_700_bus_reset(struct scsi_cmnd * SCpnt);
 STATIC int NCR_700_host_reset(struct scsi_cmnd * SCpnt);
 STATIC void NCR_700_chip_setup(struct Scsi_Host *host);
 STATIC void NCR_700_chip_reset(struct Scsi_Host *host);
+STATIC int NCR_700_slave_alloc(struct scsi_device *SDpnt);
 STATIC int NCR_700_slave_configure(struct scsi_device *SDpnt);
 STATIC void NCR_700_slave_destroy(struct scsi_device *SDpnt);
 static int NCR_700_change_queue_depth(struct scsi_device *SDpnt, int depth);
@@ -183,10 +184,6 @@ STATIC struct device_attribute *NCR_700_dev_attrs[];
 
 STATIC struct scsi_transport_template *NCR_700_transport_template = NULL;
 
-struct NCR_700_sense {
-	unsigned char cmnd[MAX_COMMAND_SIZE];
-};
-
 static char *NCR_700_phase[] = {
 	"",
 	"after selection",
@@ -334,6 +331,7 @@ NCR_700_detect(struct scsi_host_template *tpnt,
 	tpnt->use_clustering = ENABLE_CLUSTERING;
 	tpnt->slave_configure = NCR_700_slave_configure;
 	tpnt->slave_destroy = NCR_700_slave_destroy;
+	tpnt->slave_alloc = NCR_700_slave_alloc;
 	tpnt->change_queue_depth = NCR_700_change_queue_depth;
 	tpnt->change_queue_type = NCR_700_change_queue_type;
 
@@ -612,9 +610,10 @@ NCR_700_scsi_done(struct NCR_700_Host_Parameters *hostdata,
 		struct NCR_700_command_slot *slot = 
 			(struct NCR_700_command_slot *)SCp->host_scribble;
 		
-		NCR_700_unmap(hostdata, SCp, slot);
+		dma_unmap_single(hostdata->dev, slot->pCmd,
+				 sizeof(SCp->cmnd), DMA_TO_DEVICE);
 		if (slot->flags == NCR_700_FLAG_AUTOSENSE) {
-			struct NCR_700_sense *sense = SCp->device->hostdata;
+			char *cmnd = NCR_700_get_sense_cmnd(SCp->device);
 #ifdef NCR_700_DEBUG
 			printk(" ORIGINAL CMD %p RETURNED %d, new return is %d sense is\n",
 			       SCp, SCp->cmnd[7], result);
@@ -625,10 +624,9 @@ NCR_700_scsi_done(struct NCR_700_Host_Parameters *hostdata,
 			/* restore the old result if the request sense was
 			 * successful */
 			if(result == 0)
-				result = sense->cmnd[7];
+				result = cmnd[7];
 		} else
-			dma_unmap_single(hostdata->dev, slot->pCmd,
-					 sizeof(SCp->cmnd), DMA_TO_DEVICE);
+			NCR_700_unmap(hostdata, SCp, slot);
 
 		free_slot(slot, hostdata);
 #ifdef NCR_700_DEBUG
@@ -970,14 +968,15 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp,
 		   status_byte(hostdata->status[0]) == COMMAND_TERMINATED) {
 			struct NCR_700_command_slot *slot =
 				(struct NCR_700_command_slot *)SCp->host_scribble;
-			if(SCp->cmnd[0] == REQUEST_SENSE) {
+			if(slot->flags == NCR_700_FLAG_AUTOSENSE) {
 				/* OOPS: bad device, returning another
 				 * contingent allegiance condition */
 				scmd_printk(KERN_ERR, SCp,
 					"broken device is looping in contingent allegiance: ignoring\n");
 				NCR_700_scsi_done(hostdata, SCp, hostdata->status[0]);
 			} else {
-				struct NCR_700_sense *sense = SCp->device->hostdata;
+				char *cmnd =
+					NCR_700_get_sense_cmnd(SCp->device);
 #ifdef NCR_DEBUG
 				scsi_print_command(SCp);
 				printk("  cmd %p has status %d, requesting sense\n",
@@ -995,21 +994,21 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp,
 						 sizeof(SCp->cmnd),
 						 DMA_TO_DEVICE);
 
-				sense->cmnd[0] = REQUEST_SENSE;
-				sense->cmnd[1] = (SCp->device->lun & 0x7) << 5;
-				sense->cmnd[2] = 0;
-				sense->cmnd[3] = 0;
-				sense->cmnd[4] = sizeof(SCp->sense_buffer);
-				sense->cmnd[5] = 0;
+				cmnd[0] = REQUEST_SENSE;
+				cmnd[1] = (SCp->device->lun & 0x7) << 5;
+				cmnd[2] = 0;
+				cmnd[3] = 0;
+				cmnd[4] = sizeof(SCp->sense_buffer);
+				cmnd[5] = 0;
 				/* Here's a quiet hack: the
 				 * REQUEST_SENSE command is six bytes,
 				 * so store a flag indicating that
 				 * this was an internal sense request
 				 * and the original status at the end
 				 * of the command */
-				sense->cmnd[6] = NCR_700_INTERNAL_SENSE_MAGIC;
-				sense->cmnd[7] = hostdata->status[0];
-				slot->pCmd = dma_map_single(hostdata->dev, sense->cmnd, sizeof(sense->cmnd), DMA_TO_DEVICE);
+				cmnd[6] = NCR_700_INTERNAL_SENSE_MAGIC;
+				cmnd[7] = hostdata->status[0];
+				slot->pCmd = dma_map_single(hostdata->dev, cmnd, MAX_COMMAND_SIZE, DMA_TO_DEVICE);
 				slot->dma_handle = dma_map_single(hostdata->dev, SCp->sense_buffer, sizeof(SCp->sense_buffer), DMA_FROM_DEVICE);
 				slot->SG[0].ins = bS_to_host(SCRIPT_MOVE_DATA_IN | sizeof(SCp->sense_buffer));
 				slot->SG[0].pAddr = bS_to_host(slot->dma_handle);
@@ -1531,7 +1530,7 @@ NCR_700_intr(int irq, void *dev_id, struct pt_regs *regs)
 
 			/* clear all the negotiated parameters */
 			__shost_for_each_device(SDp, host)
-				SDp->hostdata = NULL;
+				NCR_700_clear_flag(SDp, ~0);
 			
 			/* clear all the slots and their pending commands */
 			for(i = 0; i < NCR_700_COMMAND_SLOTS_PER_HOST; i++) {
@@ -2036,7 +2035,17 @@ NCR_700_set_offset(struct scsi_target *STp, int offset)
 	spi_flags(STp) |= NCR_700_DEV_PRINT_SYNC_NEGOTIATION;
 }
 
+STATIC int
+NCR_700_slave_alloc(struct scsi_device *SDp)
+{
+	SDp->hostdata = kzalloc(sizeof(struct NCR_700_Device_Parameters),
+				GFP_KERNEL);
 
+	if (!SDp->hostdata)
+		return -ENOMEM;
+
+	return 0;
+}
 
 STATIC int
 NCR_700_slave_configure(struct scsi_device *SDp)
@@ -2044,11 +2053,6 @@ NCR_700_slave_configure(struct scsi_device *SDp)
 	struct NCR_700_Host_Parameters *hostdata = 
 		(struct NCR_700_Host_Parameters *)SDp->host->hostdata[0];
 
-	SDp->hostdata = kmalloc(sizeof(struct NCR_700_sense), GFP_KERNEL);
-
-	if (!SDp->hostdata)
-		return -ENOMEM;
-
 	/* to do here: allocate memory; build a queue_full list */
 	if(SDp->tagged_supported) {
 		scsi_set_tag_type(SDp, MSG_ORDERED_TAG);
diff --git a/drivers/scsi/53c700.h b/drivers/scsi/53c700.h
index 7f22a06..97ebe71 100644
--- a/drivers/scsi/53c700.h
+++ b/drivers/scsi/53c700.h
@@ -12,7 +12,7 @@
 #include <asm/io.h>
 
 #include <scsi/scsi_device.h>
-
+#include <scsi/scsi_cmnd.h>
 
 /* Turn on for general debugging---too verbose for normal use */
 #undef	NCR_700_DEBUG
@@ -76,11 +76,16 @@ struct NCR_700_SG_List {
 	#define	SCRIPT_RETURN			0x90080000
 };
 
-/* We use device->hostdata to store negotiated parameters.  This is
- * supposed to be a pointer to a device private area, but we cannot
- * really use it as such since it will never be freed, so just use the
- * 32 bits to cram the information.  The SYNC negotiation sequence looks
- * like:
+struct NCR_700_Device_Parameters {
+	/* space for creating a request sense command. Really, except
+	 * for the annoying SCSI-2 requirement for LUN information in
+	 * cmnd[1], this could be in static storage */
+	unsigned char cmnd[MAX_COMMAND_SIZE];
+	__u8	depth;
+};
+
+
+/* The SYNC negotiation sequence looks like:
  * 
  * If DEV_NEGOTIATED_SYNC not set, tack and SDTR message on to the
  * initial identify for the device and set DEV_BEGIN_SYNC_NEGOTATION
@@ -98,19 +103,26 @@ struct NCR_700_SG_List {
 #define NCR_700_DEV_BEGIN_SYNC_NEGOTIATION	(1<<17)
 #define NCR_700_DEV_PRINT_SYNC_NEGOTIATION (1<<19)
 
+static inline char *NCR_700_get_sense_cmnd(struct scsi_device *SDp)
+{
+	struct NCR_700_Device_Parameters *hostdata = SDp->hostdata;
+
+	return hostdata->cmnd;
+}
+
 static inline void
 NCR_700_set_depth(struct scsi_device *SDp, __u8 depth)
 {
-	long l = (long)SDp->hostdata;
+	struct NCR_700_Device_Parameters *hostdata = SDp->hostdata;
 
-	l &= 0xffff00ff;
-	l |= 0xff00 & (depth << 8);
-	SDp->hostdata = (void *)l;
+	hostdata->depth = depth;
 }
 static inline __u8
 NCR_700_get_depth(struct scsi_device *SDp)
 {
-	return ((((unsigned long)SDp->hostdata) & 0xff00)>>8);
+	struct NCR_700_Device_Parameters *hostdata = SDp->hostdata;
+
+	return hostdata->depth;
 }
 static inline int
 NCR_700_is_flag_set(struct scsi_device *SDp, __u32 flag)
-- 
cgit v0.10.2


From 27a288677de33c50af980e55abec5643db4cd0b8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@cruncher.tec.linutronix.de>
Date: Thu, 29 Jun 2006 21:25:10 +0200
Subject: [MTD] NAND: Fix broken sharpsl driver

Remove the not longer supported NO_VIRTBLOCKS flag and remove
an unused variable while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c
index 2174365..fbeedc3 100644
--- a/drivers/mtd/nand/sharpsl.c
+++ b/drivers/mtd/nand/sharpsl.c
@@ -237,11 +237,6 @@ static int __init sharpsl_nand_init(void)
 		}
 	}
 
-	if (machine_is_husky() || machine_is_borzoi() || machine_is_akita()) {
-		/* Need to use small eraseblock size for backward compatibility */
-		sharpsl_mtd->flags |= MTD_NO_VIRTBLOCKS;
-	}
-
 	add_mtd_partitions(sharpsl_mtd, sharpsl_partition_info, nr_partitions);
 
 	/* Return happy */
@@ -255,8 +250,6 @@ module_init(sharpsl_nand_init);
  */
 static void __exit sharpsl_nand_cleanup(void)
 {
-	struct nand_chip *this = (struct nand_chip *)&sharpsl_mtd[1];
-
 	/* Release resources, unregister device */
 	nand_release(sharpsl_mtd);
 
-- 
cgit v0.10.2


From 257a5bdeb0441789d8e34e1b3e92b26d0f51bbf0 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Thu, 29 Jun 2006 22:40:06 +0100
Subject: Remove export of include/linux/isdn/tpam.h

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/include/linux/isdn/Kbuild b/include/linux/isdn/Kbuild
index c1727c8..991cdb2 100644
--- a/include/linux/isdn/Kbuild
+++ b/include/linux/isdn/Kbuild
@@ -1 +1 @@
-header-y += capicmd.h tpam.h
+header-y += capicmd.h
-- 
cgit v0.10.2


From c05d52c748da10a3f27f6e638875514153776b15 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 22 Jun 2006 12:03:35 +0200
Subject: fs/jffs2/: make 2 functions static

This patch makes two needlessly global functions static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index 8310c95..33f2910 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -190,7 +190,7 @@ void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *x)
 	kmem_cache_free(tmp_dnode_info_slab, x);
 }
 
-struct jffs2_raw_node_ref *jffs2_alloc_refblock(void)
+static struct jffs2_raw_node_ref *jffs2_alloc_refblock(void)
 {
 	struct jffs2_raw_node_ref *ret;
 
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index b16c60b..be6d4a4 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -427,8 +427,6 @@ char *jffs2_getlink(struct jffs2_sb_info *c, struct jffs2_inode_info *f);
 /* scan.c */
 int jffs2_scan_medium(struct jffs2_sb_info *c);
 void jffs2_rotate_lists(struct jffs2_sb_info *c);
-int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf,
-				uint32_t ofs, uint32_t len);
 struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino);
 int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
 int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t size);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 2bfdc33..e241346 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -274,8 +274,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 	return ret;
 }
 
-int jffs2_fill_scan_buf (struct jffs2_sb_info *c, void *buf,
-				uint32_t ofs, uint32_t len)
+static int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf,
+			       uint32_t ofs, uint32_t len)
 {
 	int ret;
 	size_t retlen;
-- 
cgit v0.10.2


From ccb2fe209dac9ff67f6351e783e610073afaaeaf Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed, 28 Jun 2006 13:49:52 -0700
Subject: [CPUFREQ] Remove slowdown from ondemand sampling path.

Remove slowdown from ondemand sampling path. This reduces the code path length
in dbs_check_cpu() by half. slowdown was not used by ondemand by default.
If there are any user level tools that were using this tunable, they
may report error now.

Signed-off-by: Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 693e540..a2add11e 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -56,16 +56,14 @@ static unsigned int def_sampling_rate;
 #define MIN_SAMPLING_RATE			(def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
 #define MAX_SAMPLING_RATE			(500 * def_sampling_rate)
 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER	(1000)
-#define DEF_SAMPLING_DOWN_FACTOR		(1)
-#define MAX_SAMPLING_DOWN_FACTOR		(10)
 #define TRANSITION_LATENCY_LIMIT		(10 * 1000)
 
 static void do_dbs_timer(void *data);
 
 struct cpu_dbs_info_s {
+	cputime64_t prev_cpu_idle;
+	cputime64_t prev_cpu_wall;
 	struct cpufreq_policy *cur_policy;
-	unsigned int prev_cpu_idle_up;
-	unsigned int prev_cpu_idle_down;
 	unsigned int enable;
 };
 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
@@ -87,24 +85,26 @@ static struct workqueue_struct *dbs_workq;
 
 struct dbs_tuners {
 	unsigned int sampling_rate;
-	unsigned int sampling_down_factor;
 	unsigned int up_threshold;
 	unsigned int ignore_nice;
 };
 
 static struct dbs_tuners dbs_tuners_ins = {
 	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
-	.sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR,
 	.ignore_nice = 0,
 };
 
-static inline unsigned int get_cpu_idle_time(unsigned int cpu)
+static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
 {
-	return	kstat_cpu(cpu).cpustat.idle +
-		kstat_cpu(cpu).cpustat.iowait +
-		( dbs_tuners_ins.ignore_nice ?
-		  kstat_cpu(cpu).cpustat.nice :
-		  0);
+	cputime64_t retval;
+
+	retval = cputime64_add(kstat_cpu(cpu).cpustat.idle,
+			kstat_cpu(cpu).cpustat.iowait);
+
+	if (dbs_tuners_ins.ignore_nice)
+		retval = cputime64_add(retval, kstat_cpu(cpu).cpustat.nice);
+
+	return retval;
 }
 
 /************************** sysfs interface ************************/
@@ -133,29 +133,9 @@ static ssize_t show_##file_name						\
 	return sprintf(buf, "%u\n", dbs_tuners_ins.object);		\
 }
 show_one(sampling_rate, sampling_rate);
-show_one(sampling_down_factor, sampling_down_factor);
 show_one(up_threshold, up_threshold);
 show_one(ignore_nice_load, ignore_nice);
 
-static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused,
-		const char *buf, size_t count)
-{
-	unsigned int input;
-	int ret;
-	ret = sscanf (buf, "%u", &input);
-	if (ret != 1 )
-		return -EINVAL;
-
-	if (input > MAX_SAMPLING_DOWN_FACTOR || input < 1)
-		return -EINVAL;
-
-	mutex_lock(&dbs_mutex);
-	dbs_tuners_ins.sampling_down_factor = input;
-	mutex_unlock(&dbs_mutex);
-
-	return count;
-}
-
 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
 		const char *buf, size_t count)
 {
@@ -217,12 +197,12 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
 	}
 	dbs_tuners_ins.ignore_nice = input;
 
-	/* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */
+	/* we need to re-evaluate prev_cpu_idle */
 	for_each_online_cpu(j) {
-		struct cpu_dbs_info_s *j_dbs_info;
-		j_dbs_info = &per_cpu(cpu_dbs_info, j);
-		j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
-		j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up;
+		struct cpu_dbs_info_s *dbs_info;
+		dbs_info = &per_cpu(cpu_dbs_info, j);
+		dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
+		dbs_info->prev_cpu_wall = get_jiffies_64();
 	}
 	mutex_unlock(&dbs_mutex);
 
@@ -234,7 +214,6 @@ static struct freq_attr _name = \
 __ATTR(_name, 0644, show_##_name, store_##_name)
 
 define_one_rw(sampling_rate);
-define_one_rw(sampling_down_factor);
 define_one_rw(up_threshold);
 define_one_rw(ignore_nice_load);
 
@@ -242,7 +221,6 @@ static struct attribute * dbs_attributes[] = {
 	&sampling_rate_max.attr,
 	&sampling_rate_min.attr,
 	&sampling_rate.attr,
-	&sampling_down_factor.attr,
 	&up_threshold.attr,
 	&ignore_nice_load.attr,
 	NULL
@@ -257,11 +235,10 @@ static struct attribute_group dbs_attr_group = {
 
 static void dbs_check_cpu(int cpu)
 {
-	unsigned int idle_ticks, up_idle_ticks, total_ticks;
-	unsigned int freq_next;
-	unsigned int freq_down_sampling_rate;
-	static int down_skip[NR_CPUS];
+	unsigned int idle_ticks, total_ticks;
+	unsigned int load;
 	struct cpu_dbs_info_s *this_dbs_info;
+	cputime64_t cur_jiffies;
 
 	struct cpufreq_policy *policy;
 	unsigned int j;
@@ -271,10 +248,14 @@ static void dbs_check_cpu(int cpu)
 		return;
 
 	policy = this_dbs_info->cur_policy;
+	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
+	total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
+			this_dbs_info->prev_cpu_wall);
+	this_dbs_info->prev_cpu_wall = cur_jiffies;
 	/*
 	 * Every sampling_rate, we check, if current idle time is less
 	 * than 20% (default), then we try to increase frequency
-	 * Every sampling_rate*sampling_down_factor, we look for a the lowest
+	 * Every sampling_rate, we look for a the lowest
 	 * frequency which can sustain the load while keeping idle time over
 	 * 30%. If such a frequency exist, we try to decrease to this frequency.
 	 *
@@ -283,36 +264,26 @@ static void dbs_check_cpu(int cpu)
 	 * 5% (default) of current frequency
 	 */
 
-	/* Check for frequency increase */
+	/* Get Idle Time */
 	idle_ticks = UINT_MAX;
 	for_each_cpu_mask(j, policy->cpus) {
-		unsigned int tmp_idle_ticks, total_idle_ticks;
+		cputime64_t total_idle_ticks;
+		unsigned int tmp_idle_ticks;
 		struct cpu_dbs_info_s *j_dbs_info;
 
 		j_dbs_info = &per_cpu(cpu_dbs_info, j);
 		total_idle_ticks = get_cpu_idle_time(j);
-		tmp_idle_ticks = total_idle_ticks -
-			j_dbs_info->prev_cpu_idle_up;
-		j_dbs_info->prev_cpu_idle_up = total_idle_ticks;
+		tmp_idle_ticks = (unsigned int) cputime64_sub(total_idle_ticks,
+				j_dbs_info->prev_cpu_idle);
+		j_dbs_info->prev_cpu_idle = total_idle_ticks;
 
 		if (tmp_idle_ticks < idle_ticks)
 			idle_ticks = tmp_idle_ticks;
 	}
+	load = (100 * (total_ticks - idle_ticks)) / total_ticks;
 
-	/* Scale idle ticks by 100 and compare with up and down ticks */
-	idle_ticks *= 100;
-	up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) *
-			usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
-
-	if (idle_ticks < up_idle_ticks) {
-		down_skip[cpu] = 0;
-		for_each_cpu_mask(j, policy->cpus) {
-			struct cpu_dbs_info_s *j_dbs_info;
-
-			j_dbs_info = &per_cpu(cpu_dbs_info, j);
-			j_dbs_info->prev_cpu_idle_down =
-					j_dbs_info->prev_cpu_idle_up;
-		}
+	/* Check for frequency increase */
+	if (load > dbs_tuners_ins.up_threshold) {
 		/* if we are already at full speed then break out early */
 		if (policy->cur == policy->max)
 			return;
@@ -323,50 +294,22 @@ static void dbs_check_cpu(int cpu)
 	}
 
 	/* Check for frequency decrease */
-	down_skip[cpu]++;
-	if (down_skip[cpu] < dbs_tuners_ins.sampling_down_factor)
-		return;
-
-	idle_ticks = UINT_MAX;
-	for_each_cpu_mask(j, policy->cpus) {
-		unsigned int tmp_idle_ticks, total_idle_ticks;
-		struct cpu_dbs_info_s *j_dbs_info;
-
-		j_dbs_info = &per_cpu(cpu_dbs_info, j);
-		/* Check for frequency decrease */
-		total_idle_ticks = j_dbs_info->prev_cpu_idle_up;
-		tmp_idle_ticks = total_idle_ticks -
-			j_dbs_info->prev_cpu_idle_down;
-		j_dbs_info->prev_cpu_idle_down = total_idle_ticks;
-
-		if (tmp_idle_ticks < idle_ticks)
-			idle_ticks = tmp_idle_ticks;
-	}
-
-	down_skip[cpu] = 0;
 	/* if we cannot reduce the frequency anymore, break out early */
 	if (policy->cur == policy->min)
 		return;
 
-	/* Compute how many ticks there are between two measurements */
-	freq_down_sampling_rate = dbs_tuners_ins.sampling_rate *
-		dbs_tuners_ins.sampling_down_factor;
-	total_ticks = usecs_to_jiffies(freq_down_sampling_rate);
-
 	/*
 	 * The optimal frequency is the frequency that is the lowest that
 	 * can support the current CPU usage without triggering the up
 	 * policy. To be safe, we focus 10 points under the threshold.
 	 */
-	freq_next = ((total_ticks - idle_ticks) * 100) / total_ticks;
-	freq_next = (freq_next * policy->cur) /
+	if (load < (dbs_tuners_ins.up_threshold - 10)) {
+		unsigned int freq_next;
+		freq_next = (policy->cur * load) /
 			(dbs_tuners_ins.up_threshold - 10);
 
-	if (freq_next < policy->min)
-		freq_next = policy->min;
-
-	if (freq_next <= ((policy->cur * 95) / 100))
 		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
+	}
 }
 
 static void do_dbs_timer(void *data)
@@ -432,9 +375,8 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 			j_dbs_info = &per_cpu(cpu_dbs_info, j);
 			j_dbs_info->cur_policy = policy;
 
-			j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j);
-			j_dbs_info->prev_cpu_idle_down
-				= j_dbs_info->prev_cpu_idle_up;
+			j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
+			j_dbs_info->prev_cpu_wall = get_jiffies_64();
 		}
 		this_dbs_info->enable = 1;
 		sysfs_create_group(&policy->kobj, &dbs_attr_group);
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 6f17856..09204e4 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -24,7 +24,9 @@ typedef u64 cputime64_t;
 
 #define cputime64_zero (0ULL)
 #define cputime64_add(__a, __b)		((__a) + (__b))
+#define cputime64_sub(__a, __b)		((__a) - (__b))
 #define cputime64_to_jiffies64(__ct)	(__ct)
+#define jiffies64_to_cputime64(__jif)	(__jif)
 #define cputime_to_cputime64(__ct)	((u64) __ct)
 
 
-- 
cgit v0.10.2


From 7a6bc1cdd506cf81f856f0fef4e56a2ba0c5a26d Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed, 28 Jun 2006 13:50:33 -0700
Subject: [CPUFREQ] Add queue_delayed_work_on() interface for workqueues.

Add queue_delayed_work_on() interface for workqueues.

Signed-off-by: Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 957c21c..9bca353 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -63,6 +63,8 @@ extern void destroy_workqueue(struct workqueue_struct *wq);
 
 extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
 extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct work_struct *work, unsigned long delay));
+extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+	struct work_struct *work, unsigned long delay);
 extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
 
 extern int FASTCALL(schedule_work(struct work_struct *work));
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 59f0b42..8fbef70 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -148,6 +148,27 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
 	return ret;
 }
 
+int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+			struct work_struct *work, unsigned long delay)
+{
+	int ret = 0;
+	struct timer_list *timer = &work->timer;
+
+	if (!test_and_set_bit(0, &work->pending)) {
+		BUG_ON(timer_pending(timer));
+		BUG_ON(!list_empty(&work->entry));
+
+		/* This stores wq for the moment, for the timer_fn */
+		work->wq_data = wq;
+		timer->expires = jiffies + delay;
+		timer->data = (unsigned long)work;
+		timer->function = delayed_work_timer_fn;
+		add_timer_on(timer, cpu);
+		ret = 1;
+	}
+	return ret;
+}
+
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
 	unsigned long flags;
@@ -411,21 +432,7 @@ int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay
 int schedule_delayed_work_on(int cpu,
 			struct work_struct *work, unsigned long delay)
 {
-	int ret = 0;
-	struct timer_list *timer = &work->timer;
-
-	if (!test_and_set_bit(0, &work->pending)) {
-		BUG_ON(timer_pending(timer));
-		BUG_ON(!list_empty(&work->entry));
-		/* This stores keventd_wq for the moment, for the timer_fn */
-		work->wq_data = keventd_wq;
-		timer->expires = jiffies + delay;
-		timer->data = (unsigned long)work;
-		timer->function = delayed_work_timer_fn;
-		add_timer_on(timer, cpu);
-		ret = 1;
-	}
-	return ret;
+	return queue_delayed_work_on(cpu, keventd_wq, work, delay);
 }
 
 /**
@@ -622,6 +629,7 @@ void init_workqueues(void)
 EXPORT_SYMBOL_GPL(__create_workqueue);
 EXPORT_SYMBOL_GPL(queue_work);
 EXPORT_SYMBOL_GPL(queue_delayed_work);
+EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 EXPORT_SYMBOL_GPL(flush_workqueue);
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
-- 
cgit v0.10.2


From 2f8a835c705794f71726eb12c06fb0f24fe07ed3 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed, 28 Jun 2006 13:51:19 -0700
Subject: [CPUFREQ] Make ondemand sampling per CPU and remove the mutex usage
 in sampling path.

Make ondemand sampling per CPU and remove the mutex usage in sampling path.

Signed-off-by: Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index a2add11e..18b016e 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -64,6 +64,7 @@ struct cpu_dbs_info_s {
 	cputime64_t prev_cpu_idle;
 	cputime64_t prev_cpu_wall;
 	struct cpufreq_policy *cur_policy;
+ 	struct work_struct work;
 	unsigned int enable;
 };
 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
@@ -81,7 +82,7 @@ static unsigned int dbs_enable;	/* number of CPUs using this policy */
 static DEFINE_MUTEX (dbs_mutex);
 static DECLARE_WORK	(dbs_work, do_dbs_timer, NULL);
 
-static struct workqueue_struct *dbs_workq;
+static struct workqueue_struct	*kondemand_wq;
 
 struct dbs_tuners {
 	unsigned int sampling_rate;
@@ -233,17 +234,15 @@ static struct attribute_group dbs_attr_group = {
 
 /************************** sysfs end ************************/
 
-static void dbs_check_cpu(int cpu)
+static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 {
 	unsigned int idle_ticks, total_ticks;
 	unsigned int load;
-	struct cpu_dbs_info_s *this_dbs_info;
 	cputime64_t cur_jiffies;
 
 	struct cpufreq_policy *policy;
 	unsigned int j;
 
-	this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
 	if (!this_dbs_info->enable)
 		return;
 
@@ -314,35 +313,29 @@ static void dbs_check_cpu(int cpu)
 
 static void do_dbs_timer(void *data)
 {
-	int i;
-	lock_cpu_hotplug();
-	mutex_lock(&dbs_mutex);
-	for_each_online_cpu(i)
-		dbs_check_cpu(i);
-	queue_delayed_work(dbs_workq, &dbs_work,
-			   usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
-	mutex_unlock(&dbs_mutex);
-	unlock_cpu_hotplug();
+	unsigned int cpu = smp_processor_id();
+	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+
+	dbs_check_cpu(dbs_info);
+	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
+			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
 }
 
-static inline void dbs_timer_init(void)
+static inline void dbs_timer_init(unsigned int cpu)
 {
-	INIT_WORK(&dbs_work, do_dbs_timer, NULL);
-	if (!dbs_workq)
-		dbs_workq = create_singlethread_workqueue("ondemand");
-	if (!dbs_workq) {
-		printk(KERN_ERR "ondemand: Cannot initialize kernel thread\n");
-		return;
-	}
-	queue_delayed_work(dbs_workq, &dbs_work,
-			   usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
+	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+
+	INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
+	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
+			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
 	return;
 }
 
-static inline void dbs_timer_exit(void)
+static inline void dbs_timer_exit(unsigned int cpu)
 {
-	if (dbs_workq)
-		cancel_rearming_delayed_workqueue(dbs_workq, &dbs_work);
+	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+
+	cancel_rearming_delayed_workqueue(kondemand_wq, &dbs_info->work);
 }
 
 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
@@ -370,6 +363,16 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 			break;
 
 		mutex_lock(&dbs_mutex);
+		dbs_enable++;
+		if (dbs_enable == 1) {
+			kondemand_wq = create_workqueue("kondemand");
+			if (!kondemand_wq) {
+				printk(KERN_ERR "Creation of kondemand failed\n");
+				dbs_enable--;
+				mutex_unlock(&dbs_mutex);
+				return -ENOSPC;
+			}
+		}
 		for_each_cpu_mask(j, policy->cpus) {
 			struct cpu_dbs_info_s *j_dbs_info;
 			j_dbs_info = &per_cpu(cpu_dbs_info, j);
@@ -380,7 +383,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		}
 		this_dbs_info->enable = 1;
 		sysfs_create_group(&policy->kobj, &dbs_attr_group);
-		dbs_enable++;
 		/*
 		 * Start the timerschedule work, when this governor
 		 * is used for first time
@@ -399,23 +401,20 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 				def_sampling_rate = MIN_STAT_SAMPLING_RATE;
 
 			dbs_tuners_ins.sampling_rate = def_sampling_rate;
-			dbs_timer_init();
 		}
+		dbs_timer_init(policy->cpu);
 
 		mutex_unlock(&dbs_mutex);
 		break;
 
 	case CPUFREQ_GOV_STOP:
 		mutex_lock(&dbs_mutex);
+		dbs_timer_exit(policy->cpu);
 		this_dbs_info->enable = 0;
 		sysfs_remove_group(&policy->kobj, &dbs_attr_group);
 		dbs_enable--;
-		/*
-		 * Stop the timerschedule work, when this governor
-		 * is used for first time
-		 */
 		if (dbs_enable == 0)
-			dbs_timer_exit();
+			destroy_workqueue(kondemand_wq);
 
 		mutex_unlock(&dbs_mutex);
 
@@ -452,13 +451,6 @@ static int __init cpufreq_gov_dbs_init(void)
 
 static void __exit cpufreq_gov_dbs_exit(void)
 {
-	/* Make sure that the scheduled work is indeed not running.
-	   Assumes the timer has been cancelled first. */
-	if (dbs_workq) {
-		flush_workqueue(dbs_workq);
-		destroy_workqueue(dbs_workq);
-	}
-
 	cpufreq_unregister_governor(&cpufreq_gov_dbs);
 }
 
-- 
cgit v0.10.2


From ffac80e925e54d84f6ea580231aa46d0ef051756 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed, 28 Jun 2006 13:52:18 -0700
Subject: [CPUFREQ] Misc cleanups in ondemand.

Misc cleanups in ondemand. Should have zero functional impact.
Also adding Alexey as author.

Signed-off-by: Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 18b016e..8729992 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -12,22 +12,11 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/smp.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/ctype.h>
 #include <linux/cpufreq.h>
-#include <linux/sysctl.h>
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/sysfs.h>
 #include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/kmod.h>
-#include <linux/workqueue.h>
 #include <linux/jiffies.h>
 #include <linux/kernel_stat.h>
-#include <linux/percpu.h>
 #include <linux/mutex.h>
 
 /*
@@ -79,8 +68,7 @@ static unsigned int dbs_enable;	/* number of CPUs using this policy */
  * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
  * is recursive for the same process. -Venki
  */
-static DEFINE_MUTEX (dbs_mutex);
-static DECLARE_WORK	(dbs_work, do_dbs_timer, NULL);
+static DEFINE_MUTEX(dbs_mutex);
 
 static struct workqueue_struct	*kondemand_wq;
 
@@ -142,7 +130,7 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
 {
 	unsigned int input;
 	int ret;
-	ret = sscanf (buf, "%u", &input);
+	ret = sscanf(buf, "%u", &input);
 
 	mutex_lock(&dbs_mutex);
 	if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) {
@@ -161,7 +149,7 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused,
 {
 	unsigned int input;
 	int ret;
-	ret = sscanf (buf, "%u", &input);
+	ret = sscanf(buf, "%u", &input);
 
 	mutex_lock(&dbs_mutex);
 	if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
@@ -184,7 +172,7 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
 
 	unsigned int j;
 
-	ret = sscanf (buf, "%u", &input);
+	ret = sscanf(buf, "%u", &input);
 	if ( ret != 1 )
 		return -EINVAL;
 
@@ -349,8 +337,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 
 	switch (event) {
 	case CPUFREQ_GOV_START:
-		if ((!cpu_online(cpu)) ||
-		    (!policy->cur))
+		if ((!cpu_online(cpu)) || (!policy->cur))
 			return -EINVAL;
 
 		if (policy->cpuinfo.transition_latency >
@@ -424,13 +411,13 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 		lock_cpu_hotplug();
 		mutex_lock(&dbs_mutex);
 		if (policy->max < this_dbs_info->cur_policy->cur)
-			__cpufreq_driver_target(
-					this_dbs_info->cur_policy,
-					policy->max, CPUFREQ_RELATION_H);
+			__cpufreq_driver_target(this_dbs_info->cur_policy,
+			                        policy->max,
+			                        CPUFREQ_RELATION_H);
 		else if (policy->min > this_dbs_info->cur_policy->cur)
-			__cpufreq_driver_target(
-					this_dbs_info->cur_policy,
-					policy->min, CPUFREQ_RELATION_L);
+			__cpufreq_driver_target(this_dbs_info->cur_policy,
+			                        policy->min,
+			                        CPUFREQ_RELATION_L);
 		mutex_unlock(&dbs_mutex);
 		unlock_cpu_hotplug();
 		break;
@@ -439,9 +426,9 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
 }
 
 static struct cpufreq_governor cpufreq_gov_dbs = {
-	.name		= "ondemand",
-	.governor	= cpufreq_governor_dbs,
-	.owner		= THIS_MODULE,
+	.name = "ondemand",
+	.governor = cpufreq_governor_dbs,
+	.owner = THIS_MODULE,
 };
 
 static int __init cpufreq_gov_dbs_init(void)
@@ -455,10 +442,11 @@ static void __exit cpufreq_gov_dbs_exit(void)
 }
 
 
-MODULE_AUTHOR ("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
-MODULE_DESCRIPTION ("'cpufreq_ondemand' - A dynamic cpufreq governor for "
-		"Low Latency Frequency Transition capable processors");
-MODULE_LICENSE ("GPL");
+MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
+MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
+MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
+                   "Low Latency Frequency Transition capable processors");
+MODULE_LICENSE("GPL");
 
 module_init(cpufreq_gov_dbs_init);
 module_exit(cpufreq_gov_dbs_exit);
-- 
cgit v0.10.2


From ae90dd5dbee461652b90d9f7d292ba47dc3dc4b8 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 30 Jun 2006 01:40:45 -0400
Subject: Move workqueue exports to where the functions are defined.

Signed-off-by: Dave Jones <davej@redhat.com>

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8fbef70..7f1c30c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -114,6 +114,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
 	put_cpu();
 	return ret;
 }
+EXPORT_SYMBOL_GPL(queue_work);
 
 static void delayed_work_timer_fn(unsigned long __data)
 {
@@ -147,6 +148,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
 	}
 	return ret;
 }
+EXPORT_SYMBOL_GPL(queue_delayed_work);
 
 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work, unsigned long delay)
@@ -168,6 +170,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	}
 	return ret;
 }
+EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
@@ -302,6 +305,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
 		unlock_cpu_hotplug();
 	}
 }
+EXPORT_SYMBOL_GPL(flush_workqueue);
 
 static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
 						   int cpu)
@@ -379,6 +383,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
 	}
 	return wq;
 }
+EXPORT_SYMBOL_GPL(__create_workqueue);
 
 static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
 {
@@ -416,6 +421,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	free_percpu(wq->cpu_wq);
 	kfree(wq);
 }
+EXPORT_SYMBOL_GPL(destroy_workqueue);
 
 static struct workqueue_struct *keventd_wq;
 
@@ -423,17 +429,20 @@ int fastcall schedule_work(struct work_struct *work)
 {
 	return queue_work(keventd_wq, work);
 }
+EXPORT_SYMBOL(schedule_work);
 
 int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
 {
 	return queue_delayed_work(keventd_wq, work, delay);
 }
+EXPORT_SYMBOL(schedule_delayed_work);
 
 int schedule_delayed_work_on(int cpu,
 			struct work_struct *work, unsigned long delay)
 {
 	return queue_delayed_work_on(cpu, keventd_wq, work, delay);
 }
+EXPORT_SYMBOL(schedule_delayed_work_on);
 
 /**
  * schedule_on_each_cpu - call a function on each online CPU from keventd
@@ -470,6 +479,7 @@ void flush_scheduled_work(void)
 {
 	flush_workqueue(keventd_wq);
 }
+EXPORT_SYMBOL(flush_scheduled_work);
 
 /**
  * cancel_rearming_delayed_workqueue - reliably kill off a delayed
@@ -626,14 +636,3 @@ void init_workqueues(void)
 	BUG_ON(!keventd_wq);
 }
 
-EXPORT_SYMBOL_GPL(__create_workqueue);
-EXPORT_SYMBOL_GPL(queue_work);
-EXPORT_SYMBOL_GPL(queue_delayed_work);
-EXPORT_SYMBOL_GPL(queue_delayed_work_on);
-EXPORT_SYMBOL_GPL(flush_workqueue);
-EXPORT_SYMBOL_GPL(destroy_workqueue);
-
-EXPORT_SYMBOL(schedule_work);
-EXPORT_SYMBOL(schedule_delayed_work);
-EXPORT_SYMBOL(schedule_delayed_work_on);
-EXPORT_SYMBOL(flush_scheduled_work);
-- 
cgit v0.10.2


From af96179a8298832cc58be212d0e4988d8a1e11bf Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:32 -0400
Subject: ACPI: ac: Add struct acpi_device to struct acpi_ac.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 36ca365..206f56d 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -66,6 +66,7 @@ static struct acpi_driver acpi_ac_driver = {
 
 struct acpi_ac {
 	acpi_handle handle;
+	struct acpi_device * device;
 	unsigned long state;
 };
 
@@ -191,9 +192,7 @@ static void acpi_ac_notify(acpi_handle handle, u32 event, void *data)
 	if (!ac)
 		return;
 
-	if (acpi_bus_get_device(ac->handle, &device))
-		return;
-
+	device = ac->device;
 	switch (event) {
 	case ACPI_AC_NOTIFY_STATUS:
 		acpi_ac_get_state(ac);
@@ -224,6 +223,7 @@ static int acpi_ac_add(struct acpi_device *device)
 	memset(ac, 0, sizeof(struct acpi_ac));
 
 	ac->handle = device->handle;
+	ac->device = device;
 	strcpy(acpi_device_name(device), ACPI_AC_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_AC_CLASS);
 	acpi_driver_data(device) = ac;
-- 
cgit v0.10.2


From 3b74863df5d46f794052b5ee010cfc8fd66819dd Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:38 -0400
Subject: ACPI: acpi_memhotplug: add struct acpi_device to struct
 acpi_memory_device.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 6f5e395..c562224 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -81,6 +81,7 @@ struct acpi_memory_info {
 
 struct acpi_memory_device {
 	acpi_handle handle;
+	struct acpi_device * device;
 	unsigned int state;	/* State of the memory device */
 	struct list_head res_list;
 };
@@ -399,6 +400,7 @@ static int acpi_memory_device_add(struct acpi_device *device)
 
 	INIT_LIST_HEAD(&mem_device->res_list);
 	mem_device->handle = device->handle;
+	mem_device->device = device;
 	sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME);
 	sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS);
 	acpi_driver_data(device) = mem_device;
-- 
cgit v0.10.2


From 145def84a177c01cf3cc6cfbb67a029f39a8ac35 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:39 -0400
Subject: ACPI: battery: add struct acpi_device to struct acpi_battery.

- Use it instead of acpi_bus_get_device()..

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 00b0728..ba34ca6 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -109,6 +109,7 @@ struct acpi_battery_trips {
 
 struct acpi_battery {
 	acpi_handle handle;
+	struct acpi_device * device;
 	struct acpi_battery_flags flags;
 	struct acpi_battery_trips trips;
 	unsigned long alarm;
@@ -278,9 +279,7 @@ static int acpi_battery_check(struct acpi_battery *battery)
 	if (!battery)
 		return -EINVAL;
 
-	result = acpi_bus_get_device(battery->handle, &device);
-	if (result)
-		return result;
+	device = battery->device;
 
 	result = acpi_bus_get_status(device);
 	if (result)
@@ -662,8 +661,7 @@ static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
 	if (!battery)
 		return;
 
-	if (acpi_bus_get_device(handle, &device))
-		return;
+	device = battery->device;
 
 	switch (event) {
 	case ACPI_BATTERY_NOTIFY_STATUS:
@@ -696,6 +694,7 @@ static int acpi_battery_add(struct acpi_device *device)
 	memset(battery, 0, sizeof(struct acpi_battery));
 
 	battery->handle = device->handle;
+	battery->device = device;
 	strcpy(acpi_device_name(device), ACPI_BATTERY_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS);
 	acpi_driver_data(device) = battery;
-- 
cgit v0.10.2


From 74b142e0fe039fcde42030c064763577e11ca004 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:39 -0400
Subject: ACPI: fan: add struct acpi_device to struct acpi_fan.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 38acc69..8abf5ac 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -65,6 +65,7 @@ static struct acpi_driver acpi_fan_driver = {
 
 struct acpi_fan {
 	acpi_handle handle;
+	struct acpi_device * device;
 };
 
 /* --------------------------------------------------------------------------
@@ -192,6 +193,7 @@ static int acpi_fan_add(struct acpi_device *device)
 	memset(fan, 0, sizeof(struct acpi_fan));
 
 	fan->handle = device->handle;
+	fan->device = device;
 	strcpy(acpi_device_name(device), "Fan");
 	strcpy(acpi_device_class(device), ACPI_FAN_CLASS);
 	acpi_driver_data(device) = fan;
-- 
cgit v0.10.2


From 32917e5b589d813c9dc0f2d140d8c52898ddb6fb Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:39 -0400
Subject: ACPI: pci root: add struct acpi_device to struct acpi_pci_root.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 8f10442..2d63a38 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -59,6 +59,7 @@ static struct acpi_driver acpi_pci_root_driver = {
 struct acpi_pci_root {
 	struct list_head node;
 	acpi_handle handle;
+	struct acpi_device * device;
 	struct acpi_pci_id id;
 	struct pci_bus *bus;
 };
@@ -171,6 +172,7 @@ static int acpi_pci_root_add(struct acpi_device *device)
 	INIT_LIST_HEAD(&root->node);
 
 	root->handle = device->handle;
+	root->device = device;
 	strcpy(acpi_device_name(device), ACPI_PCI_ROOT_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_PCI_ROOT_CLASS);
 	acpi_driver_data(device) = root;
-- 
cgit v0.10.2


From 8348e1b19a06b1932f65e84e1d59be29e1626c2b Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:40 -0400
Subject: ACPI: thermal: add struct acpi_device to struct acpi_thermal.

- Use it instead of acpi_bus_get_device() where we can..

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index c855f44..1d8c250 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -163,6 +163,7 @@ struct acpi_thermal_flags {
 
 struct acpi_thermal {
 	acpi_handle handle;
+	struct acpi_device * device;
 	acpi_bus_id name;
 	unsigned long temperature;
 	unsigned long last_temperature;
@@ -453,10 +454,6 @@ static int acpi_thermal_call_usermode(char *path)
 
 static int acpi_thermal_critical(struct acpi_thermal *tz)
 {
-	int result = 0;
-	struct acpi_device *device = NULL;
-
-
 	if (!tz || !tz->trips.critical.flags.valid)
 		return -EINVAL;
 
@@ -466,14 +463,10 @@ static int acpi_thermal_critical(struct acpi_thermal *tz)
 	} else if (tz->trips.critical.flags.enabled)
 		tz->trips.critical.flags.enabled = 0;
 
-	result = acpi_bus_get_device(tz->handle, &device);
-	if (result)
-		return result;
-
 	printk(KERN_EMERG
 	       "Critical temperature reached (%ld C), shutting down.\n",
 	       KELVIN_TO_CELSIUS(tz->temperature));
-	acpi_bus_generate_event(device, ACPI_THERMAL_NOTIFY_CRITICAL,
+	acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL,
 				tz->trips.critical.flags.enabled);
 
 	acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF);
@@ -483,10 +476,6 @@ static int acpi_thermal_critical(struct acpi_thermal *tz)
 
 static int acpi_thermal_hot(struct acpi_thermal *tz)
 {
-	int result = 0;
-	struct acpi_device *device = NULL;
-
-
 	if (!tz || !tz->trips.hot.flags.valid)
 		return -EINVAL;
 
@@ -496,11 +485,7 @@ static int acpi_thermal_hot(struct acpi_thermal *tz)
 	} else if (tz->trips.hot.flags.enabled)
 		tz->trips.hot.flags.enabled = 0;
 
-	result = acpi_bus_get_device(tz->handle, &device);
-	if (result)
-		return result;
-
-	acpi_bus_generate_event(device, ACPI_THERMAL_NOTIFY_HOT,
+	acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_HOT,
 				tz->trips.hot.flags.enabled);
 
 	/* TBD: Call user-mode "sleep(S4)" function */
@@ -1193,8 +1178,7 @@ static void acpi_thermal_notify(acpi_handle handle, u32 event, void *data)
 	if (!tz)
 		return;
 
-	if (acpi_bus_get_device(tz->handle, &device))
-		return;
+	device = tz->device;
 
 	switch (event) {
 	case ACPI_THERMAL_NOTIFY_TEMPERATURE:
@@ -1294,6 +1278,7 @@ static int acpi_thermal_add(struct acpi_device *device)
 	memset(tz, 0, sizeof(struct acpi_thermal));
 
 	tz->handle = device->handle;
+	tz->device = device;
 	strcpy(tz->name, device->pnp.bus_id);
 	strcpy(acpi_device_name(device), ACPI_THERMAL_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_THERMAL_CLASS);
-- 
cgit v0.10.2


From 415985728895ba3127116bc4f999caf94420ed85 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:40 -0400
Subject: ACPI: power: add struct acpi_device to struct acpi_power_resource

- Use it instead of acpi_bus_get_device() where we can..

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 224f729..2b61719 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -71,6 +71,7 @@ static struct acpi_driver acpi_power_driver = {
 
 struct acpi_power_resource {
 	acpi_handle handle;
+	struct acpi_device * device;
 	acpi_bus_id name;
 	u32 system_level;
 	u32 order;
@@ -203,10 +204,8 @@ static int acpi_power_on(acpi_handle handle)
 		return -ENOEXEC;
 
 	/* Update the power resource's _device_ power state */
-	result = acpi_bus_get_device(resource->handle, &device);
-	if (result)
-		return result;
-	device->power.state = ACPI_STATE_D0;
+	device = resource->device;
+	resource->device->power.state = ACPI_STATE_D0;
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Resource [%s] turned on\n",
 			  resource->name));
@@ -253,9 +252,7 @@ static int acpi_power_off_device(acpi_handle handle)
 		return -ENOEXEC;
 
 	/* Update the power resource's _device_ power state */
-	result = acpi_bus_get_device(resource->handle, &device);
-	if (result)
-		return result;
+	device = resource->device;
 	device->power.state = ACPI_STATE_D3;
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Resource [%s] turned off\n",
@@ -545,6 +542,7 @@ static int acpi_power_add(struct acpi_device *device)
 	memset(resource, 0, sizeof(struct acpi_power_resource));
 
 	resource->handle = device->handle;
+	resource->device = device;
 	strcpy(resource->name, device->pnp.bus_id);
 	strcpy(acpi_device_name(device), ACPI_POWER_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_POWER_CLASS);
-- 
cgit v0.10.2


From e6afa0de1476290a876dfd1237a97cce7735581c Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:40 -0400
Subject: ACPI: video: add struct acpi_device to struct acpi_video_bus.

- Use it instead of acpi_bus_get_device() in acpi_video_bus_notify()
  and use the one from struct acpi_video_device in
  acpi_video_device_notify().

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 9feb633..cee9a50 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -118,6 +118,7 @@ struct acpi_video_enumerated_device {
 
 struct acpi_video_bus {
 	acpi_handle handle;
+	struct acpi_device *device;
 	u8 dos_setting;
 	struct acpi_video_enumerated_device *attached_array;
 	u8 attached_count;
@@ -1624,8 +1625,7 @@ static void acpi_video_bus_notify(acpi_handle handle, u32 event, void *data)
 	if (!video)
 		return;
 
-	if (acpi_bus_get_device(handle, &device))
-		return;
+	device = video->device;
 
 	switch (event) {
 	case ACPI_VIDEO_NOTIFY_SWITCH:	/* User request that a switch occur,
@@ -1668,8 +1668,7 @@ static void acpi_video_device_notify(acpi_handle handle, u32 event, void *data)
 	if (!video_device)
 		return;
 
-	if (acpi_bus_get_device(handle, &device))
-		return;
+	device = video_device->dev;
 
 	switch (event) {
 	case ACPI_VIDEO_NOTIFY_SWITCH:	/* change in status (cycle output device) */
@@ -1708,6 +1707,7 @@ static int acpi_video_bus_add(struct acpi_device *device)
 	memset(video, 0, sizeof(struct acpi_video_bus));
 
 	video->handle = device->handle;
+	video->device = device;
 	strcpy(acpi_device_name(device), ACPI_VIDEO_BUS_NAME);
 	strcpy(acpi_device_class(device), ACPI_VIDEO_CLASS);
 	acpi_driver_data(device) = video;
-- 
cgit v0.10.2


From a6ba5ebef91a59fabd45962e576c02468dbcd33f Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:41 -0400
Subject: ACPI: ac: Use acpi_device's handle instead of driver's

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 206f56d..447de54 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -89,7 +89,7 @@ static int acpi_ac_get_state(struct acpi_ac *ac)
 	if (!ac)
 		return -EINVAL;
 
-	status = acpi_evaluate_integer(ac->handle, "_PSR", NULL, &ac->state);
+	status = acpi_evaluate_integer(ac->device->handle, "_PSR", NULL, &ac->state);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Error reading AC Adapter state"));
 		ac->state = ACPI_AC_STATUS_UNKNOWN;
@@ -236,7 +236,7 @@ static int acpi_ac_add(struct acpi_device *device)
 	if (result)
 		goto end;
 
-	status = acpi_install_notify_handler(ac->handle,
+	status = acpi_install_notify_handler(device->handle,
 					     ACPI_DEVICE_NOTIFY, acpi_ac_notify,
 					     ac);
 	if (ACPI_FAILURE(status)) {
@@ -268,7 +268,7 @@ static int acpi_ac_remove(struct acpi_device *device, int type)
 
 	ac = (struct acpi_ac *)acpi_driver_data(device);
 
-	status = acpi_remove_notify_handler(ac->handle,
+	status = acpi_remove_notify_handler(device->handle,
 					    ACPI_DEVICE_NOTIFY, acpi_ac_notify);
 
 	acpi_ac_remove_fs(device);
-- 
cgit v0.10.2


From b863278523f7adbacb9e34133f4b6397cdab9977 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:41 -0400
Subject: ACPI: acpi_memhotplug: Use acpi_device's handle instead of driver's

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index c562224..a8860de 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -130,7 +130,7 @@ acpi_memory_get_device_resources(struct acpi_memory_device *mem_device)
 	struct acpi_memory_info *info, *n;
 
 
-	status = acpi_walk_resources(mem_device->handle, METHOD_NAME__CRS,
+	status = acpi_walk_resources(mem_device->device->handle, METHOD_NAME__CRS,
 				     acpi_memory_get_resource, mem_device);
 	if (ACPI_FAILURE(status)) {
 		list_for_each_entry_safe(info, n, &mem_device->res_list, list)
@@ -193,7 +193,7 @@ static int acpi_memory_check_device(struct acpi_memory_device *mem_device)
 
 
 	/* Get device present/absent information from the _STA */
-	if (ACPI_FAILURE(acpi_evaluate_integer(mem_device->handle, "_STA",
+	if (ACPI_FAILURE(acpi_evaluate_integer(mem_device->device->handle, "_STA",
 					       NULL, &current_status)))
 		return -ENODEV;
 	/*
@@ -223,7 +223,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 		return result;
 	}
 
-	node = acpi_get_node(mem_device->handle);
+	node = acpi_get_node(mem_device->device->handle);
 	/*
 	 * Tell the VM there is more memory here...
 	 * Note: Assume that this function returns zero on success
@@ -270,7 +270,7 @@ static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device)
 	arg_list.pointer = &arg;
 	arg.type = ACPI_TYPE_INTEGER;
 	arg.integer.value = 1;
-	status = acpi_evaluate_object(mem_device->handle,
+	status = acpi_evaluate_object(mem_device->device->handle,
 				      "_EJ0", &arg_list, NULL);
 	/* Return on _EJ0 failure */
 	if (ACPI_FAILURE(status)) {
@@ -279,7 +279,7 @@ static int acpi_memory_powerdown_device(struct acpi_memory_device *mem_device)
 	}
 
 	/* Evalute _STA to check if the device is disabled */
-	status = acpi_evaluate_integer(mem_device->handle, "_STA",
+	status = acpi_evaluate_integer(mem_device->device->handle, "_STA",
 				       NULL, &current_status);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
@@ -399,7 +399,7 @@ static int acpi_memory_device_add(struct acpi_device *device)
 	memset(mem_device, 0, sizeof(struct acpi_memory_device));
 
 	INIT_LIST_HEAD(&mem_device->res_list);
-	mem_device->handle = device->handle;
+	mem_device->device->handle = device->handle;
 	mem_device->device = device;
 	sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME);
 	sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS);
-- 
cgit v0.10.2


From 3b073ec3667ee63e35b66752a30eeedef1e1e772 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:41 -0400
Subject: ACPI: battery: Use acpi_device's handle instead of driver's

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index ba34ca6..5de735f 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -139,7 +139,7 @@ acpi_battery_get_info(struct acpi_battery *battery,
 
 	/* Evalute _BIF */
 
-	status = acpi_evaluate_object(battery->handle, "_BIF", NULL, &buffer);
+	status = acpi_evaluate_object(battery->device->handle, "_BIF", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BIF"));
 		return -ENODEV;
@@ -199,7 +199,7 @@ acpi_battery_get_status(struct acpi_battery *battery,
 
 	/* Evalute _BST */
 
-	status = acpi_evaluate_object(battery->handle, "_BST", NULL, &buffer);
+	status = acpi_evaluate_object(battery->device->handle, "_BST", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BST"));
 		return -ENODEV;
@@ -256,7 +256,7 @@ acpi_battery_set_alarm(struct acpi_battery *battery, unsigned long alarm)
 
 	arg0.integer.value = alarm;
 
-	status = acpi_evaluate_object(battery->handle, "_BTP", &arg_list, NULL);
+	status = acpi_evaluate_object(battery->device->handle, "_BTP", &arg_list, NULL);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
@@ -304,7 +304,7 @@ static int acpi_battery_check(struct acpi_battery *battery)
 
 		/* See if alarms are supported, and if so, set default */
 
-		status = acpi_get_handle(battery->handle, "_BTP", &handle);
+		status = acpi_get_handle(battery->device->handle, "_BTP", &handle);
 		if (ACPI_SUCCESS(status)) {
 			battery->flags.alarm = 1;
 			acpi_battery_set_alarm(battery, battery->trips.warning);
@@ -707,7 +707,7 @@ static int acpi_battery_add(struct acpi_device *device)
 	if (result)
 		goto end;
 
-	status = acpi_install_notify_handler(battery->handle,
+	status = acpi_install_notify_handler(device->handle,
 					     ACPI_DEVICE_NOTIFY,
 					     acpi_battery_notify, battery);
 	if (ACPI_FAILURE(status)) {
@@ -739,7 +739,7 @@ static int acpi_battery_remove(struct acpi_device *device, int type)
 
 	battery = (struct acpi_battery *)acpi_driver_data(device);
 
-	status = acpi_remove_notify_handler(battery->handle,
+	status = acpi_remove_notify_handler(device->handle,
 					    ACPI_DEVICE_NOTIFY,
 					    acpi_battery_notify);
 
-- 
cgit v0.10.2


From 27b1d3e85b1dfd9037d3fbb98b2e2aacca01da39 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:42 -0400
Subject: ACPI: button: Use acpi_device's handle instead of driver's

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 0259463..eefb9e4 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -137,7 +137,7 @@ static int acpi_button_state_seq_show(struct seq_file *seq, void *offset)
 	if (!button || !button->device)
 		return 0;
 
-	status = acpi_evaluate_integer(button->handle, "_LID", NULL, &state);
+	status = acpi_evaluate_integer(button->device->handle, "_LID", NULL, &state);
 	if (ACPI_FAILURE(status)) {
 		seq_printf(seq, "state:      unsupported\n");
 	} else {
@@ -282,7 +282,7 @@ static acpi_status acpi_button_notify_fixed(void *data)
 	if (!button)
 		return AE_BAD_PARAMETER;
 
-	acpi_button_notify(button->handle, ACPI_BUTTON_NOTIFY_STATUS, button);
+	acpi_button_notify(button->device->handle, ACPI_BUTTON_NOTIFY_STATUS, button);
 
 	return AE_OK;
 }
@@ -362,7 +362,7 @@ static int acpi_button_add(struct acpi_device *device)
 						     button);
 		break;
 	default:
-		status = acpi_install_notify_handler(button->handle,
+		status = acpi_install_notify_handler(device->handle,
 						     ACPI_DEVICE_NOTIFY,
 						     acpi_button_notify,
 						     button);
@@ -420,7 +420,7 @@ static int acpi_button_remove(struct acpi_device *device, int type)
 						    acpi_button_notify_fixed);
 		break;
 	default:
-		status = acpi_remove_notify_handler(button->handle,
+		status = acpi_remove_notify_handler(device->handle,
 						    ACPI_DEVICE_NOTIFY,
 						    acpi_button_notify);
 		break;
-- 
cgit v0.10.2


From dc8c2b2744f8563aa5feb07488e4cc207a70ac70 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:42 -0400
Subject: ACPI: fan: Use acpi_device's handle instead of driver's

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 8abf5ac..a56acdb 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -81,7 +81,7 @@ static int acpi_fan_read_state(struct seq_file *seq, void *offset)
 
 
 	if (fan) {
-		if (acpi_bus_get_power(fan->handle, &state))
+		if (acpi_bus_get_power(fan->device->handle, &state))
 			seq_printf(seq, "status:                  ERROR\n");
 		else
 			seq_printf(seq, "status:                  %s\n",
@@ -113,7 +113,7 @@ acpi_fan_write_state(struct file *file, const char __user * buffer,
 
 	state_string[count] = '\0';
 
-	result = acpi_bus_set_power(fan->handle,
+	result = acpi_bus_set_power(fan->device->handle,
 				    simple_strtoul(state_string, NULL, 0));
 	if (result)
 		return result;
@@ -198,7 +198,7 @@ static int acpi_fan_add(struct acpi_device *device)
 	strcpy(acpi_device_class(device), ACPI_FAN_CLASS);
 	acpi_driver_data(device) = fan;
 
-	result = acpi_bus_get_power(fan->handle, &state);
+	result = acpi_bus_get_power(device->handle, &state);
 	if (result) {
 		printk(KERN_ERR PREFIX "Reading power state\n");
 		goto end;
-- 
cgit v0.10.2


From 67a7136573b24a0d1f85a4aab131558a02910d25 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:42 -0400
Subject: ACPI: pci_link: Use acpi_device's handle instead of driver's

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 1badce2..68ee80a 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -175,7 +175,7 @@ static int acpi_pci_link_get_possible(struct acpi_pci_link *link)
 	if (!link)
 		return -EINVAL;
 
-	status = acpi_walk_resources(link->handle, METHOD_NAME__PRS,
+	status = acpi_walk_resources(link->device->handle, METHOD_NAME__PRS,
 				     acpi_pci_link_check_possible, link);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PRS"));
@@ -274,7 +274,7 @@ static int acpi_pci_link_get_current(struct acpi_pci_link *link)
 	 * Query and parse _CRS to get the current IRQ assignment. 
 	 */
 
-	status = acpi_walk_resources(link->handle, METHOD_NAME__CRS,
+	status = acpi_walk_resources(link->device->handle, METHOD_NAME__CRS,
 				     acpi_pci_link_check_current, &irq);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _CRS"));
@@ -360,7 +360,7 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, int irq)
 	resource->end.type = ACPI_RESOURCE_TYPE_END_TAG;
 
 	/* Attempt to set the resource */
-	status = acpi_set_current_resources(link->handle, &buffer);
+	status = acpi_set_current_resources(link->device->handle, &buffer);
 
 	/* check for total failure */
 	if (ACPI_FAILURE(status)) {
@@ -699,7 +699,7 @@ int acpi_pci_link_free_irq(acpi_handle handle)
 			  acpi_device_bid(link->device)));
 
 	if (link->refcnt == 0) {
-		acpi_ut_evaluate_object(link->handle, "_DIS", 0, NULL);
+		acpi_ut_evaluate_object(link->device->handle, "_DIS", 0, NULL);
 	}
 	mutex_unlock(&acpi_link_lock);
 	return (link->irq.active);
@@ -765,7 +765,7 @@ static int acpi_pci_link_add(struct acpi_device *device)
 
       end:
 	/* disable all links -- to be activated on use */
-	acpi_ut_evaluate_object(link->handle, "_DIS", 0, NULL);
+	acpi_ut_evaluate_object(device->handle, "_DIS", 0, NULL);
 	mutex_unlock(&acpi_link_lock);
 
 	if (result)
-- 
cgit v0.10.2


From 2d1e0a02f16f84c2358843d91d6ca0131a0587ce Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:43 -0400
Subject: ACPI: pci_root: Use acpi_device's handle instead of driver's

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 2d63a38..22ca3a1 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -84,7 +84,7 @@ int acpi_pci_register_driver(struct acpi_pci_driver *driver)
 	list_for_each(entry, &acpi_pci_roots) {
 		struct acpi_pci_root *root;
 		root = list_entry(entry, struct acpi_pci_root, node);
-		driver->add(root->handle);
+		driver->add(root->device->handle);
 		n++;
 	}
 
@@ -111,7 +111,7 @@ void acpi_pci_unregister_driver(struct acpi_pci_driver *driver)
 	list_for_each(entry, &acpi_pci_roots) {
 		struct acpi_pci_root *root;
 		root = list_entry(entry, struct acpi_pci_root, node);
-		driver->remove(root->handle);
+		driver->remove(root->device->handle);
 	}
 }
 
@@ -187,7 +187,7 @@ static int acpi_pci_root_add(struct acpi_device *device)
 	 * -------
 	 * Obtained via _SEG, if exists, otherwise assumed to be zero (0).
 	 */
-	status = acpi_evaluate_integer(root->handle, METHOD_NAME__SEG, NULL,
+	status = acpi_evaluate_integer(device->handle, METHOD_NAME__SEG, NULL,
 				       &value);
 	switch (status) {
 	case AE_OK:
@@ -209,7 +209,7 @@ static int acpi_pci_root_add(struct acpi_device *device)
 	 * ---
 	 * Obtained via _BBN, if exists, otherwise assumed to be zero (0).
 	 */
-	status = acpi_evaluate_integer(root->handle, METHOD_NAME__BBN, NULL,
+	status = acpi_evaluate_integer(device->handle, METHOD_NAME__BBN, NULL,
 				       &value);
 	switch (status) {
 	case AE_OK:
@@ -236,7 +236,7 @@ static int acpi_pci_root_add(struct acpi_device *device)
 				    "Wrong _BBN value, reboot"
 				    " and use option 'pci=noacpi'\n");
 
-			status = try_get_root_bridge_busnr(root->handle, &bus);
+			status = try_get_root_bridge_busnr(device->handle, &bus);
 			if (ACPI_FAILURE(status))
 				break;
 			if (bus != root->id.bus) {
@@ -296,9 +296,9 @@ static int acpi_pci_root_add(struct acpi_device *device)
 	 * -----------------
 	 * Evaluate and parse _PRT, if exists.
 	 */
-	status = acpi_get_handle(root->handle, METHOD_NAME__PRT, &handle);
+	status = acpi_get_handle(device->handle, METHOD_NAME__PRT, &handle);
 	if (ACPI_SUCCESS(status))
-		result = acpi_pci_irq_add_prt(root->handle, root->id.segment,
+		result = acpi_pci_irq_add_prt(device->handle, root->id.segment,
 					      root->id.bus);
 
       end:
-- 
cgit v0.10.2


From 5fbc19efdbedf9c9125774f66f80d6a6ccce4566 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:43 -0400
Subject: ACPI: power: Use acpi_device's handle instead of driver's

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 2b61719..2144289 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -125,7 +125,7 @@ static int acpi_power_get_state(struct acpi_power_resource *resource)
 	if (!resource)
 		return -EINVAL;
 
-	status = acpi_evaluate_integer(resource->handle, "_STA", NULL, &sta);
+	status = acpi_evaluate_integer(resource->device->handle, "_STA", NULL, &sta);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
@@ -193,7 +193,7 @@ static int acpi_power_on(acpi_handle handle)
 		return 0;
 	}
 
-	status = acpi_evaluate_object(resource->handle, "_ON", NULL, NULL);
+	status = acpi_evaluate_object(resource->device->handle, "_ON", NULL, NULL);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
@@ -241,7 +241,7 @@ static int acpi_power_off_device(acpi_handle handle)
 		return 0;
 	}
 
-	status = acpi_evaluate_object(resource->handle, "_OFF", NULL, NULL);
+	status = acpi_evaluate_object(resource->device->handle, "_OFF", NULL, NULL);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
@@ -549,7 +549,7 @@ static int acpi_power_add(struct acpi_device *device)
 	acpi_driver_data(device) = resource;
 
 	/* Evalute the object to get the system level and resource order. */
-	status = acpi_evaluate_object(resource->handle, NULL, NULL, &buffer);
+	status = acpi_evaluate_object(device->handle, NULL, NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		result = -ENODEV;
 		goto end;
-- 
cgit v0.10.2


From 38ba7c9ed2e1a222103332031f76c28b726573f5 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:48 -0400
Subject: ACPI: thermal: Use acpi_device's handle instead of driver's

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 1d8c250..56358e1 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -230,7 +230,7 @@ static int acpi_thermal_get_temperature(struct acpi_thermal *tz)
 	tz->last_temperature = tz->temperature;
 
 	status =
-	    acpi_evaluate_integer(tz->handle, "_TMP", NULL, &tz->temperature);
+	    acpi_evaluate_integer(tz->device->handle, "_TMP", NULL, &tz->temperature);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
@@ -249,7 +249,7 @@ static int acpi_thermal_get_polling_frequency(struct acpi_thermal *tz)
 		return -EINVAL;
 
 	status =
-	    acpi_evaluate_integer(tz->handle, "_TZP", NULL,
+	    acpi_evaluate_integer(tz->device->handle, "_TZP", NULL,
 				  &tz->polling_frequency);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
@@ -286,7 +286,7 @@ static int acpi_thermal_set_cooling_mode(struct acpi_thermal *tz, int mode)
 	if (!tz)
 		return -EINVAL;
 
-	status = acpi_get_handle(tz->handle, "_SCP", &handle);
+	status = acpi_get_handle(tz->device->handle, "_SCP", &handle);
 	if (ACPI_FAILURE(status)) {
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "_SCP not present\n"));
 		return -ENODEV;
@@ -317,7 +317,7 @@ static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
 
 	/* Critical Shutdown (required) */
 
-	status = acpi_evaluate_integer(tz->handle, "_CRT", NULL,
+	status = acpi_evaluate_integer(tz->device->handle, "_CRT", NULL,
 				       &tz->trips.critical.temperature);
 	if (ACPI_FAILURE(status)) {
 		tz->trips.critical.flags.valid = 0;
@@ -333,7 +333,7 @@ static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
 	/* Critical Sleep (optional) */
 
 	status =
-	    acpi_evaluate_integer(tz->handle, "_HOT", NULL,
+	    acpi_evaluate_integer(tz->device->handle, "_HOT", NULL,
 				  &tz->trips.hot.temperature);
 	if (ACPI_FAILURE(status)) {
 		tz->trips.hot.flags.valid = 0;
@@ -347,7 +347,7 @@ static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
 	/* Passive: Processors (optional) */
 
 	status =
-	    acpi_evaluate_integer(tz->handle, "_PSV", NULL,
+	    acpi_evaluate_integer(tz->device->handle, "_PSV", NULL,
 				  &tz->trips.passive.temperature);
 	if (ACPI_FAILURE(status)) {
 		tz->trips.passive.flags.valid = 0;
@@ -356,25 +356,25 @@ static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
 		tz->trips.passive.flags.valid = 1;
 
 		status =
-		    acpi_evaluate_integer(tz->handle, "_TC1", NULL,
+		    acpi_evaluate_integer(tz->device->handle, "_TC1", NULL,
 					  &tz->trips.passive.tc1);
 		if (ACPI_FAILURE(status))
 			tz->trips.passive.flags.valid = 0;
 
 		status =
-		    acpi_evaluate_integer(tz->handle, "_TC2", NULL,
+		    acpi_evaluate_integer(tz->device->handle, "_TC2", NULL,
 					  &tz->trips.passive.tc2);
 		if (ACPI_FAILURE(status))
 			tz->trips.passive.flags.valid = 0;
 
 		status =
-		    acpi_evaluate_integer(tz->handle, "_TSP", NULL,
+		    acpi_evaluate_integer(tz->device->handle, "_TSP", NULL,
 					  &tz->trips.passive.tsp);
 		if (ACPI_FAILURE(status))
 			tz->trips.passive.flags.valid = 0;
 
 		status =
-		    acpi_evaluate_reference(tz->handle, "_PSL", NULL,
+		    acpi_evaluate_reference(tz->device->handle, "_PSL", NULL,
 					    &tz->trips.passive.devices);
 		if (ACPI_FAILURE(status))
 			tz->trips.passive.flags.valid = 0;
@@ -394,14 +394,14 @@ static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
 		char name[5] = { '_', 'A', 'C', ('0' + i), '\0' };
 
 		status =
-		    acpi_evaluate_integer(tz->handle, name, NULL,
+		    acpi_evaluate_integer(tz->device->handle, name, NULL,
 					  &tz->trips.active[i].temperature);
 		if (ACPI_FAILURE(status))
 			break;
 
 		name[2] = 'L';
 		status =
-		    acpi_evaluate_reference(tz->handle, name, NULL,
+		    acpi_evaluate_reference(tz->device->handle, name, NULL,
 					    &tz->trips.active[i].devices);
 		if (ACPI_SUCCESS(status)) {
 			tz->trips.active[i].flags.valid = 1;
@@ -425,7 +425,7 @@ static int acpi_thermal_get_devices(struct acpi_thermal *tz)
 		return -EINVAL;
 
 	status =
-	    acpi_evaluate_reference(tz->handle, "_TZD", NULL, &tz->devices);
+	    acpi_evaluate_reference(tz->device->handle, "_TZD", NULL, &tz->devices);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
@@ -1296,7 +1296,7 @@ static int acpi_thermal_add(struct acpi_device *device)
 
 	acpi_thermal_check(tz);
 
-	status = acpi_install_notify_handler(tz->handle,
+	status = acpi_install_notify_handler(device->handle,
 					     ACPI_DEVICE_NOTIFY,
 					     acpi_thermal_notify, tz);
 	if (ACPI_FAILURE(status)) {
@@ -1337,7 +1337,7 @@ static int acpi_thermal_remove(struct acpi_device *device, int type)
 	/* deferred task may reinsert timer */
 	del_timer_sync(&(tz->timer));
 
-	status = acpi_remove_notify_handler(tz->handle,
+	status = acpi_remove_notify_handler(device->handle,
 					    ACPI_DEVICE_NOTIFY,
 					    acpi_thermal_notify);
 
-- 
cgit v0.10.2


From 901302688cb85b49a9551ec1f6aa86fb081ae49e Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:48 -0400
Subject: ACPI: video: Use acpi_device's handle instead of driver's

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index cee9a50..862cefa 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -273,7 +273,8 @@ static int
 acpi_video_device_query(struct acpi_video_device *device, unsigned long *state)
 {
 	int status;
-	status = acpi_evaluate_integer(device->handle, "_DGS", NULL, state);
+
+	status = acpi_evaluate_integer(device->dev->handle, "_DGS", NULL, state);
 
 	return status;
 }
@@ -284,8 +285,7 @@ acpi_video_device_get_state(struct acpi_video_device *device,
 {
 	int status;
 
-
-	status = acpi_evaluate_integer(device->handle, "_DCS", NULL, state);
+	status = acpi_evaluate_integer(device->dev->handle, "_DCS", NULL, state);
 
 	return status;
 }
@@ -300,7 +300,7 @@ acpi_video_device_set_state(struct acpi_video_device *device, int state)
 
 
 	arg0.integer.value = state;
-	status = acpi_evaluate_integer(device->handle, "_DSS", &args, &ret);
+	status = acpi_evaluate_integer(device->dev->handle, "_DSS", &args, &ret);
 
 	return status;
 }
@@ -316,7 +316,7 @@ acpi_video_device_lcd_query_levels(struct acpi_video_device *device,
 
 	*levels = NULL;
 
-	status = acpi_evaluate_object(device->handle, "_BCL", NULL, &buffer);
+	status = acpi_evaluate_object(device->dev->handle, "_BCL", NULL, &buffer);
 	if (!ACPI_SUCCESS(status))
 		return status;
 	obj = (union acpi_object *)buffer.pointer;
@@ -345,7 +345,7 @@ acpi_video_device_lcd_set_level(struct acpi_video_device *device, int level)
 
 
 	arg0.integer.value = level;
-	status = acpi_evaluate_object(device->handle, "_BCM", &args, NULL);
+	status = acpi_evaluate_object(device->dev->handle, "_BCM", &args, NULL);
 
 	printk(KERN_DEBUG "set_level status: %x\n", status);
 	return status;
@@ -357,7 +357,7 @@ acpi_video_device_lcd_get_level_current(struct acpi_video_device *device,
 {
 	int status;
 
-	status = acpi_evaluate_integer(device->handle, "_BQC", NULL, level);
+	status = acpi_evaluate_integer(device->dev->handle, "_BQC", NULL, level);
 
 	return status;
 }
@@ -384,7 +384,7 @@ acpi_video_device_EDID(struct acpi_video_device *device,
 	else
 		return -EINVAL;
 
-	status = acpi_evaluate_object(device->handle, "_DDC", &args, &buffer);
+	status = acpi_evaluate_object(device->dev->handle, "_DDC", &args, &buffer);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
@@ -414,7 +414,7 @@ acpi_video_bus_set_POST(struct acpi_video_bus *video, unsigned long option)
 
 	arg0.integer.value = option;
 
-	status = acpi_evaluate_integer(video->handle, "_SPD", &args, &tmp);
+	status = acpi_evaluate_integer(video->device->handle, "_SPD", &args, &tmp);
 	if (ACPI_SUCCESS(status))
 		status = tmp ? (-EINVAL) : (AE_OK);
 
@@ -426,8 +426,7 @@ acpi_video_bus_get_POST(struct acpi_video_bus *video, unsigned long *id)
 {
 	int status;
 
-
-	status = acpi_evaluate_integer(video->handle, "_GPD", NULL, id);
+	status = acpi_evaluate_integer(video->device->handle, "_GPD", NULL, id);
 
 	return status;
 }
@@ -438,7 +437,7 @@ acpi_video_bus_POST_options(struct acpi_video_bus *video,
 {
 	int status;
 
-	status = acpi_evaluate_integer(video->handle, "_VPO", NULL, options);
+	status = acpi_evaluate_integer(video->device->handle, "_VPO", NULL, options);
 	*options &= 3;
 
 	return status;
@@ -479,7 +478,7 @@ acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag)
 	}
 	arg0.integer.value = (lcd_flag << 2) | bios_flag;
 	video->dos_setting = arg0.integer.value;
-	acpi_evaluate_object(video->handle, "_DOS", &args, NULL);
+	acpi_evaluate_object(video->device->handle, "_DOS", &args, NULL);
 
       Failed:
 	return status;
@@ -507,25 +506,25 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 
 	memset(&device->cap, 0, 4);
 
-	if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_ADR", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(device->dev->handle, "_ADR", &h_dummy1))) {
 		device->cap._ADR = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_BCL", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(device->dev->handle, "_BCL", &h_dummy1))) {
 		device->cap._BCL = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_BCM", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(device->dev->handle, "_BCM", &h_dummy1))) {
 		device->cap._BCM = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DDC", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(device->dev->handle, "_DDC", &h_dummy1))) {
 		device->cap._DDC = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DCS", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(device->dev->handle, "_DCS", &h_dummy1))) {
 		device->cap._DCS = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DGS", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(device->dev->handle, "_DGS", &h_dummy1))) {
 		device->cap._DGS = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DSS", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(device->dev->handle, "_DSS", &h_dummy1))) {
 		device->cap._DSS = 1;
 	}
 
@@ -589,22 +588,22 @@ static void acpi_video_bus_find_cap(struct acpi_video_bus *video)
 	acpi_handle h_dummy1;
 
 	memset(&video->cap, 0, 4);
-	if (ACPI_SUCCESS(acpi_get_handle(video->handle, "_DOS", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(video->device->handle, "_DOS", &h_dummy1))) {
 		video->cap._DOS = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(video->handle, "_DOD", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(video->device->handle, "_DOD", &h_dummy1))) {
 		video->cap._DOD = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(video->handle, "_ROM", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(video->device->handle, "_ROM", &h_dummy1))) {
 		video->cap._ROM = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(video->handle, "_GPD", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(video->device->handle, "_GPD", &h_dummy1))) {
 		video->cap._GPD = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(video->handle, "_SPD", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(video->device->handle, "_SPD", &h_dummy1))) {
 		video->cap._SPD = 1;
 	}
-	if (ACPI_SUCCESS(acpi_get_handle(video->handle, "_VPO", &h_dummy1))) {
+	if (ACPI_SUCCESS(acpi_get_handle(video->device->handle, "_VPO", &h_dummy1))) {
 		video->cap._VPO = 1;
 	}
 }
@@ -1299,7 +1298,7 @@ acpi_video_bus_get_one_device(struct acpi_device *device,
 		acpi_video_device_bind(video, data);
 		acpi_video_device_find_cap(data);
 
-		status = acpi_install_notify_handler(data->handle,
+		status = acpi_install_notify_handler(device->handle,
 						     ACPI_DEVICE_NOTIFY,
 						     acpi_video_device_notify,
 						     data);
@@ -1401,8 +1400,7 @@ static int acpi_video_device_enumerate(struct acpi_video_bus *video)
 	union acpi_object *dod = NULL;
 	union acpi_object *obj;
 
-
-	status = acpi_evaluate_object(video->handle, "_DOD", NULL, &buffer);
+	status = acpi_evaluate_object(video->device->handle, "_DOD", NULL, &buffer);
 	if (!ACPI_SUCCESS(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _DOD"));
 		return status;
@@ -1570,7 +1568,7 @@ static int acpi_video_bus_put_one_device(struct acpi_video_device *device)
 	up(&video->sem);
 	acpi_video_device_remove_fs(device->dev);
 
-	status = acpi_remove_notify_handler(device->handle,
+	status = acpi_remove_notify_handler(device->dev->handle,
 					    ACPI_DEVICE_NOTIFY,
 					    acpi_video_device_notify);
 
@@ -1727,7 +1725,7 @@ static int acpi_video_bus_add(struct acpi_device *device)
 	acpi_video_bus_get_devices(video, device);
 	acpi_video_bus_start_devices(video);
 
-	status = acpi_install_notify_handler(video->handle,
+	status = acpi_install_notify_handler(device->handle,
 					     ACPI_DEVICE_NOTIFY,
 					     acpi_video_bus_notify, video);
 	if (ACPI_FAILURE(status)) {
@@ -1767,7 +1765,7 @@ static int acpi_video_bus_remove(struct acpi_device *device, int type)
 
 	acpi_video_bus_stop_devices(video);
 
-	status = acpi_remove_notify_handler(video->handle,
+	status = acpi_remove_notify_handler(video->device->handle,
 					    ACPI_DEVICE_NOTIFY,
 					    acpi_video_bus_notify);
 
-- 
cgit v0.10.2


From 1b5b8b81bddd1c5dcf690f43422e20b0e964c349 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:49 -0400
Subject: ACPI: ac: Remove unneeded acpi_handle from driver.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 447de54..4537ae4 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -65,7 +65,6 @@ static struct acpi_driver acpi_ac_driver = {
 };
 
 struct acpi_ac {
-	acpi_handle handle;
 	struct acpi_device * device;
 	unsigned long state;
 };
@@ -222,7 +221,6 @@ static int acpi_ac_add(struct acpi_device *device)
 		return -ENOMEM;
 	memset(ac, 0, sizeof(struct acpi_ac));
 
-	ac->handle = device->handle;
 	ac->device = device;
 	strcpy(acpi_device_name(device), ACPI_AC_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_AC_CLASS);
-- 
cgit v0.10.2


From 9453ece92688fedd7755d2ea54b2efe88822a91b Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:49 -0400
Subject: ACPI: acpi_memhotplug: Remove unneeded acpi_handle from driver.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index a8860de..9786314 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -80,7 +80,6 @@ struct acpi_memory_info {
 };
 
 struct acpi_memory_device {
-	acpi_handle handle;
 	struct acpi_device * device;
 	unsigned int state;	/* State of the memory device */
 	struct list_head res_list;
@@ -399,7 +398,6 @@ static int acpi_memory_device_add(struct acpi_device *device)
 	memset(mem_device, 0, sizeof(struct acpi_memory_device));
 
 	INIT_LIST_HEAD(&mem_device->res_list);
-	mem_device->device->handle = device->handle;
 	mem_device->device = device;
 	sprintf(acpi_device_name(device), "%s", ACPI_MEMORY_DEVICE_NAME);
 	sprintf(acpi_device_class(device), "%s", ACPI_MEMORY_DEVICE_CLASS);
-- 
cgit v0.10.2


From 39cb61e26771891f843cb433ee6febd9159bce73 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:49 -0400
Subject: ACPI: battery: Remove unneeded acpi_handle from driver.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 5de735f..6ce9330 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -108,7 +108,6 @@ struct acpi_battery_trips {
 };
 
 struct acpi_battery {
-	acpi_handle handle;
 	struct acpi_device * device;
 	struct acpi_battery_flags flags;
 	struct acpi_battery_trips trips;
@@ -693,7 +692,6 @@ static int acpi_battery_add(struct acpi_device *device)
 		return -ENOMEM;
 	memset(battery, 0, sizeof(struct acpi_battery));
 
-	battery->handle = device->handle;
 	battery->device = device;
 	strcpy(acpi_device_name(device), ACPI_BATTERY_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS);
-- 
cgit v0.10.2


From 6c689537726ec665246d2f60c48475be2efac2d0 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:50 -0400
Subject: ACPI: button: Remove unneeded acpi_handle from driver.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index eefb9e4..fd1ba05 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -82,7 +82,6 @@ static struct acpi_driver acpi_button_driver = {
 };
 
 struct acpi_button {
-	acpi_handle handle;
 	struct acpi_device *device;	/* Fixed button kludge */
 	u8 type;
 	unsigned long pushed;
@@ -303,7 +302,6 @@ static int acpi_button_add(struct acpi_device *device)
 	memset(button, 0, sizeof(struct acpi_button));
 
 	button->device = device;
-	button->handle = device->handle;
 	acpi_driver_data(device) = button;
 
 	/*
-- 
cgit v0.10.2


From 579c896cc91434e4feb938f780eba580c93fa0da Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:50 -0400
Subject: ACPI: fan: Remove unneeded acpi_handle from driver.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index a56acdb..daed246 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -64,7 +64,6 @@ static struct acpi_driver acpi_fan_driver = {
 };
 
 struct acpi_fan {
-	acpi_handle handle;
 	struct acpi_device * device;
 };
 
@@ -192,7 +191,6 @@ static int acpi_fan_add(struct acpi_device *device)
 		return -ENOMEM;
 	memset(fan, 0, sizeof(struct acpi_fan));
 
-	fan->handle = device->handle;
 	fan->device = device;
 	strcpy(acpi_device_name(device), "Fan");
 	strcpy(acpi_device_class(device), ACPI_FAN_CLASS);
-- 
cgit v0.10.2


From e0e4e117d4c898b0df749d5b88c86955151abf53 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:50 -0400
Subject: ACPI: pci_link: Remove unneeded acpi_handle from driver.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 68ee80a..8197c0e 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -83,7 +83,6 @@ struct acpi_pci_link_irq {
 struct acpi_pci_link {
 	struct list_head node;
 	struct acpi_device *device;
-	acpi_handle handle;
 	struct acpi_pci_link_irq irq;
 	int refcnt;
 };
@@ -249,8 +248,7 @@ static int acpi_pci_link_get_current(struct acpi_pci_link *link)
 	acpi_status status = AE_OK;
 	int irq = 0;
 
-
-	if (!link || !link->handle)
+	if (!link)
 		return -EINVAL;
 
 	link->irq.active = 0;
@@ -726,7 +724,6 @@ static int acpi_pci_link_add(struct acpi_device *device)
 	memset(link, 0, sizeof(struct acpi_pci_link));
 
 	link->device = device;
-	link->handle = device->handle;
 	strcpy(acpi_device_name(device), ACPI_PCI_LINK_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_PCI_LINK_CLASS);
 	acpi_driver_data(device) = link;
-- 
cgit v0.10.2


From 432bfaba7d4e70483fc5af164e020066f4921bff Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:51 -0400
Subject: ACPI: pci_root: Remove unneeded acpi_handle from driver.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 22ca3a1..0984a1e 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -58,7 +58,6 @@ static struct acpi_driver acpi_pci_root_driver = {
 
 struct acpi_pci_root {
 	struct list_head node;
-	acpi_handle handle;
 	struct acpi_device * device;
 	struct acpi_pci_id id;
 	struct pci_bus *bus;
@@ -171,7 +170,6 @@ static int acpi_pci_root_add(struct acpi_device *device)
 	memset(root, 0, sizeof(struct acpi_pci_root));
 	INIT_LIST_HEAD(&root->node);
 
-	root->handle = device->handle;
 	root->device = device;
 	strcpy(acpi_device_name(device), ACPI_PCI_ROOT_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_PCI_ROOT_CLASS);
@@ -317,7 +315,7 @@ static int acpi_pci_root_start(struct acpi_device *device)
 
 
 	list_for_each_entry(root, &acpi_pci_roots, node) {
-		if (root->handle == device->handle) {
+		if (root->device == device) {
 			pci_bus_add_devices(root->bus);
 			return 0;
 		}
-- 
cgit v0.10.2


From 14747204055d4b8fb2f8517beca91985ac617c17 Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:51 -0400
Subject: ACPI: power: Remove unneeded acpi_handle from driver.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 2144289..5d3447f 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -70,7 +70,6 @@ static struct acpi_driver acpi_power_driver = {
 };
 
 struct acpi_power_resource {
-	acpi_handle handle;
 	struct acpi_device * device;
 	acpi_bus_id name;
 	u32 system_level;
@@ -541,7 +540,6 @@ static int acpi_power_add(struct acpi_device *device)
 		return -ENOMEM;
 	memset(resource, 0, sizeof(struct acpi_power_resource));
 
-	resource->handle = device->handle;
 	resource->device = device;
 	strcpy(resource->name, device->pnp.bus_id);
 	strcpy(acpi_device_name(device), ACPI_POWER_DEVICE_NAME);
-- 
cgit v0.10.2


From 8a4444bf5a3fd890441e6cbd5022a3c24edbe69a Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:51 -0400
Subject: ACPI: thermal: Remove unneeded acpi_handle from driver.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 56358e1..503c0b9 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -162,7 +162,6 @@ struct acpi_thermal_flags {
 };
 
 struct acpi_thermal {
-	acpi_handle handle;
 	struct acpi_device * device;
 	acpi_bus_id name;
 	unsigned long temperature;
@@ -1277,7 +1276,6 @@ static int acpi_thermal_add(struct acpi_device *device)
 		return -ENOMEM;
 	memset(tz, 0, sizeof(struct acpi_thermal));
 
-	tz->handle = device->handle;
 	tz->device = device;
 	strcpy(tz->name, device->pnp.bus_id);
 	strcpy(acpi_device_name(device), ACPI_THERMAL_DEVICE_NAME);
-- 
cgit v0.10.2


From d07a8577f695c807977af003b6e75f996e01a15f Mon Sep 17 00:00:00 2001
From: Patrick Mochel <mochel@linux.intel.com>
Date: Fri, 19 May 2006 16:54:52 -0400
Subject: ACPI: video: Remove unneeded acpi_handle from driver.

Signed-off-by: Patrick Mochel <mochel@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 862cefa..7459ca5 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -117,7 +117,6 @@ struct acpi_video_enumerated_device {
 };
 
 struct acpi_video_bus {
-	acpi_handle handle;
 	struct acpi_device *device;
 	u8 dos_setting;
 	struct acpi_video_enumerated_device *attached_array;
@@ -156,7 +155,6 @@ struct acpi_video_device_brightness {
 };
 
 struct acpi_video_device {
-	acpi_handle handle;
 	unsigned long device_id;
 	struct acpi_video_device_flags flags;
 	struct acpi_video_device_cap cap;
@@ -1271,7 +1269,6 @@ acpi_video_bus_get_one_device(struct acpi_device *device,
 
 		memset(data, 0, sizeof(struct acpi_video_device));
 
-		data->handle = device->handle;
 		strcpy(acpi_device_name(device), ACPI_VIDEO_DEVICE_NAME);
 		strcpy(acpi_device_class(device), ACPI_VIDEO_CLASS);
 		acpi_driver_data(device) = data;
@@ -1704,7 +1701,6 @@ static int acpi_video_bus_add(struct acpi_device *device)
 		return -ENOMEM;
 	memset(video, 0, sizeof(struct acpi_video_bus));
 
-	video->handle = device->handle;
 	video->device = device;
 	strcpy(acpi_device_name(device), ACPI_VIDEO_BUS_NAME);
 	strcpy(acpi_device_class(device), ACPI_VIDEO_CLASS);
-- 
cgit v0.10.2


From 02438d8771ae6a4b215938959827692026380bf9 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 30 Jun 2006 03:19:10 -0400
Subject: ACPI: delete acpi_os_free(), use kfree() directly

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c
index fff8292..ccd0165 100644
--- a/arch/ia64/kernel/acpi-ext.c
+++ b/arch/ia64/kernel/acpi-ext.c
@@ -51,7 +51,7 @@ static acpi_status hp_ccsr_locate(acpi_handle obj, u64 *base, u64 *length)
 	memcpy(length, vendor->byte_data + 8, sizeof(*length));
 
   exit:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 	return status;
 }
 
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index ca16d95..c92c0aa 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -857,7 +857,7 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu)
 	obj = buffer.pointer;
 	if (obj->type != ACPI_TYPE_BUFFER ||
 	    obj->buffer.length < sizeof(*lsapic)) {
-		acpi_os_free(buffer.pointer);
+		kfree(buffer.pointer);
 		return -EINVAL;
 	}
 
@@ -865,13 +865,13 @@ int acpi_map_lsapic(acpi_handle handle, int *pcpu)
 
 	if ((lsapic->header.type != ACPI_MADT_LSAPIC) ||
 	    (!lsapic->flags.enabled)) {
-		acpi_os_free(buffer.pointer);
+		kfree(buffer.pointer);
 		return -EINVAL;
 	}
 
 	physid = ((lsapic->id << 8) | (lsapic->eid));
 
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 	buffer.length = ACPI_ALLOCATE_BUFFER;
 	buffer.pointer = NULL;
 
@@ -935,20 +935,20 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
 	obj = buffer.pointer;
 	if (obj->type != ACPI_TYPE_BUFFER ||
 	    obj->buffer.length < sizeof(*iosapic)) {
-		acpi_os_free(buffer.pointer);
+		kfree(buffer.pointer);
 		return AE_OK;
 	}
 
 	iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer;
 
 	if (iosapic->header.type != ACPI_MADT_IOSAPIC) {
-		acpi_os_free(buffer.pointer);
+		kfree(buffer.pointer);
 		return AE_OK;
 	}
 
 	gsi_base = iosapic->global_irq_base;
 
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
 	/*
 	 * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 6f5e395..2c626e8 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -466,7 +466,7 @@ static acpi_status is_memory_device(acpi_handle handle)
 
 	info = buffer.pointer;
 	if (!(info->valid & ACPI_VALID_HID)) {
-		acpi_os_free(buffer.pointer);
+		kfree(buffer.pointer);
 		return AE_ERROR;
 	}
 
@@ -475,7 +475,7 @@ static acpi_status is_memory_device(acpi_handle handle)
 	    (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID)))
 		status = AE_ERROR;
 
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 	return status;
 }
 
diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index 055cfd5..eb0b8fb 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -1017,7 +1017,7 @@ static int asus_hotk_get_info(void)
 		}
 		hotk->methods = &model_conf[hotk->model];
 		
-		acpi_os_free(model);
+		kfree(model);
 
 		return AE_OK;
 	}
@@ -1096,7 +1096,7 @@ static int asus_hotk_get_info(void)
 		/* S1300A reports L84F, but L1400B too, account for that */
 	}
 
-	acpi_os_free(model);
+	kfree(model);
 
 	return AE_OK;
 }
@@ -1256,7 +1256,7 @@ static void __exit asus_acpi_exit(void)
 	acpi_bus_unregister_driver(&asus_hotk_driver);
 	remove_proc_entry(PROC_ASUS, acpi_root_dir);
 
-	acpi_os_free(asus_info);
+	kfree(asus_info);
 
 	return;
 }
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 00b0728..7d92f73 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -171,7 +171,7 @@ acpi_battery_get_info(struct acpi_battery *battery,
 	}
 
       end:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
 	if (!result)
 		(*bif) = (struct acpi_battery_info *)data.pointer;
@@ -231,7 +231,7 @@ acpi_battery_get_status(struct acpi_battery *battery,
 	}
 
       end:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
 	if (!result)
 		(*bst) = (struct acpi_battery_status *)data.pointer;
diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c
index 7f7e41d..871aa52 100644
--- a/drivers/acpi/container.c
+++ b/drivers/acpi/container.c
@@ -236,7 +236,7 @@ container_walk_namespace_cb(acpi_handle handle,
 	}
 
       end:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
 	return AE_OK;
 }
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 8daef57..10f160d 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -152,7 +152,7 @@ static int get_root_bridge_busnr(acpi_handle handle)
 		bbn = bus;
 	}
       exit:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 	return (int)bbn;
 }
 
@@ -192,7 +192,7 @@ find_pci_rootbridge(acpi_handle handle, u32 lvl, void *context, void **rv)
 		find->handle = handle;
 	status = AE_OK;
       exit:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 	return status;
 }
 
@@ -224,7 +224,7 @@ do_acpi_find_child(acpi_handle handle, u32 lvl, void *context, void **rv)
 		info = buffer.pointer;
 		if (info->address == find->address)
 			find->handle = handle;
-		acpi_os_free(buffer.pointer);
+		kfree(buffer.pointer);
 	}
 	return AE_OK;
 }
@@ -330,7 +330,7 @@ static int acpi_platform_notify(struct device *dev)
 
 		acpi_get_name(dev->firmware_data, ACPI_FULL_PATHNAME, &buffer);
 		DBG("Device %s -> %s\n", dev->bus_id, (char *)buffer.pointer);
-		acpi_os_free(buffer.pointer);
+		kfree(buffer.pointer);
 	} else
 		DBG("Device %s -> No ACPI support\n", dev->bus_id);
 #endif
diff --git a/drivers/acpi/namespace/nsxfeval.c b/drivers/acpi/namespace/nsxfeval.c
index 6d9bd45a..dca6799 100644
--- a/drivers/acpi/namespace/nsxfeval.c
+++ b/drivers/acpi/namespace/nsxfeval.c
@@ -133,7 +133,7 @@ acpi_evaluate_object_typed(acpi_handle handle,
 
 		/* Caller used ACPI_ALLOCATE_BUFFER, free the return buffer */
 
-		acpi_os_free(return_buffer->pointer);
+		ACPI_FREE(return_buffer->pointer);
 		return_buffer->pointer = NULL;
 	}
 
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index afd937b..c68b1bb 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -147,13 +147,6 @@ void *acpi_os_allocate(acpi_size size)
 		return kmalloc(size, GFP_KERNEL);
 }
 
-void acpi_os_free(void *ptr)
-{
-	kfree(ptr);
-}
-
-EXPORT_SYMBOL(acpi_os_free);
-
 acpi_status acpi_os_get_root_pointer(u32 flags, struct acpi_pointer *addr)
 {
 	if (efi_enabled) {
@@ -743,7 +736,7 @@ acpi_status acpi_os_delete_semaphore(acpi_handle handle)
 
 	ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Deleting semaphore[%p].\n", handle));
 
-	acpi_os_free(sem);
+	kfree(sem);
 	sem = NULL;
 
 	return AE_OK;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index e439eb7..8e9c26a 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -768,7 +768,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
 		status = -EFAULT;
 
       end:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
 	return status;
 }
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 14a00e5..7ba5e49 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -216,7 +216,7 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
 	       sizeof(struct acpi_pct_register));
 
       end:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
 	return result;
 }
@@ -294,7 +294,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 	}
 
       end:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
 	return result;
 }
@@ -592,7 +592,7 @@ static int acpi_processor_get_psd(struct acpi_processor	*pr)
 	}
 
 end:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 	return result;
 }
 
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 401e723..a05b3df 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -319,7 +319,7 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
 		goto end;
 	}
 
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
 	device->wakeup.flags.valid = 1;
 	/* Power button, Lid switch always enable wakeup */
@@ -854,7 +854,7 @@ static void acpi_device_set_id(struct acpi_device *device,
 			printk(KERN_ERR "Memory allocation error\n");
 	}
 
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 }
 
 static int acpi_device_set_context(struct acpi_device *device, int type)
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index c90bd2f..c3bb7faa 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -86,7 +86,7 @@ acpi_system_read_dsdt(struct file *file,
 
 	res = simple_read_from_buffer(buffer, count, ppos,
 				      dsdt.pointer, dsdt.length);
-	acpi_os_free(dsdt.pointer);
+	kfree(dsdt.pointer);
 
 	return res;
 }
@@ -113,7 +113,7 @@ acpi_system_read_fadt(struct file *file,
 
 	res = simple_read_from_buffer(buffer, count, ppos,
 				      fadt.pointer, fadt.length);
-	acpi_os_free(fadt.pointer);
+	kfree(fadt.pointer);
 
 	return res;
 }
diff --git a/drivers/acpi/utilities/utalloc.c b/drivers/acpi/utilities/utalloc.c
index 7940fc1..5cff17d 100644
--- a/drivers/acpi/utilities/utalloc.c
+++ b/drivers/acpi/utilities/utalloc.c
@@ -166,10 +166,10 @@ acpi_status acpi_ut_delete_caches(void)
 
 	/* Free memory lists */
 
-	acpi_os_free(acpi_gbl_global_list);
+	ACPI_FREE(acpi_gbl_global_list);
 	acpi_gbl_global_list = NULL;
 
-	acpi_os_free(acpi_gbl_ns_node_list);
+	ACPI_FREE(acpi_gbl_ns_node_list);
 	acpi_gbl_ns_node_list = NULL;
 #endif
 
diff --git a/drivers/acpi/utilities/utcache.c b/drivers/acpi/utilities/utcache.c
index 56270a3..1a1f810 100644
--- a/drivers/acpi/utilities/utcache.c
+++ b/drivers/acpi/utilities/utcache.c
@@ -162,7 +162,7 @@ acpi_status acpi_os_delete_cache(struct acpi_memory_list * cache)
 
 	/* Now we can delete the cache object */
 
-	acpi_os_free(cache);
+	ACPI_FREE(cache);
 	return (AE_OK);
 }
 
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 1930e1a..f48227f 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -332,7 +332,7 @@ acpi_evaluate_string(acpi_handle handle,
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Return value [%s]\n", *data));
 
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
 	return AE_OK;
 }
@@ -418,7 +418,7 @@ acpi_evaluate_reference(acpi_handle handle,
 		//kfree(list->handles);
 	}
 
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 
 	return status;
 }
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 9feb633..1f3ffb3 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -1450,7 +1450,7 @@ static int acpi_video_device_enumerate(struct acpi_video_bus *video)
 	video->attached_array = active_device_list;
 	video->attached_count = count;
       out:
-	acpi_os_free(buffer.pointer);
+	kfree(buffer.pointer);
 	return status;
 }
 
diff --git a/include/acpi/acmacros.h b/include/acpi/acmacros.h
index 4bb3806..f1ac610 100644
--- a/include/acpi/acmacros.h
+++ b/include/acpi/acmacros.h
@@ -726,7 +726,7 @@
 
 #define ACPI_ALLOCATE(a)            acpi_ut_allocate((acpi_size)(a),_COMPONENT,_acpi_module_name,__LINE__)
 #define ACPI_ALLOCATE_ZEROED(a)     acpi_ut_allocate_zeroed((acpi_size)(a), _COMPONENT,_acpi_module_name,__LINE__)
-#define ACPI_FREE(a)                acpi_os_free(a)
+#define ACPI_FREE(a)                kfree(a)
 #define ACPI_MEM_TRACKING(a)
 
 #else
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 89bc4a1..0cd63bc 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -143,8 +143,6 @@ void acpi_os_release_mutex(acpi_mutex handle);
  */
 void *acpi_os_allocate(acpi_size size);
 
-void acpi_os_free(void *memory);
-
 acpi_status
 acpi_os_map_memory(acpi_physical_address physical_address,
 		   acpi_size size, void __iomem ** logical_address);
-- 
cgit v0.10.2


From a170a5317c0298538deb170028376ec1631acc2f Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:03:00 -0400
Subject: ACPI: asus_acpi: misc cleanups

This patch updates the version string, copyright notices and does
whitespace cleanup (it looks weird, blame Lindent).

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index eb0b8fb..38b69cb 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -2,7 +2,7 @@
  *  asus_acpi.c - Asus Laptop ACPI Extras
  *
  *
- *  Copyright (C) 2002, 2003, 2004 Julien Lerouge, Karol Kozimor
+ *  Copyright (C) 2002-2005 Julien Lerouge, 2003-2006 Karol Kozimor
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -27,10 +27,6 @@
  *  Johann Wiesner - Small compile fixes
  *  John Belmonte  - ACPI code for Toshiba laptop was a good starting point.
  *
- *  TODO:
- *  add Fn key status
- *  Add mode selection on module loading (parameter) -> still necessary?
- *  Complete display switching -- may require dirty hacks or calling _DOS?
  */
 
 #include <linux/kernel.h>
@@ -42,7 +38,7 @@
 #include <acpi/acpi_bus.h>
 #include <asm/uaccess.h>
 
-#define ASUS_ACPI_VERSION "0.29"
+#define ASUS_ACPI_VERSION "0.30"
 
 #define PROC_ASUS       "asus"	//the directory
 #define PROC_MLED       "mled"
@@ -149,17 +145,8 @@ struct asus_hotk {
 
 static struct model_data model_conf[END_MODEL] = {
 	/*
-	 * Those pathnames are relative to the HOTK / ATKD device :
-	 *       - mt_mled
-	 *       - mt_wled
-	 *       - brightness_set
-	 *       - brightness_get
-	 *       - display_set
-	 *       - display_get
-	 *
 	 * TODO I have seen a SWBX and AIBX method on some models, like L1400B,
 	 * it seems to be a kind of switch, but what for ?
-	 *
 	 */
 
 	{
@@ -993,7 +980,7 @@ static int asus_hotk_get_info(void)
 	if (buffer.pointer == NULL)
 		return -EINVAL;
 
-	model = (union acpi_object *) buffer.pointer;
+	model = (union acpi_object *)buffer.pointer;
 	/*
 	 * Samsung P30 has a device with a valid _HID whose INIT does not 
 	 * return anything. It used to be possible to catch this exception,
@@ -1002,7 +989,8 @@ static int asus_hotk_get_info(void)
 	 * identifier but it's still possible to get completely bogus data.
 	 */
 	if (model->type == ACPI_TYPE_STRING) {
-		printk(KERN_NOTICE "  %s model detected, ", model->string.pointer);
+		printk(KERN_NOTICE "  %s model detected, ",
+		       model->string.pointer);
 	} else {
 		if (asus_info &&	/* Samsung P30 */
 		    strncmp(asus_info->oem_table_id, "ODEM", 4) == 0) {
@@ -1016,7 +1004,7 @@ static int asus_hotk_get_info(void)
 			       "the developers with your DSDT\n");
 		}
 		hotk->methods = &model_conf[hotk->model];
-		
+
 		kfree(model);
 
 		return AE_OK;
@@ -1164,8 +1152,7 @@ static int asus_hotk_add(struct acpi_device *device)
 	/* For laptops without GPLV: init the hotk->brightness value */
 	if ((!hotk->methods->brightness_get)
 	    && (!hotk->methods->brightness_status)
-	    && (hotk->methods->brightness_up
-		&& hotk->methods->brightness_down)) {
+	    && (hotk->methods->brightness_up && hotk->methods->brightness_down)) {
 		status =
 		    acpi_evaluate_object(NULL, hotk->methods->brightness_down,
 					 NULL, NULL);
-- 
cgit v0.10.2


From ed2cb07b2bb04f14793cdeecb0b384374e979525 Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:03:00 -0400
Subject: ACPI: asus_acpi: support A3G

This patch adds support for Asus A3G.

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index 38b69cb..fdc3995 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -123,7 +123,7 @@ struct asus_hotk {
 		M1A,		//M1300A
 		M2E,		//M2400E, L4400L
 		M6N,		//M6800N
-		M6R,		//M6700R
+		M6R,		//M6700R, A3000G
 		P30,		//Samsung P30
 		S1x,		//S1300A, but also L1400B and M2400A (L84F)
 		S2x,		//S200 (J1 reported), Victor MP-XP7210
@@ -1025,7 +1025,8 @@ static int asus_hotk_get_info(void)
 		hotk->model = L4R;
 	else if (strncmp(model->string.pointer, "M6N", 3) == 0)
 		hotk->model = M6N;
-	else if (strncmp(model->string.pointer, "M6R", 3) == 0)
+	else if (strncmp(model->string.pointer, "M6R", 3) == 0 ||
+		 strncmp(model->string.pointer, "A3G", 3) == 0)
 		hotk->model = M6R;
 	else if (strncmp(model->string.pointer, "M2N", 3) == 0 ||
 		 strncmp(model->string.pointer, "M3N", 3) == 0 ||
@@ -1070,6 +1071,9 @@ static int asus_hotk_get_info(void)
 		hotk->methods->lcd_status = NULL;
 	/* L2B is similar enough to L3C to use its settings, with this only 
 	   exception */
+	else if (strncmp(model->string.pointer, "A3G", 3) == 0)
+		hotk->methods->lcd_status = "\\BLFG";
+	/* A3G is like M6R */
 	else if (strncmp(model->string.pointer, "S5N", 3) == 0 ||
 		 strncmp(model->string.pointer, "M5N", 3) == 0)
 		hotk->methods->mt_mled = NULL;
-- 
cgit v0.10.2


From 42cb891295795ed9b3048c8922d93f7a71f63968 Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:04:00 -0400
Subject: ACPI: asus_acpi: LED display support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds handling for front LED displays found on W1N and the like.
Additionally, W1N is given its own model_data instance.

Patch originally by Éric Burghard.

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index fdc3995..b95b52e 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -26,6 +26,7 @@
  *  Pontus Fuchs   - Helper functions, cleanup
  *  Johann Wiesner - Small compile fixes
  *  John Belmonte  - ACPI code for Toshiba laptop was a good starting point.
+ *  �ric Burghard  - LED display support for W1N
  *
  */
 
@@ -44,6 +45,7 @@
 #define PROC_MLED       "mled"
 #define PROC_WLED       "wled"
 #define PROC_TLED       "tled"
+#define PROC_LEDD       "ledd"
 #define PROC_INFO       "info"
 #define PROC_LCD        "lcd"
 #define PROC_BRN        "brn"
@@ -88,6 +90,7 @@ struct model_data {
 	char *wled_status;	//node to handle wled reading_______A
 	char *mt_tled;		//method to handle tled_____________R
 	char *tled_status;	//node to handle tled reading_______A
+	char *mt_ledd;		//method to handle LED display______R
 	char *mt_lcd_switch;	//method to turn LCD ON/OFF_________A
 	char *lcd_status;	//node to read LCD panel state______A
 	char *brightness_up;	//method to set brightness up_______A
@@ -107,6 +110,7 @@ struct asus_hotk {
 	struct acpi_device *device;	//the device we are in
 	acpi_handle handle;	//the handle of the hotk device
 	char status;		//status of the hotk, for LEDs, ...
+	u32 ledd_status;	//status of the LED display
 	struct model_data *methods;	//methods available on the laptop
 	u8 brightness;		//brightness level
 	enum {
@@ -127,7 +131,8 @@ struct asus_hotk {
 		P30,		//Samsung P30
 		S1x,		//S1300A, but also L1400B and M2400A (L84F)
 		S2x,		//S200 (J1 reported), Victor MP-XP7210
-		xxN,		//M2400N, M3700N, M5200N, S1300N, S5200N, W1OOON
+		W1N,		//W1000N
+		xxN,		//M2400N, M3700N, M5200N, M6800N, S1300N, S5200N
 		//(Centrino)
 		END_MODEL
 	} model;		//Models currently supported
@@ -332,6 +337,18 @@ static struct model_data model_conf[END_MODEL] = {
 	 .brightness_down = S2x_PREFIX "_Q0A"},
 
 	{
+	 .name = "W1N",
+	 .mt_mled = "MLED",
+	 .mt_wled = "WLED",
+	 .mt_ledd = "SLCM",
+	 .mt_lcd_switch = xxN_PREFIX "_Q10",
+	 .lcd_status = "\\BKLT",
+	 .brightness_set = "SPLV",
+	 .brightness_get = "GPLV",
+	 .display_set = "SDSP",
+	 .display_get = "\\ADVG"},
+
+	{
 	 .name = "xxN",
 	 .mt_mled = "MLED",
 /* WLED present, but not controlled by ACPI */
@@ -550,6 +567,36 @@ proc_write_mled(struct file *file, const char __user * buffer,
 }
 
 /*
+ * Proc handlers for LED display
+ */
+static int
+proc_read_ledd(char *page, char **start, off_t off, int count, int *eof,
+	       void *data)
+{
+	return sprintf(page, "0x%08x\n", hotk->ledd_status);
+}
+
+static int
+proc_write_ledd(struct file *file, const char __user * buffer,
+		unsigned long count, void *data)
+{
+	int value;
+
+	count = parse_arg(buffer, count, &value);
+	if (count > 0) {
+		if (!write_acpi_int
+		    (hotk->handle, hotk->methods->mt_ledd, value, NULL))
+			printk(KERN_WARNING
+			       "Asus ACPI: LED display write failed\n");
+		else
+			hotk->ledd_status = (u32) value;
+	} else if (count < 0)
+		printk(KERN_WARNING "Asus ACPI: Error reading user input\n");
+
+	return count;
+}
+
+/*
  * Proc handlers for WLED
  */
 static int
@@ -863,6 +910,11 @@ static int asus_hotk_add_fs(struct acpi_device *device)
 			      mode, device);
 	}
 
+	if (hotk->methods->mt_ledd) {
+		asus_proc_add(PROC_LEDD, &proc_write_ledd, &proc_read_ledd,
+			      mode, device);
+	}
+
 	if (hotk->methods->mt_mled) {
 		asus_proc_add(PROC_MLED, &proc_write_mled, &proc_read_mled,
 			      mode, device);
@@ -906,6 +958,8 @@ static int asus_hotk_remove_fs(struct acpi_device *device)
 			remove_proc_entry(PROC_MLED, acpi_device_dir(device));
 		if (hotk->methods->mt_tled)
 			remove_proc_entry(PROC_TLED, acpi_device_dir(device));
+		if (hotk->methods->mt_ledd)
+			remove_proc_entry(PROC_LEDD, acpi_device_dir(device));
 		if (hotk->methods->mt_lcd_switch && hotk->methods->lcd_status)
 			remove_proc_entry(PROC_LCD, acpi_device_dir(device));
 		if ((hotk->methods->brightness_up
@@ -1033,8 +1087,7 @@ static int asus_hotk_get_info(void)
 		 strncmp(model->string.pointer, "M5N", 3) == 0 ||
 		 strncmp(model->string.pointer, "M6N", 3) == 0 ||
 		 strncmp(model->string.pointer, "S1N", 3) == 0 ||
-		 strncmp(model->string.pointer, "S5N", 3) == 0 ||
-		 strncmp(model->string.pointer, "W1N", 3) == 0)
+		 strncmp(model->string.pointer, "S5N", 3) == 0)
 		hotk->model = xxN;
 	else if (strncmp(model->string.pointer, "M1", 2) == 0)
 		hotk->model = M1A;
@@ -1055,6 +1108,8 @@ static int asus_hotk_get_info(void)
 		hotk->model = S2x;
 	else if (strncmp(model->string.pointer, "L5", 2) == 0)
 		hotk->model = L5x;
+	else if (strncmp(model->string.pointer, "W1N", 3) == 0)
+		hotk->model = W1N;
 
 	if (hotk->model == END_MODEL) {
 		printk("unsupported, trying default values, supply the "
@@ -1078,10 +1133,9 @@ static int asus_hotk_get_info(void)
 		 strncmp(model->string.pointer, "M5N", 3) == 0)
 		hotk->methods->mt_mled = NULL;
 	/* S5N and M5N have no MLED */
-	else if (strncmp(model->string.pointer, "M2N", 3) == 0 ||
-		 strncmp(model->string.pointer, "W1N", 3) == 0)
+	else if (strncmp(model->string.pointer, "M2N", 3) == 0)
 		hotk->methods->mt_wled = "WLED";
-	/* M2N and W1N have a usable WLED */
+	/* M2N has a usable WLED */
 	else if (asus_info) {
 		if (strncmp(asus_info->oem_table_id, "L1", 2) == 0)
 			hotk->methods->mled_status = NULL;
@@ -1175,6 +1229,9 @@ static int asus_hotk_add(struct acpi_device *device)
 
 	asus_hotk_found = 1;
 
+	/* LED display is off by default */
+	hotk->ledd_status = 0xFFF;
+
       end:
 	if (result) {
 		kfree(hotk);
-- 
cgit v0.10.2


From c067a7899790ed4c03b00ed186c6e3b6a3964379 Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:05:00 -0400
Subject: ACPI: asus_acpi: support W3400N

This patch adds support for Asus W3400N.

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index b95b52e..a497f42 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -126,7 +126,7 @@ struct asus_hotk {
 		L8L,		//L8400L
 		M1A,		//M1300A
 		M2E,		//M2400E, L4400L
-		M6N,		//M6800N
+		M6N,		//M6800N, W3400N
 		M6R,		//M6700R, A3000G
 		P30,		//Samsung P30
 		S1x,		//S1300A, but also L1400B and M2400A (L84F)
@@ -1077,7 +1077,8 @@ static int asus_hotk_get_info(void)
 		hotk->model = L8L;
 	else if (strncmp(model->string.pointer, "L4R", 3) == 0)
 		hotk->model = L4R;
-	else if (strncmp(model->string.pointer, "M6N", 3) == 0)
+	else if (strncmp(model->string.pointer, "M6N", 3) == 0 ||
+		 strncmp(model->string.pointer, "W3N", 3) == 0)
 		hotk->model = M6N;
 	else if (strncmp(model->string.pointer, "M6R", 3) == 0 ||
 		 strncmp(model->string.pointer, "A3G", 3) == 0)
@@ -1130,9 +1131,10 @@ static int asus_hotk_get_info(void)
 		hotk->methods->lcd_status = "\\BLFG";
 	/* A3G is like M6R */
 	else if (strncmp(model->string.pointer, "S5N", 3) == 0 ||
-		 strncmp(model->string.pointer, "M5N", 3) == 0)
+		 strncmp(model->string.pointer, "M5N", 3) == 0 ||
+		 strncmp(model->string.pointer, "W3N", 3) == 0)
 		hotk->methods->mt_mled = NULL;
-	/* S5N and M5N have no MLED */
+	/* S5N, M5N and W3N have no MLED */
 	else if (strncmp(model->string.pointer, "M2N", 3) == 0)
 		hotk->methods->mt_wled = "WLED";
 	/* M2N has a usable WLED */
-- 
cgit v0.10.2


From f78c589d108f4b06a012817536c9ced37f473eae Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:06:00 -0400
Subject: ACPI: asus_acpi: support A4G

This patch adds support for Asus A4G.
Originally by Giuseppe Rota.

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index a497f42..c613a43 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -116,6 +116,7 @@ struct asus_hotk {
 	enum {
 		A1x = 0,	//A1340D, A1300F
 		A2x,		//A2500H
+		A4G,		//A4700G
 		D1x,		//D1
 		L2D,		//L2000D
 		L3C,		//L3800C
@@ -176,6 +177,16 @@ static struct model_data model_conf[END_MODEL] = {
 	 .display_get = "\\INFB"},
 
 	{
+	 .name = "A4G",
+	 .mt_mled = "MLED",
+/* WLED present, but not controlled by ACPI */
+	 .mt_lcd_switch = xxN_PREFIX "_Q10",
+	 .brightness_set = "SPLV",
+	 .brightness_get = "GPLV",
+	 .display_set = "SDSP",
+	 .display_get = "\\ADVG"},
+
+	{
 	 .name = "D1x",
 	 .mt_mled = "MLED",
 	 .mt_lcd_switch = "\\Q0D",
@@ -1109,6 +1120,8 @@ static int asus_hotk_get_info(void)
 		hotk->model = S2x;
 	else if (strncmp(model->string.pointer, "L5", 2) == 0)
 		hotk->model = L5x;
+	else if (strncmp(model->string.pointer, "A4G", 3) == 0)
+		hotk->model = A4G;
 	else if (strncmp(model->string.pointer, "W1N", 3) == 0)
 		hotk->model = W1N;
 
-- 
cgit v0.10.2


From e067aaa7612c273d4bfd70d1bd8d80313a57685c Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:07:00 -0400
Subject: ACPI: asus_acpi: handle internal Bluetooth / support W5A

This patch creates a new file named "bluetooth" under /proc/acpi/asus/.
This file controls both the internal Bluetooth adapter's presence on the
USB bus and the associated LED.

echo 1 > /proc/acpi/asus/bluetooth to enable, 0 to disable.

Additionally, the patch add support for Asus W5A, the first model that uses
this feature.

Patch originally by Fernando A. P. Gomes.

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index c613a43..f39aef1 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -45,6 +45,7 @@
 #define PROC_MLED       "mled"
 #define PROC_WLED       "wled"
 #define PROC_TLED       "tled"
+#define PROC_BT         "bluetooth"
 #define PROC_LEDD       "ledd"
 #define PROC_INFO       "info"
 #define PROC_LCD        "lcd"
@@ -65,9 +66,10 @@
 /*
  * Flags for hotk status
  */
-#define MLED_ON     0x01	//is MLED ON ?
-#define WLED_ON     0x02
-#define TLED_ON     0x04
+#define MLED_ON     0x01	//mail LED
+#define WLED_ON     0x02	//wireless LED
+#define TLED_ON     0x04	//touchpad LED
+#define BT_ON       0x08	//internal Bluetooth
 
 MODULE_AUTHOR("Julien Lerouge, Karol Kozimor");
 MODULE_DESCRIPTION(ACPI_HOTK_NAME);
@@ -91,7 +93,9 @@ struct model_data {
 	char *mt_tled;		//method to handle tled_____________R
 	char *tled_status;	//node to handle tled reading_______A
 	char *mt_ledd;		//method to handle LED display______R
-	char *mt_lcd_switch;	//method to turn LCD ON/OFF_________A
+	char *mt_bt_switch;	//method to switch Bluetooth on/off_R
+	char *bt_status;	//no model currently supports this__?
+	char *mt_lcd_switch;	//method to turn LCD on/off_________A
 	char *lcd_status;	//node to read LCD panel state______A
 	char *brightness_up;	//method to set brightness up_______A
 	char *brightness_down;	//guess what ?______________________A
@@ -133,6 +137,7 @@ struct asus_hotk {
 		S1x,		//S1300A, but also L1400B and M2400A (L84F)
 		S2x,		//S200 (J1 reported), Victor MP-XP7210
 		W1N,		//W1000N
+		W5A,		//W5A
 		xxN,		//M2400N, M3700N, M5200N, M6800N, S1300N, S5200N
 		//(Centrino)
 		END_MODEL
@@ -360,6 +365,16 @@ static struct model_data model_conf[END_MODEL] = {
 	 .display_get = "\\ADVG"},
 
 	{
+	 .name = "W5A",
+	 .mt_bt_switch = "BLED",
+	 .mt_wled = "WLED",
+	 .mt_lcd_switch = xxN_PREFIX "_Q10",
+	 .brightness_set = "SPLV",
+	 .brightness_get = "GPLV",
+	 .display_set = "SDSP",
+	 .display_get = "\\ADVG"},
+
+	{
 	 .name = "xxN",
 	 .mt_mled = "MLED",
 /* WLED present, but not controlled by ACPI */
@@ -626,6 +641,25 @@ proc_write_wled(struct file *file, const char __user * buffer,
 }
 
 /*
+ * Proc handlers for Bluetooth
+ */
+static int
+proc_read_bluetooth(char *page, char **start, off_t off, int count, int *eof,
+		    void *data)
+{
+	return sprintf(page, "%d\n", read_led(hotk->methods->bt_status, BT_ON));
+}
+
+static int
+proc_write_bluetooth(struct file *file, const char __user * buffer,
+		     unsigned long count, void *data)
+{
+	/* Note: mt_bt_switch controls both internal Bluetooth adapter's 
+	   presence and its LED */
+	return write_led(buffer, count, hotk->methods->mt_bt_switch, BT_ON, 0);
+}
+
+/*
  * Proc handlers for TLED
  */
 static int
@@ -936,6 +970,11 @@ static int asus_hotk_add_fs(struct acpi_device *device)
 			      mode, device);
 	}
 
+	if (hotk->methods->mt_bt_switch) {
+		asus_proc_add(PROC_BT, &proc_write_bluetooth,
+			      &proc_read_bluetooth, mode, device);
+	}
+
 	/* 
 	 * We need both read node and write method as LCD switch is also accessible
 	 * from keyboard 
@@ -971,6 +1010,8 @@ static int asus_hotk_remove_fs(struct acpi_device *device)
 			remove_proc_entry(PROC_TLED, acpi_device_dir(device));
 		if (hotk->methods->mt_ledd)
 			remove_proc_entry(PROC_LEDD, acpi_device_dir(device));
+		if (hotk->methods->mt_bt_switch)
+			remove_proc_entry(PROC_BT, acpi_device_dir(device));
 		if (hotk->methods->mt_lcd_switch && hotk->methods->lcd_status)
 			remove_proc_entry(PROC_LCD, acpi_device_dir(device));
 		if ((hotk->methods->brightness_up
@@ -1124,6 +1165,8 @@ static int asus_hotk_get_info(void)
 		hotk->model = A4G;
 	else if (strncmp(model->string.pointer, "W1N", 3) == 0)
 		hotk->model = W1N;
+	else if (strncmp(model->string.pointer, "W5A", 3) == 0)
+		hotk->model = W5A;
 
 	if (hotk->model == END_MODEL) {
 		printk("unsupported, trying default values, supply the "
-- 
cgit v0.10.2


From ebccb84810729f0e86a83a65681ba2de45ff84d8 Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:08:00 -0400
Subject: ACPI: asus_acpi: support L5D

This patch adds support for Asus L5D and thus fixes
http://bugme.osdl.org/show_bug.cgi?id=4695

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index f39aef1..099f92a 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -125,7 +125,7 @@ struct asus_hotk {
 		L2D,		//L2000D
 		L3C,		//L3800C
 		L3D,		//L3400D
-		L3H,		//L3H, but also L2000E
+		L3H,		//L3H, L2000E, L5D
 		L4R,		//L4500R
 		L5x,		//L5800C 
 		L8L,		//L8400L
@@ -1119,8 +1119,9 @@ static int asus_hotk_get_info(void)
 	hotk->model = END_MODEL;
 	if (strncmp(model->string.pointer, "L3D", 3) == 0)
 		hotk->model = L3D;
-	else if (strncmp(model->string.pointer, "L3H", 3) == 0 ||
-		 strncmp(model->string.pointer, "L2E", 3) == 0)
+	else if (strncmp(model->string.pointer, "L2E", 3) == 0 ||
+		 strncmp(model->string.pointer, "L3H", 3) == 0 ||
+		 strncmp(model->string.pointer, "L5D", 3) == 0)
 		hotk->model = L3H;
 	else if (strncmp(model->string.pointer, "L3", 2) == 0 ||
 		 strncmp(model->string.pointer, "L2B", 3) == 0)
@@ -1191,6 +1192,9 @@ static int asus_hotk_get_info(void)
 		 strncmp(model->string.pointer, "W3N", 3) == 0)
 		hotk->methods->mt_mled = NULL;
 	/* S5N, M5N and W3N have no MLED */
+	else if (strncmp(model->string.pointer, "L5D", 3) == 0)
+		hotk->methods->mt_wled = NULL;
+	/* L5D's WLED is not controlled by ACPI */
 	else if (strncmp(model->string.pointer, "M2N", 3) == 0)
 		hotk->methods->mt_wled = "WLED";
 	/* M2N has a usable WLED */
-- 
cgit v0.10.2


From ffab0d9507dc527ff6d704ec5e7e7ccfee119fb1 Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:11:00 -0400
Subject: ACPI: asus_acpi: rework model detection

This patch reworks laptop model detection.

This addresses the Samsung P30 issue, where the INIT method would return no
object, but the implicit return in the AML interpreter would confuse the
driver. It also accounts for a newer batch of Asus models whose INIT
returns ACPI_TYPE_BUFFER instead of STRING.

The handling is now much leaner, if we get a buffer or a string, we check
against known values, in every other case we use a different path
(currently DSDT signatures). The bulk of this patch is separating the
string matching from asus_hotk_get_info() into a separate function.

This patch properly fixes http://bugme.osdl.org/show_bug.cgi?id=5067 and
http://bugme.osdl.org/show_bug.cgi?id=5092 and makes the driver fully
functional again with acpi=strict on all machines.

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index 099f92a..07b4c8b 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -1044,6 +1044,65 @@ static void asus_hotk_notify(acpi_handle handle, u32 event, void *data)
 }
 
 /*
+ * Match the model string to the list of supported models. Return END_MODEL if
+ * no match or model is NULL.
+ */
+static int asus_model_match(char *model)
+{
+	if (model == NULL)
+		return END_MODEL;
+
+	if (strncmp(model, "L3D", 3) == 0)
+		return L3D;
+	else if (strncmp(model, "L2E", 3) == 0 ||
+		 strncmp(model, "L3H", 3) == 0 || strncmp(model, "L5D", 3) == 0)
+		return L3H;
+	else if (strncmp(model, "L3", 2) == 0 || strncmp(model, "L2B", 3) == 0)
+		return L3C;
+	else if (strncmp(model, "L8L", 3) == 0)
+		return L8L;
+	else if (strncmp(model, "L4R", 3) == 0)
+		return L4R;
+	else if (strncmp(model, "M6N", 3) == 0 || strncmp(model, "W3N", 3) == 0)
+		return M6N;
+	else if (strncmp(model, "M6R", 3) == 0 || strncmp(model, "A3G", 3) == 0)
+		return M6R;
+	else if (strncmp(model, "M2N", 3) == 0 ||
+		 strncmp(model, "M3N", 3) == 0 ||
+		 strncmp(model, "M5N", 3) == 0 ||
+		 strncmp(model, "M6N", 3) == 0 ||
+		 strncmp(model, "S1N", 3) == 0 ||
+		 strncmp(model, "S5N", 3) == 0 || strncmp(model, "W1N", 3) == 0)
+		return xxN;
+	else if (strncmp(model, "M1", 2) == 0)
+		return M1A;
+	else if (strncmp(model, "M2", 2) == 0 || strncmp(model, "L4E", 3) == 0)
+		return M2E;
+	else if (strncmp(model, "L2", 2) == 0)
+		return L2D;
+	else if (strncmp(model, "L8", 2) == 0)
+		return S1x;
+	else if (strncmp(model, "D1", 2) == 0)
+		return D1x;
+	else if (strncmp(model, "A1", 2) == 0)
+		return A1x;
+	else if (strncmp(model, "A2", 2) == 0)
+		return A2x;
+	else if (strncmp(model, "J1", 2) == 0)
+		return S2x;
+	else if (strncmp(model, "L5", 2) == 0)
+		return L5x;
+	else if (strncmp(model, "A4G", 3) == 0)
+		return A4G;
+	else if (strncmp(model, "W1N", 3) == 0)
+		return W1N;
+	else if (strncmp(model, "W5A", 3) == 0)
+		return W5A;
+	else
+		return END_MODEL;
+}
+
+/*
  * This function is used to initialize the hotk with right values. In this
  * method, we can make all the detection we want, and modify the hotk struct
  */
@@ -1053,6 +1112,7 @@ static int asus_hotk_get_info(void)
 	struct acpi_buffer dsdt = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *model = NULL;
 	int bsts_result;
+	char *string = NULL;
 	acpi_status status;
 
 	/*
@@ -1082,120 +1142,63 @@ static int asus_hotk_get_info(void)
 		printk(KERN_NOTICE "  BSTS called, 0x%02x returned\n",
 		       bsts_result);
 
-	/* This is unlikely with implicit return */
-	if (buffer.pointer == NULL)
-		return -EINVAL;
-
-	model = (union acpi_object *)buffer.pointer;
 	/*
-	 * Samsung P30 has a device with a valid _HID whose INIT does not 
-	 * return anything. It used to be possible to catch this exception,
-	 * but the implicit return code will now happily confuse the 
-	 * driver. We assume that every ACPI_TYPE_STRING is a valid model
-	 * identifier but it's still possible to get completely bogus data.
+	 * Try to match the object returned by INIT to the specific model.
+	 * Handle every possible object (or the lack of thereof) the DSDT 
+	 * writers might throw at us. When in trouble, we pass NULL to 
+	 * asus_model_match() and try something completely different.
 	 */
-	if (model->type == ACPI_TYPE_STRING) {
-		printk(KERN_NOTICE "  %s model detected, ",
-		       model->string.pointer);
-	} else {
-		if (asus_info &&	/* Samsung P30 */
+	if (buffer.pointer) {
+		model = (union acpi_object *)buffer.pointer;
+		switch (model->type) {
+		case ACPI_TYPE_STRING:
+			string = model->string.pointer;
+			break;
+		case ACPI_TYPE_BUFFER:
+			string = model->buffer.pointer;
+			break;
+		default:
+			kfree(model);
+			break;
+		}
+	}
+	hotk->model = asus_model_match(string);
+	if (hotk->model == END_MODEL) {	/* match failed */
+		if (asus_info &&
 		    strncmp(asus_info->oem_table_id, "ODEM", 4) == 0) {
 			hotk->model = P30;
 			printk(KERN_NOTICE
 			       "  Samsung P30 detected, supported\n");
 		} else {
 			hotk->model = M2E;
-			printk(KERN_WARNING "  no string returned by INIT\n");
-			printk(KERN_WARNING "  trying default values, supply "
-			       "the developers with your DSDT\n");
+			printk(KERN_NOTICE "  unsupported model %s, trying "
+			       "default values\n", string);
+			printk(KERN_NOTICE
+			       "  send /proc/acpi/dsdt to the developers\n");
 		}
 		hotk->methods = &model_conf[hotk->model];
-
-		kfree(model);
-
 		return AE_OK;
 	}
-
-	hotk->model = END_MODEL;
-	if (strncmp(model->string.pointer, "L3D", 3) == 0)
-		hotk->model = L3D;
-	else if (strncmp(model->string.pointer, "L2E", 3) == 0 ||
-		 strncmp(model->string.pointer, "L3H", 3) == 0 ||
-		 strncmp(model->string.pointer, "L5D", 3) == 0)
-		hotk->model = L3H;
-	else if (strncmp(model->string.pointer, "L3", 2) == 0 ||
-		 strncmp(model->string.pointer, "L2B", 3) == 0)
-		hotk->model = L3C;
-	else if (strncmp(model->string.pointer, "L8L", 3) == 0)
-		hotk->model = L8L;
-	else if (strncmp(model->string.pointer, "L4R", 3) == 0)
-		hotk->model = L4R;
-	else if (strncmp(model->string.pointer, "M6N", 3) == 0 ||
-		 strncmp(model->string.pointer, "W3N", 3) == 0)
-		hotk->model = M6N;
-	else if (strncmp(model->string.pointer, "M6R", 3) == 0 ||
-		 strncmp(model->string.pointer, "A3G", 3) == 0)
-		hotk->model = M6R;
-	else if (strncmp(model->string.pointer, "M2N", 3) == 0 ||
-		 strncmp(model->string.pointer, "M3N", 3) == 0 ||
-		 strncmp(model->string.pointer, "M5N", 3) == 0 ||
-		 strncmp(model->string.pointer, "M6N", 3) == 0 ||
-		 strncmp(model->string.pointer, "S1N", 3) == 0 ||
-		 strncmp(model->string.pointer, "S5N", 3) == 0)
-		hotk->model = xxN;
-	else if (strncmp(model->string.pointer, "M1", 2) == 0)
-		hotk->model = M1A;
-	else if (strncmp(model->string.pointer, "M2", 2) == 0 ||
-		 strncmp(model->string.pointer, "L4E", 3) == 0)
-		hotk->model = M2E;
-	else if (strncmp(model->string.pointer, "L2", 2) == 0)
-		hotk->model = L2D;
-	else if (strncmp(model->string.pointer, "L8", 2) == 0)
-		hotk->model = S1x;
-	else if (strncmp(model->string.pointer, "D1", 2) == 0)
-		hotk->model = D1x;
-	else if (strncmp(model->string.pointer, "A1", 2) == 0)
-		hotk->model = A1x;
-	else if (strncmp(model->string.pointer, "A2", 2) == 0)
-		hotk->model = A2x;
-	else if (strncmp(model->string.pointer, "J1", 2) == 0)
-		hotk->model = S2x;
-	else if (strncmp(model->string.pointer, "L5", 2) == 0)
-		hotk->model = L5x;
-	else if (strncmp(model->string.pointer, "A4G", 3) == 0)
-		hotk->model = A4G;
-	else if (strncmp(model->string.pointer, "W1N", 3) == 0)
-		hotk->model = W1N;
-	else if (strncmp(model->string.pointer, "W5A", 3) == 0)
-		hotk->model = W5A;
-
-	if (hotk->model == END_MODEL) {
-		printk("unsupported, trying default values, supply the "
-		       "developers with your DSDT\n");
-		hotk->model = M2E;
-	} else {
-		printk("supported\n");
-	}
-
 	hotk->methods = &model_conf[hotk->model];
+	printk(KERN_NOTICE "  %s model detected, supported\n", string);
 
 	/* Sort of per-model blacklist */
-	if (strncmp(model->string.pointer, "L2B", 3) == 0)
+	if (strncmp(string, "L2B", 3) == 0)
 		hotk->methods->lcd_status = NULL;
 	/* L2B is similar enough to L3C to use its settings, with this only 
 	   exception */
-	else if (strncmp(model->string.pointer, "A3G", 3) == 0)
+	else if (strncmp(string, "A3G", 3) == 0)
 		hotk->methods->lcd_status = "\\BLFG";
 	/* A3G is like M6R */
-	else if (strncmp(model->string.pointer, "S5N", 3) == 0 ||
-		 strncmp(model->string.pointer, "M5N", 3) == 0 ||
-		 strncmp(model->string.pointer, "W3N", 3) == 0)
+	else if (strncmp(string, "S5N", 3) == 0 ||
+		 strncmp(string, "M5N", 3) == 0 ||
+		 strncmp(string, "W3N", 3) == 0)
 		hotk->methods->mt_mled = NULL;
 	/* S5N, M5N and W3N have no MLED */
-	else if (strncmp(model->string.pointer, "L5D", 3) == 0)
+	else if (strncmp(string, "L5D", 3) == 0)
 		hotk->methods->mt_wled = NULL;
 	/* L5D's WLED is not controlled by ACPI */
-	else if (strncmp(model->string.pointer, "M2N", 3) == 0)
+	else if (strncmp(string, "M2N", 3) == 0)
 		hotk->methods->mt_wled = "WLED";
 	/* M2N has a usable WLED */
 	else if (asus_info) {
-- 
cgit v0.10.2


From 96d1142084281ae4601fab02be061e1267e431a3 Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:13:00 -0400
Subject: ACPI: asus_acpi: add S1N WLED control

This small patch adds back WLED control for S1N models, this was
accidentally removed a while ago.

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index 07b4c8b..d1ff387 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -1198,9 +1198,10 @@ static int asus_hotk_get_info(void)
 	else if (strncmp(string, "L5D", 3) == 0)
 		hotk->methods->mt_wled = NULL;
 	/* L5D's WLED is not controlled by ACPI */
-	else if (strncmp(string, "M2N", 3) == 0)
+	else if (strncmp(string, "M2N", 3) == 0 ||
+		 strncmp(string, "S1N", 3) == 0)
 		hotk->methods->mt_wled = "WLED";
-	/* M2N has a usable WLED */
+	/* M2N and S1N have a usable WLED */
 	else if (asus_info) {
 		if (strncmp(asus_info->oem_table_id, "L1", 2) == 0)
 			hotk->methods->mled_status = NULL;
-- 
cgit v0.10.2


From 2df8386cec47520b76822cb39d96709f5d353cf8 Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:15:00 -0400
Subject: ACPI: asus_acpi: add S1N WLED control

This patch switches back the display nodes for M6R and M6N -- this happened
a while ago when a patch was misapplied (only the in-tree version was
affected).

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index d1ff387..e9ee4c5 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -310,7 +310,8 @@ static struct model_data model_conf[END_MODEL] = {
 	 .brightness_set = "SPLV",
 	 .brightness_get = "GPLV",
 	 .display_set = "SDSP",
-	 .display_get = "\\_SB.PCI0.P0P1.VGA.GETD"},
+	 .display_get = "\\SSTE"},
+
 	{
 	 .name = "M6R",
 	 .mt_mled = "MLED",
@@ -320,7 +321,7 @@ static struct model_data model_conf[END_MODEL] = {
 	 .brightness_set = "SPLV",
 	 .brightness_get = "GPLV",
 	 .display_set = "SDSP",
-	 .display_get = "\\SSTE"},
+	 .display_get = "\\_SB.PCI0.P0P1.VGA.GETD"},
 
 	{
 	 .name = "P30",
-- 
cgit v0.10.2


From 454e8957eb60841016deb319dbbf83042fb32a39 Mon Sep 17 00:00:00 2001
From: Ishai Rabinovitz <ishai@mellanox.co.il>
Date: Thu, 29 Jun 2006 16:39:54 +0300
Subject: [SCSI] sg.c: Fix bad error handling in

I got a NULL derefrence in cdev_del+1 when called from sg_remove. By looking at
the code of sg_add, sg_alloc and sg_remove (all in drivers/scsi/sg.c) I found
out that sg_add is calling sg_alloc but if it fails afterwards it does not
deallocate the space that was allocated in sg_alloc and the redundant entry has
NULL in cdev. When sg_remove is being called, it tries to perform cdev_del to
this NULL cdev and fails.

Signed-off-by: Ishai Rabinovitz <ishai@mellanox.co.il>
Acked-by: Douglas Gilbert <dougg@torque.net>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 98b9312..d5bbb97 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1402,6 +1402,7 @@ sg_add(struct class_device *cl_dev, struct class_interface *cl_intf)
 	Sg_device *sdp = NULL;
 	struct cdev * cdev = NULL;
 	int error, k;
+	unsigned long iflags;
 
 	disk = alloc_disk(1);
 	if (!disk) {
@@ -1429,7 +1430,7 @@ sg_add(struct class_device *cl_dev, struct class_interface *cl_intf)
 
 	error = cdev_add(cdev, MKDEV(SCSI_GENERIC_MAJOR, k), 1);
 	if (error)
-		goto out;
+		goto cdev_add_err;
 
 	sdp->cdev = cdev;
 	if (sg_sysfs_valid) {
@@ -1456,6 +1457,13 @@ sg_add(struct class_device *cl_dev, struct class_interface *cl_intf)
 
 	return 0;
 
+cdev_add_err:
+	write_lock_irqsave(&sg_dev_arr_lock, iflags);
+	kfree(sg_dev_arr[k]);
+	sg_dev_arr[k] = NULL;
+	sg_nr_dev--;
+	write_unlock_irqrestore(&sg_dev_arr_lock, iflags);
+
 out:
 	put_disk(disk);
 	if (cdev)
-- 
cgit v0.10.2


From 376ac8307868f93a0b9aa277f43dee0f63c41c1b Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsil.com>
Date: Thu, 29 Jun 2006 17:36:26 -0600
Subject: [SCSI] mptsas: make two functions static

Make two needlessly global functions static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.com>
Signed-off-by: Eric Moore <Eric.Moore@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index bc36f5f..85eff15 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -337,7 +337,7 @@ mptsas_is_end_device(struct mptsas_devinfo * attached)
 }
 
 /* no mutex */
-void
+static void
 mptsas_port_delete(struct mptsas_portinfo_details * port_details)
 {
 	struct mptsas_portinfo *port_info;
@@ -438,7 +438,7 @@ starget)
  * Updates for new and existing narrow/wide port configuration
  * in the sas_topology
  */
-void
+static void
 mptsas_setup_wide_ports(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
 {
 	struct mptsas_portinfo_details * port_details;
-- 
cgit v0.10.2


From 3e00a5b28782d65b7ac91e1e9812c281c2ec7af0 Mon Sep 17 00:00:00 2001
From: Eric Moore <eric.moore@lsil.com>
Date: Thu, 29 Jun 2006 17:38:43 -0600
Subject: [SCSI] mptbase: mpt_interrupt should return IRQ_NONE

The way mpt_interrupt() was coded, it was impossible for the unhandled
interrupt detection logic to ever trigger. All interrupt handlers should
return IRQ_NONE when they have nothing to do.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.com>
Signed-off-by: Eric Moore <Eric.Moore@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 8ac77ca..2060996 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -369,20 +369,21 @@ static irqreturn_t
 mpt_interrupt(int irq, void *bus_id, struct pt_regs *r)
 {
 	MPT_ADAPTER *ioc = bus_id;
-	u32 pa;
+	u32 pa = CHIPREG_READ32_dmasync(&ioc->chip->ReplyFifo);
+
+	if (pa == 0xFFFFFFFF)
+		return IRQ_NONE;
 
 	/*
 	 *  Drain the reply FIFO!
 	 */
-	while (1) {
-		pa = CHIPREG_READ32_dmasync(&ioc->chip->ReplyFifo);
-		if (pa == 0xFFFFFFFF)
-			return IRQ_HANDLED;
-		else if (pa & MPI_ADDRESS_REPLY_A_BIT)
+	do {
+		if (pa & MPI_ADDRESS_REPLY_A_BIT)
 			mpt_reply(ioc, pa);
 		else
 			mpt_turbo_reply(ioc, pa);
-	}
+		pa = CHIPREG_READ32_dmasync(&ioc->chip->ReplyFifo);
+	} while (pa != 0xFFFFFFFF);
 
 	return IRQ_HANDLED;
 }
-- 
cgit v0.10.2


From 900d9f98737b502fbd0303cfcab2773b465bb1f8 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jesper.juhl@gmail.com>
Date: Fri, 30 Jun 2006 02:33:07 -0700
Subject: [SCSI] small whitespace cleanup for qlogic driver

Add a few spaces to MODULE_PARM_DESC() text for qla2xxx. Without these
spaces text runs together when modinfo prints the text.

Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
Acked-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 3516e36..01fc30c 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -39,14 +39,14 @@ MODULE_PARM_DESC(ql2xlogintimeout,
 int qlport_down_retry = 30;
 module_param(qlport_down_retry, int, S_IRUGO|S_IRUSR);
 MODULE_PARM_DESC(qlport_down_retry,
-		"Maximum number of command retries to a port that returns"
+		"Maximum number of command retries to a port that returns "
 		"a PORT-DOWN status.");
 
 int ql2xplogiabsentdevice;
 module_param(ql2xplogiabsentdevice, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(ql2xplogiabsentdevice,
 		"Option to enable PLOGI to devices that are not present after "
-		"a Fabric scan.  This is needed for several broken switches."
+		"a Fabric scan.  This is needed for several broken switches. "
 		"Default is 0 - no PLOGI. 1 - perfom PLOGI.");
 
 int ql2xloginretrycount = 0;
-- 
cgit v0.10.2


From 413975a0f1a43a896490fc5c589d05c95624c6d2 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 30 Jun 2006 02:33:06 -0700
Subject: [SCSI] qla2xxx: make some more functions static

Make some needlessly global functions static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Andrew Vasquez <andrew.vasquez@qlogic.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 750aef1..8311ac2 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -31,13 +31,9 @@ extern void qla2x00_update_fw_options(struct scsi_qla_host *);
 extern void qla24xx_update_fw_options(scsi_qla_host_t *);
 extern int qla2x00_load_risc(struct scsi_qla_host *, uint32_t *);
 extern int qla24xx_load_risc(scsi_qla_host_t *, uint32_t *);
-extern int qla24xx_load_risc_flash(scsi_qla_host_t *, uint32_t *);
-
-extern fc_port_t *qla2x00_alloc_fcport(scsi_qla_host_t *, gfp_t);
 
 extern int qla2x00_loop_resync(scsi_qla_host_t *);
 
-extern int qla2x00_find_new_loop_id(scsi_qla_host_t *, fc_port_t *);
 extern int qla2x00_fabric_login(scsi_qla_host_t *, fc_port_t *, uint16_t *);
 extern int qla2x00_local_device_login(scsi_qla_host_t *, fc_port_t *);
 
@@ -84,8 +80,6 @@ extern void qla2xxx_wake_dpc(scsi_qla_host_t *);
 /*
  * Global Function Prototypes in qla_iocb.c source file.
  */
-extern void qla2x00_isp_cmd(scsi_qla_host_t *);
-
 extern uint16_t qla2x00_calc_iocbs_32(uint16_t);
 extern uint16_t qla2x00_calc_iocbs_64(uint16_t);
 extern void qla2x00_build_scsi_iocbs_32(srb_t *, cmd_entry_t *, uint16_t);
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index ce74a60..9758dba 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -39,6 +39,8 @@ static int qla2x00_fabric_dev_login(scsi_qla_host_t *, fc_port_t *,
 
 static int qla2x00_restart_isp(scsi_qla_host_t *);
 
+static int qla2x00_find_new_loop_id(scsi_qla_host_t *ha, fc_port_t *dev);
+
 /****************************************************************************/
 /*                QLogic ISP2x00 Hardware Support Functions.                */
 /****************************************************************************/
@@ -1790,7 +1792,7 @@ qla2x00_rport_del(void *data)
  *
  * Returns a pointer to the allocated fcport, or NULL, if none available.
  */
-fc_port_t *
+static fc_port_t *
 qla2x00_alloc_fcport(scsi_qla_host_t *ha, gfp_t flags)
 {
 	fc_port_t *fcport;
@@ -2586,7 +2588,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *ha, struct list_head *new_fcports)
  * Context:
  *	Kernel context.
  */
-int
+static int
 qla2x00_find_new_loop_id(scsi_qla_host_t *ha, fc_port_t *dev)
 {
 	int	rval;
@@ -3561,7 +3563,7 @@ qla24xx_nvram_config(scsi_qla_host_t *ha)
 	return (rval);
 }
 
-int
+static int
 qla24xx_load_risc_flash(scsi_qla_host_t *ha, uint32_t *srisc_addr)
 {
 	int	rval;
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 8c769cf..2b60a27 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -15,6 +15,7 @@ static inline uint16_t qla2x00_get_cmd_direction(struct scsi_cmnd *cmd);
 static inline cont_entry_t *qla2x00_prep_cont_type0_iocb(scsi_qla_host_t *);
 static inline cont_a64_entry_t *qla2x00_prep_cont_type1_iocb(scsi_qla_host_t *);
 static request_t *qla2x00_req_pkt(scsi_qla_host_t *ha);
+static void qla2x00_isp_cmd(scsi_qla_host_t *ha);
 
 /**
  * qla2x00_get_cmd_direction() - Determine control_flag data direction.
@@ -574,7 +575,7 @@ qla2x00_req_pkt(scsi_qla_host_t *ha)
  *
  * Note: The caller must hold the hardware lock before calling this routine.
  */
-void
+static void
 qla2x00_isp_cmd(scsi_qla_host_t *ha)
 {
 	device_reg_t __iomem *reg = ha->iobase;
-- 
cgit v0.10.2


From 2686de27a356914e098329463d5a8cd7aa6f29a4 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Fri, 30 Jun 2006 12:54:02 -0500
Subject: [SCSI] mptsas: eliminate ghost devices

One of the current problems the mptsas driver has is that of "ghost"
devices (these are devices the firmware reports as existing, but what
they actually represent are the parents of a lower device), so for
example in my dual expander configuration, three expanders actually show
up, two for the real expanders but a third is created because the
firmware reports that the lower expander also has another expander
connected (which is simply the port going back to the upper expander).

The attached patch eliminates all these ghosts by not allocating any
devices for them if the SAS address is the SAS address of the parent.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 7bf0855..2697b70 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -632,6 +632,7 @@ typedef struct _MPT_ADAPTER
 	struct mutex		 sas_discovery_mutex;
 	u8			 sas_discovery_runtime;
 	u8			 sas_discovery_ignore_events;
+	u16			 handle;
 	int			 sas_index; /* index refrencing */
 	MPT_SAS_MGMT		 sas_mgmt;
 	int			 num_ports;
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 85eff15..f7bd8b1 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1635,8 +1635,10 @@ static int mptsas_probe_one_phy(struct device *dev,
 	if (!mptsas_get_rphy(phy_info) && port && !port->rphy) {
 
 		struct sas_rphy *rphy;
+		struct device *parent;
 		struct sas_identify identify;
 
+		parent = dev->parent->parent;
 		/*
 		 * Let the hotplug_work thread handle processing
 		 * the adding/removing of devices that occur
@@ -1647,6 +1649,27 @@ static int mptsas_probe_one_phy(struct device *dev,
 				goto out;
 
 		mptsas_parse_device_info(&identify, &phy_info->attached);
+		if (scsi_is_host_device(parent)) {
+			struct mptsas_portinfo *port_info;
+			int i;
+
+			mutex_lock(&ioc->sas_topology_mutex);
+			port_info = mptsas_find_portinfo_by_handle(ioc,
+								   ioc->handle);
+			mutex_unlock(&ioc->sas_topology_mutex);
+
+			for (i = 0; i < port_info->num_phys; i++)
+				if (port_info->phy_info[i].identify.sas_address ==
+				    identify.sas_address)
+					goto out;
+
+		} else if (scsi_is_sas_rphy(parent)) {
+			struct sas_rphy *parent_rphy = dev_to_rphy(parent);
+			if (identify.sas_address ==
+			    parent_rphy->identify.sas_address)
+				goto out;
+		}
+
 		switch (identify.device_type) {
 		case SAS_END_DEVICE:
 			rphy = sas_end_device_alloc(port);
@@ -1698,6 +1721,7 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc)
 		goto out_free_port_info;
 
 	mutex_lock(&ioc->sas_topology_mutex);
+	ioc->handle = hba->handle;
 	port_info = mptsas_find_portinfo_by_handle(ioc, hba->handle);
 	if (!port_info) {
 		port_info = hba;
-- 
cgit v0.10.2


From f58f8313a6f552d1e7889926a29f0cf91c02f55d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Fri, 30 Jun 2006 02:33:14 -0700
Subject: [SCSI] qla1280: fix section mismatch warnings

Fix section mismatch warnings:
WARNING: drivers/scsi/qla1280.o - Section mismatch: reference to
.init.data: from .text between 'qla1280_get_token' (at offset 0x2a16)
and 'qla1280_probe_one'
WARNING: drivers/scsi/qla1280.o - Section mismatch: reference to
.init.data: from .text between 'qla1280_get_token' (at offset 0x2a3c)
and 'qla1280_probe_one'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 680f606..f09d540 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -4210,7 +4210,7 @@ qla1280_setup(char *s)
 }
 
 
-static int
+static int __init
 qla1280_get_token(char *str)
 {
 	char *sep;
-- 
cgit v0.10.2


From 03d782524e2d0511317769521c8d5daadbab8482 Mon Sep 17 00:00:00 2001
From: Christian Lupien <lupien@physique.usherbrooke.ca>
Date: Thu, 19 Aug 2004 01:26:00 -0400
Subject: ACPI: handle AC notify event on broken BIOS

http://bugzilla.kernel.org/show_bug.cgi?id=3241

updated by Vladimir Lebedev

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 4537ae4..69a98da 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -194,6 +194,8 @@ static void acpi_ac_notify(acpi_handle handle, u32 event, void *data)
 	device = ac->device;
 	switch (event) {
 	case ACPI_AC_NOTIFY_STATUS:
+	case ACPI_NOTIFY_BUS_CHECK:
+	case ACPI_NOTIFY_DEVICE_CHECK:
 		acpi_ac_get_state(ac);
 		acpi_bus_generate_event(device, event, (u32) ac->state);
 		break;
@@ -235,7 +237,7 @@ static int acpi_ac_add(struct acpi_device *device)
 		goto end;
 
 	status = acpi_install_notify_handler(device->handle,
-					     ACPI_DEVICE_NOTIFY, acpi_ac_notify,
+					     ACPI_ALL_NOTIFY, acpi_ac_notify,
 					     ac);
 	if (ACPI_FAILURE(status)) {
 		result = -ENODEV;
@@ -267,7 +269,7 @@ static int acpi_ac_remove(struct acpi_device *device, int type)
 	ac = (struct acpi_ac *)acpi_driver_data(device);
 
 	status = acpi_remove_notify_handler(device->handle,
-					    ACPI_DEVICE_NOTIFY, acpi_ac_notify);
+					    ACPI_ALL_NOTIFY, acpi_ac_notify);
 
 	acpi_ac_remove_fs(device);
 
-- 
cgit v0.10.2


From 9fdae727645215d4dbb88912b9a176ef87911a05 Mon Sep 17 00:00:00 2001
From: Vladimir Lebedev <vladimir.p.lebedev@intel.com>
Date: Tue, 27 Jun 2006 04:49:00 -0400
Subject: ACPI: handle battery notify event on broken BIOS

http://bugzilla.kernel.org/show_bug.cgi?id=3241

Signed-off-by: Vladimir Lebedev <vladimir.p.lebedev@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 2b8aab5..a192d2b 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -665,6 +665,8 @@ static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
 	switch (event) {
 	case ACPI_BATTERY_NOTIFY_STATUS:
 	case ACPI_BATTERY_NOTIFY_INFO:
+	case ACPI_NOTIFY_BUS_CHECK:
+	case ACPI_NOTIFY_DEVICE_CHECK:
 		acpi_battery_check(battery);
 		acpi_bus_generate_event(device, event, battery->flags.present);
 		break;
@@ -706,7 +708,7 @@ static int acpi_battery_add(struct acpi_device *device)
 		goto end;
 
 	status = acpi_install_notify_handler(device->handle,
-					     ACPI_DEVICE_NOTIFY,
+					     ACPI_ALL_NOTIFY,
 					     acpi_battery_notify, battery);
 	if (ACPI_FAILURE(status)) {
 		result = -ENODEV;
@@ -738,7 +740,7 @@ static int acpi_battery_remove(struct acpi_device *device, int type)
 	battery = (struct acpi_battery *)acpi_driver_data(device);
 
 	status = acpi_remove_notify_handler(device->handle,
-					    ACPI_DEVICE_NOTIFY,
+					    ACPI_ALL_NOTIFY,
 					    acpi_battery_notify);
 
 	acpi_battery_remove_fs(device);
-- 
cgit v0.10.2


From 9becf5b91ec7b600a3cfea12724165aaaf4d4527 Mon Sep 17 00:00:00 2001
From: Karol Kozimor <sziwan@hell.org.pl>
Date: Fri, 30 Jun 2006 19:15:00 -0400
Subject: ACPI: asus_acpi: correct M6N/M6R display nodes

This patch switches back the display nodes for M6R and M6N -- this happened
a while ago when a patch was misapplied (only the in-tree version was
affected).

Signed-off-by: Karol Kozimor <sziwan@hell.org.pl>
Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/asus_acpi.c b/drivers/acpi/asus_acpi.c
index d1ff387..e9ee4c5 100644
--- a/drivers/acpi/asus_acpi.c
+++ b/drivers/acpi/asus_acpi.c
@@ -310,7 +310,8 @@ static struct model_data model_conf[END_MODEL] = {
 	 .brightness_set = "SPLV",
 	 .brightness_get = "GPLV",
 	 .display_set = "SDSP",
-	 .display_get = "\\_SB.PCI0.P0P1.VGA.GETD"},
+	 .display_get = "\\SSTE"},
+
 	{
 	 .name = "M6R",
 	 .mt_mled = "MLED",
@@ -320,7 +321,7 @@ static struct model_data model_conf[END_MODEL] = {
 	 .brightness_set = "SPLV",
 	 .brightness_get = "GPLV",
 	 .display_set = "SDSP",
-	 .display_get = "\\SSTE"},
+	 .display_get = "\\_SB.PCI0.P0P1.VGA.GETD"},
 
 	{
 	 .name = "P30",
-- 
cgit v0.10.2


From 3f86b83243d59bb50caf5938d284d22e10d082a4 Mon Sep 17 00:00:00 2001
From: Rich Townsend <rhdt@bartol.udel.edu>
Date: Sat, 1 Jul 2006 11:36:54 -0400
Subject: ACPI: add support for Smart Battery

Most batteries today are ACPI "Control Method" batteries,
but some models ship with the older "Smart Battery"
that requires this code.

Rich Townsend and Bruno Ducrot were the original authors.
Vladimir Lebedev updated to run on latest kernel.

http://bugzilla.kernel.org/show_bug.cgi?id=3734

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index bc2652d..fef7bab 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -352,6 +352,18 @@ config ACPI_HOTPLUG_MEMORY
 	  If one selects "m," this driver can be loaded using the following
 	  command: 
 		$>modprobe acpi_memhotplug 
+
+config ACPI_SBS
+	tristate "Smart Battery System (EXPERIMENTAL)"
+	depends on X86 && I2C
+	depends on EXPERIMENTAL
+	default y
+	help
+	  This driver adds support for the Smart Battery System.
+	  Depends on I2C (Device Drivers ---> I2C support)
+	  A "Smart Battery" is quite old and quite rare compared
+	  to today's ACPI "Control Method" battery.
+
 endif	# ACPI
 
 endmenu
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index f0a68ec..bce7ca2 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -58,3 +58,5 @@ obj-$(CONFIG_ACPI_IBM)		+= ibm_acpi.o
 obj-$(CONFIG_ACPI_TOSHIBA)	+= toshiba_acpi.o
 obj-y				+= scan.o motherboard.o
 obj-$(CONFIG_ACPI_HOTPLUG_MEMORY)	+= acpi_memhotplug.o
+obj-y				+= cm_sbs.o
+obj-$(CONFIG_ACPI_SBS)		+= i2c_ec.o sbs.o
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 4537ae4..e0a1b15 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -50,6 +50,9 @@ ACPI_MODULE_NAME("acpi_ac")
 MODULE_DESCRIPTION(ACPI_AC_DRIVER_NAME);
 MODULE_LICENSE("GPL");
 
+extern struct proc_dir_entry *acpi_lock_ac_dir(void);
+extern void *acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir);
+
 static int acpi_ac_add(struct acpi_device *device);
 static int acpi_ac_remove(struct acpi_device *device, int type);
 static int acpi_ac_open_fs(struct inode *inode, struct file *file);
@@ -278,17 +281,16 @@ static int acpi_ac_remove(struct acpi_device *device, int type)
 
 static int __init acpi_ac_init(void)
 {
-	int result = 0;
+	int result;
 
 
-	acpi_ac_dir = proc_mkdir(ACPI_AC_CLASS, acpi_root_dir);
+	acpi_ac_dir = acpi_lock_ac_dir();
 	if (!acpi_ac_dir)
 		return -ENODEV;
-	acpi_ac_dir->owner = THIS_MODULE;
 
 	result = acpi_bus_register_driver(&acpi_ac_driver);
 	if (result < 0) {
-		remove_proc_entry(ACPI_AC_CLASS, acpi_root_dir);
+		acpi_unlock_ac_dir(acpi_ac_dir);
 		return -ENODEV;
 	}
 
@@ -300,7 +302,7 @@ static void __exit acpi_ac_exit(void)
 
 	acpi_bus_unregister_driver(&acpi_ac_driver);
 
-	remove_proc_entry(ACPI_AC_CLASS, acpi_root_dir);
+	acpi_unlock_ac_dir(acpi_ac_dir);
 
 	return;
 }
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 2b8aab5..3ea79de 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -59,6 +59,9 @@ ACPI_MODULE_NAME("acpi_battery")
 MODULE_DESCRIPTION(ACPI_BATTERY_DRIVER_NAME);
 MODULE_LICENSE("GPL");
 
+extern struct proc_dir_entry *acpi_lock_battery_dir(void);
+extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir);
+
 static int acpi_battery_add(struct acpi_device *device);
 static int acpi_battery_remove(struct acpi_device *device, int type);
 
@@ -750,17 +753,15 @@ static int acpi_battery_remove(struct acpi_device *device, int type)
 
 static int __init acpi_battery_init(void)
 {
-	int result = 0;
-
+	int result;
 
-	acpi_battery_dir = proc_mkdir(ACPI_BATTERY_CLASS, acpi_root_dir);
+	acpi_battery_dir = acpi_lock_battery_dir();
 	if (!acpi_battery_dir)
 		return -ENODEV;
-	acpi_battery_dir->owner = THIS_MODULE;
 
 	result = acpi_bus_register_driver(&acpi_battery_driver);
 	if (result < 0) {
-		remove_proc_entry(ACPI_BATTERY_CLASS, acpi_root_dir);
+		acpi_unlock_battery_dir(acpi_battery_dir);
 		return -ENODEV;
 	}
 
@@ -772,7 +773,7 @@ static void __exit acpi_battery_exit(void)
 
 	acpi_bus_unregister_driver(&acpi_battery_driver);
 
-	remove_proc_entry(ACPI_BATTERY_CLASS, acpi_root_dir);
+	acpi_unlock_battery_dir(acpi_battery_dir);
 
 	return;
 }
diff --git a/drivers/acpi/cm_sbs.c b/drivers/acpi/cm_sbs.c
new file mode 100644
index 0000000..d11507c
--- /dev/null
+++ b/drivers/acpi/cm_sbs.c
@@ -0,0 +1,135 @@
+/*
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/acmacros.h>
+#include <acpi/actypes.h>
+#include <acpi/acutils.h>
+
+ACPI_MODULE_NAME("cm_sbs")
+#define ACPI_AC_CLASS		"ac_adapter"
+#define ACPI_BATTERY_CLASS	"battery"
+#define ACPI_SBS_COMPONENT	0x00080000
+#define _COMPONENT		ACPI_SBS_COMPONENT
+static struct proc_dir_entry *acpi_ac_dir;
+static struct proc_dir_entry *acpi_battery_dir;
+
+static struct semaphore cm_sbs_sem;
+
+static int lock_ac_dir_cnt = 0;
+static int lock_battery_dir_cnt = 0;
+
+struct proc_dir_entry *acpi_lock_ac_dir(void)
+{
+	ACPI_FUNCTION_TRACE("acpi_lock_ac_dir");
+
+	down(&cm_sbs_sem);
+	if (!acpi_ac_dir) {
+		acpi_ac_dir = proc_mkdir(ACPI_AC_CLASS, acpi_root_dir);
+	}
+	if (acpi_ac_dir) {
+		lock_ac_dir_cnt++;
+	} else {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "Cannot create %s\n", ACPI_AC_CLASS));
+	}
+	up(&cm_sbs_sem);
+	return (acpi_ac_dir);
+}
+
+EXPORT_SYMBOL(acpi_lock_ac_dir);
+
+void acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir_param)
+{
+	ACPI_FUNCTION_TRACE("acpi_unlock_ac_dir");
+
+	down(&cm_sbs_sem);
+	if (acpi_ac_dir_param) {
+		lock_ac_dir_cnt--;
+	}
+	if (lock_ac_dir_cnt == 0 && acpi_ac_dir_param && acpi_ac_dir) {
+		remove_proc_entry(ACPI_AC_CLASS, acpi_root_dir);
+		acpi_ac_dir = 0;
+	}
+	up(&cm_sbs_sem);
+}
+
+EXPORT_SYMBOL(acpi_unlock_ac_dir);
+
+struct proc_dir_entry *acpi_lock_battery_dir(void)
+{
+	ACPI_FUNCTION_TRACE("acpi_lock_battery_dir");
+
+	down(&cm_sbs_sem);
+	if (!acpi_battery_dir) {
+		acpi_battery_dir =
+		    proc_mkdir(ACPI_BATTERY_CLASS, acpi_root_dir);
+	}
+	if (acpi_battery_dir) {
+		lock_battery_dir_cnt++;
+	} else {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "Cannot create %s\n", ACPI_BATTERY_CLASS));
+	}
+	up(&cm_sbs_sem);
+	return (acpi_battery_dir);
+}
+
+EXPORT_SYMBOL(acpi_lock_battery_dir);
+
+void acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir_param)
+{
+	ACPI_FUNCTION_TRACE("acpi_unlock_battery_dir");
+
+	down(&cm_sbs_sem);
+	if (acpi_battery_dir_param) {
+		lock_battery_dir_cnt--;
+	}
+	if (lock_battery_dir_cnt == 0 && acpi_battery_dir_param
+	    && acpi_battery_dir) {
+		remove_proc_entry(ACPI_BATTERY_CLASS, acpi_root_dir);
+		acpi_battery_dir = 0;
+	}
+	up(&cm_sbs_sem);
+}
+
+EXPORT_SYMBOL(acpi_unlock_battery_dir);
+
+static int __init acpi_cm_sbs_init(void)
+{
+	ACPI_FUNCTION_TRACE("acpi_cm_sbs_init");
+
+	if (acpi_disabled)
+		return_VALUE(0);
+
+	init_MUTEX(&cm_sbs_sem);
+
+	return_VALUE(0);
+}
+
+subsys_initcall(acpi_cm_sbs_init);
diff --git a/drivers/acpi/i2c_ec.c b/drivers/acpi/i2c_ec.c
new file mode 100644
index 0000000..72478a6
--- /dev/null
+++ b/drivers/acpi/i2c_ec.c
@@ -0,0 +1,420 @@
+/*
+ * SMBus driver for ACPI Embedded Controller ($Revision: 1.3 $)
+ *
+ * Copyright (c) 2002, 2005 Ducrot Bruno
+ * Copyright (c) 2005 Rich Townsend (tiny hacks & tweaks)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation version 2.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/acpi.h>
+#include <linux/delay.h>
+
+#include "i2c_ec.h"
+
+#define	xudelay(t)	udelay(t)
+#define	xmsleep(t)	msleep(t)
+
+#define ACPI_EC_HC_COMPONENT	0x00080000
+#define ACPI_EC_HC_CLASS	"ec_hc_smbus"
+#define ACPI_EC_HC_HID		"ACPI0001"
+#define ACPI_EC_HC_DRIVER_NAME	"ACPI EC HC smbus driver"
+#define ACPI_EC_HC_DEVICE_NAME	"EC HC smbus"
+
+#define _COMPONENT		ACPI_EC_HC_COMPONENT
+
+ACPI_MODULE_NAME("acpi_smbus")
+
+static int acpi_ec_hc_add(struct acpi_device *device);
+static int acpi_ec_hc_remove(struct acpi_device *device, int type);
+
+static struct acpi_driver acpi_ec_hc_driver = {
+	.name = ACPI_EC_HC_DRIVER_NAME,
+	.class = ACPI_EC_HC_CLASS,
+	.ids = ACPI_EC_HC_HID,
+	.ops = {
+		.add = acpi_ec_hc_add,
+		.remove = acpi_ec_hc_remove,
+		},
+};
+
+/* Various bit mask for EC_SC (R) */
+#define OBF		0x01
+#define IBF		0x02
+#define CMD		0x08
+#define BURST		0x10
+#define SCI_EVT		0x20
+#define SMI_EVT		0x40
+
+/* Commands for EC_SC (W) */
+#define RD_EC		0x80
+#define WR_EC		0x81
+#define BE_EC		0x82
+#define BD_EC		0x83
+#define QR_EC		0x84
+
+/*
+ * ACPI 2.0 chapter 13 SMBus 2.0 EC register model
+ */
+
+#define ACPI_EC_SMB_PRTCL	0x00	/* protocol, PEC */
+#define ACPI_EC_SMB_STS		0x01	/* status */
+#define ACPI_EC_SMB_ADDR	0x02	/* address */
+#define ACPI_EC_SMB_CMD		0x03	/* command */
+#define ACPI_EC_SMB_DATA	0x04	/* 32 data registers */
+#define ACPI_EC_SMB_BCNT	0x24	/* number of data bytes */
+#define ACPI_EC_SMB_ALRM_A	0x25	/* alarm address */
+#define ACPI_EC_SMB_ALRM_D	0x26	/* 2 bytes alarm data */
+
+#define ACPI_EC_SMB_STS_DONE	0x80
+#define ACPI_EC_SMB_STS_ALRM	0x40
+#define ACPI_EC_SMB_STS_RES	0x20
+#define ACPI_EC_SMB_STS_STATUS	0x1f
+
+#define ACPI_EC_SMB_STATUS_OK		0x00
+#define ACPI_EC_SMB_STATUS_FAIL		0x07
+#define ACPI_EC_SMB_STATUS_DNAK		0x10
+#define ACPI_EC_SMB_STATUS_DERR		0x11
+#define ACPI_EC_SMB_STATUS_CMD_DENY	0x12
+#define ACPI_EC_SMB_STATUS_UNKNOWN	0x13
+#define ACPI_EC_SMB_STATUS_ACC_DENY	0x17
+#define ACPI_EC_SMB_STATUS_TIMEOUT	0x18
+#define ACPI_EC_SMB_STATUS_NOTSUP	0x19
+#define ACPI_EC_SMB_STATUS_BUSY		0x1A
+#define ACPI_EC_SMB_STATUS_PEC		0x1F
+
+#define ACPI_EC_SMB_PRTCL_WRITE			0x00
+#define ACPI_EC_SMB_PRTCL_READ			0x01
+#define ACPI_EC_SMB_PRTCL_QUICK			0x02
+#define ACPI_EC_SMB_PRTCL_BYTE			0x04
+#define ACPI_EC_SMB_PRTCL_BYTE_DATA		0x06
+#define ACPI_EC_SMB_PRTCL_WORD_DATA		0x08
+#define ACPI_EC_SMB_PRTCL_BLOCK_DATA		0x0a
+#define ACPI_EC_SMB_PRTCL_PROC_CALL		0x0c
+#define ACPI_EC_SMB_PRTCL_BLOCK_PROC_CALL	0x0d
+#define ACPI_EC_SMB_PRTCL_I2C_BLOCK_DATA	0x4a
+#define ACPI_EC_SMB_PRTCL_PEC			0x80
+
+/* Length of pre/post transaction sleep (msec) */
+#define ACPI_EC_SMB_TRANSACTION_SLEEP		1
+#define ACPI_EC_SMB_ACCESS_SLEEP1		1
+#define ACPI_EC_SMB_ACCESS_SLEEP2		10
+
+static int acpi_ec_smb_read(struct acpi_ec_smbus *smbus, u8 address, u8 * data)
+{
+	u8 val;
+	int err;
+
+	ACPI_FUNCTION_TRACE("acpi_ec_smb_read");
+
+	err = ec_read(smbus->base + address, &val);
+	if (!err) {
+		*data = val;
+	}
+	xmsleep(ACPI_EC_SMB_TRANSACTION_SLEEP);
+	return (err);
+}
+
+static int acpi_ec_smb_write(struct acpi_ec_smbus *smbus, u8 address, u8 data)
+{
+	int err;
+
+	ACPI_FUNCTION_TRACE("acpi_ec_smb_write");
+
+	err = ec_write(smbus->base + address, data);
+	return (err);
+}
+
+static int
+acpi_ec_smb_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
+		   char read_write, u8 command, int size,
+		   union i2c_smbus_data *data)
+{
+	struct acpi_ec_smbus *smbus = adap->algo_data;
+	unsigned char protocol, len = 0, pec, temp[2] = { 0, 0 };
+	int i;
+
+	ACPI_FUNCTION_TRACE("acpi_ec_smb_access");
+
+	if (read_write == I2C_SMBUS_READ) {
+		protocol = ACPI_EC_SMB_PRTCL_READ;
+	} else {
+		protocol = ACPI_EC_SMB_PRTCL_WRITE;
+	}
+	pec = (flags & I2C_CLIENT_PEC) ? ACPI_EC_SMB_PRTCL_PEC : 0;
+
+	switch (size) {
+
+	case I2C_SMBUS_QUICK:
+		protocol |= ACPI_EC_SMB_PRTCL_QUICK;
+		read_write = I2C_SMBUS_WRITE;
+		break;
+
+	case I2C_SMBUS_BYTE:
+		if (read_write == I2C_SMBUS_WRITE) {
+			acpi_ec_smb_write(smbus, ACPI_EC_SMB_DATA, data->byte);
+		}
+		protocol |= ACPI_EC_SMB_PRTCL_BYTE;
+		break;
+
+	case I2C_SMBUS_BYTE_DATA:
+		acpi_ec_smb_write(smbus, ACPI_EC_SMB_CMD, command);
+		if (read_write == I2C_SMBUS_WRITE) {
+			acpi_ec_smb_write(smbus, ACPI_EC_SMB_DATA, data->byte);
+		}
+		protocol |= ACPI_EC_SMB_PRTCL_BYTE_DATA;
+		break;
+
+	case I2C_SMBUS_WORD_DATA:
+		acpi_ec_smb_write(smbus, ACPI_EC_SMB_CMD, command);
+		if (read_write == I2C_SMBUS_WRITE) {
+			acpi_ec_smb_write(smbus, ACPI_EC_SMB_DATA, data->word);
+			acpi_ec_smb_write(smbus, ACPI_EC_SMB_DATA + 1,
+					  data->word >> 8);
+		}
+		protocol |= ACPI_EC_SMB_PRTCL_WORD_DATA | pec;
+		break;
+
+	case I2C_SMBUS_BLOCK_DATA:
+		acpi_ec_smb_write(smbus, ACPI_EC_SMB_CMD, command);
+		if (read_write == I2C_SMBUS_WRITE) {
+			len = min_t(u8, data->block[0], 32);
+			acpi_ec_smb_write(smbus, ACPI_EC_SMB_BCNT, len);
+			for (i = 0; i < len; i++)
+				acpi_ec_smb_write(smbus, ACPI_EC_SMB_DATA + i,
+						  data->block[i + 1]);
+		}
+		protocol |= ACPI_EC_SMB_PRTCL_BLOCK_DATA | pec;
+		break;
+
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		len = min_t(u8, data->block[0], 32);
+		acpi_ec_smb_write(smbus, ACPI_EC_SMB_CMD, command);
+		acpi_ec_smb_write(smbus, ACPI_EC_SMB_BCNT, len);
+		if (read_write == I2C_SMBUS_WRITE) {
+			for (i = 0; i < len; i++) {
+				acpi_ec_smb_write(smbus, ACPI_EC_SMB_DATA + i,
+						  data->block[i + 1]);
+			}
+		}
+		protocol |= ACPI_EC_SMB_PRTCL_I2C_BLOCK_DATA;
+		break;
+
+	case I2C_SMBUS_PROC_CALL:
+		acpi_ec_smb_write(smbus, ACPI_EC_SMB_CMD, command);
+		acpi_ec_smb_write(smbus, ACPI_EC_SMB_DATA, data->word);
+		acpi_ec_smb_write(smbus, ACPI_EC_SMB_DATA + 1, data->word >> 8);
+		protocol = ACPI_EC_SMB_PRTCL_PROC_CALL | pec;
+		read_write = I2C_SMBUS_READ;
+		break;
+
+	case I2C_SMBUS_BLOCK_PROC_CALL:
+		protocol |= pec;
+		len = min_t(u8, data->block[0], 31);
+		acpi_ec_smb_write(smbus, ACPI_EC_SMB_CMD, command);
+		acpi_ec_smb_write(smbus, ACPI_EC_SMB_BCNT, len);
+		for (i = 0; i < len; i++)
+			acpi_ec_smb_write(smbus, ACPI_EC_SMB_DATA + i,
+					  data->block[i + 1]);
+		protocol = ACPI_EC_SMB_PRTCL_BLOCK_PROC_CALL | pec;
+		read_write = I2C_SMBUS_READ;
+		break;
+
+	default:
+		ACPI_DEBUG_PRINT((ACPI_DB_WARN, "EC SMBus adapter: "
+				  "Unsupported transaction %d\n", size));
+		return (-1);
+	}
+
+	acpi_ec_smb_write(smbus, ACPI_EC_SMB_ADDR, addr << 1);
+	acpi_ec_smb_write(smbus, ACPI_EC_SMB_PRTCL, protocol);
+
+	acpi_ec_smb_read(smbus, ACPI_EC_SMB_STS, temp + 0);
+
+	if (~temp[0] & ACPI_EC_SMB_STS_DONE) {
+		xudelay(500);
+		acpi_ec_smb_read(smbus, ACPI_EC_SMB_STS, temp + 0);
+	}
+	if (~temp[0] & ACPI_EC_SMB_STS_DONE) {
+		xmsleep(ACPI_EC_SMB_ACCESS_SLEEP2);
+		acpi_ec_smb_read(smbus, ACPI_EC_SMB_STS, temp + 0);
+	}
+	if ((~temp[0] & ACPI_EC_SMB_STS_DONE)
+	    || (temp[0] & ACPI_EC_SMB_STS_STATUS)) {
+		return (-1);
+	}
+
+	if (read_write == I2C_SMBUS_WRITE) {
+		return (0);
+	}
+
+	switch (size) {
+
+	case I2C_SMBUS_BYTE:
+	case I2C_SMBUS_BYTE_DATA:
+		acpi_ec_smb_read(smbus, ACPI_EC_SMB_DATA, &data->byte);
+		break;
+
+	case I2C_SMBUS_WORD_DATA:
+	case I2C_SMBUS_PROC_CALL:
+		acpi_ec_smb_read(smbus, ACPI_EC_SMB_DATA, temp + 0);
+		acpi_ec_smb_read(smbus, ACPI_EC_SMB_DATA + 1, temp + 1);
+		data->word = (temp[1] << 8) | temp[0];
+		break;
+
+	case I2C_SMBUS_BLOCK_DATA:
+	case I2C_SMBUS_BLOCK_PROC_CALL:
+		len = 0;
+		acpi_ec_smb_read(smbus, ACPI_EC_SMB_BCNT, &len);
+		len = min_t(u8, len, 32);
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		for (i = 0; i < len; i++)
+			acpi_ec_smb_read(smbus, ACPI_EC_SMB_DATA + i,
+					 data->block + i + 1);
+		data->block[0] = len;
+		break;
+	}
+
+	return (0);
+}
+
+static u32 acpi_ec_smb_func(struct i2c_adapter *adapter)
+{
+	ACPI_FUNCTION_TRACE("acpi_ec_smb_func");
+
+	return (I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+		I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+		I2C_FUNC_SMBUS_BLOCK_DATA |
+		I2C_FUNC_SMBUS_PROC_CALL |
+		I2C_FUNC_SMBUS_BLOCK_PROC_CALL |
+		I2C_FUNC_SMBUS_I2C_BLOCK | I2C_FUNC_SMBUS_HWPEC_CALC);
+}
+
+static struct i2c_algorithm acpi_ec_smbus_algorithm = {
+	.smbus_xfer = acpi_ec_smb_access,
+	.functionality = acpi_ec_smb_func,
+};
+
+static int acpi_ec_hc_add(struct acpi_device *device)
+{
+	int status;
+	unsigned long val;
+	struct acpi_ec_hc *ec_hc;
+	struct acpi_ec_smbus *smbus;
+
+	ACPI_FUNCTION_TRACE("acpi_ec_hc_add");
+
+	if (!device) {
+		return_VALUE(-EINVAL);
+	}
+
+	ec_hc = kmalloc(sizeof(struct acpi_ec_hc), GFP_KERNEL);
+	if (!ec_hc) {
+		return_VALUE(-ENOMEM);
+	}
+	memset(ec_hc, 0, sizeof(struct acpi_ec_hc));
+
+	smbus = kmalloc(sizeof(struct acpi_ec_smbus), GFP_KERNEL);
+	if (!smbus) {
+		kfree(ec_hc);
+		return_VALUE(-ENOMEM);
+	}
+	memset(smbus, 0, sizeof(struct acpi_ec_smbus));
+
+	ec_hc->handle = device->handle;
+	strcpy(acpi_device_name(device), ACPI_EC_HC_DEVICE_NAME);
+	strcpy(acpi_device_class(device), ACPI_EC_HC_CLASS);
+	acpi_driver_data(device) = ec_hc;
+
+	status = acpi_evaluate_integer(ec_hc->handle, "_EC", NULL, &val);
+	if (ACPI_FAILURE(status)) {
+		ACPI_DEBUG_PRINT((ACPI_DB_WARN, "Error obtaining _EC\n"));
+		kfree(ec_hc->smbus);
+		kfree(smbus);
+		return_VALUE(-EIO);
+	}
+
+	smbus->ec = acpi_driver_data(device->parent);
+	smbus->base = (val & 0xff00ull) >> 8;
+	smbus->alert = val & 0xffull;
+
+	smbus->adapter.owner = THIS_MODULE;
+	smbus->adapter.algo = &acpi_ec_smbus_algorithm;
+	smbus->adapter.algo_data = smbus;
+
+	if (i2c_add_adapter(&smbus->adapter)) {
+		ACPI_DEBUG_PRINT((ACPI_DB_WARN,
+				  "EC SMBus adapter: Failed to register adapter\n"));
+		kfree(smbus);
+		kfree(ec_hc);
+		return_VALUE(-EIO);
+	}
+
+	ec_hc->smbus = smbus;
+
+	printk(KERN_INFO PREFIX "%s [%s]\n",
+	       acpi_device_name(device), acpi_device_bid(device));
+
+	return_VALUE(AE_OK);
+}
+
+static int acpi_ec_hc_remove(struct acpi_device *device, int type)
+{
+	struct acpi_ec_hc *ec_hc;
+
+	ACPI_FUNCTION_TRACE("acpi_ec_hc_remove");
+
+	if (!device) {
+		return_VALUE(-EINVAL);
+	}
+	ec_hc = acpi_driver_data(device);
+
+	i2c_del_adapter(&ec_hc->smbus->adapter);
+	kfree(ec_hc->smbus);
+	kfree(ec_hc);
+
+	return_VALUE(AE_OK);
+}
+
+static int __init acpi_ec_hc_init(void)
+{
+	int result;
+
+	ACPI_FUNCTION_TRACE("acpi_ec_hc_init");
+	result = acpi_bus_register_driver(&acpi_ec_hc_driver);
+	if (result < 0) {
+		return_VALUE(-ENODEV);
+	}
+	return_VALUE(0);
+}
+
+static void __exit acpi_ec_hc_exit(void)
+{
+	ACPI_FUNCTION_TRACE("acpi_ec_hc_exit");
+	acpi_bus_unregister_driver(&acpi_ec_hc_driver);
+}
+
+struct acpi_ec_hc *acpi_get_ec_hc(struct acpi_device *device)
+{
+	ACPI_FUNCTION_TRACE("acpi_get_ec_hc");
+	return ((struct acpi_ec_hc *)acpi_driver_data(device->parent));
+}
+
+EXPORT_SYMBOL(acpi_get_ec_hc);
+
+module_init(acpi_ec_hc_init);
+module_exit(acpi_ec_hc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ducrot Bruno");
+MODULE_DESCRIPTION("ACPI EC SMBus driver");
diff --git a/drivers/acpi/i2c_ec.h b/drivers/acpi/i2c_ec.h
new file mode 100644
index 0000000..7c53fb7
--- /dev/null
+++ b/drivers/acpi/i2c_ec.h
@@ -0,0 +1,23 @@
+/*
+ * SMBus driver for ACPI Embedded Controller ($Revision: 1.2 $)
+ *
+ * Copyright (c) 2002, 2005 Ducrot Bruno
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation version 2.
+ */
+
+struct acpi_ec_smbus {
+	struct i2c_adapter adapter;
+	union acpi_ec *ec;
+	int base;
+	int alert;
+};
+
+struct acpi_ec_hc {
+	acpi_handle handle;
+	struct acpi_ec_smbus *smbus;
+};
+
+struct acpi_ec_hc *acpi_get_ec_hc(struct acpi_device *device);
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
new file mode 100644
index 0000000..8bebceb
--- /dev/null
+++ b/drivers/acpi/sbs.c
@@ -0,0 +1,1828 @@
+/*
+ *  acpi_sbs.c - ACPI Smart Battery System Driver ($Revision: 1.16 $)
+ *
+ *  Copyright (c) 2005 Rich Townsend <rhdt@bartol.udel.edu>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <linux/acpi.h>
+#include <linux/i2c.h>
+#include <linux/delay.h>
+
+#include "i2c_ec.h"
+
+#define	DEF_CAPACITY_UNIT	3
+#define	MAH_CAPACITY_UNIT	1
+#define	MWH_CAPACITY_UNIT	2
+#define	CAPACITY_UNIT		DEF_CAPACITY_UNIT
+
+#define	REQUEST_UPDATE_MODE	1
+#define	QUEUE_UPDATE_MODE	2
+
+#define	DATA_TYPE_COMMON	0
+#define	DATA_TYPE_INFO		1
+#define	DATA_TYPE_STATE		2
+#define	DATA_TYPE_ALARM		3
+#define	DATA_TYPE_AC_STATE	4
+
+extern struct proc_dir_entry *acpi_lock_ac_dir(void);
+extern struct proc_dir_entry *acpi_lock_battery_dir(void);
+extern void acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir);
+extern void acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir);
+
+#define ACPI_SBS_COMPONENT		0x00080000
+#define ACPI_SBS_CLASS			"sbs"
+#define ACPI_AC_CLASS			"ac_adapter"
+#define ACPI_BATTERY_CLASS		"battery"
+#define ACPI_SBS_HID			"ACPI0002"
+#define ACPI_SBS_DRIVER_NAME		"ACPI Smart Battery System Driver"
+#define ACPI_SBS_DEVICE_NAME		"Smart Battery System"
+#define ACPI_SBS_FILE_INFO		"info"
+#define ACPI_SBS_FILE_STATE		"state"
+#define ACPI_SBS_FILE_ALARM		"alarm"
+#define ACPI_BATTERY_DIR_NAME		"BAT%i"
+#define ACPI_AC_DIR_NAME		"AC0"
+#define ACPI_SBC_SMBUS_ADDR		0x9
+#define ACPI_SBSM_SMBUS_ADDR		0xa
+#define ACPI_SB_SMBUS_ADDR		0xb
+#define ACPI_SBS_AC_NOTIFY_STATUS	0x80
+#define ACPI_SBS_BATTERY_NOTIFY_STATUS	0x80
+#define ACPI_SBS_BATTERY_NOTIFY_INFO	0x81
+
+#define _COMPONENT			ACPI_SBS_COMPONENT
+
+#define	MAX_SBS_BAT			4
+#define	MAX_SMBUS_ERR			1
+
+ACPI_MODULE_NAME("acpi_sbs");
+
+MODULE_AUTHOR("Rich Townsend");
+MODULE_DESCRIPTION("Smart Battery System ACPI interface driver");
+MODULE_LICENSE("GPL");
+
+static struct semaphore sbs_sem;
+
+#define	UPDATE_MODE		QUEUE_UPDATE_MODE
+/* REQUEST_UPDATE_MODE  QUEUE_UPDATE_MODE */
+#define	UPDATE_INFO_MODE	0
+#define	UPDATE_TIME		60
+#define	UPDATE_TIME2		0
+
+static int capacity_mode = CAPACITY_UNIT;
+static int update_mode = UPDATE_MODE;
+static int update_info_mode = UPDATE_INFO_MODE;
+static int update_time = UPDATE_TIME;
+static int update_time2 = UPDATE_TIME2;
+
+module_param(capacity_mode, int, CAPACITY_UNIT);
+module_param(update_mode, int, UPDATE_MODE);
+module_param(update_info_mode, int, UPDATE_INFO_MODE);
+module_param(update_time, int, UPDATE_TIME);
+module_param(update_time2, int, UPDATE_TIME2);
+
+static int acpi_sbs_add(struct acpi_device *device);
+static int acpi_sbs_remove(struct acpi_device *device, int type);
+static void acpi_battery_smbus_err_handler(struct acpi_ec_smbus *smbus);
+static void acpi_sbs_update_queue(void *data);
+
+static struct acpi_driver acpi_sbs_driver = {
+	.name = ACPI_SBS_DRIVER_NAME,
+	.class = ACPI_SBS_CLASS,
+	.ids = ACPI_SBS_HID,
+	.ops = {
+		.add = acpi_sbs_add,
+		.remove = acpi_sbs_remove,
+		},
+};
+
+struct acpi_battery_info {
+	int capacity_mode;
+	s16 full_charge_capacity;
+	s16 design_capacity;
+	s16 design_voltage;
+	int vscale;
+	int ipscale;
+	s16 serial_number;
+	char manufacturer_name[I2C_SMBUS_BLOCK_MAX + 3];
+	char device_name[I2C_SMBUS_BLOCK_MAX + 3];
+	char device_chemistry[I2C_SMBUS_BLOCK_MAX + 3];
+};
+
+struct acpi_battery_state {
+	s16 voltage;
+	s16 amperage;
+	s16 remaining_capacity;
+	s16 average_time_to_empty;
+	s16 average_time_to_full;
+	s16 battery_status;
+};
+
+struct acpi_battery_alarm {
+	s16 remaining_capacity;
+};
+
+struct acpi_battery {
+	int alive;
+	int battery_present;
+	int id;
+	int init_state;
+	struct acpi_sbs *sbs;
+	struct acpi_battery_info info;
+	struct acpi_battery_state state;
+	struct acpi_battery_alarm alarm;
+	struct proc_dir_entry *battery_entry;
+};
+
+struct acpi_sbs {
+	acpi_handle handle;
+	struct acpi_device *device;
+	struct acpi_ec_smbus *smbus;
+	int sbsm_present;
+	int sbsm_batteries_supported;
+	int ac_present;
+	struct proc_dir_entry *ac_entry;
+	struct acpi_battery battery[MAX_SBS_BAT];
+	int update_info_mode;
+	int zombie;
+	int update_time;
+	int update_time2;
+	struct timer_list update_timer;
+};
+
+static void acpi_update_delay(struct acpi_sbs *sbs);
+static int acpi_sbs_update_run(struct acpi_sbs *sbs, int data_type);
+
+/* --------------------------------------------------------------------------
+                               SMBus Communication
+   -------------------------------------------------------------------------- */
+
+static void acpi_battery_smbus_err_handler(struct acpi_ec_smbus *smbus)
+{
+	union i2c_smbus_data data;
+	int result = 0;
+	char *err_str;
+	int err_number;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_smbus_err_handler");
+
+	data.word = 0;
+
+	result = smbus->adapter.algo->
+	    smbus_xfer(&smbus->adapter,
+		       ACPI_SB_SMBUS_ADDR,
+		       0, I2C_SMBUS_READ, 0x16, I2C_SMBUS_BLOCK_DATA, &data);
+
+	err_number = (data.word & 0x000f);
+
+	switch (data.word & 0x000f) {
+	case 0x0000:
+		err_str = "unexpected bus error";
+		break;
+	case 0x0001:
+		err_str = "busy";
+		break;
+	case 0x0002:
+		err_str = "reserved command";
+		break;
+	case 0x0003:
+		err_str = "unsupported command";
+		break;
+	case 0x0004:
+		err_str = "access denied";
+		break;
+	case 0x0005:
+		err_str = "overflow/underflow";
+		break;
+	case 0x0006:
+		err_str = "bad size";
+		break;
+	case 0x0007:
+		err_str = "unknown error";
+		break;
+	default:
+		err_str = "unrecognized error";
+	}
+	ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+			  "%s: ret %i, err %i\n", err_str, result, err_number));
+}
+
+static int
+acpi_sbs_smbus_read_word(struct acpi_ec_smbus *smbus, int addr, int func,
+			 u16 * word,
+			 void (*err_handler) (struct acpi_ec_smbus * smbus))
+{
+	union i2c_smbus_data data;
+	int result = 0;
+	int i;
+
+	ACPI_FUNCTION_TRACE("acpi_sbs_smbus_read_word");
+
+	if (err_handler == NULL) {
+		err_handler = acpi_battery_smbus_err_handler;
+	}
+
+	for (i = 0; i < MAX_SMBUS_ERR; i++) {
+		result =
+		    smbus->adapter.algo->smbus_xfer(&smbus->adapter, addr, 0,
+						    I2C_SMBUS_READ, func,
+						    I2C_SMBUS_WORD_DATA, &data);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "try %i: smbus->adapter.algo->smbus_xfer() failed\n",
+					  i));
+			if (err_handler) {
+				err_handler(smbus);
+			}
+		} else {
+			*word = data.word;
+			break;
+		}
+	}
+
+	return_VALUE(result);
+}
+
+static int
+acpi_sbs_smbus_read_str(struct acpi_ec_smbus *smbus, int addr, int func,
+			char *str,
+			void (*err_handler) (struct acpi_ec_smbus * smbus))
+{
+	union i2c_smbus_data data;
+	int result = 0;
+	int i;
+
+	ACPI_FUNCTION_TRACE("acpi_sbs_smbus_read_str");
+
+	if (err_handler == NULL) {
+		err_handler = acpi_battery_smbus_err_handler;
+	}
+
+	for (i = 0; i < MAX_SMBUS_ERR; i++) {
+		result =
+		    smbus->adapter.algo->smbus_xfer(&smbus->adapter, addr, 0,
+						    I2C_SMBUS_READ, func,
+						    I2C_SMBUS_BLOCK_DATA,
+						    &data);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "try %i: smbus->adapter.algo->smbus_xfer() failed\n",
+					  i));
+			if (err_handler) {
+				err_handler(smbus);
+			}
+		} else {
+			strncpy(str, (const char *)data.block + 1,
+				data.block[0]);
+			str[data.block[0]] = 0;
+			break;
+		}
+	}
+
+	return_VALUE(result);
+}
+
+static int
+acpi_sbs_smbus_write_word(struct acpi_ec_smbus *smbus, int addr, int func,
+			  int word,
+			  void (*err_handler) (struct acpi_ec_smbus * smbus))
+{
+	union i2c_smbus_data data;
+	int result = 0;
+	int i;
+
+	ACPI_FUNCTION_TRACE("acpi_sbs_smbus_write_word");
+
+	if (err_handler == NULL) {
+		err_handler = acpi_battery_smbus_err_handler;
+	}
+
+	data.word = word;
+
+	for (i = 0; i < MAX_SMBUS_ERR; i++) {
+		result =
+		    smbus->adapter.algo->smbus_xfer(&smbus->adapter, addr, 0,
+						    I2C_SMBUS_WRITE, func,
+						    I2C_SMBUS_WORD_DATA, &data);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "try %i: smbus->adapter.algo"
+					  "->smbus_xfer() failed\n", i));
+			if (err_handler) {
+				err_handler(smbus);
+			}
+		} else {
+			break;
+		}
+	}
+
+	return_VALUE(result);
+}
+
+/* --------------------------------------------------------------------------
+                            Smart Battery System Management
+   -------------------------------------------------------------------------- */
+
+/* Smart Battery */
+
+static int acpi_sbs_generate_event(struct acpi_device *device,
+				   int event, int state, char *bid, char *class)
+{
+	char bid_saved[5];
+	char class_saved[20];
+	int result = 0;
+
+	ACPI_FUNCTION_TRACE("acpi_sbs_generate_event");
+
+	strcpy(bid_saved, acpi_device_bid(device));
+	strcpy(class_saved, acpi_device_class(device));
+
+	strcpy(acpi_device_bid(device), bid);
+	strcpy(acpi_device_class(device), class);
+
+	result = acpi_bus_generate_event(device, event, state);
+
+	strcpy(acpi_device_bid(device), bid_saved);
+	strcpy(acpi_device_class(device), class_saved);
+
+	return_VALUE(result);
+}
+
+static int acpi_battery_get_present(struct acpi_battery *battery)
+{
+	s16 state;
+	int result = 0;
+	int is_present = 0;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_get_present");
+
+	result = acpi_sbs_smbus_read_word(battery->sbs->smbus,
+					  ACPI_SBSM_SMBUS_ADDR, 0x01,
+					  &state, NULL);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed"));
+	}
+	if (!result) {
+		is_present = (state & 0x000f) & (1 << battery->id);
+	}
+	battery->battery_present = is_present;
+
+	return_VALUE(result);
+}
+
+static int acpi_battery_is_present(struct acpi_battery *battery)
+{
+	return (battery->battery_present);
+}
+
+static int acpi_ac_is_present(struct acpi_sbs *sbs)
+{
+	return (sbs->ac_present);
+}
+
+static int acpi_battery_select(struct acpi_battery *battery)
+{
+	struct acpi_ec_smbus *smbus = battery->sbs->smbus;
+	int result = 0;
+	s16 state;
+	int foo;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_select");
+
+	if (battery->sbs->sbsm_present) {
+
+		/* Take special care not to knobble other nibbles of
+		 * state (aka selector_state), since
+		 * it causes charging to halt on SBSELs */
+
+		result =
+		    acpi_sbs_smbus_read_word(smbus, ACPI_SBSM_SMBUS_ADDR, 0x01,
+					     &state, NULL);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbs_smbus_read_word() failed\n"));
+			goto end;
+		}
+
+		foo = (state & 0x0fff) | (1 << (battery->id + 12));
+		result =
+		    acpi_sbs_smbus_write_word(smbus, ACPI_SBSM_SMBUS_ADDR, 0x01,
+					      foo, NULL);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbs_smbus_write_word() failed\n"));
+			goto end;
+		}
+	}
+
+      end:
+	return_VALUE(result);
+}
+
+static int acpi_sbsm_get_info(struct acpi_sbs *sbs)
+{
+	struct acpi_ec_smbus *smbus = sbs->smbus;
+	int result = 0;
+	s16 battery_system_info;
+
+	ACPI_FUNCTION_TRACE("acpi_sbsm_get_info");
+
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SBSM_SMBUS_ADDR, 0x04,
+					  &battery_system_info, NULL);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	sbs->sbsm_batteries_supported = battery_system_info & 0x000f;
+
+      end:
+
+	return_VALUE(result);
+}
+
+static int acpi_battery_get_info(struct acpi_battery *battery)
+{
+	struct acpi_ec_smbus *smbus = battery->sbs->smbus;
+	int result = 0;
+	s16 battery_mode;
+	s16 specification_info;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_get_info");
+
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x03,
+					  &battery_mode,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+	battery->info.capacity_mode = (battery_mode & 0x8000) >> 15;
+
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x10,
+					  &battery->info.full_charge_capacity,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x18,
+					  &battery->info.design_capacity,
+					  &acpi_battery_smbus_err_handler);
+
+	if (result) {
+		goto end;
+	}
+
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x19,
+					  &battery->info.design_voltage,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x1a,
+					  &specification_info,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	switch ((specification_info & 0x0f00) >> 8) {
+	case 1:
+		battery->info.vscale = 10;
+		break;
+	case 2:
+		battery->info.vscale = 100;
+		break;
+	case 3:
+		battery->info.vscale = 1000;
+		break;
+	default:
+		battery->info.vscale = 1;
+	}
+
+	switch ((specification_info & 0xf000) >> 12) {
+	case 1:
+		battery->info.ipscale = 10;
+		break;
+	case 2:
+		battery->info.ipscale = 100;
+		break;
+	case 3:
+		battery->info.ipscale = 1000;
+		break;
+	default:
+		battery->info.ipscale = 1;
+	}
+
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x1c,
+					  &battery->info.serial_number,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		goto end;
+	}
+
+	result = acpi_sbs_smbus_read_str(smbus, ACPI_SB_SMBUS_ADDR, 0x20,
+					 battery->info.manufacturer_name,
+					 &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_str() failed\n"));
+		goto end;
+	}
+
+	result = acpi_sbs_smbus_read_str(smbus, ACPI_SB_SMBUS_ADDR, 0x21,
+					 battery->info.device_name,
+					 &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_str() failed\n"));
+		goto end;
+	}
+
+	result = acpi_sbs_smbus_read_str(smbus, ACPI_SB_SMBUS_ADDR, 0x22,
+					 battery->info.device_chemistry,
+					 &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_str() failed\n"));
+		goto end;
+	}
+
+      end:
+	return_VALUE(result);
+}
+
+static void acpi_update_delay(struct acpi_sbs *sbs)
+{
+	ACPI_FUNCTION_TRACE("acpi_update_delay");
+	if (sbs->zombie) {
+		return;
+	}
+	if (sbs->update_time2 > 0) {
+		msleep(sbs->update_time2 * 1000);
+	}
+}
+
+static int acpi_battery_get_state(struct acpi_battery *battery)
+{
+	struct acpi_ec_smbus *smbus = battery->sbs->smbus;
+	int result = 0;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_get_state");
+
+	acpi_update_delay(battery->sbs);
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x09,
+					  &battery->state.voltage,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	acpi_update_delay(battery->sbs);
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x0a,
+					  &battery->state.amperage,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	acpi_update_delay(battery->sbs);
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x0f,
+					  &battery->state.remaining_capacity,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	acpi_update_delay(battery->sbs);
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x12,
+					  &battery->state.average_time_to_empty,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	acpi_update_delay(battery->sbs);
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x13,
+					  &battery->state.average_time_to_full,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	acpi_update_delay(battery->sbs);
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x16,
+					  &battery->state.battery_status,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	acpi_update_delay(battery->sbs);
+
+      end:
+	return_VALUE(result);
+}
+
+static int acpi_battery_get_alarm(struct acpi_battery *battery)
+{
+	struct acpi_ec_smbus *smbus = battery->sbs->smbus;
+	int result = 0;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_get_alarm");
+
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x01,
+					  &battery->alarm.remaining_capacity,
+					  &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	acpi_update_delay(battery->sbs);
+
+      end:
+
+	return_VALUE(result);
+}
+
+static int acpi_battery_set_alarm(struct acpi_battery *battery,
+				  unsigned long alarm)
+{
+	struct acpi_ec_smbus *smbus = battery->sbs->smbus;
+	int result = 0;
+	s16 battery_mode;
+	int foo;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_set_alarm");
+
+	result = acpi_battery_select(battery);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_battery_select() failed\n"));
+		goto end;
+	}
+
+	/* If necessary, enable the alarm */
+
+	if (alarm > 0) {
+		result =
+		    acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x03,
+					     &battery_mode,
+					     &acpi_battery_smbus_err_handler);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbs_smbus_read_word() failed\n"));
+			goto end;
+		}
+
+		result =
+		    acpi_sbs_smbus_write_word(smbus, ACPI_SB_SMBUS_ADDR, 0x01,
+					      battery_mode & 0xbfff,
+					      &acpi_battery_smbus_err_handler);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbs_smbus_write_word() failed\n"));
+			goto end;
+		}
+	}
+
+	foo = alarm / (battery->info.capacity_mode ? 10 : 1);
+	result = acpi_sbs_smbus_write_word(smbus, ACPI_SB_SMBUS_ADDR, 0x01,
+					   foo,
+					   &acpi_battery_smbus_err_handler);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_write_word() failed\n"));
+		goto end;
+	}
+
+      end:
+
+	return_VALUE(result);
+}
+
+static int acpi_battery_set_mode(struct acpi_battery *battery)
+{
+	int result = 0;
+	s16 battery_mode;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_set_mode");
+
+	if (capacity_mode == DEF_CAPACITY_UNIT) {
+		goto end;
+	}
+
+	result = acpi_sbs_smbus_read_word(battery->sbs->smbus,
+					  ACPI_SB_SMBUS_ADDR, 0x03,
+					  &battery_mode, NULL);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	if (capacity_mode == MAH_CAPACITY_UNIT) {
+		battery_mode &= 0x7fff;
+	} else {
+		battery_mode |= 0x8000;
+	}
+	result = acpi_sbs_smbus_write_word(battery->sbs->smbus,
+					   ACPI_SB_SMBUS_ADDR, 0x03,
+					   battery_mode, NULL);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_write_word() failed\n"));
+		goto end;
+	}
+
+	result = acpi_sbs_smbus_read_word(battery->sbs->smbus,
+					  ACPI_SB_SMBUS_ADDR, 0x03,
+					  &battery_mode, NULL);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+      end:
+	return_VALUE(result);
+}
+
+static int acpi_battery_init(struct acpi_battery *battery)
+{
+	int result = 0;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_init");
+
+	result = acpi_battery_select(battery);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_battery_init() failed\n"));
+		goto end;
+	}
+
+	result = acpi_battery_set_mode(battery);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_battery_set_mode() failed\n"));
+		goto end;
+	}
+
+	result = acpi_battery_get_info(battery);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_battery_get_info() failed\n"));
+		goto end;
+	}
+
+	result = acpi_battery_get_state(battery);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_battery_get_state() failed\n"));
+		goto end;
+	}
+
+	result = acpi_battery_get_alarm(battery);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_battery_get_alarm() failed\n"));
+		goto end;
+	}
+
+      end:
+	return_VALUE(result);
+}
+
+static int acpi_ac_get_present(struct acpi_sbs *sbs)
+{
+	struct acpi_ec_smbus *smbus = sbs->smbus;
+	int result = 0;
+	s16 charger_status;
+
+	ACPI_FUNCTION_TRACE("acpi_ac_get_present");
+
+	result = acpi_sbs_smbus_read_word(smbus, ACPI_SBC_SMBUS_ADDR, 0x13,
+					  &charger_status, NULL);
+
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_smbus_read_word() failed\n"));
+		goto end;
+	}
+
+	sbs->ac_present = (charger_status & 0x8000) >> 15;
+
+      end:
+
+	return_VALUE(result);
+}
+
+/* --------------------------------------------------------------------------
+                              FS Interface (/proc/acpi)
+   -------------------------------------------------------------------------- */
+
+/* Generic Routines */
+
+static int
+acpi_sbs_generic_add_fs(struct proc_dir_entry **dir,
+			struct proc_dir_entry *parent_dir,
+			char *dir_name,
+			struct file_operations *info_fops,
+			struct file_operations *state_fops,
+			struct file_operations *alarm_fops, void *data)
+{
+	struct proc_dir_entry *entry = NULL;
+
+	ACPI_FUNCTION_TRACE("acpi_sbs_generic_add_fs");
+
+	if (!*dir) {
+		*dir = proc_mkdir(dir_name, parent_dir);
+		if (!*dir) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "proc_mkdir() failed\n"));
+			return_VALUE(-ENODEV);
+		}
+		(*dir)->owner = THIS_MODULE;
+	}
+
+	/* 'info' [R] */
+	if (info_fops) {
+		entry = create_proc_entry(ACPI_SBS_FILE_INFO, S_IRUGO, *dir);
+		if (!entry) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "create_proc_entry() failed\n"));
+		} else {
+			entry->proc_fops = info_fops;
+			entry->data = data;
+			entry->owner = THIS_MODULE;
+		}
+	}
+
+	/* 'state' [R] */
+	if (state_fops) {
+		entry = create_proc_entry(ACPI_SBS_FILE_STATE, S_IRUGO, *dir);
+		if (!entry) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "create_proc_entry() failed\n"));
+		} else {
+			entry->proc_fops = state_fops;
+			entry->data = data;
+			entry->owner = THIS_MODULE;
+		}
+	}
+
+	/* 'alarm' [R/W] */
+	if (alarm_fops) {
+		entry = create_proc_entry(ACPI_SBS_FILE_ALARM, S_IRUGO, *dir);
+		if (!entry) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "create_proc_entry() failed\n"));
+		} else {
+			entry->proc_fops = alarm_fops;
+			entry->data = data;
+			entry->owner = THIS_MODULE;
+		}
+	}
+
+	return_VALUE(0);
+}
+
+static void
+acpi_sbs_generic_remove_fs(struct proc_dir_entry **dir,
+			   struct proc_dir_entry *parent_dir)
+{
+	ACPI_FUNCTION_TRACE("acpi_sbs_generic_remove_fs");
+
+	if (*dir) {
+		remove_proc_entry(ACPI_SBS_FILE_INFO, *dir);
+		remove_proc_entry(ACPI_SBS_FILE_STATE, *dir);
+		remove_proc_entry(ACPI_SBS_FILE_ALARM, *dir);
+		remove_proc_entry((*dir)->name, parent_dir);
+		*dir = NULL;
+	}
+
+}
+
+/* Smart Battery Interface */
+
+static struct proc_dir_entry *acpi_battery_dir = NULL;
+
+static int acpi_battery_read_info(struct seq_file *seq, void *offset)
+{
+	struct acpi_battery *battery = (struct acpi_battery *)seq->private;
+	int cscale;
+	int result = 0;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_read_info");
+
+	if (battery->sbs->zombie) {
+		return_VALUE(-ENODEV);
+	}
+
+	down(&sbs_sem);
+
+	if (update_mode == REQUEST_UPDATE_MODE) {
+		result = acpi_sbs_update_run(battery->sbs, DATA_TYPE_INFO);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbs_update_run() failed\n"));
+		}
+	}
+
+	if (acpi_battery_is_present(battery)) {
+		seq_printf(seq, "present:                 yes\n");
+	} else {
+		seq_printf(seq, "present:                 no\n");
+		goto end;
+	}
+
+	if (battery->info.capacity_mode) {
+		cscale = battery->info.vscale * battery->info.ipscale;
+	} else {
+		cscale = battery->info.ipscale;
+	}
+	seq_printf(seq, "design capacity:         %i%s",
+		   battery->info.design_capacity * cscale,
+		   battery->info.capacity_mode ? "0 mWh\n" : " mAh\n");
+
+	seq_printf(seq, "last full capacity:      %i%s",
+		   battery->info.full_charge_capacity * cscale,
+		   battery->info.capacity_mode ? "0 mWh\n" : " mAh\n");
+
+	seq_printf(seq, "battery technology:      rechargeable\n");
+
+	seq_printf(seq, "design voltage:          %i mV\n",
+		   battery->info.design_voltage * battery->info.vscale);
+
+	seq_printf(seq, "design capacity warning: unknown\n");
+	seq_printf(seq, "design capacity low:     unknown\n");
+	seq_printf(seq, "capacity granularity 1:  unknown\n");
+	seq_printf(seq, "capacity granularity 2:  unknown\n");
+
+	seq_printf(seq, "model number:            %s\n",
+		   battery->info.device_name);
+
+	seq_printf(seq, "serial number:           %i\n",
+		   battery->info.serial_number);
+
+	seq_printf(seq, "battery type:            %s\n",
+		   battery->info.device_chemistry);
+
+	seq_printf(seq, "OEM info:                %s\n",
+		   battery->info.manufacturer_name);
+
+      end:
+
+	up(&sbs_sem);
+
+	return_VALUE(result);
+}
+
+static int acpi_battery_info_open_fs(struct inode *inode, struct file *file)
+{
+	return single_open(file, acpi_battery_read_info, PDE(inode)->data);
+}
+
+static int acpi_battery_read_state(struct seq_file *seq, void *offset)
+{
+	struct acpi_battery *battery = (struct acpi_battery *)seq->private;
+	int result = 0;
+	int cscale;
+	int foo;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_read_state");
+
+	if (battery->sbs->zombie) {
+		return_VALUE(-ENODEV);
+	}
+
+	down(&sbs_sem);
+
+	if (update_mode == REQUEST_UPDATE_MODE) {
+		result = acpi_sbs_update_run(battery->sbs, DATA_TYPE_STATE);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbs_update_run() failed\n"));
+		}
+	}
+
+	if (acpi_battery_is_present(battery)) {
+		seq_printf(seq, "present:                 yes\n");
+	} else {
+		seq_printf(seq, "present:                 no\n");
+		goto end;
+	}
+
+	if (battery->info.capacity_mode) {
+		cscale = battery->info.vscale * battery->info.ipscale;
+	} else {
+		cscale = battery->info.ipscale;
+	}
+
+	if (battery->state.battery_status & 0x0010) {
+		seq_printf(seq, "capacity state:          critical\n");
+	} else {
+		seq_printf(seq, "capacity state:          ok\n");
+	}
+	if (battery->state.amperage < 0) {
+		seq_printf(seq, "charging state:          discharging\n");
+		foo = battery->state.remaining_capacity * cscale * 60 /
+		    (battery->state.average_time_to_empty == 0 ? 1 :
+		     battery->state.average_time_to_empty);
+		seq_printf(seq, "present rate:            %i%s\n",
+			   foo, battery->info.capacity_mode ? "0 mW" : " mA");
+	} else if (battery->state.amperage > 0) {
+		seq_printf(seq, "charging state:          charging\n");
+		foo = (battery->info.full_charge_capacity -
+		       battery->state.remaining_capacity) * cscale * 60 /
+		    (battery->state.average_time_to_full == 0 ? 1 :
+		     battery->state.average_time_to_full);
+		seq_printf(seq, "present rate:            %i%s\n",
+			   foo, battery->info.capacity_mode ? "0 mW" : " mA");
+	} else {
+		seq_printf(seq, "charging state:          charged\n");
+		seq_printf(seq, "present rate:            0 %s\n",
+			   battery->info.capacity_mode ? "mW" : "mA");
+	}
+
+	seq_printf(seq, "remaining capacity:      %i%s",
+		   battery->state.remaining_capacity * cscale,
+		   battery->info.capacity_mode ? "0 mWh\n" : " mAh\n");
+
+	seq_printf(seq, "present voltage:         %i mV\n",
+		   battery->state.voltage * battery->info.vscale);
+
+      end:
+
+	up(&sbs_sem);
+
+	return_VALUE(result);
+}
+
+static int acpi_battery_state_open_fs(struct inode *inode, struct file *file)
+{
+	return single_open(file, acpi_battery_read_state, PDE(inode)->data);
+}
+
+static int acpi_battery_read_alarm(struct seq_file *seq, void *offset)
+{
+	struct acpi_battery *battery = (struct acpi_battery *)seq->private;
+	int result = 0;
+	int cscale;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_read_alarm");
+
+	if (battery->sbs->zombie) {
+		return_VALUE(-ENODEV);
+	}
+
+	down(&sbs_sem);
+
+	if (update_mode == REQUEST_UPDATE_MODE) {
+		result = acpi_sbs_update_run(battery->sbs, DATA_TYPE_ALARM);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbs_update_run() failed\n"));
+		}
+	}
+
+	if (!acpi_battery_is_present(battery)) {
+		seq_printf(seq, "present:                 no\n");
+		goto end;
+	}
+
+	if (battery->info.capacity_mode) {
+		cscale = battery->info.vscale * battery->info.ipscale;
+	} else {
+		cscale = battery->info.ipscale;
+	}
+
+	seq_printf(seq, "alarm:                   ");
+	if (battery->alarm.remaining_capacity) {
+		seq_printf(seq, "%i%s",
+			   battery->alarm.remaining_capacity * cscale,
+			   battery->info.capacity_mode ? "0 mWh\n" : " mAh\n");
+	} else {
+		seq_printf(seq, "disabled\n");
+	}
+
+      end:
+
+	up(&sbs_sem);
+
+	return_VALUE(result);
+}
+
+static ssize_t
+acpi_battery_write_alarm(struct file *file, const char __user * buffer,
+			 size_t count, loff_t * ppos)
+{
+	struct seq_file *seq = (struct seq_file *)file->private_data;
+	struct acpi_battery *battery = (struct acpi_battery *)seq->private;
+	char alarm_string[12] = { '\0' };
+	int result, old_alarm, new_alarm;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_write_alarm");
+
+	if (battery->sbs->zombie) {
+		return_VALUE(-ENODEV);
+	}
+
+	down(&sbs_sem);
+
+	if (!acpi_battery_is_present(battery)) {
+		result = -ENODEV;
+		goto end;
+	}
+
+	if (count > sizeof(alarm_string) - 1) {
+		result = -EINVAL;
+		goto end;
+	}
+
+	if (copy_from_user(alarm_string, buffer, count)) {
+		result = -EFAULT;
+		goto end;
+	}
+
+	alarm_string[count] = 0;
+
+	old_alarm = battery->alarm.remaining_capacity;
+	new_alarm = simple_strtoul(alarm_string, NULL, 0);
+
+	result = acpi_battery_set_alarm(battery, new_alarm);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_battery_set_alarm() failed\n"));
+		(void)acpi_battery_set_alarm(battery, old_alarm);
+		goto end;
+	}
+	result = acpi_battery_get_alarm(battery);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_battery_get_alarm() failed\n"));
+		(void)acpi_battery_set_alarm(battery, old_alarm);
+		goto end;
+	}
+
+      end:
+	up(&sbs_sem);
+
+	if (result) {
+		return_VALUE(result);
+	} else {
+		return_VALUE(count);
+	}
+}
+
+static int acpi_battery_alarm_open_fs(struct inode *inode, struct file *file)
+{
+	return single_open(file, acpi_battery_read_alarm, PDE(inode)->data);
+}
+
+static struct file_operations acpi_battery_info_fops = {
+	.open = acpi_battery_info_open_fs,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.owner = THIS_MODULE,
+};
+
+static struct file_operations acpi_battery_state_fops = {
+	.open = acpi_battery_state_open_fs,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.owner = THIS_MODULE,
+};
+
+static struct file_operations acpi_battery_alarm_fops = {
+	.open = acpi_battery_alarm_open_fs,
+	.read = seq_read,
+	.write = acpi_battery_write_alarm,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.owner = THIS_MODULE,
+};
+
+/* Legacy AC Adapter Interface */
+
+static struct proc_dir_entry *acpi_ac_dir = NULL;
+
+static int acpi_ac_read_state(struct seq_file *seq, void *offset)
+{
+	struct acpi_sbs *sbs = (struct acpi_sbs *)seq->private;
+	int result;
+
+	ACPI_FUNCTION_TRACE("acpi_ac_read_state");
+
+	if (sbs->zombie) {
+		return_VALUE(-ENODEV);
+	}
+
+	down(&sbs_sem);
+
+	if (update_mode == REQUEST_UPDATE_MODE) {
+		result = acpi_sbs_update_run(sbs, DATA_TYPE_AC_STATE);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbs_update_run() failed\n"));
+		}
+	}
+
+	seq_printf(seq, "state:                   %s\n",
+		   sbs->ac_present ? "on-line" : "off-line");
+
+	up(&sbs_sem);
+
+	return_VALUE(0);
+}
+
+static int acpi_ac_state_open_fs(struct inode *inode, struct file *file)
+{
+	return single_open(file, acpi_ac_read_state, PDE(inode)->data);
+}
+
+static struct file_operations acpi_ac_state_fops = {
+	.open = acpi_ac_state_open_fs,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.owner = THIS_MODULE,
+};
+
+/* --------------------------------------------------------------------------
+                                 Driver Interface
+   -------------------------------------------------------------------------- */
+
+/* Smart Battery */
+
+static int acpi_battery_add(struct acpi_sbs *sbs, int id)
+{
+	int is_present;
+	int result;
+	char dir_name[32];
+	struct acpi_battery *battery;
+
+	ACPI_FUNCTION_TRACE("acpi_battery_add");
+
+	battery = &sbs->battery[id];
+
+	battery->alive = 0;
+
+	battery->init_state = 0;
+	battery->id = id;
+	battery->sbs = sbs;
+
+	result = acpi_battery_select(battery);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_battery_select() failed\n"));
+		goto end;
+	}
+
+	result = acpi_battery_get_present(battery);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_battery_get_present() failed\n"));
+		goto end;
+	}
+
+	is_present = acpi_battery_is_present(battery);
+
+	if (is_present) {
+		result = acpi_battery_init(battery);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_battery_init() failed\n"));
+			goto end;
+		}
+		battery->init_state = 1;
+	}
+
+	(void)sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
+
+	result = acpi_sbs_generic_add_fs(&battery->battery_entry,
+					 acpi_battery_dir,
+					 dir_name,
+					 &acpi_battery_info_fops,
+					 &acpi_battery_state_fops,
+					 &acpi_battery_alarm_fops, battery);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_generic_add_fs() failed\n"));
+		goto end;
+	}
+	battery->alive = 1;
+
+      end:
+	return_VALUE(result);
+}
+
+static void acpi_battery_remove(struct acpi_sbs *sbs, int id)
+{
+	ACPI_FUNCTION_TRACE("acpi_battery_remove");
+
+	if (sbs->battery[id].battery_entry) {
+		acpi_sbs_generic_remove_fs(&(sbs->battery[id].battery_entry),
+					   acpi_battery_dir);
+	}
+}
+
+static int acpi_ac_add(struct acpi_sbs *sbs)
+{
+	int result;
+
+	ACPI_FUNCTION_TRACE("acpi_ac_add");
+
+	result = acpi_ac_get_present(sbs);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_ac_get_present() failed\n"));
+		goto end;
+	}
+
+	result = acpi_sbs_generic_add_fs(&sbs->ac_entry,
+					 acpi_ac_dir,
+					 ACPI_AC_DIR_NAME,
+					 NULL, &acpi_ac_state_fops, NULL, sbs);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_generic_add_fs() failed\n"));
+		goto end;
+	}
+
+      end:
+
+	return_VALUE(result);
+}
+
+static void acpi_ac_remove(struct acpi_sbs *sbs)
+{
+	ACPI_FUNCTION_TRACE("acpi_ac_remove");
+
+	if (sbs->ac_entry) {
+		acpi_sbs_generic_remove_fs(&sbs->ac_entry, acpi_ac_dir);
+	}
+}
+
+static void acpi_sbs_update_queue_run(unsigned long data)
+{
+	ACPI_FUNCTION_TRACE("acpi_sbs_update_queue_run");
+	acpi_os_execute(OSL_GPE_HANDLER, acpi_sbs_update_queue, (void *)data);
+}
+
+static int acpi_sbs_update_run(struct acpi_sbs *sbs, int data_type)
+{
+	struct acpi_battery *battery;
+	int result = 0;
+	int old_ac_present;
+	int old_battery_present;
+	int new_ac_present;
+	int new_battery_present;
+	int id;
+	char dir_name[32];
+	int do_battery_init, do_ac_init;
+	s16 old_remaining_capacity;
+
+	ACPI_FUNCTION_TRACE("acpi_sbs_update_run");
+
+	if (sbs->zombie) {
+		goto end;
+	}
+
+	old_ac_present = acpi_ac_is_present(sbs);
+
+	result = acpi_ac_get_present(sbs);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_ac_get_present() failed\n"));
+	}
+
+	new_ac_present = acpi_ac_is_present(sbs);
+
+	do_ac_init = (old_ac_present != new_ac_present);
+
+	if (data_type == DATA_TYPE_AC_STATE) {
+		goto end;
+	}
+
+	for (id = 0; id < MAX_SBS_BAT; id++) {
+		battery = &sbs->battery[id];
+		if (battery->alive == 0) {
+			continue;
+		}
+
+		old_remaining_capacity = battery->state.remaining_capacity;
+
+		old_battery_present = acpi_battery_is_present(battery);
+
+		result = acpi_battery_select(battery);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_battery_select() failed\n"));
+		}
+		if (sbs->zombie) {
+			goto end;
+		}
+
+		result = acpi_battery_get_present(battery);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_battery_get_present() failed\n"));
+		}
+		if (sbs->zombie) {
+			goto end;
+		}
+
+		new_battery_present = acpi_battery_is_present(battery);
+
+		do_battery_init = ((old_battery_present != new_battery_present)
+				   && new_battery_present);
+
+		if (sbs->zombie) {
+			goto end;
+		}
+		if (do_ac_init || do_battery_init ||
+		    update_info_mode || sbs->update_info_mode) {
+			if (sbs->update_info_mode) {
+				sbs->update_info_mode = 0;
+			} else {
+				sbs->update_info_mode = 1;
+			}
+			result = acpi_battery_init(battery);
+			if (result) {
+				ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+						  "acpi_battery_init() "
+						  "failed\n"));
+			}
+		}
+		if (data_type == DATA_TYPE_INFO) {
+			continue;
+		}
+
+		if (sbs->zombie) {
+			goto end;
+		}
+		if (new_battery_present) {
+			result = acpi_battery_get_alarm(battery);
+			if (result) {
+				ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+						  "acpi_battery_get_alarm() "
+						  "failed\n"));
+			}
+			if (data_type == DATA_TYPE_ALARM) {
+				continue;
+			}
+
+			result = acpi_battery_get_state(battery);
+			if (result) {
+				ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+						  "acpi_battery_get_state() "
+						  "failed\n"));
+			}
+		}
+		if (sbs->zombie) {
+			goto end;
+		}
+		if (data_type != DATA_TYPE_COMMON) {
+			continue;
+		}
+
+		if (old_battery_present != new_battery_present) {
+			(void)sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
+			result = acpi_sbs_generate_event(sbs->device,
+							 ACPI_SBS_BATTERY_NOTIFY_STATUS,
+							 new_battery_present,
+							 dir_name,
+							 ACPI_BATTERY_CLASS);
+			if (result) {
+				ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+						  "acpi_sbs_generate_event() "
+						  "failed\n"));
+			}
+		}
+		if (old_remaining_capacity != battery->state.remaining_capacity) {
+			(void)sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
+			result = acpi_sbs_generate_event(sbs->device,
+							 ACPI_SBS_BATTERY_NOTIFY_STATUS,
+							 new_battery_present,
+							 dir_name,
+							 ACPI_BATTERY_CLASS);
+			if (result) {
+				ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+						  "acpi_sbs_generate_event() failed\n"));
+			}
+		}
+
+	}
+	if (sbs->zombie) {
+		goto end;
+	}
+	if (data_type != DATA_TYPE_COMMON) {
+		goto end;
+	}
+
+	if (old_ac_present != new_ac_present) {
+		result = acpi_sbs_generate_event(sbs->device,
+						 ACPI_SBS_AC_NOTIFY_STATUS,
+						 new_ac_present,
+						 ACPI_AC_DIR_NAME,
+						 ACPI_AC_CLASS);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbs_generate_event() failed\n"));
+		}
+	}
+
+      end:
+	return_VALUE(result);
+}
+
+static void acpi_sbs_update_queue(void *data)
+{
+	struct acpi_sbs *sbs = data;
+	unsigned long delay = -1;
+	int result;
+
+	ACPI_FUNCTION_TRACE("acpi_sbs_update_queue");
+
+	if (sbs->zombie) {
+		goto end;
+	}
+
+	result = acpi_sbs_update_run(sbs, DATA_TYPE_COMMON);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_sbs_update_run() failed\n"));
+	}
+
+	if (sbs->zombie) {
+		goto end;
+	}
+
+	if (update_mode == REQUEST_UPDATE_MODE) {
+		goto end;
+	}
+
+	delay = jiffies + HZ * update_time;
+	sbs->update_timer.data = (unsigned long)data;
+	sbs->update_timer.function = acpi_sbs_update_queue_run;
+	sbs->update_timer.expires = delay;
+	add_timer(&sbs->update_timer);
+      end:
+	;
+}
+
+static int acpi_sbs_add(struct acpi_device *device)
+{
+	struct acpi_sbs *sbs = NULL;
+	struct acpi_ec_hc *ec_hc = NULL;
+	int result, remove_result = 0;
+	unsigned long sbs_obj;
+	int id, cnt;
+	acpi_status status = AE_OK;
+
+	ACPI_FUNCTION_TRACE("acpi_sbs_add");
+
+	sbs = kmalloc(sizeof(struct acpi_sbs), GFP_KERNEL);
+	if (!sbs) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "kmalloc() failed\n"));
+		return_VALUE(-ENOMEM);
+	}
+	memset(sbs, 0, sizeof(struct acpi_sbs));
+
+	cnt = 0;
+	while (cnt < 10) {
+		cnt++;
+		ec_hc = acpi_get_ec_hc(device);
+		if (ec_hc) {
+			break;
+		}
+		msleep(1000);
+	}
+
+	if (!ec_hc) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_get_ec_hc() failed: "
+				  "NO driver found for EC HC SMBus\n"));
+		result = -ENODEV;
+		goto end;
+	}
+
+	sbs->device = device;
+	sbs->smbus = ec_hc->smbus;
+
+	strcpy(acpi_device_name(device), ACPI_SBS_DEVICE_NAME);
+	strcpy(acpi_device_class(device), ACPI_SBS_CLASS);
+	acpi_driver_data(device) = sbs;
+
+	sbs->update_time = 0;
+	sbs->update_time2 = 0;
+
+	result = acpi_ac_add(sbs);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "acpi_ac_add() failed\n"));
+		goto end;
+	}
+	result = acpi_evaluate_integer(device->handle, "_SBS", NULL, &sbs_obj);
+	if (ACPI_FAILURE(result)) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_evaluate_integer() failed\n"));
+		result = -EIO;
+		goto end;
+	}
+
+	if (sbs_obj > 0) {
+		result = acpi_sbsm_get_info(sbs);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbsm_get_info() failed\n"));
+			goto end;
+		}
+		sbs->sbsm_present = 1;
+	}
+	if (sbs->sbsm_present == 0) {
+		result = acpi_battery_add(sbs, 0);
+		if (result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_battery_add() failed\n"));
+			goto end;
+		}
+	} else {
+		for (id = 0; id < MAX_SBS_BAT; id++) {
+			if ((sbs->sbsm_batteries_supported & (1 << id))) {
+				result = acpi_battery_add(sbs, id);
+				if (result) {
+					ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+							  "acpi_battery_add() "
+							  "failed\n"));
+					goto end;
+				}
+			}
+		}
+	}
+
+	sbs->handle = device->handle;
+
+	init_timer(&sbs->update_timer);
+	if (update_mode == QUEUE_UPDATE_MODE) {
+		status = acpi_os_execute(OSL_GPE_HANDLER,
+					 acpi_sbs_update_queue, (void *)sbs);
+		if (status != AE_OK) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_os_execute() failed\n"));
+		}
+	}
+	sbs->update_time = update_time;
+	sbs->update_time2 = update_time2;
+
+	printk(KERN_INFO PREFIX "%s [%s]\n",
+	       acpi_device_name(device), acpi_device_bid(device));
+
+      end:
+	if (result) {
+		remove_result = acpi_sbs_remove(device, 0);
+		if (remove_result) {
+			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+					  "acpi_sbs_remove() failed\n"));
+		}
+	}
+
+	return_VALUE(result);
+}
+
+int acpi_sbs_remove(struct acpi_device *device, int type)
+{
+	struct acpi_sbs *sbs = (struct acpi_sbs *)acpi_driver_data(device);
+	int id;
+
+	ACPI_FUNCTION_TRACE("acpi_sbs_remove");
+
+	if (!device || !sbs) {
+		return_VALUE(-EINVAL);
+	}
+
+	sbs->zombie = 1;
+	sbs->update_time = 0;
+	sbs->update_time2 = 0;
+	del_timer_sync(&sbs->update_timer);
+	acpi_os_wait_events_complete(NULL);
+	del_timer_sync(&sbs->update_timer);
+
+	for (id = 0; id < MAX_SBS_BAT; id++) {
+		acpi_battery_remove(sbs, id);
+	}
+
+	acpi_ac_remove(sbs);
+
+	kfree(sbs);
+
+	return_VALUE(0);
+}
+
+static int __init acpi_sbs_init(void)
+{
+	int result = 0;
+
+	ACPI_FUNCTION_TRACE("acpi_sbs_init");
+
+	init_MUTEX(&sbs_sem);
+
+	if (capacity_mode != DEF_CAPACITY_UNIT
+	    && capacity_mode != MAH_CAPACITY_UNIT
+	    && capacity_mode != MWH_CAPACITY_UNIT) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "acpi_sbs_init: "
+				  "invalid capacity_mode = %d\n",
+				  capacity_mode));
+		return_VALUE(-EINVAL);
+	}
+
+	acpi_ac_dir = acpi_lock_ac_dir();
+	if (!acpi_ac_dir) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_lock_ac_dir() failed\n"));
+		return_VALUE(-ENODEV);
+	}
+
+	acpi_battery_dir = acpi_lock_battery_dir();
+	if (!acpi_battery_dir) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_lock_battery_dir() failed\n"));
+		return_VALUE(-ENODEV);
+	}
+
+	result = acpi_bus_register_driver(&acpi_sbs_driver);
+	if (result < 0) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "acpi_bus_register_driver() failed\n"));
+		return_VALUE(-ENODEV);
+	}
+
+	return_VALUE(0);
+}
+
+static void __exit acpi_sbs_exit(void)
+{
+	ACPI_FUNCTION_TRACE("acpi_sbs_exit");
+
+	acpi_bus_unregister_driver(&acpi_sbs_driver);
+
+	acpi_unlock_ac_dir(acpi_ac_dir);
+	acpi_ac_dir = NULL;
+	acpi_unlock_battery_dir(acpi_battery_dir);
+	acpi_battery_dir = NULL;
+
+	return_VOID;
+}
+
+module_init(acpi_sbs_init);
+module_exit(acpi_sbs_exit);
-- 
cgit v0.10.2


From 635227ee89929a6e2920fc8aa1cd48f7225d3d93 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 1 Jul 2006 16:48:23 -0400
Subject: ACPI: remove function tracing macros from drivers/acpi/*.c

a few invocations appeared due to the SBS and other patches.

Signed-off-by: Len Brown <len.brown@intel.com>

diff --git a/drivers/acpi/cm_sbs.c b/drivers/acpi/cm_sbs.c
index d11507c..574a75a 100644
--- a/drivers/acpi/cm_sbs.c
+++ b/drivers/acpi/cm_sbs.c
@@ -46,7 +46,6 @@ static int lock_battery_dir_cnt = 0;
 
 struct proc_dir_entry *acpi_lock_ac_dir(void)
 {
-	ACPI_FUNCTION_TRACE("acpi_lock_ac_dir");
 
 	down(&cm_sbs_sem);
 	if (!acpi_ac_dir) {
@@ -59,14 +58,13 @@ struct proc_dir_entry *acpi_lock_ac_dir(void)
 				  "Cannot create %s\n", ACPI_AC_CLASS));
 	}
 	up(&cm_sbs_sem);
-	return (acpi_ac_dir);
+	return acpi_ac_dir;
 }
 
 EXPORT_SYMBOL(acpi_lock_ac_dir);
 
 void acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir_param)
 {
-	ACPI_FUNCTION_TRACE("acpi_unlock_ac_dir");
 
 	down(&cm_sbs_sem);
 	if (acpi_ac_dir_param) {
@@ -83,7 +81,6 @@ EXPORT_SYMBOL(acpi_unlock_ac_dir);
 
 struct proc_dir_entry *acpi_lock_battery_dir(void)
 {
-	ACPI_FUNCTION_TRACE("acpi_lock_battery_dir");
 
 	down(&cm_sbs_sem);
 	if (!acpi_battery_dir) {
@@ -97,14 +94,13 @@ struct proc_dir_entry *acpi_lock_battery_dir(void)
 				  "Cannot create %s\n", ACPI_BATTERY_CLASS));
 	}
 	up(&cm_sbs_sem);
-	return (acpi_battery_dir);
+	return acpi_battery_dir;
 }
 
 EXPORT_SYMBOL(acpi_lock_battery_dir);
 
 void acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir_param)
 {
-	ACPI_FUNCTION_TRACE("acpi_unlock_battery_dir");
 
 	down(&cm_sbs_sem);
 	if (acpi_battery_dir_param) {
@@ -116,20 +112,20 @@ void acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir_param)
 		acpi_battery_dir = 0;
 	}
 	up(&cm_sbs_sem);
+	return;
 }
 
 EXPORT_SYMBOL(acpi_unlock_battery_dir);
 
 static int __init acpi_cm_sbs_init(void)
 {
-	ACPI_FUNCTION_TRACE("acpi_cm_sbs_init");
 
 	if (acpi_disabled)
-		return_VALUE(0);
+		return 0;
 
 	init_MUTEX(&cm_sbs_sem);
 
-	return_VALUE(0);
+	return 0;
 }
 
 subsys_initcall(acpi_cm_sbs_init);
diff --git a/drivers/acpi/i2c_ec.c b/drivers/acpi/i2c_ec.c
index 72478a6..84239d5 100644
--- a/drivers/acpi/i2c_ec.c
+++ b/drivers/acpi/i2c_ec.c
@@ -115,8 +115,6 @@ static int acpi_ec_smb_read(struct acpi_ec_smbus *smbus, u8 address, u8 * data)
 	u8 val;
 	int err;
 
-	ACPI_FUNCTION_TRACE("acpi_ec_smb_read");
-
 	err = ec_read(smbus->base + address, &val);
 	if (!err) {
 		*data = val;
@@ -129,8 +127,6 @@ static int acpi_ec_smb_write(struct acpi_ec_smbus *smbus, u8 address, u8 data)
 {
 	int err;
 
-	ACPI_FUNCTION_TRACE("acpi_ec_smb_write");
-
 	err = ec_write(smbus->base + address, data);
 	return (err);
 }
@@ -144,8 +140,6 @@ acpi_ec_smb_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
 	unsigned char protocol, len = 0, pec, temp[2] = { 0, 0 };
 	int i;
 
-	ACPI_FUNCTION_TRACE("acpi_ec_smb_access");
-
 	if (read_write == I2C_SMBUS_READ) {
 		protocol = ACPI_EC_SMB_PRTCL_READ;
 	} else {
@@ -290,7 +284,6 @@ acpi_ec_smb_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
 
 static u32 acpi_ec_smb_func(struct i2c_adapter *adapter)
 {
-	ACPI_FUNCTION_TRACE("acpi_ec_smb_func");
 
 	return (I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
 		I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
@@ -312,22 +305,20 @@ static int acpi_ec_hc_add(struct acpi_device *device)
 	struct acpi_ec_hc *ec_hc;
 	struct acpi_ec_smbus *smbus;
 
-	ACPI_FUNCTION_TRACE("acpi_ec_hc_add");
-
 	if (!device) {
-		return_VALUE(-EINVAL);
+		return -EINVAL;
 	}
 
 	ec_hc = kmalloc(sizeof(struct acpi_ec_hc), GFP_KERNEL);
 	if (!ec_hc) {
-		return_VALUE(-ENOMEM);
+		return -ENOMEM;
 	}
 	memset(ec_hc, 0, sizeof(struct acpi_ec_hc));
 
 	smbus = kmalloc(sizeof(struct acpi_ec_smbus), GFP_KERNEL);
 	if (!smbus) {
 		kfree(ec_hc);
-		return_VALUE(-ENOMEM);
+		return -ENOMEM;
 	}
 	memset(smbus, 0, sizeof(struct acpi_ec_smbus));
 
@@ -341,7 +332,7 @@ static int acpi_ec_hc_add(struct acpi_device *device)
 		ACPI_DEBUG_PRINT((ACPI_DB_WARN, "Error obtaining _EC\n"));
 		kfree(ec_hc->smbus);
 		kfree(smbus);
-		return_VALUE(-EIO);
+		return -EIO;
 	}
 
 	smbus->ec = acpi_driver_data(device->parent);
@@ -357,7 +348,7 @@ static int acpi_ec_hc_add(struct acpi_device *device)
 				  "EC SMBus adapter: Failed to register adapter\n"));
 		kfree(smbus);
 		kfree(ec_hc);
-		return_VALUE(-EIO);
+		return -EIO;
 	}
 
 	ec_hc->smbus = smbus;
@@ -365,17 +356,15 @@ static int acpi_ec_hc_add(struct acpi_device *device)
 	printk(KERN_INFO PREFIX "%s [%s]\n",
 	       acpi_device_name(device), acpi_device_bid(device));
 
-	return_VALUE(AE_OK);
+	return AE_OK;
 }
 
 static int acpi_ec_hc_remove(struct acpi_device *device, int type)
 {
 	struct acpi_ec_hc *ec_hc;
 
-	ACPI_FUNCTION_TRACE("acpi_ec_hc_remove");
-
 	if (!device) {
-		return_VALUE(-EINVAL);
+		return -EINVAL;
 	}
 	ec_hc = acpi_driver_data(device);
 
@@ -383,30 +372,27 @@ static int acpi_ec_hc_remove(struct acpi_device *device, int type)
 	kfree(ec_hc->smbus);
 	kfree(ec_hc);
 
-	return_VALUE(AE_OK);
+	return AE_OK;
 }
 
 static int __init acpi_ec_hc_init(void)
 {
 	int result;
 
-	ACPI_FUNCTION_TRACE("acpi_ec_hc_init");
 	result = acpi_bus_register_driver(&acpi_ec_hc_driver);
 	if (result < 0) {
-		return_VALUE(-ENODEV);
+		return -ENODEV;
 	}
-	return_VALUE(0);
+	return 0;
 }
 
 static void __exit acpi_ec_hc_exit(void)
 {
-	ACPI_FUNCTION_TRACE("acpi_ec_hc_exit");
 	acpi_bus_unregister_driver(&acpi_ec_hc_driver);
 }
 
 struct acpi_ec_hc *acpi_get_ec_hc(struct acpi_device *device)
 {
-	ACPI_FUNCTION_TRACE("acpi_get_ec_hc");
 	return ((struct acpi_ec_hc *)acpi_driver_data(device->parent));
 }
 
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 4d62298..e5e448e 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -259,12 +259,10 @@ int acpi_get_node(acpi_handle *handle)
 {
 	int pxm, node = -1;
 
-	ACPI_FUNCTION_TRACE("acpi_get_node");
-
 	pxm = acpi_get_pxm(handle);
 	if (pxm >= 0)
 		node = acpi_map_pxm_to_node(pxm);
 
-	return_VALUE(node);
+	return node;
 }
 EXPORT_SYMBOL(acpi_get_node);
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 8bebceb..db7b350 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -187,8 +187,6 @@ static void acpi_battery_smbus_err_handler(struct acpi_ec_smbus *smbus)
 	char *err_str;
 	int err_number;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_smbus_err_handler");
-
 	data.word = 0;
 
 	result = smbus->adapter.algo->
@@ -239,8 +237,6 @@ acpi_sbs_smbus_read_word(struct acpi_ec_smbus *smbus, int addr, int func,
 	int result = 0;
 	int i;
 
-	ACPI_FUNCTION_TRACE("acpi_sbs_smbus_read_word");
-
 	if (err_handler == NULL) {
 		err_handler = acpi_battery_smbus_err_handler;
 	}
@@ -263,7 +259,7 @@ acpi_sbs_smbus_read_word(struct acpi_ec_smbus *smbus, int addr, int func,
 		}
 	}
 
-	return_VALUE(result);
+	return result;
 }
 
 static int
@@ -275,8 +271,6 @@ acpi_sbs_smbus_read_str(struct acpi_ec_smbus *smbus, int addr, int func,
 	int result = 0;
 	int i;
 
-	ACPI_FUNCTION_TRACE("acpi_sbs_smbus_read_str");
-
 	if (err_handler == NULL) {
 		err_handler = acpi_battery_smbus_err_handler;
 	}
@@ -302,7 +296,7 @@ acpi_sbs_smbus_read_str(struct acpi_ec_smbus *smbus, int addr, int func,
 		}
 	}
 
-	return_VALUE(result);
+	return result;
 }
 
 static int
@@ -314,8 +308,6 @@ acpi_sbs_smbus_write_word(struct acpi_ec_smbus *smbus, int addr, int func,
 	int result = 0;
 	int i;
 
-	ACPI_FUNCTION_TRACE("acpi_sbs_smbus_write_word");
-
 	if (err_handler == NULL) {
 		err_handler = acpi_battery_smbus_err_handler;
 	}
@@ -339,7 +331,7 @@ acpi_sbs_smbus_write_word(struct acpi_ec_smbus *smbus, int addr, int func,
 		}
 	}
 
-	return_VALUE(result);
+	return result;
 }
 
 /* --------------------------------------------------------------------------
@@ -355,8 +347,6 @@ static int acpi_sbs_generate_event(struct acpi_device *device,
 	char class_saved[20];
 	int result = 0;
 
-	ACPI_FUNCTION_TRACE("acpi_sbs_generate_event");
-
 	strcpy(bid_saved, acpi_device_bid(device));
 	strcpy(class_saved, acpi_device_class(device));
 
@@ -368,7 +358,7 @@ static int acpi_sbs_generate_event(struct acpi_device *device,
 	strcpy(acpi_device_bid(device), bid_saved);
 	strcpy(acpi_device_class(device), class_saved);
 
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_battery_get_present(struct acpi_battery *battery)
@@ -377,8 +367,6 @@ static int acpi_battery_get_present(struct acpi_battery *battery)
 	int result = 0;
 	int is_present = 0;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_get_present");
-
 	result = acpi_sbs_smbus_read_word(battery->sbs->smbus,
 					  ACPI_SBSM_SMBUS_ADDR, 0x01,
 					  &state, NULL);
@@ -391,7 +379,7 @@ static int acpi_battery_get_present(struct acpi_battery *battery)
 	}
 	battery->battery_present = is_present;
 
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_battery_is_present(struct acpi_battery *battery)
@@ -411,8 +399,6 @@ static int acpi_battery_select(struct acpi_battery *battery)
 	s16 state;
 	int foo;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_select");
-
 	if (battery->sbs->sbsm_present) {
 
 		/* Take special care not to knobble other nibbles of
@@ -440,7 +426,7 @@ static int acpi_battery_select(struct acpi_battery *battery)
 	}
 
       end:
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_sbsm_get_info(struct acpi_sbs *sbs)
@@ -449,8 +435,6 @@ static int acpi_sbsm_get_info(struct acpi_sbs *sbs)
 	int result = 0;
 	s16 battery_system_info;
 
-	ACPI_FUNCTION_TRACE("acpi_sbsm_get_info");
-
 	result = acpi_sbs_smbus_read_word(smbus, ACPI_SBSM_SMBUS_ADDR, 0x04,
 					  &battery_system_info, NULL);
 	if (result) {
@@ -463,7 +447,7 @@ static int acpi_sbsm_get_info(struct acpi_sbs *sbs)
 
       end:
 
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_battery_get_info(struct acpi_battery *battery)
@@ -473,8 +457,6 @@ static int acpi_battery_get_info(struct acpi_battery *battery)
 	s16 battery_mode;
 	s16 specification_info;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_get_info");
-
 	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x03,
 					  &battery_mode,
 					  &acpi_battery_smbus_err_handler);
@@ -583,12 +565,11 @@ static int acpi_battery_get_info(struct acpi_battery *battery)
 	}
 
       end:
-	return_VALUE(result);
+	return result;
 }
 
 static void acpi_update_delay(struct acpi_sbs *sbs)
 {
-	ACPI_FUNCTION_TRACE("acpi_update_delay");
 	if (sbs->zombie) {
 		return;
 	}
@@ -602,8 +583,6 @@ static int acpi_battery_get_state(struct acpi_battery *battery)
 	struct acpi_ec_smbus *smbus = battery->sbs->smbus;
 	int result = 0;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_get_state");
-
 	acpi_update_delay(battery->sbs);
 	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x09,
 					  &battery->state.voltage,
@@ -667,7 +646,7 @@ static int acpi_battery_get_state(struct acpi_battery *battery)
 	acpi_update_delay(battery->sbs);
 
       end:
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_battery_get_alarm(struct acpi_battery *battery)
@@ -675,8 +654,6 @@ static int acpi_battery_get_alarm(struct acpi_battery *battery)
 	struct acpi_ec_smbus *smbus = battery->sbs->smbus;
 	int result = 0;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_get_alarm");
-
 	result = acpi_sbs_smbus_read_word(smbus, ACPI_SB_SMBUS_ADDR, 0x01,
 					  &battery->alarm.remaining_capacity,
 					  &acpi_battery_smbus_err_handler);
@@ -690,7 +667,7 @@ static int acpi_battery_get_alarm(struct acpi_battery *battery)
 
       end:
 
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_battery_set_alarm(struct acpi_battery *battery,
@@ -701,8 +678,6 @@ static int acpi_battery_set_alarm(struct acpi_battery *battery,
 	s16 battery_mode;
 	int foo;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_set_alarm");
-
 	result = acpi_battery_select(battery);
 	if (result) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
@@ -746,7 +721,7 @@ static int acpi_battery_set_alarm(struct acpi_battery *battery,
 
       end:
 
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_battery_set_mode(struct acpi_battery *battery)
@@ -754,8 +729,6 @@ static int acpi_battery_set_mode(struct acpi_battery *battery)
 	int result = 0;
 	s16 battery_mode;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_set_mode");
-
 	if (capacity_mode == DEF_CAPACITY_UNIT) {
 		goto end;
 	}
@@ -793,15 +766,13 @@ static int acpi_battery_set_mode(struct acpi_battery *battery)
 	}
 
       end:
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_battery_init(struct acpi_battery *battery)
 {
 	int result = 0;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_init");
-
 	result = acpi_battery_select(battery);
 	if (result) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
@@ -838,7 +809,7 @@ static int acpi_battery_init(struct acpi_battery *battery)
 	}
 
       end:
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_ac_get_present(struct acpi_sbs *sbs)
@@ -847,8 +818,6 @@ static int acpi_ac_get_present(struct acpi_sbs *sbs)
 	int result = 0;
 	s16 charger_status;
 
-	ACPI_FUNCTION_TRACE("acpi_ac_get_present");
-
 	result = acpi_sbs_smbus_read_word(smbus, ACPI_SBC_SMBUS_ADDR, 0x13,
 					  &charger_status, NULL);
 
@@ -862,7 +831,7 @@ static int acpi_ac_get_present(struct acpi_sbs *sbs)
 
       end:
 
-	return_VALUE(result);
+	return result;
 }
 
 /* --------------------------------------------------------------------------
@@ -881,14 +850,12 @@ acpi_sbs_generic_add_fs(struct proc_dir_entry **dir,
 {
 	struct proc_dir_entry *entry = NULL;
 
-	ACPI_FUNCTION_TRACE("acpi_sbs_generic_add_fs");
-
 	if (!*dir) {
 		*dir = proc_mkdir(dir_name, parent_dir);
 		if (!*dir) {
 			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 					  "proc_mkdir() failed\n"));
-			return_VALUE(-ENODEV);
+			return -ENODEV;
 		}
 		(*dir)->owner = THIS_MODULE;
 	}
@@ -932,14 +899,13 @@ acpi_sbs_generic_add_fs(struct proc_dir_entry **dir,
 		}
 	}
 
-	return_VALUE(0);
+	return 0;
 }
 
 static void
 acpi_sbs_generic_remove_fs(struct proc_dir_entry **dir,
 			   struct proc_dir_entry *parent_dir)
 {
-	ACPI_FUNCTION_TRACE("acpi_sbs_generic_remove_fs");
 
 	if (*dir) {
 		remove_proc_entry(ACPI_SBS_FILE_INFO, *dir);
@@ -961,10 +927,8 @@ static int acpi_battery_read_info(struct seq_file *seq, void *offset)
 	int cscale;
 	int result = 0;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_read_info");
-
 	if (battery->sbs->zombie) {
-		return_VALUE(-ENODEV);
+		return -ENODEV;
 	}
 
 	down(&sbs_sem);
@@ -1023,7 +987,7 @@ static int acpi_battery_read_info(struct seq_file *seq, void *offset)
 
 	up(&sbs_sem);
 
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_battery_info_open_fs(struct inode *inode, struct file *file)
@@ -1038,10 +1002,8 @@ static int acpi_battery_read_state(struct seq_file *seq, void *offset)
 	int cscale;
 	int foo;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_read_state");
-
 	if (battery->sbs->zombie) {
-		return_VALUE(-ENODEV);
+		return -ENODEV;
 	}
 
 	down(&sbs_sem);
@@ -1104,7 +1066,7 @@ static int acpi_battery_read_state(struct seq_file *seq, void *offset)
 
 	up(&sbs_sem);
 
-	return_VALUE(result);
+	return result;
 }
 
 static int acpi_battery_state_open_fs(struct inode *inode, struct file *file)
@@ -1118,10 +1080,8 @@ static int acpi_battery_read_alarm(struct seq_file *seq, void *offset)
 	int result = 0;
 	int cscale;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_read_alarm");
-
 	if (battery->sbs->zombie) {
-		return_VALUE(-ENODEV);
+		return -ENODEV;
 	}
 
 	down(&sbs_sem);
@@ -1158,7 +1118,7 @@ static int acpi_battery_read_alarm(struct seq_file *seq, void *offset)
 
 	up(&sbs_sem);
 
-	return_VALUE(result);
+	return result;
 }
 
 static ssize_t
@@ -1170,10 +1130,8 @@ acpi_battery_write_alarm(struct file *file, const char __user * buffer,
 	char alarm_string[12] = { '\0' };
 	int result, old_alarm, new_alarm;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_write_alarm");
-
 	if (battery->sbs->zombie) {
-		return_VALUE(-ENODEV);
+		return -ENODEV;
 	}
 
 	down(&sbs_sem);
@@ -1217,9 +1175,9 @@ acpi_battery_write_alarm(struct file *file, const char __user * buffer,
 	up(&sbs_sem);
 
 	if (result) {
-		return_VALUE(result);
+		return result;
 	} else {
-		return_VALUE(count);
+		return count;
 	}
 }
 
@@ -1262,10 +1220,8 @@ static int acpi_ac_read_state(struct seq_file *seq, void *offset)
 	struct acpi_sbs *sbs = (struct acpi_sbs *)seq->private;
 	int result;
 
-	ACPI_FUNCTION_TRACE("acpi_ac_read_state");
-
 	if (sbs->zombie) {
-		return_VALUE(-ENODEV);
+		return -ENODEV;
 	}
 
 	down(&sbs_sem);
@@ -1283,7 +1239,7 @@ static int acpi_ac_read_state(struct seq_file *seq, void *offset)
 
 	up(&sbs_sem);
 
-	return_VALUE(0);
+	return 0;
 }
 
 static int acpi_ac_state_open_fs(struct inode *inode, struct file *file)
@@ -1312,8 +1268,6 @@ static int acpi_battery_add(struct acpi_sbs *sbs, int id)
 	char dir_name[32];
 	struct acpi_battery *battery;
 
-	ACPI_FUNCTION_TRACE("acpi_battery_add");
-
 	battery = &sbs->battery[id];
 
 	battery->alive = 0;
@@ -1364,12 +1318,11 @@ static int acpi_battery_add(struct acpi_sbs *sbs, int id)
 	battery->alive = 1;
 
       end:
-	return_VALUE(result);
+	return result;
 }
 
 static void acpi_battery_remove(struct acpi_sbs *sbs, int id)
 {
-	ACPI_FUNCTION_TRACE("acpi_battery_remove");
 
 	if (sbs->battery[id].battery_entry) {
 		acpi_sbs_generic_remove_fs(&(sbs->battery[id].battery_entry),
@@ -1381,8 +1334,6 @@ static int acpi_ac_add(struct acpi_sbs *sbs)
 {
 	int result;
 
-	ACPI_FUNCTION_TRACE("acpi_ac_add");
-
 	result = acpi_ac_get_present(sbs);
 	if (result) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
@@ -1402,12 +1353,11 @@ static int acpi_ac_add(struct acpi_sbs *sbs)
 
       end:
 
-	return_VALUE(result);
+	return result;
 }
 
 static void acpi_ac_remove(struct acpi_sbs *sbs)
 {
-	ACPI_FUNCTION_TRACE("acpi_ac_remove");
 
 	if (sbs->ac_entry) {
 		acpi_sbs_generic_remove_fs(&sbs->ac_entry, acpi_ac_dir);
@@ -1416,7 +1366,6 @@ static void acpi_ac_remove(struct acpi_sbs *sbs)
 
 static void acpi_sbs_update_queue_run(unsigned long data)
 {
-	ACPI_FUNCTION_TRACE("acpi_sbs_update_queue_run");
 	acpi_os_execute(OSL_GPE_HANDLER, acpi_sbs_update_queue, (void *)data);
 }
 
@@ -1433,8 +1382,6 @@ static int acpi_sbs_update_run(struct acpi_sbs *sbs, int data_type)
 	int do_battery_init, do_ac_init;
 	s16 old_remaining_capacity;
 
-	ACPI_FUNCTION_TRACE("acpi_sbs_update_run");
-
 	if (sbs->zombie) {
 		goto end;
 	}
@@ -1584,7 +1531,7 @@ static int acpi_sbs_update_run(struct acpi_sbs *sbs, int data_type)
 	}
 
       end:
-	return_VALUE(result);
+	return result;
 }
 
 static void acpi_sbs_update_queue(void *data)
@@ -1593,8 +1540,6 @@ static void acpi_sbs_update_queue(void *data)
 	unsigned long delay = -1;
 	int result;
 
-	ACPI_FUNCTION_TRACE("acpi_sbs_update_queue");
-
 	if (sbs->zombie) {
 		goto end;
 	}
@@ -1631,12 +1576,10 @@ static int acpi_sbs_add(struct acpi_device *device)
 	int id, cnt;
 	acpi_status status = AE_OK;
 
-	ACPI_FUNCTION_TRACE("acpi_sbs_add");
-
 	sbs = kmalloc(sizeof(struct acpi_sbs), GFP_KERNEL);
 	if (!sbs) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "kmalloc() failed\n"));
-		return_VALUE(-ENOMEM);
+		return -ENOMEM;
 	}
 	memset(sbs, 0, sizeof(struct acpi_sbs));
 
@@ -1737,7 +1680,7 @@ static int acpi_sbs_add(struct acpi_device *device)
 		}
 	}
 
-	return_VALUE(result);
+	return result;
 }
 
 int acpi_sbs_remove(struct acpi_device *device, int type)
@@ -1745,10 +1688,8 @@ int acpi_sbs_remove(struct acpi_device *device, int type)
 	struct acpi_sbs *sbs = (struct acpi_sbs *)acpi_driver_data(device);
 	int id;
 
-	ACPI_FUNCTION_TRACE("acpi_sbs_remove");
-
 	if (!device || !sbs) {
-		return_VALUE(-EINVAL);
+		return -EINVAL;
 	}
 
 	sbs->zombie = 1;
@@ -1766,15 +1707,13 @@ int acpi_sbs_remove(struct acpi_device *device, int type)
 
 	kfree(sbs);
 
-	return_VALUE(0);
+	return 0;
 }
 
 static int __init acpi_sbs_init(void)
 {
 	int result = 0;
 
-	ACPI_FUNCTION_TRACE("acpi_sbs_init");
-
 	init_MUTEX(&sbs_sem);
 
 	if (capacity_mode != DEF_CAPACITY_UNIT
@@ -1783,36 +1722,35 @@ static int __init acpi_sbs_init(void)
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "acpi_sbs_init: "
 				  "invalid capacity_mode = %d\n",
 				  capacity_mode));
-		return_VALUE(-EINVAL);
+		return -EINVAL;
 	}
 
 	acpi_ac_dir = acpi_lock_ac_dir();
 	if (!acpi_ac_dir) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 				  "acpi_lock_ac_dir() failed\n"));
-		return_VALUE(-ENODEV);
+		return -ENODEV;
 	}
 
 	acpi_battery_dir = acpi_lock_battery_dir();
 	if (!acpi_battery_dir) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 				  "acpi_lock_battery_dir() failed\n"));
-		return_VALUE(-ENODEV);
+		return -ENODEV;
 	}
 
 	result = acpi_bus_register_driver(&acpi_sbs_driver);
 	if (result < 0) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 				  "acpi_bus_register_driver() failed\n"));
-		return_VALUE(-ENODEV);
+		return -ENODEV;
 	}
 
-	return_VALUE(0);
+	return 0;
 }
 
 static void __exit acpi_sbs_exit(void)
 {
-	ACPI_FUNCTION_TRACE("acpi_sbs_exit");
 
 	acpi_bus_unregister_driver(&acpi_sbs_driver);
 
@@ -1821,7 +1759,7 @@ static void __exit acpi_sbs_exit(void)
 	acpi_unlock_battery_dir(acpi_battery_dir);
 	acpi_battery_dir = NULL;
 
-	return_VOID;
+	return;
 }
 
 module_init(acpi_sbs_init);
-- 
cgit v0.10.2


From c7afb0f97700e73109564f83c35bfeeb14cb653b Mon Sep 17 00:00:00 2001
From: KaiGai Kohei <kaigai@ak.jp.nec.com>
Date: Sun, 2 Jul 2006 15:13:46 +0100
Subject: [JFFS2][XATTR] Fix memory leak in POSIX-ACL support

jffs2_clear_acl() which releases acl caches allocated by kmalloc()
was defined but it was never called. Thus, we faced to the risk
of memory leaking.

This patch plugs jffs2_clear_acl() into jffs2_do_clear_inode().
It ensures to release acl cache when inode is cleared.

Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 9c2077e..0ae3cd1 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -345,10 +345,8 @@ int jffs2_init_acl(struct inode *inode, struct inode *dir)
 	return rc;
 }
 
-void jffs2_clear_acl(struct inode *inode)
+void jffs2_clear_acl(struct jffs2_inode_info *f)
 {
-	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
-
 	if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {
 		posix_acl_release(f->i_acl_access);
 		f->i_acl_access = JFFS2_ACL_NOT_CACHED;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 8893bd1..fa327db 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -30,7 +30,7 @@ struct jffs2_acl_header {
 extern int jffs2_permission(struct inode *, int, struct nameidata *);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_init_acl(struct inode *, struct inode *);
-extern void jffs2_clear_acl(struct inode *);
+extern void jffs2_clear_acl(struct jffs2_inode_info *);
 
 extern struct xattr_handler jffs2_acl_access_xattr_handler;
 extern struct xattr_handler jffs2_acl_default_xattr_handler;
@@ -40,6 +40,6 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
 #define jffs2_permission NULL
 #define jffs2_acl_chmod(inode)		(0)
 #define jffs2_init_acl(inode,dir)	(0)
-#define jffs2_clear_acl(inode)
+#define jffs2_clear_acl(f)
 
 #endif	/* CONFIG_JFFS2_FS_POSIX_ACL */
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index cc18992..266423b 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -968,6 +968,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
 	struct jffs2_full_dirent *fd, *fds;
 	int deleted;
 
+	jffs2_clear_acl(f);
 	jffs2_xattr_delete_inode(c, f->inocache);
 	down(&f->sem);
 	deleted = f->inocache && !f->inocache->nlink;
-- 
cgit v0.10.2


From d6b0c53723753fc0cfda63f56735b225c43e1e9a Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Sun, 2 Jul 2006 10:06:28 -0500
Subject: [SCSI] fix error handling in scsi_io_completion

There was a logic fault in scsi_io_completion() where zero transfer
commands that complete successfully were sent to the block layer as
not up to date.  This patch removes the if (good_bytes > 0) gate
around the successful completion, since zero transfer commands do have
good_bytes == 0.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index bf5191f..08af9aa 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -920,22 +920,20 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	 * Next deal with any sectors which we were able to correctly
 	 * handle.
 	 */
-	if (good_bytes > 0) {
-		SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
-					      "%d bytes done.\n",
-					      req->nr_sectors, good_bytes));
-		SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));
-
-		if (clear_errors)
-			req->errors = 0;
-
-		/* A number of bytes were successfully read.  If there
-		 * is leftovers and there is some kind of error
-		 * (result != 0), retry the rest.
-		 */
-		if (scsi_end_request(cmd, 1, good_bytes, !!result) == NULL)
-			return;
-	}
+	SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
+				      "%d bytes done.\n",
+				      req->nr_sectors, good_bytes));
+	SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));
+
+	if (clear_errors)
+		req->errors = 0;
+
+	/* A number of bytes were successfully read.  If there
+	 * are leftovers and there is some kind of error
+	 * (result != 0), retry the rest.
+	 */
+	if (scsi_end_request(cmd, 1, good_bytes, result == 0) == NULL)
+		return;
 
 	/* good_bytes = 0, or (inclusive) there were leftovers and
 	 * result = 0, so scsi_end_request couldn't retry.
-- 
cgit v0.10.2


From 34c162f79e374556dd1384437f0dab558b5dc657 Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@mars.ravnborg.org>
Date: Sun, 2 Jul 2006 20:21:49 +0200
Subject: kbuild: explicit turn off gcc stack-protector

Ubuntu has enabled -fstack-protector per default in gcc
breaking kernel build. Explicit turn it off for now.
Later we may decide to make it configurable if the
kernel starts to support it.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/Makefile b/Makefile
index 4dcf25d..1cc793f 100644
--- a/Makefile
+++ b/Makefile
@@ -309,6 +309,9 @@ CPPFLAGS        := -D__KERNEL__ $(LINUXINCLUDE)
 
 CFLAGS          := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
                    -fno-strict-aliasing -fno-common
+# Force gcc to behave correct even for buggy distributions
+CFLAGS          += $(call cc-option, -fno-stack-protector-all \
+                                     -fno-stack-protector)
 AFLAGS          := -D__ASSEMBLY__
 
 # Read KERNELRELEASE from include/config/kernel.release (if it exists)
-- 
cgit v0.10.2


From 947deee8904b3c2edc7f59ab6e6242499e4dc434 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Sun, 2 Jul 2006 20:45:51 +0100
Subject: [SERIAL] Convert fifosize to an unsigned int

Some UARTs have more than 255 bytes of FIFO, which can't be
represented by an unsigned char.  Change the kernel's internal
structure to be an unsigned int, but still export an unsigned char
via the TIOCGSERIAL ioctl.  If the TIOCSSERIAL ioctl provides a
fifo size of 0, assume this means "don't change" otherwise we'll
corrupt the larger fifo sizes.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index 6459edc..1b2cdc9 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -728,8 +728,7 @@ mpc52xx_uart_probe(struct platform_device *dev)
 
 	spin_lock_init(&port->lock);
 	port->uartclk	= __res.bi_ipbfreq / 2; /* Look at CTLR doc */
-	port->fifosize	= 255; /* Should be 512 ! But it can't be */
-	                       /* stored in a unsigned char       */
+	port->fifosize	= 512;
 	port->iotype	= UPIO_MEM;
 	port->flags	= UPF_BOOT_AUTOCONF |
 			  ( uart_console(port) ? 0 : UPF_IOREMAP );
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 17839e7..3805f46 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -691,7 +691,8 @@ static int uart_set_info(struct uart_state *state,
 		    (new_serial.baud_base != port->uartclk / 16) ||
 		    (close_delay != state->close_delay) ||
 		    (closing_wait != state->closing_wait) ||
-		    (new_serial.xmit_fifo_size != port->fifosize) ||
+		    (new_serial.xmit_fifo_size &&
+		     new_serial.xmit_fifo_size != port->fifosize) ||
 		    (((new_flags ^ old_flags) & ~UPF_USR_MASK) != 0))
 			goto exit;
 		port->flags = ((port->flags & ~UPF_USR_MASK) |
@@ -796,7 +797,8 @@ static int uart_set_info(struct uart_state *state,
 	port->custom_divisor   = new_serial.custom_divisor;
 	state->close_delay     = close_delay;
 	state->closing_wait    = closing_wait;
-	port->fifosize         = new_serial.xmit_fifo_size;
+	if (new_serial.xmit_fifo_size)
+		port->fifosize = new_serial.xmit_fifo_size;
 	if (state->info->tty)
 		state->info->tty->low_latency =
 			(port->flags & UPF_LOW_LATENCY) ? 1 : 0;
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index bd14858..966e2e8 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -214,10 +214,11 @@ struct uart_port {
 	unsigned char __iomem	*membase;		/* read/write[bwl] */
 	unsigned int		irq;			/* irq number */
 	unsigned int		uartclk;		/* base uart clock */
-	unsigned char		fifosize;		/* tx fifo size */
+	unsigned int		fifosize;		/* tx fifo size */
 	unsigned char		x_char;			/* xon/xoff char */
 	unsigned char		regshift;		/* reg offset shift */
 	unsigned char		iotype;			/* io access style */
+	unsigned char		unused1;
 
 #define UPIO_PORT		(0)
 #define UPIO_HUB6		(1)
-- 
cgit v0.10.2


From a1af5b2fd49eb24ab8c024da5d853b09841d1f8f Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Thu, 29 Jun 2006 20:28:18 +1000
Subject: [POWERPC] change get_property to return void *

Change the get_property() function to return a void *. This allows us
to later remove the cast done in the majority of callers.

Built for pseries, iseries, pmac32, cell, cbesim, g5, systemsim, maple,
and mpc* defconfigs

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4c524cb..ef3619c 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -1975,8 +1975,7 @@ struct property *of_find_property(struct device_node *np, const char *name,
  * Find a property with a given name for a given node
  * and return the value.
  */
-unsigned char *get_property(struct device_node *np, const char *name,
-			    int *lenp)
+void *get_property(struct device_node *np, const char *name, int *lenp)
 {
 	struct property *pp = of_find_property(np,name,lenp);
 	return pp ? pp->value : NULL;
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index 010d186..b0768f4 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -167,8 +167,8 @@ extern void unflatten_device_tree(void);
 extern void early_init_devtree(void *);
 extern int device_is_compatible(struct device_node *device, const char *);
 extern int machine_is_compatible(const char *compat);
-extern unsigned char *get_property(struct device_node *node, const char *name,
-				   int *lenp);
+extern void *get_property(struct device_node *node, const char *name,
+		int *lenp);
 extern void print_properties(struct device_node *node);
 extern int prom_n_addr_cells(struct device_node* np);
 extern int prom_n_size_cells(struct device_node* np);
-- 
cgit v0.10.2


From 609c9991b193f99a9a9e941beffcdb540752f761 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 29 Jun 2006 16:52:53 -0400
Subject: [POWERPC] fix implicit declaration on cell.

(Only fails with -Werror-implicit-function-declaration, but
 it should still be fixed).

arch/powerpc/platforms/cell/setup.c:145: error: implicit declaration of function 'udbg_init_rtas_console'

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index d8c2a29b..01ec394 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -49,6 +49,7 @@
 #include <asm/irq.h>
 #include <asm/spu.h>
 #include <asm/spu_priv1.h>
+#include <asm/udbg.h>
 
 #include "interrupt.h"
 #include "iommu.h"
-- 
cgit v0.10.2


From 4ce631e7a229a0e073078a197ed37d437cabcde0 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 3 Jul 2006 16:36:17 +1000
Subject: [POWERPC] Add a default config for 32-bit CHRP machines

Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig
new file mode 100644
index 0000000..0fa010a
--- /dev/null
+++ b/arch/powerpc/configs/chrp32_defconfig
@@ -0,0 +1,1378 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.17
+# Mon Jul  3 12:08:41 2006
+#
+# CONFIG_PPC64 is not set
+CONFIG_PPC32=y
+CONFIG_PPC_MERGE=y
+CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_PPC=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_NVRAM=y
+CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_PPC_OF=y
+CONFIG_PPC_UDBG_16550=y
+CONFIG_GENERIC_TBSYNC=y
+# CONFIG_DEFAULT_UIMAGE is not set
+
+#
+# Processor support
+#
+CONFIG_CLASSIC32=y
+# CONFIG_PPC_52xx is not set
+# CONFIG_PPC_82xx is not set
+# CONFIG_PPC_83xx is not set
+# CONFIG_PPC_85xx is not set
+# CONFIG_PPC_86xx is not set
+# CONFIG_40x is not set
+# CONFIG_44x is not set
+# CONFIG_8xx is not set
+# CONFIG_E200 is not set
+CONFIG_6xx=y
+CONFIG_PPC_FPU=y
+# CONFIG_ALTIVEC is not set
+CONFIG_PPC_STD_MMU=y
+CONFIG_PPC_STD_MMU_32=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_CPUSETS is not set
+# CONFIG_RELAY is not set
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SHMEM=y
+CONFIG_SLAB=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+# CONFIG_SLOB is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Block layer
+#
+CONFIG_LBD=y
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
+#
+# Platform support
+#
+CONFIG_PPC_MULTIPLATFORM=y
+# CONFIG_PPC_ISERIES is not set
+# CONFIG_EMBEDDED6xx is not set
+# CONFIG_APUS is not set
+CONFIG_PPC_CHRP=y
+# CONFIG_PPC_PMAC is not set
+# CONFIG_PPC_CELL is not set
+# CONFIG_PPC_CELL_NATIVE is not set
+CONFIG_MPIC=y
+CONFIG_PPC_RTAS=y
+# CONFIG_RTAS_ERROR_LOGGING is not set
+CONFIG_RTAS_PROC=y
+# CONFIG_MMIO_NVRAM is not set
+CONFIG_PPC_MPC106=y
+# CONFIG_PPC_970_NAP is not set
+# CONFIG_CPU_FREQ is not set
+# CONFIG_TAU is not set
+# CONFIG_WANT_EARLY_SERIAL is not set
+
+#
+# Kernel options
+#
+CONFIG_HIGHMEM=y
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_PREEMPT_BKL=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+# CONFIG_KEXEC is not set
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_PROC_DEVICETREE=y
+# CONFIG_CMDLINE_BOOL is not set
+# CONFIG_PM is not set
+CONFIG_SECCOMP=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options
+#
+CONFIG_ISA=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_PPC_I8259=y
+CONFIG_PPC_INDIRECT_PCI=y
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_PCIEPORTBUS is not set
+# CONFIG_PCI_DEBUG is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PCI Hotplug Support
+#
+# CONFIG_HOTPLUG_PCI is not set
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_HIGHMEM_START=0xfe000000
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_TASK_SIZE=0x80000000
+CONFIG_BOOT_LOAD=0x00800000
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+# CONFIG_NETDEBUG is not set
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+
+#
+# IP: Virtual Server Configuration
+#
+# CONFIG_IP_VS is not set
+# CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+
+#
+# Core Netfilter Configuration
+#
+# CONFIG_NETFILTER_NETLINK is not set
+# CONFIG_NETFILTER_XTABLES is not set
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+# CONFIG_IP_NF_CT_ACCT is not set
+# CONFIG_IP_NF_CONNTRACK_MARK is not set
+# CONFIG_IP_NF_CONNTRACK_EVENTS is not set
+# CONFIG_IP_NF_CT_PROTO_SCTP is not set
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+# CONFIG_IP_NF_NETBIOS_NS is not set
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+# CONFIG_IP_NF_PPTP is not set
+# CONFIG_IP_NF_H323 is not set
+# CONFIG_IP_NF_SIP is not set
+# CONFIG_IP_NF_QUEUE is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+# CONFIG_STANDALONE is not set
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+# CONFIG_PNP is not set
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=y
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_IDE_SATA is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+CONFIG_BLK_DEV_IDECD=y
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_BLK_DEV_IDESCSI is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+CONFIG_BLK_DEV_GENERIC=y
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_SL82C105=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+# CONFIG_BLK_DEV_AEC62XX is not set
+# CONFIG_BLK_DEV_ALI15X3 is not set
+# CONFIG_BLK_DEV_AMD74XX is not set
+# CONFIG_BLK_DEV_CMD64X is not set
+# CONFIG_BLK_DEV_TRIFLEX is not set
+# CONFIG_BLK_DEV_CY82C693 is not set
+# CONFIG_BLK_DEV_CS5520 is not set
+# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_HPT34X is not set
+# CONFIG_BLK_DEV_HPT366 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+# CONFIG_BLK_DEV_PIIX is not set
+# CONFIG_BLK_DEV_IT821X is not set
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_PDC202XX_OLD is not set
+# CONFIG_BLK_DEV_PDC202XX_NEW is not set
+# CONFIG_BLK_DEV_SVWKS is not set
+# CONFIG_BLK_DEV_SIIMAGE is not set
+# CONFIG_BLK_DEV_SLC90E66 is not set
+# CONFIG_BLK_DEV_TRM290 is not set
+CONFIG_BLK_DEV_VIA82CXXX=y
+# CONFIG_IDE_ARM is not set
+# CONFIG_IDE_CHIPSETS is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+# CONFIG_CHR_DEV_OSST is not set
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_ISCSI_TCP is not set
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_MEGARAID_SAS is not set
+# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_HPTIOP is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+# CONFIG_SCSI_GENERIC_NCR5380_MMIO is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_NCR53C406A is not set
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+CONFIG_SCSI_SYM53C8XX_MMIO=y
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PSI240I is not set
+# CONFIG_SCSI_QLOGIC_FAS is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+# CONFIG_SCSI_QLA_FC is not set
+# CONFIG_SCSI_LPFC is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+# CONFIG_FUSION_SPI is not set
+# CONFIG_FUSION_FC is not set
+# CONFIG_FUSION_SAS is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Macintosh device drivers
+#
+# CONFIG_WINDFARM is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# PHY device support
+#
+# CONFIG_PHYLIB is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_CASSINI is not set
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+# CONFIG_TULIP is not set
+CONFIG_DE4X5=y
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_ULI526X is not set
+# CONFIG_AT1700 is not set
+# CONFIG_DEPCA is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=y
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_CS89x0 is not set
+# CONFIG_DGRS is not set
+# CONFIG_EEPRO100 is not set
+# CONFIG_E100 is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+CONFIG_8139CP=y
+CONFIG_8139TOO=y
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_OLD_RX_RESET is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+CONFIG_VIA_RHINE=y
+# CONFIG_VIA_RHINE_MMIO is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+# CONFIG_E1000 is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SIS190 is not set
+# CONFIG_SKGE is not set
+# CONFIG_SKY2 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+# CONFIG_TIGON3 is not set
+# CONFIG_BNX2 is not set
+CONFIG_MV643XX_ETH=y
+# CONFIG_MV643XX_ETH_0 is not set
+# CONFIG_MV643XX_ETH_1 is not set
+# CONFIG_MV643XX_ETH_2 is not set
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_CHELSIO_T1 is not set
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+# CONFIG_MYRI10GE is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_INPORT is not set
+# CONFIG_MOUSE_LOGIBM is not set
+# CONFIG_MOUSE_PC110PAD is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+CONFIG_INPUT_MISC=y
+# CONFIG_INPUT_PCSPKR is not set
+CONFIG_INPUT_UINPUT=y
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=y
+# CONFIG_SERIO_PCIPS2 is not set
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_PCI=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+# CONFIG_HVC_RTAS is not set
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+CONFIG_NVRAM=y
+CONFIG_GEN_RTC=y
+# CONFIG_GEN_RTC_X is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_RAW_DRIVER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=y
+# CONFIG_I2C_CHARDEV is not set
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=y
+# CONFIG_I2C_ALGOPCF is not set
+# CONFIG_I2C_ALGOPCA is not set
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_HYDRA is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_PIIX4 is not set
+# CONFIG_I2C_MPC is not set
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_PROSAVAGE is not set
+# CONFIG_I2C_SAVAGE4 is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+# CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_ISA is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_SENSORS_DS1337 is not set
+# CONFIG_SENSORS_DS1374 is not set
+# CONFIG_SENSORS_EEPROM is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_M41T00 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# SPI support
+#
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+
+#
+# Dallas's 1-wire bus
+#
+
+#
+# Hardware Monitoring support
+#
+# CONFIG_HWMON is not set
+# CONFIG_HWMON_VID is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+# CONFIG_USB_DABUSB is not set
+
+#
+# Graphics support
+#
+CONFIG_FB=y
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+CONFIG_FB_MACMODES=y
+CONFIG_FB_FIRMWARE_EDID=y
+# CONFIG_FB_BACKLIGHT is not set
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+# CONFIG_FB_CIRRUS is not set
+# CONFIG_FB_PM2 is not set
+# CONFIG_FB_CYBER2000 is not set
+CONFIG_FB_OF=y
+# CONFIG_FB_CT65550 is not set
+# CONFIG_FB_ASILIANT is not set
+# CONFIG_FB_IMSTT is not set
+# CONFIG_FB_VGA16 is not set
+# CONFIG_FB_S1D13XXX is not set
+# CONFIG_FB_NVIDIA is not set
+# CONFIG_FB_RIVA is not set
+CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G=y
+# CONFIG_FB_MATROX_I2C is not set
+# CONFIG_FB_MATROX_MULTIHEAD is not set
+CONFIG_FB_RADEON=y
+CONFIG_FB_RADEON_I2C=y
+# CONFIG_FB_RADEON_DEBUG is not set
+# CONFIG_FB_ATY128 is not set
+CONFIG_FB_ATY=y
+CONFIG_FB_ATY_CT=y
+# CONFIG_FB_ATY_GENERIC_LCD is not set
+CONFIG_FB_ATY_GX=y
+# CONFIG_FB_SAVAGE is not set
+# CONFIG_FB_SIS is not set
+# CONFIG_FB_NEOMAGIC is not set
+# CONFIG_FB_KYRO is not set
+CONFIG_FB_3DFX=y
+# CONFIG_FB_3DFX_ACCEL is not set
+# CONFIG_FB_VOODOO1 is not set
+# CONFIG_FB_TRIDENT is not set
+# CONFIG_FB_VIRTUAL is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
+# CONFIG_MDA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+
+#
+# Logo configuration
+#
+CONFIG_LOGO=y
+CONFIG_LOGO_LINUX_MONO=y
+CONFIG_LOGO_LINUX_VGA16=y
+CONFIG_LOGO_LINUX_CLUT224=y
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+# CONFIG_USB_ISP116X_HCD is not set
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=y
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
+#
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_USBAT is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_STORAGE_ONETOUCH is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Input Devices
+#
+CONFIG_USB_HID=y
+CONFIG_USB_HIDINPUT=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_ACECAD is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_TOUCHSCREEN is not set
+# CONFIG_USB_YEALINK is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+# CONFIG_USB_ATI_REMOTE2 is not set
+# CONFIG_USB_KEYSPAN_REMOTE is not set
+# CONFIG_USB_APPLETOUCH is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CY7C63 is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_APPLEDISPLAY is not set
+# CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TEST is not set
+
+#
+# USB DSL modem support
+#
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+
+#
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
+#
+
+#
+# Real Time Clock
+#
+# CONFIG_RTC_CLASS is not set
+
+#
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
+#
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+# CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+# CONFIG_NFS_FS is not set
+# CONFIG_NFSD is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_BSD_DISKLABEL is not set
+# CONFIG_MINIX_SUBPARTITION is not set
+# CONFIG_SOLARIS_X86_PARTITION is not set
+# CONFIG_UNIXWARE_DISKLABEL is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=m
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+
+#
+# Library routines
+#
+CONFIG_CRC_CCITT=m
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=m
+CONFIG_ZLIB_DEFLATE=m
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_KMP=m
+
+#
+# Instrumentation Support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+# CONFIG_PRINTK_TIME is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=15
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_DEBUG_MUTEXES=y
+# CONFIG_DEBUG_SPINLOCK is not set
+CONFIG_DEBUG_SPINLOCK_SLEEP=y
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
+CONFIG_FORCED_INLINING=y
+# CONFIG_RCU_TORTURE_TEST is not set
+CONFIG_DEBUGGER=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+# CONFIG_BDI_SWITCH is not set
+# CONFIG_BOOTX_TEXT is not set
+# CONFIG_PPC_EARLY_DEBUG is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+CONFIG_CRYPTO_SHA1=m
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+CONFIG_CRYPTO_ARC4=m
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
-- 
cgit v0.10.2


From ab13446616118dc61c00ea50cc49919400717dd0 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 3 Jul 2006 17:19:48 +1000
Subject: [POWERPC] Fix various offb and BootX-related issues

This patch fixes various issues with offb (the default fbdev used on
powerpc when no proper fbdev is supported). It was broken when using
BootX under some circumstances and would fail to properly get the
framebuffer base address in others.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index a692091..f4e5e14e 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -111,7 +111,7 @@ void __init btext_setup_display(int width, int height, int depth, int pitch,
 	logicalDisplayBase = (unsigned char *)address;
 	dispDeviceBase = (unsigned char *)address;
 	dispDeviceRowBytes = pitch;
-	dispDeviceDepth = depth;
+	dispDeviceDepth = depth == 15 ? 16 : depth;
 	dispDeviceRect[0] = dispDeviceRect[1] = 0;
 	dispDeviceRect[2] = width;
 	dispDeviceRect[3] = height;
@@ -160,20 +160,28 @@ int btext_initialize(struct device_node *np)
 	unsigned long address = 0;
 	u32 *prop;
 
-	prop = (u32 *)get_property(np, "width", NULL);
+	prop = (u32 *)get_property(np, "linux,bootx-width", NULL);
+	if (prop == NULL)
+		prop = (u32 *)get_property(np, "width", NULL);
 	if (prop == NULL)
 		return -EINVAL;
 	width = *prop;
-	prop = (u32 *)get_property(np, "height", NULL);
+	prop = (u32 *)get_property(np, "linux,bootx-height", NULL);
+	if (prop == NULL)
+		prop = (u32 *)get_property(np, "height", NULL);
 	if (prop == NULL)
 		return -EINVAL;
 	height = *prop;
-	prop = (u32 *)get_property(np, "depth", NULL);
+	prop = (u32 *)get_property(np, "linux,bootx-depth", NULL);
+	if (prop == NULL)
+		prop = (u32 *)get_property(np, "depth", NULL);
 	if (prop == NULL)
 		return -EINVAL;
 	depth = *prop;
 	pitch = width * ((depth + 7) / 8);
-	prop = (u32 *)get_property(np, "linebytes", NULL);
+	prop = (u32 *)get_property(np, "linux,bootx-linebytes", NULL);
+	if (prop == NULL)
+		prop = (u32 *)get_property(np, "linebytes", NULL);
 	if (prop)
 		pitch = *prop;
 	if (pitch == 1)
@@ -194,7 +202,7 @@ int btext_initialize(struct device_node *np)
 	g_max_loc_Y = height / 16;
 	dispDeviceBase = (unsigned char *)address;
 	dispDeviceRowBytes = pitch;
-	dispDeviceDepth = depth;
+	dispDeviceDepth = depth == 15 ? 16 : depth;
 	dispDeviceRect[0] = dispDeviceRect[1] = 0;
 	dispDeviceRect[2] = width;
 	dispDeviceRect[3] = height;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ba7cd50..0c21de3 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -51,7 +51,6 @@
 
 extern void bootx_init(unsigned long r4, unsigned long phys);
 
-boot_infos_t *boot_infos;
 struct ide_machdep_calls ppc_ide_md;
 
 int boot_cpuid;
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index cb257ae..5685ad9 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -181,8 +181,25 @@ static void __init bootx_add_chosen_props(unsigned long base,
 static void __init bootx_add_display_props(unsigned long base,
 					   unsigned long *mem_end)
 {
+	boot_infos_t *bi = bootx_info;
+	u32 tmp;
+
 	bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end);
 	bootx_dt_add_prop("linux,opened", NULL, 0, mem_end);
+	tmp = bi->dispDeviceDepth;
+	bootx_dt_add_prop("linux,bootx-depth", &tmp, 4, mem_end);
+	tmp = bi->dispDeviceRect[2] - bi->dispDeviceRect[0];
+	bootx_dt_add_prop("linux,bootx-width", &tmp, 4, mem_end);
+	tmp = bi->dispDeviceRect[3] - bi->dispDeviceRect[1];
+	bootx_dt_add_prop("linux,bootx-height", &tmp, 4, mem_end);
+	tmp = bi->dispDeviceRowBytes;
+	bootx_dt_add_prop("linux,bootx-linebytes", &tmp, 4, mem_end);
+	tmp = (u32)bi->dispDeviceBase;
+	if (tmp == 0)
+		tmp = (u32)bi->logicalDisplayBase;
+	tmp += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
+	tmp += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8);
+	bootx_dt_add_prop("linux,bootx-addr", &tmp, 4, mem_end);
 }
 
 static void __init bootx_dt_add_string(char *s, unsigned long *mem_end)
@@ -222,6 +239,11 @@ static void __init bootx_scan_dt_build_strings(unsigned long base,
 		DBG(" detected display ! adding properties names !\n");
 		bootx_dt_add_string("linux,boot-display", mem_end);
 		bootx_dt_add_string("linux,opened", mem_end);
+		bootx_dt_add_string("linux,bootx-depth", mem_end);
+		bootx_dt_add_string("linux,bootx-width", mem_end);
+		bootx_dt_add_string("linux,bootx-height", mem_end);
+		bootx_dt_add_string("linux,bootx-linebytes", mem_end);
+		bootx_dt_add_string("linux,bootx-addr", mem_end);
 		strncpy(bootx_disp_path, namep, 255);
 	}
 
@@ -443,7 +465,14 @@ void __init bootx_init(unsigned long r3, unsigned long r4)
 	if (!BOOT_INFO_IS_V2_COMPATIBLE(bi))
 		bi->logicalDisplayBase = bi->dispDeviceBase;
 
+	/* Fixup depth 16 -> 15 as that's what MacOS calls 16bpp */
+	if (bi->dispDeviceDepth == 16)
+		bi->dispDeviceDepth = 15;
+
 #ifdef CONFIG_BOOTX_TEXT
+	ptr = (unsigned long)bi->logicalDisplayBase;
+	ptr += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
+	ptr += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8);
 	btext_setup_display(bi->dispDeviceRect[2] - bi->dispDeviceRect[0],
 			    bi->dispDeviceRect[3] - bi->dispDeviceRect[1],
 			    bi->dispDeviceDepth, bi->dispDeviceRowBytes,
diff --git a/drivers/video/offb.c b/drivers/video/offb.c
index bfeb11b..71ce1fa 100644
--- a/drivers/video/offb.c
+++ b/drivers/video/offb.c
@@ -97,14 +97,43 @@ static int offb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 			  u_int transp, struct fb_info *info)
 {
 	struct offb_par *par = (struct offb_par *) info->par;
+	int i, depth;
+	u32 *pal = info->pseudo_palette;
 
-	if (!par->cmap_adr || regno > 255)
+	depth = info->var.bits_per_pixel;
+	if (depth == 16)
+		depth = (info->var.green.length == 5) ? 15 : 16;
+
+	if (regno > 255 ||
+	    (depth == 16 && regno > 63) ||
+	    (depth == 15 && regno > 31))
 		return 1;
 
+	if (regno < 16) {
+		switch (depth) {
+		case 15:
+			pal[regno] = (regno << 10) | (regno << 5) | regno;
+			break;
+		case 16:
+			pal[regno] = (regno << 11) | (regno << 5) | regno;
+			break;
+		case 24:
+			pal[regno] = (regno << 16) | (regno << 8) | regno;
+			break;
+		case 32:
+			i = (regno << 8) | regno;
+			pal[regno] = (i << 16) | i;
+			break;
+		}
+	}
+
 	red >>= 8;
 	green >>= 8;
 	blue >>= 8;
 
+	if (!par->cmap_adr)
+		return 0;
+
 	switch (par->cmap_type) {
 	case cmap_m64:
 		writeb(regno, par->cmap_adr);
@@ -141,20 +170,6 @@ static int offb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 		break;
 	}
 
-	if (regno < 16)
-		switch (info->var.bits_per_pixel) {
-		case 16:
-			((u16 *) (info->pseudo_palette))[regno] =
-			    (regno << 10) | (regno << 5) | regno;
-			break;
-		case 32:
-			{
-				int i = (regno << 8) | regno;
-				((u32 *) (info->pseudo_palette))[regno] =
-				    (i << 16) | i;
-				break;
-			}
-		}
 	return 0;
 }
 
@@ -223,81 +238,9 @@ int __init offb_init(void)
 {
 	struct device_node *dp = NULL, *boot_disp = NULL;
 
-#if defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32)
-	struct device_node *macos_display = NULL;
-#endif
 	if (fb_get_options("offb", NULL))
 		return -ENODEV;
 
-#if defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32)
-	/* If we're booted from BootX... */
-	if (boot_infos != 0) {
-		unsigned long addr =
-		    (unsigned long) boot_infos->dispDeviceBase;
-		u32 *addrp;
-		u64 daddr, dsize;
-		unsigned int flags;
-
-		/* find the device node corresponding to the macos display */
-		while ((dp = of_find_node_by_type(dp, "display"))) {
-			int i;
-
-			/*
-			 * Look for an AAPL,address property first.
-			 */
-			unsigned int na;
-			unsigned int *ap =
-				(unsigned int *)get_property(dp, "AAPL,address",
-							     &na);
-			if (ap != 0) {
-				for (na /= sizeof(unsigned int); na > 0;
-				     --na, ++ap)
-					if (*ap <= addr &&
-					    addr < *ap + 0x1000000) {
-						macos_display = dp;
-						goto foundit;
-					}
-			}
-
-			/*
-			 * See if the display address is in one of the address
-			 * ranges for this display.
-			 */
-			i = 0;
-			for (;;) {
-				addrp = of_get_address(dp, i++, &dsize, &flags);
-				if (addrp == NULL)
-					break;
-				if (!(flags & IORESOURCE_MEM))
-					continue;
-				daddr = of_translate_address(dp, addrp);
-				if (daddr == OF_BAD_ADDR)
-					continue;
-				if (daddr <= addr && addr < (daddr + dsize)) {
-					macos_display = dp;
-					goto foundit;
-				}
-			}
-		foundit:
-			if (macos_display) {
-				printk(KERN_INFO "MacOS display is %s\n",
-				       dp->full_name);
-				break;
-			}
-		}
-
-		/* initialize it */
-		offb_init_fb(macos_display ? macos_display->
-			     name : "MacOS display",
-			     macos_display ? macos_display->
-			     full_name : "MacOS display",
-			     boot_infos->dispDeviceRect[2],
-			     boot_infos->dispDeviceRect[3],
-			     boot_infos->dispDeviceDepth,
-			     boot_infos->dispDeviceRowBytes, addr, NULL);
-	}
-#endif /* defined(CONFIG_BOOTX_TEXT) && defined(CONFIG_PPC32) */
-
 	for (dp = NULL; (dp = of_find_node_by_type(dp, "display"));) {
 		if (get_property(dp, "linux,opened", NULL) &&
 		    get_property(dp, "linux,boot-display", NULL)) {
@@ -317,94 +260,93 @@ int __init offb_init(void)
 
 static void __init offb_init_nodriver(struct device_node *dp)
 {
-	int *pp, i;
 	unsigned int len;
-	int width = 640, height = 480, depth = 8, pitch;
-	unsigned int flags, rsize, *up;
-	u64 address = OF_BAD_ADDR;
-	u32 *addrp;
+	int i, width = 640, height = 480, depth = 8, pitch = 640;
+	unsigned int flags, rsize, addr_prop = 0;
+	unsigned long max_size = 0;
+	u64 rstart, address = OF_BAD_ADDR;
+	u32 *pp, *addrp, *up;
 	u64 asize;
 
-	if ((pp = (int *) get_property(dp, "depth", &len)) != NULL
-	    && len == sizeof(int))
+	pp = (u32 *)get_property(dp, "linux,bootx-depth", &len);
+	if (pp == NULL)
+		pp = (u32 *)get_property(dp, "depth", &len);
+	if (pp && len == sizeof(u32))
 		depth = *pp;
-	if ((pp = (int *) get_property(dp, "width", &len)) != NULL
-	    && len == sizeof(int))
+
+	pp = (u32 *)get_property(dp, "linux,bootx-width", &len);
+	if (pp == NULL)
+		pp = (u32 *)get_property(dp, "width", &len);
+	if (pp && len == sizeof(u32))
 		width = *pp;
-	if ((pp = (int *) get_property(dp, "height", &len)) != NULL
-	    && len == sizeof(int))
+
+	pp = (u32 *)get_property(dp, "linux,bootx-height", &len);
+	if (pp == NULL)
+		pp = (u32 *)get_property(dp, "height", &len);
+	if (pp && len == sizeof(u32))
 		height = *pp;
-	if ((pp = (int *) get_property(dp, "linebytes", &len)) != NULL
-	    && len == sizeof(int)) {
+
+	pp = (u32 *)get_property(dp, "linux,bootx-linebytes", &len);
+	if (pp == NULL)
+		pp = (u32 *)get_property(dp, "linebytes", &len);
+	if (pp && len == sizeof(u32))
 		pitch = *pp;
-		if (pitch == 1)
-			pitch = 0x1000;
-	} else
-		pitch = width;
-
-       rsize = (unsigned long)pitch * (unsigned long)height *
-               (unsigned long)(depth / 8);
-
-       /* Try to match device to a PCI device in order to get a properly
-	* translated address rather then trying to decode the open firmware
-	* stuff in various incorrect ways
-	*/
-#ifdef CONFIG_PCI
-       /* First try to locate the PCI device if any */
-       {
-               struct pci_dev *pdev = NULL;
-
-	       for_each_pci_dev(pdev) {
-                       if (dp == pci_device_to_OF_node(pdev))
-                               break;
-	       }
-               if (pdev) {
-                       for (i = 0; i < 6 && address == OF_BAD_ADDR; i++) {
-                               if ((pci_resource_flags(pdev, i) &
-				    IORESOURCE_MEM) &&
-				   (pci_resource_len(pdev, i) >= rsize))
-                                       address = pci_resource_start(pdev, i);
-                       }
-		       pci_dev_put(pdev);
-               }
-        }
-#endif /* CONFIG_PCI */
-
-       /* This one is dodgy, we may drop it ... */
-       if (address == OF_BAD_ADDR &&
-	   (up = (unsigned *) get_property(dp, "address", &len)) != NULL &&
-	   len == sizeof(unsigned int))
-	       address = (u64) * up;
-
-       if (address == OF_BAD_ADDR) {
-	       for (i = 0; (addrp = of_get_address(dp, i, &asize, &flags))
-			    != NULL; i++) {
-		       if (!(flags & IORESOURCE_MEM))
-			       continue;
-		       if (asize >= pitch * height * depth / 8)
-			       break;
-	       }
-		if (addrp == NULL) {
-			printk(KERN_ERR
-			       "no framebuffer address found for %s\n",
-			       dp->full_name);
-			return;
-		}
-		address = of_translate_address(dp, addrp);
-		if (address == OF_BAD_ADDR) {
-			printk(KERN_ERR
-			       "can't translate framebuffer address for %s\n",
-			       dp->full_name);
-			return;
+	else
+		pitch = width * ((depth + 7) / 8);
+
+	rsize = (unsigned long)pitch * (unsigned long)height;
+
+	/* Ok, now we try to figure out the address of the framebuffer.
+	 *
+	 * Unfortunately, Open Firmware doesn't provide a standard way to do
+	 * so. All we can do is a dodgy heuristic that happens to work in
+	 * practice. On most machines, the "address" property contains what
+	 * we need, though not on Matrox cards found in IBM machines. What I've
+	 * found that appears to give good results is to go through the PCI
+	 * ranges and pick one that is both big enough and if possible encloses
+	 * the "address" property. If none match, we pick the biggest
+	 */
+	up = (u32 *)get_property(dp, "linux,bootx-addr", &len);
+	if (up == NULL)
+		up = (u32 *)get_property(dp, "address", &len);
+	if (up && len == sizeof(u32))
+		addr_prop = *up;
+
+	for (i = 0; (addrp = of_get_address(dp, i, &asize, &flags))
+		     != NULL; i++) {
+		int match_addrp = 0;
+
+		if (!(flags & IORESOURCE_MEM))
+			continue;
+		if (asize < rsize)
+			continue;
+		rstart = of_translate_address(dp, addrp);
+		if (rstart == OF_BAD_ADDR)
+			continue;
+		if (addr_prop && (rstart <= addr_prop) &&
+		    ((rstart + asize) >= (addr_prop + rsize)))
+			match_addrp = 1;
+		if (match_addrp) {
+			address = addr_prop;
+			break;
 		}
+		if (rsize > max_size) {
+			max_size = rsize;
+			address = OF_BAD_ADDR;
+ 		}
 
+		if (address == OF_BAD_ADDR)
+			address = rstart;
+	}
+	if (address == OF_BAD_ADDR && addr_prop)
+		address = (u64)addr_prop;
+	if (address != OF_BAD_ADDR) {
 		/* kludge for valkyrie */
 		if (strcmp(dp->name, "valkyrie") == 0)
 			address += 0x1000;
+		offb_init_fb(dp->name, dp->full_name, width, height, depth,
+			     pitch, address, dp);
 	}
-	offb_init_fb(dp->name, dp->full_name, width, height, depth,
-		     pitch, address, dp);
-
 }
 
 static void __init offb_init_fb(const char *name, const char *full_name,
@@ -412,7 +354,7 @@ static void __init offb_init_fb(const char *name, const char *full_name,
 				int pitch, unsigned long address,
 				struct device_node *dp)
 {
-	unsigned long res_size = pitch * height * depth / 8;
+	unsigned long res_size = pitch * height * (depth + 7) / 8;
 	struct offb_par *par = &default_par;
 	unsigned long res_start = address;
 	struct fb_fix_screeninfo *fix;
@@ -426,7 +368,7 @@ static void __init offb_init_fb(const char *name, const char *full_name,
 	printk(KERN_INFO
 	       "Using unsupported %dx%d %s at %lx, depth=%d, pitch=%d\n",
 	       width, height, name, address, depth, pitch);
-	if (depth != 8 && depth != 16 && depth != 32) {
+	if (depth != 8 && depth != 15 && depth != 16 && depth != 32) {
 		printk(KERN_ERR "%s: can't use depth = %d\n", full_name,
 		       depth);
 		release_mem_region(res_start, res_size);
@@ -502,7 +444,6 @@ static void __init offb_init_fb(const char *name, const char *full_name,
 				   : */ FB_VISUAL_TRUECOLOR;
 
 	var->xoffset = var->yoffset = 0;
-	var->bits_per_pixel = depth;
 	switch (depth) {
 	case 8:
 		var->bits_per_pixel = 8;
@@ -515,7 +456,7 @@ static void __init offb_init_fb(const char *name, const char *full_name,
 		var->transp.offset = 0;
 		var->transp.length = 0;
 		break;
-	case 16:		/* RGB 555 */
+	case 15:		/* RGB 555 */
 		var->bits_per_pixel = 16;
 		var->red.offset = 10;
 		var->red.length = 5;
@@ -526,6 +467,17 @@ static void __init offb_init_fb(const char *name, const char *full_name,
 		var->transp.offset = 0;
 		var->transp.length = 0;
 		break;
+	case 16:		/* RGB 565 */
+		var->bits_per_pixel = 16;
+		var->red.offset = 11;
+		var->red.length = 5;
+		var->green.offset = 5;
+		var->green.length = 6;
+		var->blue.offset = 0;
+		var->blue.length = 5;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
 	case 32:		/* RGB 888 */
 		var->bits_per_pixel = 32;
 		var->red.offset = 16;
-- 
cgit v0.10.2


From 980a65136569b841cebaad524a34482b0b1627a9 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 3 Jul 2006 17:22:05 +1000
Subject: [POWERPC] Fix booting on Momentum "Apache" board (a Maple derivative)

This extends the maple device-tree workarounds to work on the
Apache board as well, and extends the maple platform probing code
to recognize the Apache board.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1e95a9f..ebd501a 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1990,12 +1990,22 @@ static void __init flatten_device_tree(void)
 static void __init fixup_device_tree_maple(void)
 {
 	phandle isa;
+	u32 rloc = 0x01002000; /* IO space; PCI device = 4 */
 	u32 isa_ranges[6];
-
-	isa = call_prom("finddevice", 1, 1, ADDR("/ht@0/isa@4"));
+	char *name;
+
+	name = "/ht@0/isa@4";
+	isa = call_prom("finddevice", 1, 1, ADDR(name));
+	if (!PHANDLE_VALID(isa)) {
+		name = "/ht@0/isa@6";
+		isa = call_prom("finddevice", 1, 1, ADDR(name));
+		rloc = 0x01003000; /* IO space; PCI device = 6 */
+	}
 	if (!PHANDLE_VALID(isa))
 		return;
 
+	if (prom_getproplen(isa, "ranges") != 12)
+		return;
 	if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges))
 		== PROM_ERROR)
 		return;
@@ -2005,15 +2015,15 @@ static void __init fixup_device_tree_maple(void)
 		isa_ranges[2] != 0x00010000)
 		return;
 
-	prom_printf("fixing up bogus ISA range on Maple...\n");
+	prom_printf("Fixing up bogus ISA range on Maple/Apache...\n");
 
 	isa_ranges[0] = 0x1;
 	isa_ranges[1] = 0x0;
-	isa_ranges[2] = 0x01002000; /* IO space; PCI device = 4 */
+	isa_ranges[2] = rloc;
 	isa_ranges[3] = 0x0;
 	isa_ranges[4] = 0x0;
 	isa_ranges[5] = 0x00010000;
-	prom_setprop(isa, "/ht@0/isa@4", "ranges",
+	prom_setprop(isa, name, "ranges",
 			isa_ranges, sizeof(isa_ranges));
 }
 #else
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 5cf90c2..611ca8e 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -11,7 +11,7 @@
  *
  */
 
-#define DEBUG
+#undef DEBUG
 
 #include <linux/init.h>
 #include <linux/errno.h>
@@ -256,7 +256,9 @@ static void __init maple_progress(char *s, unsigned short hex)
 static int __init maple_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
-	if (!of_flat_dt_is_compatible(root, "Momentum,Maple"))
+
+	if (!of_flat_dt_is_compatible(root, "Momentum,Maple") &&
+	    !of_flat_dt_is_compatible(root, "Momentum,Apache"))
 		return 0;
 	/*
 	 * On U3, the DART (iommu) must be allocated now since it
-- 
cgit v0.10.2


From 7c6efda5996c26c468eaba178af9bac8b70dbdcb Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 3 Jul 2006 17:24:15 +1000
Subject: [POWERPC] Fix error handling in detecting legacy serial ports

Previously we weren't checking for failures in translating device
addresses from the firmware.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 4cf0b97..a550566 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -126,6 +126,8 @@ static int __init add_legacy_soc_port(struct device_node *np,
 		return -1;
 
 	addr = of_translate_address(soc_dev, addrp);
+	if (addr == OF_BAD_ADDR)
+		return -1;
 
 	/* Add port, irq will be dealt with later. We passed a translated
 	 * IO port value. It will be fixed up later along with the irq
@@ -141,6 +143,8 @@ static int __init add_legacy_isa_port(struct device_node *np,
 	int index = -1;
 	phys_addr_t taddr;
 
+	DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
+
 	/* Get the ISA port number */
 	reg = (u32 *)get_property(np, "reg", NULL);
 	if (reg == NULL)
@@ -161,9 +165,12 @@ static int __init add_legacy_isa_port(struct device_node *np,
 
 	/* Translate ISA address */
 	taddr = of_translate_address(np, reg);
+	if (taddr == OF_BAD_ADDR)
+		return -1;
 
 	/* Add port, irq will be dealt with later */
-	return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr, NO_IRQ, UPF_BOOT_AUTOCONF);
+	return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr,
+			       NO_IRQ, UPF_BOOT_AUTOCONF);
 
 }
 
@@ -176,6 +183,8 @@ static int __init add_legacy_pci_port(struct device_node *np,
 	unsigned int flags;
 	int iotype, index = -1, lindex = 0;
 
+	DBG(" -> add_legacy_pci_port(%s)\n", np->full_name);
+
 	/* We only support ports that have a clock frequency properly
 	 * encoded in the device-tree (that is have an fcode). Anything
 	 * else can't be used that early and will be normally probed by
@@ -194,6 +203,8 @@ static int __init add_legacy_pci_port(struct device_node *np,
 	/* We only support BAR 0 for now */
 	iotype = (flags & IORESOURCE_MEM) ? UPIO_MEM : UPIO_PORT;
 	addr = of_translate_address(pci_dev, addrp);
+	if (addr == OF_BAD_ADDR)
+		return -1;
 
 	/* Set the IO base to the same as the translated address for MMIO,
 	 * or to the domain local IO base for PIO (it will be fixed up later)
-- 
cgit v0.10.2


From 3e9a69275f23baec86b54febc5dad0b2fc7fb200 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 3 Jul 2006 17:25:26 +1000
Subject: [POWERPC] Update the SWIM3 (powermac) floppy driver

Port the PowerMac floppy driver (swim3) to use the macio device
infrastructure.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 3721e12..cc42e76 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -250,8 +250,6 @@ static int floppy_open(struct inode *inode, struct file *filp);
 static int floppy_release(struct inode *inode, struct file *filp);
 static int floppy_check_change(struct gendisk *disk);
 static int floppy_revalidate(struct gendisk *disk);
-static int swim3_add_device(struct device_node *swims);
-int swim3_init(void);
 
 #ifndef CONFIG_PMAC_MEDIABAY
 #define check_media_bay(which, what)	1
@@ -1011,114 +1009,63 @@ static struct block_device_operations floppy_fops = {
 	.revalidate_disk= floppy_revalidate,
 };
 
-int swim3_init(void)
-{
-	struct device_node *swim;
-	int err = -ENOMEM;
-	int i;
-
-	swim = find_devices("floppy");
-	while (swim && (floppy_count < MAX_FLOPPIES))
-	{
-		swim3_add_device(swim);
-		swim = swim->next;
-	}
-
-	swim = find_devices("swim3");
-	while (swim && (floppy_count < MAX_FLOPPIES))
-	{
-		swim3_add_device(swim);
-		swim = swim->next;
-	}
-
-	if (!floppy_count)
-		return -ENODEV;
-
-	for (i = 0; i < floppy_count; i++) {
-		disks[i] = alloc_disk(1);
-		if (!disks[i])
-			goto out;
-	}
-
-	if (register_blkdev(FLOPPY_MAJOR, "fd")) {
-		err = -EBUSY;
-		goto out;
-	}
-
-	swim3_queue = blk_init_queue(do_fd_request, &swim3_lock);
-	if (!swim3_queue) {
-		err = -ENOMEM;
-		goto out_queue;
-	}
-
-	for (i = 0; i < floppy_count; i++) {
-		struct gendisk *disk = disks[i];
-		disk->major = FLOPPY_MAJOR;
-		disk->first_minor = i;
-		disk->fops = &floppy_fops;
-		disk->private_data = &floppy_states[i];
-		disk->queue = swim3_queue;
-		disk->flags |= GENHD_FL_REMOVABLE;
-		sprintf(disk->disk_name, "fd%d", i);
-		set_capacity(disk, 2880);
-		add_disk(disk);
-	}
-	return 0;
-
-out_queue:
-	unregister_blkdev(FLOPPY_MAJOR, "fd");
-out:
-	while (i--)
-		put_disk(disks[i]);
-	/* shouldn't we do something with results of swim_add_device()? */
-	return err;
-}
-
-static int swim3_add_device(struct device_node *swim)
+static int swim3_add_device(struct macio_dev *mdev, int index)
 {
+	struct device_node *swim = mdev->ofdev.node;
 	struct device_node *mediabay;
-	struct floppy_state *fs = &floppy_states[floppy_count];
-	struct resource res_reg, res_dma;
+	struct floppy_state *fs = &floppy_states[index];
+	int rc = -EBUSY;
 
-	if (of_address_to_resource(swim, 0, &res_reg) ||
-	    of_address_to_resource(swim, 1, &res_dma)) {
-		printk(KERN_ERR "swim3: Can't get addresses\n");
-		return -EINVAL;
+	/* Check & Request resources */
+	if (macio_resource_count(mdev) < 2) {
+		printk(KERN_WARNING "ifd%d: no address for %s\n",
+		       index, swim->full_name);
+		return -ENXIO;
 	}
-	if (request_mem_region(res_reg.start, res_reg.end - res_reg.start + 1,
-			       " (reg)") == NULL) {
-		printk(KERN_ERR "swim3: Can't request register space\n");
-		return -EINVAL;
+	if (macio_irq_count(mdev) < 2) {
+		printk(KERN_WARNING "fd%d: no intrs for device %s\n",
+			index, swim->full_name);
 	}
-	if (request_mem_region(res_dma.start, res_dma.end - res_dma.start + 1,
-			       " (dma)") == NULL) {
-		release_mem_region(res_reg.start,
-				   res_reg.end - res_reg.start + 1);
-		printk(KERN_ERR "swim3: Can't request DMA space\n");
-		return -EINVAL;
+	if (macio_request_resource(mdev, 0, "swim3 (mmio)")) {
+		printk(KERN_ERR "fd%d: can't request mmio resource for %s\n",
+		       index, swim->full_name);
+		return -EBUSY;
 	}
-
-	if (swim->n_intrs < 2) {
-		printk(KERN_INFO "swim3: expecting 2 intrs (n_intrs:%d)\n",
-		       swim->n_intrs);
-		release_mem_region(res_reg.start,
-				   res_reg.end - res_reg.start + 1);
-		release_mem_region(res_dma.start,
-				   res_dma.end - res_dma.start + 1);
-		return -EINVAL;
+	if (macio_request_resource(mdev, 1, "swim3 (dma)")) {
+		printk(KERN_ERR "fd%d: can't request dma resource for %s\n",
+		       index, swim->full_name);
+		macio_release_resource(mdev, 0);
+		return -EBUSY;
 	}
+	dev_set_drvdata(&mdev->ofdev.dev, fs);
 
-	mediabay = (strcasecmp(swim->parent->type, "media-bay") == 0) ? swim->parent : NULL;
+	mediabay = (strcasecmp(swim->parent->type, "media-bay") == 0) ?
+		swim->parent : NULL;
 	if (mediabay == NULL)
 		pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 1);
 	
 	memset(fs, 0, sizeof(*fs));
 	spin_lock_init(&fs->lock);
 	fs->state = idle;
-	fs->swim3 = (struct swim3 __iomem *)ioremap(res_reg.start, 0x200);
-	fs->dma = (struct dbdma_regs __iomem *)ioremap(res_dma.start, 0x200);
-	fs->swim3_intr = swim->intrs[0].line;
-	fs->dma_intr = swim->intrs[1].line;
+	fs->swim3 = (struct swim3 __iomem *)
+		ioremap(macio_resource_start(mdev, 0), 0x200);
+	if (fs->swim3 == NULL) {
+		printk("fd%d: couldn't map registers for %s\n",
+		       index, swim->full_name);
+		rc = -ENOMEM;
+		goto out_release;
+	}
+	fs->dma = (struct dbdma_regs __iomem *)
+		ioremap(macio_resource_start(mdev, 1), 0x200);
+	if (fs->dma == NULL) {
+		printk("fd%d: couldn't map DMA for %s\n",
+		       index, swim->full_name);
+		iounmap(fs->swim3);
+		rc = -ENOMEM;
+		goto out_release;
+	}
+	fs->swim3_intr = macio_irq(mdev, 0);
+	fs->dma_intr = macio_irq(mdev, 1);;
 	fs->cur_cyl = -1;
 	fs->cur_sector = -1;
 	fs->secpercyl = 36;
@@ -1132,15 +1079,16 @@ static int swim3_add_device(struct device_node *swim)
 	st_le16(&fs->dma_cmd[1].command, DBDMA_STOP);
 
 	if (request_irq(fs->swim3_intr, swim3_interrupt, 0, "SWIM3", fs)) {
-		printk(KERN_ERR "Couldn't get irq %d for SWIM3\n", fs->swim3_intr);
+		printk(KERN_ERR "fd%d: couldn't request irq %d for %s\n",
+		       index, fs->swim3_intr, swim->full_name);
 		pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0);
+		goto out_unmap;
 		return -EBUSY;
 	}
 /*
 	if (request_irq(fs->dma_intr, fd_dma_interrupt, 0, "SWIM3-dma", fs)) {
 		printk(KERN_ERR "Couldn't get irq %d for SWIM3 DMA",
 		       fs->dma_intr);
-		pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0);
 		return -EBUSY;
 	}
 */
@@ -1150,8 +1098,90 @@ static int swim3_add_device(struct device_node *swim)
 	printk(KERN_INFO "fd%d: SWIM3 floppy controller %s\n", floppy_count,
 		mediabay ? "in media bay" : "");
 
-	floppy_count++;
-	
+	return 0;
+
+ out_unmap:
+	iounmap(fs->dma);
+	iounmap(fs->swim3);
+
+ out_release:
+	macio_release_resource(mdev, 0);
+	macio_release_resource(mdev, 1);
+
+	return rc;
+}
+
+static int __devinit swim3_attach(struct macio_dev *mdev, const struct of_device_id *match)
+{
+	int i, rc;
+	struct gendisk *disk;
+
+	/* Add the drive */
+	rc = swim3_add_device(mdev, floppy_count);
+	if (rc)
+		return rc;
+
+	/* Now create the queue if not there yet */
+	if (swim3_queue == NULL) {
+		/* If we failed, there isn't much we can do as the driver is still
+		 * too dumb to remove the device, just bail out
+		 */
+		if (register_blkdev(FLOPPY_MAJOR, "fd"))
+			return 0;
+		swim3_queue = blk_init_queue(do_fd_request, &swim3_lock);
+		if (swim3_queue == NULL) {
+			unregister_blkdev(FLOPPY_MAJOR, "fd");
+			return 0;
+		}
+	}
+
+	/* Now register that disk. Same comment about failure handling */
+	i = floppy_count++;
+	disk = disks[i] = alloc_disk(1);
+	if (disk == NULL)
+		return 0;
+
+	disk->major = FLOPPY_MAJOR;
+	disk->first_minor = i;
+	disk->fops = &floppy_fops;
+	disk->private_data = &floppy_states[i];
+	disk->queue = swim3_queue;
+	disk->flags |= GENHD_FL_REMOVABLE;
+	sprintf(disk->disk_name, "fd%d", i);
+	set_capacity(disk, 2880);
+	add_disk(disk);
+
+	return 0;
+}
+
+static struct of_device_id swim3_match[] =
+{
+	{
+	.name		= "swim3",
+	},
+	{
+	.compatible	= "ohare-swim3"
+	},
+	{
+	.compatible	= "swim3"
+	},
+};
+
+static struct macio_driver swim3_driver =
+{
+	.name 		= "swim3",
+	.match_table	= swim3_match,
+	.probe		= swim3_attach,
+#if 0
+	.suspend	= swim3_suspend,
+	.resume		= swim3_resume,
+#endif
+};
+
+
+int swim3_init(void)
+{
+	macio_register_driver(&swim3_driver);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 5a43a066b11ac2fe84cf67307f20b83bea390f83 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 3 Jul 2006 19:54:59 +1000
Subject: [PATCH] genirq: Allow fasteoi handler to retrigger disabled
 interrupts

Make the fasteoi handler mark disabled interrupts as pending if they
happen anyway. This allow implementation of a delayed disable scheme
with the fasteoi handler.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 54105bd..9336f2e 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -261,10 +261,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
 	 * keep it masked and get out of here
 	 */
 	action = desc->action;
-	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+	if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
+		desc->status |= IRQ_PENDING;
 		goto out;
+	}
 
 	desc->status |= IRQ_INPROGRESS;
+	desc->status &= ~IRQ_PENDING;
 	spin_unlock(&desc->lock);
 
 	action_ret = handle_IRQ_event(irq, regs, action);
-- 
cgit v0.10.2


From b9e5b4e6a991a5a6d521f2e20a65835404b4169f Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 3 Jul 2006 19:32:51 +1000
Subject: [POWERPC] Use the genirq framework

This adapts the generic powerpc interrupt handling code, and all of
the platforms except for the embedded 6xx machines, to use the new
genirq framework.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 525baab..9124855 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -62,28 +62,27 @@
 #endif
 
 int __irq_offset_value;
-#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(__irq_offset_value);
-#endif
-
 static int ppc_spurious_interrupts;
 
 #ifdef CONFIG_PPC32
-#define NR_MASK_WORDS	((NR_IRQS + 31) / 32)
+EXPORT_SYMBOL(__irq_offset_value);
+atomic_t ppc_n_lost_interrupts;
 
+#ifndef CONFIG_PPC_MERGE
+#define NR_MASK_WORDS	((NR_IRQS + 31) / 32)
 unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
-atomic_t ppc_n_lost_interrupts;
+#endif
 
 #ifdef CONFIG_TAU_INT
 extern int tau_initialized;
 extern int tau_interrupts(int);
 #endif
+#endif /* CONFIG_PPC32 */
 
 #if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
 extern atomic_t ipi_recv;
 extern atomic_t ipi_sent;
 #endif
-#endif /* CONFIG_PPC32 */
 
 #ifdef CONFIG_PPC64
 EXPORT_SYMBOL(irq_desc);
@@ -219,15 +218,19 @@ void do_IRQ(struct pt_regs *regs)
 		curtp = current_thread_info();
 		irqtp = hardirq_ctx[smp_processor_id()];
 		if (curtp != irqtp) {
+			struct irq_desc *desc = irq_desc + irq;
+			void *handler = desc->handle_irq;
+			if (handler == NULL)
+				handler = &__do_IRQ;
 			irqtp->task = curtp->task;
 			irqtp->flags = 0;
-			call___do_IRQ(irq, regs, irqtp);
+			call_handle_irq(irq, desc, regs, irqtp, handler);
 			irqtp->task = NULL;
 			if (irqtp->flags)
 				set_bits(irqtp->flags, &curtp->flags);
 		} else
 #endif
-			__do_IRQ(irq, regs);
+			generic_handle_irq(irq, regs);
 	} else if (irq != -2)
 		/* That's not SMP safe ... but who cares ? */
 		ppc_spurious_interrupts++;
@@ -245,15 +248,6 @@ void do_IRQ(struct pt_regs *regs)
 
 void __init init_IRQ(void)
 {
-#ifdef CONFIG_PPC64
-	static int once = 0;
-
-	if (once)
-		return;
-
-	once++;
-
-#endif
 	ppc_md.init_IRQ();
 #ifdef CONFIG_PPC64
 	irq_ctx_init();
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 0c3c70d..bfb407f 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -51,12 +51,14 @@ _GLOBAL(call_do_softirq)
 	mtlr	r0
 	blr
 
-_GLOBAL(call___do_IRQ)
+_GLOBAL(call_handle_irq)
+	ld	r8,0(r7)
 	mflr	r0
 	std	r0,16(r1)
-	stdu	r1,THREAD_SIZE-112(r5)
-	mr	r1,r5
-	bl	.__do_IRQ
+	mtctr	r8
+	stdu	r1,THREAD_SIZE-112(r6)
+	mr	r1,r6
+	bctrl
 	ld	r1,0(r1)
 	ld	r0,16(r1)
 	mtlr	r0
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 22da133..97936f5 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -37,64 +37,51 @@
 struct iic {
 	struct cbe_iic_thread_regs __iomem *regs;
 	u8 target_id;
+	u8 eoi_stack[16];
+	int eoi_ptr;
 };
 
 static DEFINE_PER_CPU(struct iic, iic);
 
-void iic_local_enable(void)
+static void iic_mask(unsigned int irq)
 {
-	struct iic *iic = &__get_cpu_var(iic);
-	u64 tmp;
-
-	/*
-	 * There seems to be a bug that is present in DD2.x CPUs
-	 * and still only partially fixed in DD3.1.
-	 * This bug causes a value written to the priority register
-	 * not to make it there, resulting in a system hang unless we
-	 * write it again.
-	 * Masking with 0xf0 is done because the Cell BE does not
-	 * implement the lower four bits of the interrupt priority,
-	 * they always read back as zeroes, although future CPUs
-	 * might implement different bits.
-	 */
-	do {
-		out_be64(&iic->regs->prio, 0xff);
-		tmp = in_be64(&iic->regs->prio);
-	} while ((tmp & 0xf0) != 0xf0);
-}
-
-void iic_local_disable(void)
-{
-	out_be64(&__get_cpu_var(iic).regs->prio, 0x0);
-}
-
-static unsigned int iic_startup(unsigned int irq)
-{
-	return 0;
-}
-
-static void iic_enable(unsigned int irq)
-{
-	iic_local_enable();
 }
 
-static void iic_disable(unsigned int irq)
+static void iic_unmask(unsigned int irq)
 {
 }
 
-static void iic_end(unsigned int irq)
+static void iic_eoi(unsigned int irq)
 {
-	iic_local_enable();
+	struct iic *iic = &__get_cpu_var(iic);
+	out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
+	BUG_ON(iic->eoi_ptr < 0);
 }
 
-static struct hw_interrupt_type iic_pic = {
+static struct irq_chip iic_chip = {
 	.typename = " CELL-IIC ",
-	.startup = iic_startup,
-	.enable = iic_enable,
-	.disable = iic_disable,
-	.end = iic_end,
+	.mask = iic_mask,
+	.unmask = iic_unmask,
+	.eoi = iic_eoi,
 };
 
+/* XXX All of this has to be reworked completely. We need to assign a real
+ * interrupt numbers to the external interrupts and remove all the hard coded
+ * interrupt maps (rely on the device-tree whenever possible).
+ *
+ * Basically, my scheme is to define the "pendings" bits to be the HW interrupt
+ * number (ignoring the data and flags here). That means we can sort-of split
+ * external sources based on priority, and we can use request_irq() on pretty
+ * much anything.
+ *
+ * For spider or axon, they have their own interrupt space. spider will just have
+ * local "hardward" interrupts 0...xx * node stride. The node stride is not
+ * necessary (separate interrupt chips will have separate HW number space), but
+ * will allow to be compatible with existing device-trees.
+ *
+ * All of thise little world will get a standard remapping scheme to map those HW
+ * numbers into the linux flat irq number space.
+*/
 static int iic_external_get_irq(struct cbe_iic_pending_bits pending)
 {
 	int irq;
@@ -118,9 +105,10 @@ static int iic_external_get_irq(struct cbe_iic_pending_bits pending)
 		 */
 		if (pending.class != 2)
 			break;
-		irq = IIC_EXT_OFFSET
-			+ spider_get_irq(node)
-			+ node * IIC_NODE_STRIDE;
+		/* TODO: We might want to silently ignore cascade interrupts
+		 * when no cascade handler exist yet
+		 */
+		irq = IIC_EXT_CASCADE + node * IIC_NODE_STRIDE;
 		break;
 	case 0x01 ... 0x04:
 	case 0x07 ... 0x0a:
@@ -152,6 +140,8 @@ int iic_get_irq(struct pt_regs *regs)
 	iic = &__get_cpu_var(iic);
 	*(unsigned long *) &pending = 
 		in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
+	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+	BUG_ON(iic->eoi_ptr > 15);
 
 	irq = -1;
 	if (pending.flags & CBE_IIC_IRQ_VALID) {
@@ -172,7 +162,7 @@ int iic_get_irq(struct pt_regs *regs)
 
 /* hardcoded part to be compatible with older firmware */
 
-static int setup_iic_hardcoded(void)
+static int __init setup_iic_hardcoded(void)
 {
 	struct device_node *np;
 	int nodeid, cpu;
@@ -207,12 +197,13 @@ static int setup_iic_hardcoded(void)
 		printk(KERN_INFO "IIC for CPU %d at %lx\n", cpu, regs);
 		iic->regs = ioremap(regs, sizeof(struct cbe_iic_thread_regs));
 		iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe);
+		iic->eoi_stack[0] = 0xff;
 	}
 
 	return 0;
 }
 
-static int setup_iic(void)
+static int __init setup_iic(void)
 {
 	struct device_node *dn;
 	unsigned long *regs;
@@ -248,11 +239,14 @@ static int setup_iic(void)
  		iic = &per_cpu(iic, np[0]);
  		iic->regs = ioremap(regs[0], sizeof(struct cbe_iic_thread_regs));
 		iic->target_id = ((np[0] & 2) << 3) + ((np[0] & 1) ? 0xf : 0xe);
+		iic->eoi_stack[0] = 0xff;
  		printk("IIC for CPU %d at %lx mapped to %p\n", np[0], regs[0], iic->regs);
 
  		iic = &per_cpu(iic, np[1]);
  		iic->regs = ioremap(regs[2], sizeof(struct cbe_iic_thread_regs));
 		iic->target_id = ((np[1] & 2) << 3) + ((np[1] & 1) ? 0xf : 0xe);
+		iic->eoi_stack[0] = 0xff;
+
  		printk("IIC for CPU %d at %lx mapped to %p\n", np[1], regs[2], iic->regs);
 
 		found++;
@@ -304,10 +298,10 @@ static void iic_request_ipi(int ipi, const char *name)
 	int irq;
 
 	irq = iic_ipi_to_irq(ipi);
+
 	/* IPIs are marked IRQF_DISABLED as they must run with irqs
 	 * disabled */
-	get_irq_desc(irq)->chip = &iic_pic;
-	get_irq_desc(irq)->status |= IRQ_PER_CPU;
+ 	set_irq_chip_and_handler(irq, &iic_chip, handle_percpu_irq);
 	request_irq(irq, iic_ipi_action, IRQF_DISABLED, name, NULL);
 }
 
@@ -321,20 +315,26 @@ void iic_request_IPIs(void)
 }
 #endif /* CONFIG_SMP */
 
-static void iic_setup_spe_handlers(void)
+static void __init iic_setup_builtin_handlers(void)
 {
 	int be, isrc;
 
-	/* Assume two threads per BE are present */
+	/* XXX FIXME: Assume two threads per BE are present */
 	for (be=0; be < num_present_cpus() / 2; be++) {
+		int irq;
+
+		/* setup SPE chip and handlers */
 		for (isrc = 0; isrc < IIC_CLASS_STRIDE * 3; isrc++) {
-			int irq = IIC_NODE_STRIDE * be + IIC_SPE_OFFSET + isrc;
-			get_irq_desc(irq)->chip = &iic_pic;
+			irq = IIC_NODE_STRIDE * be + IIC_SPE_OFFSET + isrc;
+			set_irq_chip_and_handler(irq, &iic_chip, handle_fasteoi_irq);
 		}
+		/* setup cascade chip */
+		irq = IIC_EXT_CASCADE + be * IIC_NODE_STRIDE;
+		set_irq_chip_and_handler(irq, &iic_chip, handle_fasteoi_irq);
 	}
 }
 
-void iic_init_IRQ(void)
+void __init iic_init_IRQ(void)
 {
 	int cpu, irq_offset;
 	struct iic *iic;
@@ -348,5 +348,6 @@ void iic_init_IRQ(void)
 		if (iic->regs)
 			out_be64(&iic->regs->prio, 0xff);
 	}
-	iic_setup_spe_handlers();
+	iic_setup_builtin_handlers();
+
 }
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
index 799f77d..c74515a 100644
--- a/arch/powerpc/platforms/cell/interrupt.h
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -38,6 +38,7 @@
 
 enum {
 	IIC_EXT_OFFSET   = 0x00, /* Start of south bridge IRQs */
+	IIC_EXT_CASCADE  = 0x20, /* There is no interrupt 32 on spider */
 	IIC_NUM_EXT      = 0x40, /* Number of south bridge IRQs */
 	IIC_SPE_OFFSET   = 0x40, /* Start of SPE interrupts */
 	IIC_CLASS_STRIDE = 0x10, /* SPE IRQs per class    */
@@ -51,13 +52,10 @@ extern int  iic_get_irq(struct pt_regs *regs);
 extern void iic_cause_IPI(int cpu, int mesg);
 extern void iic_request_IPIs(void);
 extern void iic_setup_cpu(void);
-extern void iic_local_enable(void);
-extern void iic_local_disable(void);
 
 extern u8 iic_get_target_id(int cpu);
 
 extern void spider_init_IRQ(void);
-extern int spider_get_irq(int node);
 
 #endif
 #endif /* ASM_CELL_PIC_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 01ec394..70a4e90 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -80,10 +80,14 @@ static void cell_progress(char *s, unsigned short hex)
 	printk("*** %04x : %s\n", hex, s ? s : "");
 }
 
+static void __init cell_init_irq(void)
+{
+	iic_init_IRQ();
+	spider_init_IRQ();
+}
+
 static void __init cell_setup_arch(void)
 {
-	ppc_md.init_IRQ       = iic_init_IRQ;
-	ppc_md.get_irq        = iic_get_irq;
 #ifdef CONFIG_SPU_BASE
 	spu_priv1_ops         = &spu_priv1_mmio_ops;
 #endif
@@ -109,7 +113,6 @@ static void __init cell_setup_arch(void)
 	/* Find and initialize PCI host bridges */
 	init_pci_config_tokens();
 	find_and_init_phbs();
-	spider_init_IRQ();
 	cbe_pervasive_init();
 #ifdef CONFIG_DUMMY_CONSOLE
 	conswitchp = &dummy_con;
@@ -174,6 +177,9 @@ define_machine(cell) {
 	.calibrate_decr		= generic_calibrate_decr,
 	.check_legacy_ioport	= cell_check_legacy_ioport,
 	.progress		= cell_progress,
+	.init_IRQ       	= cell_init_irq,
+	.get_irq        	= iic_get_irq,
+
 #ifdef CONFIG_KEXEC
 	.machine_kexec		= default_machine_kexec,
 	.machine_kexec_prepare	= default_machine_kexec_prepare,
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 7c3a0b6..98425ac 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -82,17 +82,20 @@ static void __iomem *spider_get_irq_config(int irq)
 	return pic + TIR_CFGA + 8 * spider_get_nr(irq);
 }
 
-static void spider_enable_irq(unsigned int irq)
+static void spider_unmask_irq(unsigned int irq)
 {
 	int nodeid = (irq / IIC_NODE_STRIDE) * 0x10;
 	void __iomem *cfg = spider_get_irq_config(irq);
 	irq = spider_get_nr(irq);
 
+	/* FIXME: Most of that is configuration and has nothing to do with enabling/disable,
+	 * besides, it's also partially bogus.
+	 */
 	out_be32(cfg, (in_be32(cfg) & ~0xf0)| 0x3107000eu | nodeid);
 	out_be32(cfg + 4, in_be32(cfg + 4) | 0x00020000u | irq);
 }
 
-static void spider_disable_irq(unsigned int irq)
+static void spider_mask_irq(unsigned int irq)
 {
 	void __iomem *cfg = spider_get_irq_config(irq);
 	irq = spider_get_nr(irq);
@@ -100,39 +103,21 @@ static void spider_disable_irq(unsigned int irq)
 	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
 }
 
-static unsigned int spider_startup_irq(unsigned int irq)
-{
-	spider_enable_irq(irq);
-	return 0;
-}
-
-static void spider_shutdown_irq(unsigned int irq)
-{
-	spider_disable_irq(irq);
-}
-
-static void spider_end_irq(unsigned int irq)
-{
-	spider_enable_irq(irq);
-}
-
 static void spider_ack_irq(unsigned int irq)
 {
-	spider_disable_irq(irq);
-	iic_local_enable();
+	/* Should reset edge detection logic but we don't configure any edge interrupt
+	 * at the moment.
+	 */
 }
 
-static struct hw_interrupt_type spider_pic = {
+static struct irq_chip spider_pic = {
 	.typename = " SPIDER   ",
-	.startup = spider_startup_irq,
-	.shutdown = spider_shutdown_irq,
-	.enable = spider_enable_irq,
-	.disable = spider_disable_irq,
+	.unmask = spider_unmask_irq,
+	.mask = spider_mask_irq,
 	.ack = spider_ack_irq,
-	.end = spider_end_irq,
 };
 
-int spider_get_irq(int node)
+static int spider_get_irq(int node)
 {
 	unsigned long cs;
 	void __iomem *regs = spider_pics[node];
@@ -145,95 +130,89 @@ int spider_get_irq(int node)
 		return cs;
 }
 
+static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc,
+			       struct pt_regs *regs)
+{
+	int node = (int)(long)desc->handler_data;
+	int cascade_irq;
+
+	cascade_irq = spider_get_irq(node);
+	generic_handle_irq(cascade_irq, regs);
+	desc->chip->eoi(irq);
+}
+
 /* hardcoded part to be compatible with older firmware */
 
-void spider_init_IRQ_hardcoded(void)
+static void __init spider_init_one(int node, unsigned long addr)
 {
-	int node;
-	long spiderpic;
-	long pics[] = { 0x24000008000, 0x34000008000 };
-	int n;
-
-	pr_debug("%s(%d): Using hardcoded defaults\n", __FUNCTION__, __LINE__);
-
-	for (node = 0; node < num_present_cpus()/2; node++) {
-		spiderpic = pics[node];
-		printk(KERN_DEBUG "SPIDER addr: %lx\n", spiderpic);
-		spider_pics[node] = ioremap(spiderpic, 0x800);
-		for (n = 0; n < IIC_NUM_EXT; n++) {
-			int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE;
-			get_irq_desc(irq)->chip = &spider_pic;
-		}
-
- 		/* do not mask any interrupts because of level */
- 		out_be32(spider_pics[node] + TIR_MSK, 0x0);
-
- 		/* disable edge detection clear */
- 		/* out_be32(spider_pics[node] + TIR_EDC, 0x0); */
-
- 		/* enable interrupt packets to be output */
- 		out_be32(spider_pics[node] + TIR_PIEN,
-			in_be32(spider_pics[node] + TIR_PIEN) | 0x1);
-
- 		/* Enable the interrupt detection enable bit. Do this last! */
- 		out_be32(spider_pics[node] + TIR_DEN,
-			in_be32(spider_pics[node] + TIR_DEN) | 0x1);
+	int n, irq;
+
+	spider_pics[node] = ioremap(addr, 0x800);
+	if (spider_pics[node] == NULL)
+		panic("spider_pic: can't map registers !");
+
+	printk(KERN_INFO "spider_pic: mapped for node %d, addr: 0x%lx mapped to %p\n",
+	       node, addr, spider_pics[node]);
+
+	for (n = 0; n < IIC_NUM_EXT; n++) {
+		if (n == IIC_EXT_CASCADE)
+			continue;
+		irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE;
+		set_irq_chip_and_handler(irq, &spider_pic, handle_level_irq);
+		get_irq_desc(irq)->status |= IRQ_LEVEL;
 	}
+
+	/* do not mask any interrupts because of level */
+	out_be32(spider_pics[node] + TIR_MSK, 0x0);
+
+	/* disable edge detection clear */
+	/* out_be32(spider_pics[node] + TIR_EDC, 0x0); */
+
+	/* enable interrupt packets to be output */
+	out_be32(spider_pics[node] + TIR_PIEN,
+		 in_be32(spider_pics[node] + TIR_PIEN) | 0x1);
+
+	/* Hook up cascade */
+	irq = IIC_EXT_CASCADE + node * IIC_NODE_STRIDE;
+	set_irq_data(irq, (void *)(long)node);
+	set_irq_chained_handler(irq, spider_irq_cascade);
+
+	/* Enable the interrupt detection enable bit. Do this last! */
+	out_be32(spider_pics[node] + TIR_DEN,
+		 in_be32(spider_pics[node] + TIR_DEN) | 0x1);
 }
 
-void spider_init_IRQ(void)
+void __init spider_init_IRQ(void)
 {
-	long spider_reg;
+	unsigned long *spider_reg;
 	struct device_node *dn;
 	char *compatible;
-	int n, node = 0;
-
+	int node = 0;
+
+	/* XXX node numbers are totally bogus. We _hope_ we get the device nodes in the right
+	 * order here but that's definitely not guaranteed, we need to get the node from the
+	 * device tree instead. There is currently no proper property for it (but our whole
+	 * device-tree is bogus anyway) so all we can do is pray or maybe test the address
+	 * and deduce the node-id
+	 */
 	for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
 		compatible = (char *)get_property(dn, "compatible", NULL);
 
 		if (!compatible)
 			continue;
 
-		if (strstr(compatible, "CBEA,platform-spider-pic"))
-			spider_reg = *(long *)get_property(dn,"reg", NULL);
-		else if (strstr(compatible, "sti,platform-spider-pic")) {
-			spider_init_IRQ_hardcoded();
-			return;
+ 		if (strstr(compatible, "CBEA,platform-spider-pic"))
+			spider_reg = (unsigned long *)get_property(dn, "reg", NULL);
+		else if (strstr(compatible, "sti,platform-spider-pic") && (node < 2)) {
+			static long hard_coded_pics[] = { 0x24000008000, 0x34000008000 };
+			spider_reg = &hard_coded_pics[node];
 		} else
 			continue;
 
-		if (!spider_reg)
-			printk("interrupt controller does not have reg property !\n");
-
-		n = prom_n_addr_cells(dn);
-
-		if ( n != 2)
-			printk("reg property with invalid number of elements \n");
-
-		spider_pics[node] = ioremap(spider_reg, 0x800);
-
-		printk("SPIDER addr: %lx with %i addr_cells mapped to %p\n",
-		       spider_reg, n, spider_pics[node]);
-
-		for (n = 0; n < IIC_NUM_EXT; n++) {
-			int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE;
-			get_irq_desc(irq)->chip = &spider_pic;
-		}
-
-		/* do not mask any interrupts because of level */
-		out_be32(spider_pics[node] + TIR_MSK, 0x0);
-
-		/* disable edge detection clear */
-		/* out_be32(spider_pics[node] + TIR_EDC, 0x0); */
-
-		/* enable interrupt packets to be output */
-		out_be32(spider_pics[node] + TIR_PIEN,
-			in_be32(spider_pics[node] + TIR_PIEN) | 0x1);
-
-		/* Enable the interrupt detection enable bit. Do this last! */
-		out_be32(spider_pics[node] + TIR_DEN,
-			in_be32(spider_pics[node] + TIR_DEN) | 0x1);
+		if (spider_reg == NULL)
+			printk(KERN_ERR "spider_pic: No address for node %d\n", node);
 
+		spider_init_one(node, *spider_reg);
 		node++;
 	}
 }
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 1f1771b..a5dffc8 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -315,6 +315,21 @@ chrp_event_scan(unsigned long unused)
 		  jiffies + event_scan_interval);
 }
 
+void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc,
+			struct pt_regs *regs)
+{
+	unsigned int max = 100;
+
+	while(max--) {
+		int irq = i8259_irq(regs);
+		if (max == 99)
+			desc->chip->eoi(irq);
+		if (irq < 0)
+			break;
+		generic_handle_irq(irq, regs);
+	};
+}
+
 /*
  * Finds the open-pic node and sets up the mpic driver.
  */
@@ -402,7 +417,7 @@ static void __init chrp_find_openpic(void)
 	}
 
 	mpic_init(chrp_mpic);
-	mpic_setup_cascade(NUM_ISA_INTERRUPTS, i8259_irq_cascade, NULL);
+	set_irq_chained_handler(NUM_ISA_INTERRUPTS, chrp_8259_cascade);
 }
 
 #if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON)
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index f70e820..7fb6a08 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -297,13 +297,13 @@ static void iseries_end_IRQ(unsigned int irq)
 		(REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq));
 }
 
-static hw_irq_controller iSeries_IRQ_handler = {
-	.typename = "iSeries irq controller",
-	.startup = iseries_startup_IRQ,
-	.shutdown = iseries_shutdown_IRQ,
-	.enable = iseries_enable_IRQ,
-	.disable = iseries_disable_IRQ,
-	.end = iseries_end_IRQ
+static struct irq_chip iseries_pic = {
+	.typename	= "iSeries irq controller",
+	.startup	= iseries_startup_IRQ,
+	.shutdown	= iseries_shutdown_IRQ,
+	.unmask		= iseries_enable_IRQ,
+	.mask		= iseries_disable_IRQ,
+	.eoi		= iseries_end_IRQ
 };
 
 /*
@@ -322,8 +322,7 @@ int __init iSeries_allocate_IRQ(HvBusNumber bus,
 	realirq = (((((sub_bus << 8) + (bus - 1)) << 3) + (idsel - 1)) << 3)
 		+ function;
 	virtirq = virt_irq_create_mapping(realirq);
-
-	irq_desc[virtirq].chip = &iSeries_IRQ_handler;
+	set_irq_chip_and_handler(virtirq, &iseries_pic, handle_fasteoi_irq);
 	return virtirq;
 }
 
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index c9b09a9..58a4c7b 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -70,18 +70,19 @@ static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE];
 
 #define NR_MASK_WORDS	((NR_IRQS + 31) / 32)
 static unsigned long ppc_lost_interrupts[NR_MASK_WORDS];
+static unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
+static int pmac_irq_cascade = -1;
 
-/*
- * Mark an irq as "lost".  This is only used on the pmac
- * since it can lose interrupts (see pmac_set_irq_mask).
- * -- Cort
- */
-void __set_lost(unsigned long irq_nr, int nokick)
+static void __pmac_retrigger(unsigned int irq_nr)
 {
-	if (!test_and_set_bit(irq_nr, ppc_lost_interrupts)) {
+	if (irq_nr >= max_real_irqs && pmac_irq_cascade > 0) {
+		__set_bit(irq_nr, ppc_lost_interrupts);
+		irq_nr = pmac_irq_cascade;
+		mb();
+	}
+	if (!__test_and_set_bit(irq_nr, ppc_lost_interrupts)) {
 		atomic_inc(&ppc_n_lost_interrupts);
-		if (!nokick)
-			set_dec(1);
+		set_dec(1);
 	}
 }
 
@@ -94,10 +95,10 @@ static void pmac_mask_and_ack_irq(unsigned int irq_nr)
         if ((unsigned)irq_nr >= max_irqs)
                 return;
 
-        clear_bit(irq_nr, ppc_cached_irq_mask);
-        if (test_and_clear_bit(irq_nr, ppc_lost_interrupts))
-                atomic_dec(&ppc_n_lost_interrupts);
 	spin_lock_irqsave(&pmac_pic_lock, flags);
+        __clear_bit(irq_nr, ppc_cached_irq_mask);
+        if (__test_and_clear_bit(irq_nr, ppc_lost_interrupts))
+                atomic_dec(&ppc_n_lost_interrupts);
         out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
         out_le32(&pmac_irq_hw[i]->ack, bit);
         do {
@@ -109,7 +110,7 @@ static void pmac_mask_and_ack_irq(unsigned int irq_nr)
 	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 }
 
-static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
+static void pmac_ack_irq(unsigned int irq_nr)
 {
         unsigned long bit = 1UL << (irq_nr & 0x1f);
         int i = irq_nr >> 5;
@@ -118,7 +119,22 @@ static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
         if ((unsigned)irq_nr >= max_irqs)
                 return;
 
-	spin_lock_irqsave(&pmac_pic_lock, flags);
+  	spin_lock_irqsave(&pmac_pic_lock, flags);
+	if (__test_and_clear_bit(irq_nr, ppc_lost_interrupts))
+                atomic_dec(&ppc_n_lost_interrupts);
+        out_le32(&pmac_irq_hw[i]->ack, bit);
+        (void)in_le32(&pmac_irq_hw[i]->ack);
+	spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
+{
+        unsigned long bit = 1UL << (irq_nr & 0x1f);
+        int i = irq_nr >> 5;
+
+        if ((unsigned)irq_nr >= max_irqs)
+                return;
+
         /* enable unmasked interrupts */
         out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
 
@@ -135,8 +151,7 @@ static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
          * the bit in the flag register or request another interrupt.
          */
         if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level))
-		__set_lost((ulong)irq_nr, nokicklost);
-	spin_unlock_irqrestore(&pmac_pic_lock, flags);
+		__pmac_retrigger(irq_nr);
 }
 
 /* When an irq gets requested for the first client, if it's an
@@ -144,62 +159,67 @@ static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
  */
 static unsigned int pmac_startup_irq(unsigned int irq_nr)
 {
+	unsigned long flags;
         unsigned long bit = 1UL << (irq_nr & 0x1f);
         int i = irq_nr >> 5;
 
+  	spin_lock_irqsave(&pmac_pic_lock, flags);
 	if ((irq_desc[irq_nr].status & IRQ_LEVEL) == 0)
 		out_le32(&pmac_irq_hw[i]->ack, bit);
-        set_bit(irq_nr, ppc_cached_irq_mask);
-        pmac_set_irq_mask(irq_nr, 0);
+        __set_bit(irq_nr, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(irq_nr, 0);
+  	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 
 	return 0;
 }
 
 static void pmac_mask_irq(unsigned int irq_nr)
 {
-        clear_bit(irq_nr, ppc_cached_irq_mask);
-        pmac_set_irq_mask(irq_nr, 0);
-        mb();
+	unsigned long flags;
+
+  	spin_lock_irqsave(&pmac_pic_lock, flags);
+        __clear_bit(irq_nr, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(irq_nr, 0);
+  	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 }
 
 static void pmac_unmask_irq(unsigned int irq_nr)
 {
-        set_bit(irq_nr, ppc_cached_irq_mask);
-        pmac_set_irq_mask(irq_nr, 0);
+	unsigned long flags;
+
+	spin_lock_irqsave(&pmac_pic_lock, flags);
+	__set_bit(irq_nr, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(irq_nr, 0);
+  	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 }
 
-static void pmac_end_irq(unsigned int irq_nr)
+static int pmac_retrigger(unsigned int irq_nr)
 {
-	if (!(irq_desc[irq_nr].status & (IRQ_DISABLED|IRQ_INPROGRESS))
-	    && irq_desc[irq_nr].action) {
-        	set_bit(irq_nr, ppc_cached_irq_mask);
-	        pmac_set_irq_mask(irq_nr, 1);
-	}
-}
+	unsigned long flags;
 
+  	spin_lock_irqsave(&pmac_pic_lock, flags);
+	__pmac_retrigger(irq_nr);
+  	spin_unlock_irqrestore(&pmac_pic_lock, flags);
+	return 1;
+}
 
-struct hw_interrupt_type pmac_pic = {
+static struct irq_chip pmac_pic = {
 	.typename	= " PMAC-PIC ",
 	.startup	= pmac_startup_irq,
-	.enable		= pmac_unmask_irq,
-	.disable	= pmac_mask_irq,
-	.ack		= pmac_mask_and_ack_irq,
-	.end		= pmac_end_irq,
-};
-
-struct hw_interrupt_type gatwick_pic = {
-	.typename	= " GATWICK  ",
-	.startup	= pmac_startup_irq,
-	.enable		= pmac_unmask_irq,
-	.disable	= pmac_mask_irq,
-	.ack		= pmac_mask_and_ack_irq,
-	.end		= pmac_end_irq,
+	.mask		= pmac_mask_irq,
+	.ack		= pmac_ack_irq,
+	.mask_ack	= pmac_mask_and_ack_irq,
+	.unmask		= pmac_unmask_irq,
+	.retrigger	= pmac_retrigger,
 };
 
 static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs)
 {
+	unsigned long flags;
 	int irq, bits;
+	int rc = IRQ_NONE;
 
+  	spin_lock_irqsave(&pmac_pic_lock, flags);
 	for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) {
 		int i = irq >> 5;
 		bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
@@ -209,17 +229,20 @@ static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs)
 		if (bits == 0)
 			continue;
 		irq += __ilog2(bits);
+		spin_unlock_irqrestore(&pmac_pic_lock, flags);
 		__do_IRQ(irq, regs);
-		return IRQ_HANDLED;
+		spin_lock_irqsave(&pmac_pic_lock, flags);
+		rc = IRQ_HANDLED;
 	}
-	printk("gatwick irq not from gatwick pic\n");
-	return IRQ_NONE;
+  	spin_unlock_irqrestore(&pmac_pic_lock, flags);
+	return rc;
 }
 
 static int pmac_get_irq(struct pt_regs *regs)
 {
 	int irq;
 	unsigned long bits = 0;
+	unsigned long flags;
 
 #ifdef CONFIG_SMP
 	void psurge_smp_message_recv(struct pt_regs *);
@@ -230,6 +253,7 @@ static int pmac_get_irq(struct pt_regs *regs)
 		return -2;	/* ignore, already handled */
         }
 #endif /* CONFIG_SMP */
+  	spin_lock_irqsave(&pmac_pic_lock, flags);
 	for (irq = max_real_irqs; (irq -= 32) >= 0; ) {
 		int i = irq >> 5;
 		bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
@@ -241,6 +265,7 @@ static int pmac_get_irq(struct pt_regs *regs)
 		irq += __ilog2(bits);
 		break;
 	}
+  	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 
 	return irq;
 }
@@ -389,7 +414,6 @@ static struct irqaction gatwick_cascade_action = {
 static void __init pmac_pic_probe_oldstyle(void)
 {
         int i;
-	int irq_cascade = -1;
         struct device_node *master = NULL;
 	struct device_node *slave = NULL;
 	u8 __iomem *addr;
@@ -443,9 +467,16 @@ static void __init pmac_pic_probe_oldstyle(void)
 	}
 	BUG_ON(master == NULL);
 
-	/* Set the handler for the main PIC */
-	for ( i = 0; i < max_real_irqs ; i++ )
-		irq_desc[i].chip = &pmac_pic;
+	/* Mark level interrupts and set handlers */
+	for (i = 0; i < max_irqs; i++) {
+		int level = !!(level_mask[i >> 5] & (1UL << (i & 0x1f)));
+		if (level)
+			irq_desc[i].status |= IRQ_LEVEL;
+		else
+			irq_desc[i].status |= IRQ_DELAYED_DISABLE;
+		set_irq_chip_and_handler(i, &pmac_pic, level ?
+					 handle_level_irq : handle_edge_irq);
+	}
 
 	/* Get addresses of first controller if we have a node for it */
 	BUG_ON(of_address_to_resource(master, 0, &r));
@@ -472,29 +503,22 @@ static void __init pmac_pic_probe_oldstyle(void)
 			pmac_irq_hw[i++] =
 				(volatile struct pmac_irq_hw __iomem *)
 				(addr + 0x10);
-		irq_cascade = slave->intrs[0].line;
+		pmac_irq_cascade = slave->intrs[0].line;
 
 		printk(KERN_INFO "irq: Found slave Apple PIC %s for %d irqs"
 		       " cascade: %d\n", slave->full_name,
-		       max_irqs - max_real_irqs, irq_cascade);
+		       max_irqs - max_real_irqs, pmac_irq_cascade);
 	}
 	of_node_put(slave);
 
-	/* disable all interrupts in all controllers */
+	/* Disable all interrupts in all controllers */
 	for (i = 0; i * 32 < max_irqs; ++i)
 		out_le32(&pmac_irq_hw[i]->enable, 0);
 
-	/* mark level interrupts */
-	for (i = 0; i < max_irqs; i++)
-		if (level_mask[i >> 5] & (1UL << (i & 0x1f)))
-			irq_desc[i].status = IRQ_LEVEL;
+	/* Hookup cascade irq */
+	if (slave)
+		setup_irq(pmac_irq_cascade, &gatwick_cascade_action);
 
-	/* Setup handlers for secondary controller and hook cascade irq*/
-	if (slave) {
-		for ( i = max_real_irqs ; i < max_irqs ; i++ )
-			irq_desc[i].chip = &gatwick_pic;
-		setup_irq(irq_cascade, &gatwick_cascade_action);
-	}
 	printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs);
 #ifdef CONFIG_XMON
 	setup_irq(20, &xmon_action);
@@ -502,9 +526,20 @@ static void __init pmac_pic_probe_oldstyle(void)
 }
 #endif /* CONFIG_PPC32 */
 
-static int pmac_u3_cascade(struct pt_regs *regs, void *data)
+static void pmac_u3_cascade(unsigned int irq, struct irq_desc *desc,
+			    struct pt_regs *regs)
 {
-	return mpic_get_one_irq((struct mpic *)data, regs);
+	struct mpic *mpic = desc->handler_data;
+	unsigned int max = 100;
+
+	while(max--) {
+		int cascade_irq = mpic_get_one_irq(mpic, regs);
+		if (max == 99)
+			desc->chip->eoi(irq);
+		if (irq < 0)
+			break;
+		generic_handle_irq(cascade_irq, regs);
+	};
 }
 
 static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic)
@@ -612,7 +647,8 @@ static int __init pmac_pic_probe_mpic(void)
 		of_node_put(slave);
 		return 0;
 	}
-	mpic_setup_cascade(slave->intrs[0].line, pmac_u3_cascade, mpic2);
+	set_irq_data(slave->intrs[0].line, mpic2);
+	set_irq_chained_handler(slave->intrs[0].line, pmac_u3_cascade);
 
 	of_node_put(slave);
 	return 0;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 999509d..476b564 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -118,6 +118,21 @@ static void __init fwnmi_init(void)
 		fwnmi_active = 1;
 }
 
+void pSeries_8259_cascade(unsigned int irq, struct irq_desc *desc,
+			  struct pt_regs *regs)
+{
+	unsigned int max = 100;
+
+	while(max--) {
+		int cascade_irq = i8259_irq(regs);
+		if (max == 99)
+			desc->chip->eoi(irq);
+		if (cascade_irq < 0)
+			break;
+		generic_handle_irq(cascade_irq, regs);
+	};
+}
+
 static void __init pSeries_init_mpic(void)
 {
         unsigned int *addrp;
@@ -140,7 +155,7 @@ static void __init pSeries_init_mpic(void)
 	i8259_init(intack, 0);
 
 	/* Hook cascade to mpic */
-	mpic_setup_cascade(NUM_ISA_INTERRUPTS, i8259_irq_cascade, NULL);
+	set_irq_chained_handler(NUM_ISA_INTERRUPTS, pSeries_8259_cascade);
 }
 
 static void __init pSeries_setup_mpic(void)
@@ -201,10 +216,8 @@ static void __init pSeries_setup_arch(void)
 		/* Allocate the mpic now, so that find_and_init_phbs() can
 		 * fill the ISUs */
 		pSeries_setup_mpic();
-	} else {
+	} else
 		ppc_md.init_IRQ       = xics_init_IRQ;
-		ppc_md.get_irq        = xics_get_irq;
-	}
 
 #ifdef CONFIG_SMP
 	smp_init_pSeries();
@@ -291,10 +304,7 @@ static void pSeries_mach_cpu_die(void)
 {
 	local_irq_disable();
 	idle_task_exit();
-	/* Some hardware requires clearing the CPPR, while other hardware does not
-	 * it is safe either way
-	 */
-	pSeriesLP_cppr_info(0, 0);
+	xics_teardown_cpu(0);
 	rtas_stop_self();
 	/* Should never get here... */
 	BUG();
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 2ffebe3..c7f0442 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -31,23 +31,6 @@
 
 #include "xics.h"
 
-static unsigned int xics_startup(unsigned int irq);
-static void xics_enable_irq(unsigned int irq);
-static void xics_disable_irq(unsigned int irq);
-static void xics_mask_and_ack_irq(unsigned int irq);
-static void xics_end_irq(unsigned int irq);
-static void xics_set_affinity(unsigned int irq_nr, cpumask_t cpumask);
-
-static struct hw_interrupt_type xics_pic = {
-	.typename = " XICS     ",
-	.startup = xics_startup,
-	.enable = xics_enable_irq,
-	.disable = xics_disable_irq,
-	.ack = xics_mask_and_ack_irq,
-	.end = xics_end_irq,
-	.set_affinity = xics_set_affinity
-};
-
 /* This is used to map real irq numbers to virtual */
 static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC);
 
@@ -98,47 +81,33 @@ static int ibm_set_xive;
 static int ibm_int_on;
 static int ibm_int_off;
 
-typedef struct {
-	int (*xirr_info_get)(int cpu);
-	void (*xirr_info_set)(int cpu, int val);
-	void (*cppr_info)(int cpu, u8 val);
-	void (*qirr_info)(int cpu, u8 val);
-} xics_ops;
 
+/* Direct HW low level accessors */
 
-/* SMP */
 
-static int pSeries_xirr_info_get(int n_cpu)
+static inline int direct_xirr_info_get(int n_cpu)
 {
 	return in_be32(&xics_per_cpu[n_cpu]->xirr.word);
 }
 
-static void pSeries_xirr_info_set(int n_cpu, int value)
+static inline void direct_xirr_info_set(int n_cpu, int value)
 {
 	out_be32(&xics_per_cpu[n_cpu]->xirr.word, value);
 }
 
-static void pSeries_cppr_info(int n_cpu, u8 value)
+static inline void direct_cppr_info(int n_cpu, u8 value)
 {
 	out_8(&xics_per_cpu[n_cpu]->xirr.bytes[0], value);
 }
 
-static void pSeries_qirr_info(int n_cpu, u8 value)
+static inline void direct_qirr_info(int n_cpu, u8 value)
 {
 	out_8(&xics_per_cpu[n_cpu]->qirr.bytes[0], value);
 }
 
-static xics_ops pSeries_ops = {
-	pSeries_xirr_info_get,
-	pSeries_xirr_info_set,
-	pSeries_cppr_info,
-	pSeries_qirr_info
-};
-
-static xics_ops *ops = &pSeries_ops;
 
+/* LPAR low level accessors */
 
-/* LPAR */
 
 static inline long plpar_eoi(unsigned long xirr)
 {
@@ -161,7 +130,7 @@ static inline long plpar_xirr(unsigned long *xirr_ret)
 	return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy);
 }
 
-static int pSeriesLP_xirr_info_get(int n_cpu)
+static inline int lpar_xirr_info_get(int n_cpu)
 {
 	unsigned long lpar_rc;
 	unsigned long return_value;
@@ -172,7 +141,7 @@ static int pSeriesLP_xirr_info_get(int n_cpu)
 	return (int)return_value;
 }
 
-static void pSeriesLP_xirr_info_set(int n_cpu, int value)
+static inline void lpar_xirr_info_set(int n_cpu, int value)
 {
 	unsigned long lpar_rc;
 	unsigned long val64 = value & 0xffffffff;
@@ -183,7 +152,7 @@ static void pSeriesLP_xirr_info_set(int n_cpu, int value)
 		      val64);
 }
 
-void pSeriesLP_cppr_info(int n_cpu, u8 value)
+static inline void lpar_cppr_info(int n_cpu, u8 value)
 {
 	unsigned long lpar_rc;
 
@@ -192,7 +161,7 @@ void pSeriesLP_cppr_info(int n_cpu, u8 value)
 		panic("bad return code cppr - rc = %lx\n", lpar_rc);
 }
 
-static void pSeriesLP_qirr_info(int n_cpu , u8 value)
+static inline void lpar_qirr_info(int n_cpu , u8 value)
 {
 	unsigned long lpar_rc;
 
@@ -201,36 +170,9 @@ static void pSeriesLP_qirr_info(int n_cpu , u8 value)
 		panic("bad return code qirr - rc = %lx\n", lpar_rc);
 }
 
-xics_ops pSeriesLP_ops = {
-	pSeriesLP_xirr_info_get,
-	pSeriesLP_xirr_info_set,
-	pSeriesLP_cppr_info,
-	pSeriesLP_qirr_info
-};
 
-static unsigned int xics_startup(unsigned int virq)
-{
-	unsigned int irq;
+/* High level handlers and init code */
 
-	irq = irq_offset_down(virq);
-	if (radix_tree_insert(&irq_map, virt_irq_to_real(irq),
-			      &virt_irq_to_real_map[irq]) == -ENOMEM)
-		printk(KERN_CRIT "Out of memory creating real -> virtual"
-		       " IRQ mapping for irq %u (real 0x%x)\n",
-		       virq, virt_irq_to_real(irq));
-	xics_enable_irq(virq);
-	return 0;	/* return value is ignored */
-}
-
-static unsigned int real_irq_to_virt(unsigned int real_irq)
-{
-	unsigned int *ptr;
-
-	ptr = radix_tree_lookup(&irq_map, real_irq);
-	if (ptr == NULL)
-		return NO_IRQ;
-	return ptr - virt_irq_to_real_map;
-}
 
 #ifdef CONFIG_SMP
 static int get_irq_server(unsigned int irq)
@@ -264,17 +206,20 @@ static int get_irq_server(unsigned int irq)
 }
 #endif
 
-static void xics_enable_irq(unsigned int virq)
+
+static void xics_unmask_irq(unsigned int virq)
 {
 	unsigned int irq;
 	int call_status;
 	unsigned int server;
 
 	irq = virt_irq_to_real(irq_offset_down(virq));
-	if (irq == XICS_IPI)
+	WARN_ON(irq == NO_IRQ);
+	if (irq == XICS_IPI || irq == NO_IRQ)
 		return;
 
 	server = get_irq_server(virq);
+
 	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server,
 				DEFAULT_PRIORITY);
 	if (call_status != 0) {
@@ -293,7 +238,7 @@ static void xics_enable_irq(unsigned int virq)
 	}
 }
 
-static void xics_disable_real_irq(unsigned int irq)
+static void xics_mask_real_irq(unsigned int irq)
 {
 	int call_status;
 	unsigned int server;
@@ -318,75 +263,104 @@ static void xics_disable_real_irq(unsigned int irq)
 	}
 }
 
-static void xics_disable_irq(unsigned int virq)
+static void xics_mask_irq(unsigned int virq)
 {
 	unsigned int irq;
 
 	irq = virt_irq_to_real(irq_offset_down(virq));
-	xics_disable_real_irq(irq);
+	WARN_ON(irq == NO_IRQ);
+	if (irq != NO_IRQ)
+		xics_mask_real_irq(irq);
+}
+
+static void xics_set_irq_revmap(unsigned int virq)
+{
+	unsigned int irq;
+
+	irq = irq_offset_down(virq);
+	if (radix_tree_insert(&irq_map, virt_irq_to_real(irq),
+			      &virt_irq_to_real_map[irq]) == -ENOMEM)
+		printk(KERN_CRIT "Out of memory creating real -> virtual"
+		       " IRQ mapping for irq %u (real 0x%x)\n",
+		       virq, virt_irq_to_real(irq));
 }
 
-static void xics_end_irq(unsigned int irq)
+static unsigned int xics_startup(unsigned int virq)
+{
+	xics_set_irq_revmap(virq);
+	xics_unmask_irq(virq);
+	return 0;
+}
+
+static unsigned int real_irq_to_virt(unsigned int real_irq)
+{
+	unsigned int *ptr;
+
+	ptr = radix_tree_lookup(&irq_map, real_irq);
+	if (ptr == NULL)
+		return NO_IRQ;
+	return ptr - virt_irq_to_real_map;
+}
+
+static void xics_eoi_direct(unsigned int irq)
 {
 	int cpu = smp_processor_id();
 
 	iosync();
-	ops->xirr_info_set(cpu, ((0xff << 24) |
-				 (virt_irq_to_real(irq_offset_down(irq)))));
-
+	direct_xirr_info_set(cpu, ((0xff << 24) |
+				   (virt_irq_to_real(irq_offset_down(irq)))));
 }
 
-static void xics_mask_and_ack_irq(unsigned int irq)
+
+static void xics_eoi_lpar(unsigned int irq)
 {
 	int cpu = smp_processor_id();
 
-	if (irq < irq_offset_value()) {
-		i8259_pic.ack(irq);
-		iosync();
-		ops->xirr_info_set(cpu, ((0xff<<24) |
-					 xics_irq_8259_cascade_real));
-		iosync();
-	}
+	iosync();
+	lpar_xirr_info_set(cpu, ((0xff << 24) |
+				 (virt_irq_to_real(irq_offset_down(irq)))));
+
 }
 
-int xics_get_irq(struct pt_regs *regs)
+static inline int xics_remap_irq(int vec)
 {
-	unsigned int cpu = smp_processor_id();
-	unsigned int vec;
 	int irq;
 
-	vec = ops->xirr_info_get(cpu);
-	/*  (vec >> 24) == old priority */
 	vec &= 0x00ffffff;
 
-	/* for sanity, this had better be < NR_IRQS - 16 */
-	if (vec == xics_irq_8259_cascade_real) {
-		irq = i8259_irq(regs);
-		xics_end_irq(irq_offset_up(xics_irq_8259_cascade));
-	} else if (vec == XICS_IRQ_SPURIOUS) {
-		irq = -1;
-	} else {
-		irq = real_irq_to_virt(vec);
-		if (irq == NO_IRQ)
-			irq = real_irq_to_virt_slowpath(vec);
-		if (irq == NO_IRQ) {
-			printk(KERN_ERR "Interrupt %u (real) is invalid,"
-			       " disabling it.\n", vec);
-			xics_disable_real_irq(vec);
-		} else
-			irq = irq_offset_up(irq);
-	}
-	return irq;
+	if (vec == XICS_IRQ_SPURIOUS)
+		return NO_IRQ;
+
+	irq = real_irq_to_virt(vec);
+	if (irq == NO_IRQ)
+		irq = real_irq_to_virt_slowpath(vec);
+	if (likely(irq != NO_IRQ))
+		return irq_offset_up(irq);
+
+	printk(KERN_ERR "Interrupt %u (real) is invalid,"
+	       " disabling it.\n", vec);
+	xics_mask_real_irq(vec);
+	return NO_IRQ;
 }
 
-#ifdef CONFIG_SMP
+static int xics_get_irq_direct(struct pt_regs *regs)
+{
+	unsigned int cpu = smp_processor_id();
 
-static irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
+	return xics_remap_irq(direct_xirr_info_get(cpu));
+}
+
+static int xics_get_irq_lpar(struct pt_regs *regs)
 {
-	int cpu = smp_processor_id();
+	unsigned int cpu = smp_processor_id();
 
-	ops->qirr_info(cpu, 0xff);
+	return xics_remap_irq(lpar_xirr_info_get(cpu));
+}
+
+#ifdef CONFIG_SMP
 
+static irqreturn_t xics_ipi_dispatch(int cpu, struct pt_regs *regs)
+{
 	WARN_ON(cpu_is_offline(cpu));
 
 	while (xics_ipi_message[cpu].value) {
@@ -418,18 +392,108 @@ static irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t xics_ipi_action_direct(int irq, void *dev_id, struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	direct_qirr_info(cpu, 0xff);
+
+	return xics_ipi_dispatch(cpu, regs);
+}
+
+static irqreturn_t xics_ipi_action_lpar(int irq, void *dev_id, struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+
+	lpar_qirr_info(cpu, 0xff);
+
+	return xics_ipi_dispatch(cpu, regs);
+}
+
 void xics_cause_IPI(int cpu)
 {
-	ops->qirr_info(cpu, IPI_PRIORITY);
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		lpar_qirr_info(cpu, IPI_PRIORITY);
+	else
+		direct_qirr_info(cpu, IPI_PRIORITY);
 }
+
 #endif /* CONFIG_SMP */
 
+static void xics_set_cpu_priority(int cpu, unsigned char cppr)
+{
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		lpar_cppr_info(cpu, cppr);
+	else
+		direct_cppr_info(cpu, cppr);
+	iosync();
+}
+
+static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
+{
+	unsigned int irq;
+	int status;
+	int xics_status[2];
+	unsigned long newmask;
+	cpumask_t tmp = CPU_MASK_NONE;
+
+	irq = virt_irq_to_real(irq_offset_down(virq));
+	if (irq == XICS_IPI || irq == NO_IRQ)
+		return;
+
+	status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
+
+	if (status) {
+		printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive "
+		       "returns %d\n", irq, status);
+		return;
+	}
+
+	/* For the moment only implement delivery to all cpus or one cpu */
+	if (cpus_equal(cpumask, CPU_MASK_ALL)) {
+		newmask = default_distrib_server;
+	} else {
+		cpus_and(tmp, cpu_online_map, cpumask);
+		if (cpus_empty(tmp))
+			return;
+		newmask = get_hard_smp_processor_id(first_cpu(tmp));
+	}
+
+	status = rtas_call(ibm_set_xive, 3, 1, NULL,
+				irq, newmask, xics_status[1]);
+
+	if (status) {
+		printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive "
+		       "returns %d\n", irq, status);
+		return;
+	}
+}
+
+static struct irq_chip xics_pic_direct = {
+	.typename = " XICS     ",
+	.startup = xics_startup,
+	.mask = xics_mask_irq,
+	.unmask = xics_unmask_irq,
+	.eoi = xics_eoi_direct,
+	.set_affinity = xics_set_affinity
+};
+
+
+static struct irq_chip xics_pic_lpar = {
+	.typename = " XICS     ",
+	.startup = xics_startup,
+	.mask = xics_mask_irq,
+	.unmask = xics_unmask_irq,
+	.eoi = xics_eoi_lpar,
+	.set_affinity = xics_set_affinity
+};
+
+
 void xics_setup_cpu(void)
 {
 	int cpu = smp_processor_id();
 
-	ops->cppr_info(cpu, 0xff);
-	iosync();
+	xics_set_cpu_priority(cpu, 0xff);
 
 	/*
 	 * Put the calling processor into the GIQ.  This is really only
@@ -453,6 +517,7 @@ void xics_init_IRQ(void)
 		unsigned long addr;
 		unsigned long size;
 	} intnodes[NR_CPUS];
+	struct irq_chip *chip;
 
 	ppc64_boot_msg(0x20, "XICS Init");
 
@@ -519,26 +584,10 @@ nextnode:
 	intr_base = intnodes[0].addr;
 	intr_size = intnodes[0].size;
 
-	np = of_find_node_by_type(NULL, "interrupt-controller");
-	if (!np) {
-		printk(KERN_DEBUG "xics: no ISA interrupt controller\n");
-		xics_irq_8259_cascade_real = -1;
-		xics_irq_8259_cascade = -1;
-	} else {
-		ireg = (uint *) get_property(np, "interrupts", NULL);
-		if (!ireg)
-			panic("xics_init_IRQ: can't find ISA interrupts property");
-
-		xics_irq_8259_cascade_real = *ireg;
-		xics_irq_8259_cascade
-			= virt_irq_create_mapping(xics_irq_8259_cascade_real);
-		i8259_init(0, 0);
-		of_node_put(np);
-	}
-
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		ops = &pSeriesLP_ops;
-	else {
+ 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ 		ppc_md.get_irq = xics_get_irq_lpar;
+ 		chip = &xics_pic_lpar;
+  	} else {
 #ifdef CONFIG_SMP
 		for_each_possible_cpu(i) {
 			int hard_id;
@@ -554,32 +603,54 @@ nextnode:
 #else
 		xics_per_cpu[0] = ioremap(intr_base, intr_size);
 #endif /* CONFIG_SMP */
+		ppc_md.get_irq = xics_get_irq_direct;
+		chip = &xics_pic_direct;
+
 	}
 
-	for (i = irq_offset_value(); i < NR_IRQS; ++i)
-		get_irq_desc(i)->chip = &xics_pic;
+	for (i = irq_offset_value(); i < NR_IRQS; ++i) {
+		/* All IRQs on XICS are level for now. MSI code may want to modify
+		 * that for reporting purposes
+		 */
+		get_irq_desc(i)->status |= IRQ_LEVEL;
+		set_irq_chip_and_handler(i, chip, handle_fasteoi_irq);
+	}
 
 	xics_setup_cpu();
 
 	ppc64_boot_msg(0x21, "XICS Done");
 }
 
-/*
- * We cant do this in init_IRQ because we need the memory subsystem up for
- * request_irq()
- */
-static int __init xics_setup_i8259(void)
-{
-	if (ppc64_interrupt_controller == IC_PPC_XIC &&
-	    xics_irq_8259_cascade != -1) {
-		if (request_irq(irq_offset_up(xics_irq_8259_cascade),
-				no_action, 0, "8259 cascade", NULL))
-			printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 "
-					"cascade\n");
+static int xics_setup_8259_cascade(void)
+{
+	struct device_node *np;
+	uint *ireg;
+
+	np = of_find_node_by_type(NULL, "interrupt-controller");
+	if (np == NULL) {
+		printk(KERN_WARNING "xics: no ISA interrupt controller\n");
+		xics_irq_8259_cascade_real = -1;
+		xics_irq_8259_cascade = -1;
+		return 0;
 	}
+
+	ireg = (uint *) get_property(np, "interrupts", NULL);
+	if (!ireg)
+		panic("xics_init_IRQ: can't find ISA interrupts property");
+
+	xics_irq_8259_cascade_real = *ireg;
+	xics_irq_8259_cascade = irq_offset_up
+		(virt_irq_create_mapping(xics_irq_8259_cascade_real));
+	i8259_init(0, 0);
+	of_node_put(np);
+
+	xics_set_irq_revmap(xics_irq_8259_cascade);
+	set_irq_chained_handler(xics_irq_8259_cascade, pSeries_8259_cascade);
+
 	return 0;
 }
-arch_initcall(xics_setup_i8259);
+arch_initcall(xics_setup_8259_cascade);
+
 
 #ifdef CONFIG_SMP
 void xics_request_IPIs(void)
@@ -590,61 +661,22 @@ void xics_request_IPIs(void)
 	 * IPIs are marked IRQF_DISABLED as they must run with irqs
 	 * disabled
 	 */
-	request_irq(irq_offset_up(XICS_IPI), xics_ipi_action,
-		    IRQF_DISABLED, "IPI", NULL);
-	get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU;
-}
-#endif
-
-static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
-{
-	unsigned int irq;
-	int status;
-	int xics_status[2];
-	unsigned long newmask;
-	cpumask_t tmp = CPU_MASK_NONE;
-
-	irq = virt_irq_to_real(irq_offset_down(virq));
-	if (irq == XICS_IPI || irq == NO_IRQ)
-		return;
-
-	status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
-
-	if (status) {
-		printk(KERN_ERR "xics_set_affinity: irq=%u ibm,get-xive "
-		       "returns %d\n", irq, status);
-		return;
-	}
-
-	/* For the moment only implement delivery to all cpus or one cpu */
-	if (cpus_equal(cpumask, CPU_MASK_ALL)) {
-		newmask = default_distrib_server;
-	} else {
-		cpus_and(tmp, cpu_online_map, cpumask);
-		if (cpus_empty(tmp))
-			return;
-		newmask = get_hard_smp_processor_id(first_cpu(tmp));
-	}
-
-	status = rtas_call(ibm_set_xive, 3, 1, NULL,
-				irq, newmask, xics_status[1]);
-
-	if (status) {
-		printk(KERN_ERR "xics_set_affinity: irq=%u ibm,set-xive "
-		       "returns %d\n", irq, status);
-		return;
-	}
+	set_irq_handler(irq_offset_up(XICS_IPI), handle_percpu_irq);
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		request_irq(irq_offset_up(XICS_IPI), xics_ipi_action_lpar,
+			    SA_INTERRUPT, "IPI", NULL);
+	else
+		request_irq(irq_offset_up(XICS_IPI), xics_ipi_action_direct,
+			    SA_INTERRUPT, "IPI", NULL);
 }
+#endif /* CONFIG_SMP */
 
 void xics_teardown_cpu(int secondary)
 {
+	struct irq_desc *desc = get_irq_desc(irq_offset_up(XICS_IPI));
 	int cpu = smp_processor_id();
 
-	ops->cppr_info(cpu, 0x00);
-	iosync();
-
-	/* Clear IPI */
-	ops->qirr_info(cpu, 0xff);
+ 	xics_set_cpu_priority(cpu, 0);
 
 	/*
 	 * we need to EOI the IPI if we got here from kexec down IPI
@@ -653,7 +685,8 @@ void xics_teardown_cpu(int secondary)
 	 * should we be flagging idle loop instead?
 	 * or creating some task to be scheduled?
 	 */
-	ops->xirr_info_set(cpu, XICS_IPI);
+	if (desc->chip && desc->chip->eoi)
+		desc->chip->eoi(XICS_IPI);
 
 	/*
 	 * Some machines need to have at least one cpu in the GIQ,
@@ -674,8 +707,7 @@ void xics_migrate_irqs_away(void)
 	unsigned int irq, virq, cpu = smp_processor_id();
 
 	/* Reject any interrupt that was queued to us... */
-	ops->cppr_info(cpu, 0);
-	iosync();
+	xics_set_cpu_priority(cpu, 0);
 
 	/* remove ourselves from the global interrupt queue */
 	status = rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
@@ -683,11 +715,10 @@ void xics_migrate_irqs_away(void)
 	WARN_ON(status < 0);
 
 	/* Allow IPIs again... */
-	ops->cppr_info(cpu, DEFAULT_PRIORITY);
-	iosync();
+	xics_set_cpu_priority(cpu, DEFAULT_PRIORITY);
 
 	for_each_irq(virq) {
-		irq_desc_t *desc;
+		struct irq_desc *desc;
 		int xics_status[2];
 		unsigned long flags;
 
diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h
index e14c708..67dedf3 100644
--- a/arch/powerpc/platforms/pseries/xics.h
+++ b/arch/powerpc/platforms/pseries/xics.h
@@ -14,13 +14,12 @@
 
 #include <linux/cache.h>
 
-void xics_init_IRQ(void);
-int xics_get_irq(struct pt_regs *);
-void xics_setup_cpu(void);
-void xics_teardown_cpu(int secondary);
-void xics_cause_IPI(int cpu);
-void xics_request_IPIs(void);
-void xics_migrate_irqs_away(void);
+extern void xics_init_IRQ(void);
+extern void xics_setup_cpu(void);
+extern void xics_teardown_cpu(int secondary);
+extern void xics_cause_IPI(int cpu);
+extern  void xics_request_IPIs(void);
+extern void xics_migrate_irqs_away(void);
 
 /* first argument is ignored for now*/
 void pSeriesLP_cppr_info(int n_cpu, u8 value);
@@ -31,4 +30,8 @@ struct xics_ipi_struct {
 
 extern struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned;
 
+struct irq_desc;
+extern void pSeries_8259_cascade(unsigned int irq, struct irq_desc *desc,
+				 struct pt_regs *regs);
+
 #endif /* _POWERPC_KERNEL_XICS_H */
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 1a3ef1a..c2e9465 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -69,11 +69,6 @@ int i8259_irq(struct pt_regs *regs)
 	return irq + i8259_pic_irq_offset;
 }
 
-int i8259_irq_cascade(struct pt_regs *regs, void *unused)
-{
-	return i8259_irq(regs);
-}
-
 static void i8259_mask_and_ack_irq(unsigned int irq_nr)
 {
 	unsigned long flags;
@@ -129,19 +124,11 @@ static void i8259_unmask_irq(unsigned int irq_nr)
 	spin_unlock_irqrestore(&i8259_lock, flags);
 }
 
-static void i8259_end_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS))
-	    && irq_desc[irq].action)
-		i8259_unmask_irq(irq);
-}
-
-struct hw_interrupt_type i8259_pic = {
-	.typename = " i8259    ",
-	.enable = i8259_unmask_irq,
-	.disable = i8259_mask_irq,
-	.ack = i8259_mask_and_ack_irq,
-	.end = i8259_end_irq,
+static struct irq_chip i8259_pic = {
+	.typename	= " i8259    ",
+	.mask		= i8259_mask_irq,
+	.unmask		= i8259_unmask_irq,
+	.mask_ack	= i8259_mask_and_ack_irq,
 };
 
 static struct resource pic1_iores = {
@@ -207,8 +194,11 @@ void __init i8259_init(unsigned long intack_addr, int offset)
 
 	spin_unlock_irqrestore(&i8259_lock, flags);
 
-	for (i = 0; i < NUM_ISA_INTERRUPTS; ++i)
-		irq_desc[offset + i].chip = &i8259_pic;
+	for (i = 0; i < NUM_ISA_INTERRUPTS; ++i) {
+		set_irq_chip_and_handler(offset + i, &i8259_pic,
+					 handle_level_irq);
+		irq_desc[offset + i].status |= IRQ_LEVEL;
+	}
 
 	/* reserve our resources */
 	setup_irq(offset + 2, &i8259_irqaction);
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 7e46935..9a95f16 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -100,8 +100,8 @@ static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg)
 
 	if (mpic->flags & MPIC_PRIMARY)
 		cpu = hard_smp_processor_id();
-
-	return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg);
+	return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN,
+			  mpic->cpuregs[cpu], reg);
 }
 
 static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value)
@@ -378,14 +378,14 @@ static inline u32 mpic_physmask(u32 cpumask)
 /* Get the mpic structure from the IPI number */
 static inline struct mpic * mpic_from_ipi(unsigned int ipi)
 {
-	return container_of(irq_desc[ipi].chip, struct mpic, hc_ipi);
+	return irq_desc[ipi].chip_data;
 }
 #endif
 
 /* Get the mpic structure from the irq number */
 static inline struct mpic * mpic_from_irq(unsigned int irq)
 {
-	return container_of(irq_desc[irq].chip, struct mpic, hc_irq);
+	return irq_desc[irq].chip_data;
 }
 
 /* Send an EOI */
@@ -410,7 +410,7 @@ static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
  */
 
 
-static void mpic_enable_irq(unsigned int irq)
+static void mpic_unmask_irq(unsigned int irq)
 {
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq(irq);
@@ -429,35 +429,9 @@ static void mpic_enable_irq(unsigned int irq)
 			break;
 		}
 	} while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK);	
-
-#ifdef CONFIG_MPIC_BROKEN_U3
-	if (mpic->flags & MPIC_BROKEN_U3) {
-		unsigned int src = irq - mpic->irq_offset;
-		if (mpic_is_ht_interrupt(mpic, src) &&
-		    (irq_desc[irq].status & IRQ_LEVEL))
-			mpic_ht_end_irq(mpic, src);
-	}
-#endif /* CONFIG_MPIC_BROKEN_U3 */
-}
-
-static unsigned int mpic_startup_irq(unsigned int irq)
-{
-#ifdef CONFIG_MPIC_BROKEN_U3
-	struct mpic *mpic = mpic_from_irq(irq);
-	unsigned int src = irq - mpic->irq_offset;
-#endif /* CONFIG_MPIC_BROKEN_U3 */
-
-	mpic_enable_irq(irq);
-
-#ifdef CONFIG_MPIC_BROKEN_U3
-	if (mpic_is_ht_interrupt(mpic, src))
-		mpic_startup_ht_interrupt(mpic, src, irq_desc[irq].status);
-#endif /* CONFIG_MPIC_BROKEN_U3 */
-
-	return 0;
 }
 
-static void mpic_disable_irq(unsigned int irq)
+static void mpic_mask_irq(unsigned int irq)
 {
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq(irq);
@@ -478,23 +452,58 @@ static void mpic_disable_irq(unsigned int irq)
 	} while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK));
 }
 
-static void mpic_shutdown_irq(unsigned int irq)
+static void mpic_end_irq(unsigned int irq)
 {
+	struct mpic *mpic = mpic_from_irq(irq);
+
+#ifdef DEBUG_IRQ
+	DBG("%s: end_irq: %d\n", mpic->name, irq);
+#endif
+	/* We always EOI on end_irq() even for edge interrupts since that
+	 * should only lower the priority, the MPIC should have properly
+	 * latched another edge interrupt coming in anyway
+	 */
+
+	mpic_eoi(mpic);
+}
+
 #ifdef CONFIG_MPIC_BROKEN_U3
+
+static void mpic_unmask_ht_irq(unsigned int irq)
+{
 	struct mpic *mpic = mpic_from_irq(irq);
 	unsigned int src = irq - mpic->irq_offset;
 
-	if (mpic_is_ht_interrupt(mpic, src))
-		mpic_shutdown_ht_interrupt(mpic, src, irq_desc[irq].status);
+	mpic_unmask_irq(irq);
 
-#endif /* CONFIG_MPIC_BROKEN_U3 */
+	if (irq_desc[irq].status & IRQ_LEVEL)
+		mpic_ht_end_irq(mpic, src);
+}
+
+static unsigned int mpic_startup_ht_irq(unsigned int irq)
+{
+	struct mpic *mpic = mpic_from_irq(irq);
+	unsigned int src = irq - mpic->irq_offset;
 
-	mpic_disable_irq(irq);
+	mpic_unmask_irq(irq);
+	mpic_startup_ht_interrupt(mpic, src, irq_desc[irq].status);
+
+	return 0;
 }
 
-static void mpic_end_irq(unsigned int irq)
+static void mpic_shutdown_ht_irq(unsigned int irq)
+{
+	struct mpic *mpic = mpic_from_irq(irq);
+	unsigned int src = irq - mpic->irq_offset;
+
+	mpic_shutdown_ht_interrupt(mpic, src, irq_desc[irq].status);
+	mpic_mask_irq(irq);
+}
+
+static void mpic_end_ht_irq(unsigned int irq)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
+	unsigned int src = irq - mpic->irq_offset;
 
 #ifdef DEBUG_IRQ
 	DBG("%s: end_irq: %d\n", mpic->name, irq);
@@ -504,21 +513,16 @@ static void mpic_end_irq(unsigned int irq)
 	 * latched another edge interrupt coming in anyway
 	 */
 
-#ifdef CONFIG_MPIC_BROKEN_U3
-	if (mpic->flags & MPIC_BROKEN_U3) {
-		unsigned int src = irq - mpic->irq_offset;
-		if (mpic_is_ht_interrupt(mpic, src) &&
-		    (irq_desc[irq].status & IRQ_LEVEL))
-			mpic_ht_end_irq(mpic, src);
-	}
-#endif /* CONFIG_MPIC_BROKEN_U3 */
-
+	if (irq_desc[irq].status & IRQ_LEVEL)
+		mpic_ht_end_irq(mpic, src);
 	mpic_eoi(mpic);
 }
 
+#endif /* CONFIG_MPIC_BROKEN_U3 */
+
 #ifdef CONFIG_SMP
 
-static void mpic_enable_ipi(unsigned int irq)
+static void mpic_unmask_ipi(unsigned int irq)
 {
 	struct mpic *mpic = mpic_from_ipi(irq);
 	unsigned int src = irq - mpic->ipi_offset;
@@ -527,7 +531,7 @@ static void mpic_enable_ipi(unsigned int irq)
 	mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
 }
 
-static void mpic_disable_ipi(unsigned int irq)
+static void mpic_mask_ipi(unsigned int irq)
 {
 	/* NEVER disable an IPI... that's just plain wrong! */
 }
@@ -560,6 +564,30 @@ static void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
 		       mpic_physmask(cpus_addr(tmp)[0]));	
 }
 
+static struct irq_chip mpic_irq_chip = {
+	.mask	= mpic_mask_irq,
+	.unmask	= mpic_unmask_irq,
+	.eoi	= mpic_end_irq,
+};
+
+#ifdef CONFIG_SMP
+static struct irq_chip mpic_ipi_chip = {
+	.mask	= mpic_mask_ipi,
+	.unmask	= mpic_unmask_ipi,
+	.eoi	= mpic_end_ipi,
+};
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_MPIC_BROKEN_U3
+static struct irq_chip mpic_irq_ht_chip = {
+	.startup	= mpic_startup_ht_irq,
+	.shutdown	= mpic_shutdown_ht_irq,
+	.mask		= mpic_mask_irq,
+	.unmask		= mpic_unmask_ht_irq,
+	.eoi		= mpic_end_ht_irq,
+};
+#endif /* CONFIG_MPIC_BROKEN_U3 */
+
 
 /*
  * Exported functions
@@ -589,19 +617,19 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr,
 	memset(mpic, 0, sizeof(struct mpic));
 	mpic->name = name;
 
+	mpic->hc_irq = mpic_irq_chip;
 	mpic->hc_irq.typename = name;
-	mpic->hc_irq.startup = mpic_startup_irq;
-	mpic->hc_irq.shutdown = mpic_shutdown_irq;
-	mpic->hc_irq.enable = mpic_enable_irq;
-	mpic->hc_irq.disable = mpic_disable_irq;
-	mpic->hc_irq.end = mpic_end_irq;
 	if (flags & MPIC_PRIMARY)
 		mpic->hc_irq.set_affinity = mpic_set_affinity;
+#ifdef CONFIG_MPIC_BROKEN_U3
+	mpic->hc_ht_irq = mpic_irq_ht_chip;
+	mpic->hc_ht_irq.typename = name;
+	if (flags & MPIC_PRIMARY)
+		mpic->hc_ht_irq.set_affinity = mpic_set_affinity;
+#endif /* CONFIG_MPIC_BROKEN_U3 */
 #ifdef CONFIG_SMP
 	mpic->hc_ipi.typename = name;
-	mpic->hc_ipi.enable = mpic_enable_ipi;
-	mpic->hc_ipi.disable = mpic_disable_ipi;
-	mpic->hc_ipi.end = mpic_end_ipi;
+	mpic->hc_ipi = mpic_ipi_chip;
 #endif /* CONFIG_SMP */
 
 	mpic->flags = flags;
@@ -697,28 +725,6 @@ void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
 		mpic->num_sources = isu_first + mpic->isu_size;
 }
 
-void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler,
-			       void *data)
-{
-	struct mpic *mpic = mpic_find(irq, NULL);
-	unsigned long flags;
-
-	/* Synchronization here is a bit dodgy, so don't try to replace cascade
-	 * interrupts on the fly too often ... but normally it's set up at boot.
-	 */
-	spin_lock_irqsave(&mpic_lock, flags);
-	if (mpic->cascade)	       
-		mpic_disable_irq(mpic->cascade_vec + mpic->irq_offset);
-	mpic->cascade = NULL;
-	wmb();
-	mpic->cascade_vec = irq - mpic->irq_offset;
-	mpic->cascade_data = data;
-	wmb();
-	mpic->cascade = handler;
-	mpic_enable_irq(irq);
-	spin_unlock_irqrestore(&mpic_lock, flags);
-}
-
 void __init mpic_init(struct mpic *mpic)
 {
 	int i;
@@ -750,8 +756,10 @@ void __init mpic_init(struct mpic *mpic)
 #ifdef CONFIG_SMP
 		if (!(mpic->flags & MPIC_PRIMARY))
 			continue;
-		irq_desc[mpic->ipi_offset+i].status |= IRQ_PER_CPU;
-		irq_desc[mpic->ipi_offset+i].chip = &mpic->hc_ipi;
+		set_irq_chip_data(mpic->ipi_offset+i, mpic);
+		set_irq_chip_and_handler(mpic->ipi_offset+i,
+					 &mpic->hc_ipi,
+					 handle_percpu_irq);
 #endif /* CONFIG_SMP */
 	}
 
@@ -763,7 +771,7 @@ void __init mpic_init(struct mpic *mpic)
 	/* Do the HT PIC fixups on U3 broken mpic */
 	DBG("MPIC flags: %x\n", mpic->flags);
 	if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY))
-		mpic_scan_ht_pics(mpic);
+ 		mpic_scan_ht_pics(mpic);
 #endif /* CONFIG_MPIC_BROKEN_U3 */
 
 	for (i = 0; i < mpic->num_sources; i++) {
@@ -811,8 +819,17 @@ void __init mpic_init(struct mpic *mpic)
 
 		/* init linux descriptors */
 		if (i < mpic->irq_count) {
-			irq_desc[mpic->irq_offset+i].status = level ? IRQ_LEVEL : 0;
-			irq_desc[mpic->irq_offset+i].chip = &mpic->hc_irq;
+			struct irq_chip *chip = &mpic->hc_irq;
+
+			irq_desc[mpic->irq_offset+i].status |=
+				level ? IRQ_LEVEL : 0;
+#ifdef CONFIG_MPIC_BROKEN_U3
+			if (mpic_is_ht_interrupt(mpic, i))
+				chip = &mpic->hc_ht_irq;
+#endif /* CONFIG_MPIC_BROKEN_U3 */
+			set_irq_chip_data(mpic->irq_offset+i, mpic);
+			set_irq_chip_and_handler(mpic->irq_offset+i, chip,
+						 handle_fasteoi_irq);
 		}
 	}
 	
@@ -986,14 +1003,6 @@ int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs)
 #ifdef DEBUG_LOW
 	DBG("%s: get_one_irq(): %d\n", mpic->name, irq);
 #endif
-	if (mpic->cascade && irq == mpic->cascade_vec) {
-#ifdef DEBUG_LOW
-		DBG("%s: cascading ...\n", mpic->name);
-#endif
-		irq = mpic->cascade(regs, mpic->cascade_data);
-		mpic_eoi(mpic);
-		return irq;
-	}
 	if (unlikely(irq == MPIC_VEC_SPURRIOUS))
 		return -1;
 	if (irq < MPIC_VEC_IPI_0) {
diff --git a/include/asm-powerpc/i8259.h b/include/asm-powerpc/i8259.h
index 0392159..ff31cb9 100644
--- a/include/asm-powerpc/i8259.h
+++ b/include/asm-powerpc/i8259.h
@@ -4,11 +4,8 @@
 
 #include <linux/irq.h>
 
-extern struct hw_interrupt_type i8259_pic;
-
 extern void i8259_init(unsigned long intack_addr, int offset);
 extern int i8259_irq(struct pt_regs *regs);
-extern int i8259_irq_cascade(struct pt_regs *regs, void *unused);
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_I8259_H */
diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h
index eb5f33e..13fa2ef 100644
--- a/include/asm-powerpc/irq.h
+++ b/include/asm-powerpc/irq.h
@@ -514,9 +514,12 @@ extern u64 ppc64_interrupt_controller;
 
 #endif
 
+#ifndef CONFIG_PPC_MERGE
 #define NR_MASK_WORDS	((NR_IRQS + 31) / 32)
 /* pedantic: these are long because they are used with set_bit --RR */
 extern unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
+#endif
+
 extern atomic_t ppc_n_lost_interrupts;
 
 #define virt_irq_create_mapping(x)	(x)
@@ -579,9 +582,8 @@ extern struct thread_info *softirq_ctx[NR_CPUS];
 
 extern void irq_ctx_init(void);
 extern void call_do_softirq(struct thread_info *tp);
-extern int call___do_IRQ(int irq, struct pt_regs *regs,
-		struct thread_info *tp);
-
+extern int call_handle_irq(int irq, void *p1, void *p2,
+			   struct thread_info *tp, void *func);
 #else
 #define irq_ctx_init()
 
diff --git a/include/asm-powerpc/mpic.h b/include/asm-powerpc/mpic.h
index f0d22ac..a2277cb 100644
--- a/include/asm-powerpc/mpic.h
+++ b/include/asm-powerpc/mpic.h
@@ -114,9 +114,6 @@
 #define MPIC_VEC_TIMER_1	248
 #define MPIC_VEC_TIMER_0	247
 
-/* Type definition of the cascade handler */
-typedef int (*mpic_cascade_t)(struct pt_regs *regs, void *data);
-
 #ifdef CONFIG_MPIC_BROKEN_U3
 /* Fixup table entry */
 struct mpic_irq_fixup
@@ -133,9 +130,12 @@ struct mpic_irq_fixup
 struct mpic
 {
 	/* The "linux" controller struct */
-	hw_irq_controller	hc_irq;
+	struct irq_chip		hc_irq;
+#ifdef CONFIG_MPIC_BROKEN_U3
+	struct irq_chip		hc_ht_irq;
+#endif
 #ifdef CONFIG_SMP
-	hw_irq_controller	hc_ipi;
+	struct irq_chip		hc_ipi;
 #endif
 	const char		*name;
 	/* Flags */
@@ -153,10 +153,6 @@ struct mpic
 	unsigned int		num_sources;
 	/* Number of CPUs */
 	unsigned int		num_cpus;
-	/* cascade handler */
-	mpic_cascade_t		cascade;
-	void			*cascade_data;
-	unsigned int		cascade_vec;
 	/* senses array */
 	unsigned char		*senses;
 	unsigned int		senses_count;
@@ -237,17 +233,6 @@ extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
  */
 extern void mpic_init(struct mpic *mpic);
 
-/* Setup a cascade. Currently, only one cascade is supported this
- * way, though you can always do a normal request_irq() and add
- * other cascades this way. You should call this _after_ having
- * added all the ISUs
- *
- * @irq_no:	"linux" irq number of the cascade (that is offset'ed vector)
- * @handler:	cascade handler function
- */
-extern void mpic_setup_cascade(unsigned int irq_no, mpic_cascade_t hanlder,
-			       void *data);
-
 /*
  * All of the following functions must only be used after the
  * ISUs have been assigned and the controller fully initialized
-- 
cgit v0.10.2


From cc9fd71c62f542233c412b5fabc1bbe0c4d5ad08 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 3 Jul 2006 19:35:17 +1000
Subject: [POWERPC] New device-tree interrupt parsing code

Adds new routines to prom_parse to walk the device-tree for interrupt
information. This includes both direct mapping of interrupts and low
level parsing functions for use with partial trees.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 45df420..21009b1 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -38,14 +38,6 @@ static void of_dump_addr(const char *s, u32 *addr, int na)
 static void of_dump_addr(const char *s, u32 *addr, int na) { }
 #endif
 
-/* Read a big address */
-static inline u64 of_read_addr(u32 *cell, int size)
-{
-	u64 r = 0;
-	while (size--)
-		r = (r << 32) | *(cell++);
-	return r;
-}
 
 /* Callbacks for bus specific translators */
 struct of_bus {
@@ -77,9 +69,9 @@ static u64 of_bus_default_map(u32 *addr, u32 *range, int na, int ns, int pna)
 {
 	u64 cp, s, da;
 
-	cp = of_read_addr(range, na);
-	s  = of_read_addr(range + na + pna, ns);
-	da = of_read_addr(addr, na);
+	cp = of_read_number(range, na);
+	s  = of_read_number(range + na + pna, ns);
+	da = of_read_number(addr, na);
 
 	DBG("OF: default map, cp="PRu64", s="PRu64", da="PRu64"\n",
 	    cp, s, da);
@@ -91,7 +83,7 @@ static u64 of_bus_default_map(u32 *addr, u32 *range, int na, int ns, int pna)
 
 static int of_bus_default_translate(u32 *addr, u64 offset, int na)
 {
-	u64 a = of_read_addr(addr, na);
+	u64 a = of_read_number(addr, na);
 	memset(addr, 0, na * 4);
 	a += offset;
 	if (na > 1)
@@ -135,9 +127,9 @@ static u64 of_bus_pci_map(u32 *addr, u32 *range, int na, int ns, int pna)
 		return OF_BAD_ADDR;
 
 	/* Read address values, skipping high cell */
-	cp = of_read_addr(range + 1, na - 1);
-	s  = of_read_addr(range + na + pna, ns);
-	da = of_read_addr(addr + 1, na - 1);
+	cp = of_read_number(range + 1, na - 1);
+	s  = of_read_number(range + na + pna, ns);
+	da = of_read_number(addr + 1, na - 1);
 
 	DBG("OF: PCI map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da);
 
@@ -195,9 +187,9 @@ static u64 of_bus_isa_map(u32 *addr, u32 *range, int na, int ns, int pna)
 		return OF_BAD_ADDR;
 
 	/* Read address values, skipping high cell */
-	cp = of_read_addr(range + 1, na - 1);
-	s  = of_read_addr(range + na + pna, ns);
-	da = of_read_addr(addr + 1, na - 1);
+	cp = of_read_number(range + 1, na - 1);
+	s  = of_read_number(range + na + pna, ns);
+	da = of_read_number(addr + 1, na - 1);
 
 	DBG("OF: ISA map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da);
 
@@ -295,7 +287,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus,
 	 */
 	ranges = (u32 *)get_property(parent, "ranges", &rlen);
 	if (ranges == NULL || rlen == 0) {
-		offset = of_read_addr(addr, na);
+		offset = of_read_number(addr, na);
 		memset(addr, 0, pna * 4);
 		DBG("OF: no ranges, 1:1 translation\n");
 		goto finish;
@@ -378,7 +370,7 @@ u64 of_translate_address(struct device_node *dev, u32 *in_addr)
 		/* If root, we have finished */
 		if (parent == NULL) {
 			DBG("OF: reached root node\n");
-			result = of_read_addr(addr, na);
+			result = of_read_number(addr, na);
 			break;
 		}
 
@@ -442,7 +434,7 @@ u32 *of_get_address(struct device_node *dev, int index, u64 *size,
 	for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
 		if (i == index) {
 			if (size)
-				*size = of_read_addr(prop + na, ns);
+				*size = of_read_number(prop + na, ns);
 			if (flags)
 				*flags = bus->get_flags(prop);
 			return prop;
@@ -484,7 +476,7 @@ u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
 	for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++)
 		if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) {
 			if (size)
-				*size = of_read_addr(prop + na, ns);
+				*size = of_read_number(prop + na, ns);
 			if (flags)
 				*flags = bus->get_flags(prop);
 			return prop;
@@ -565,11 +557,414 @@ void of_parse_dma_window(struct device_node *dn, unsigned char *dma_window_prop,
 		prop = get_property(dn, "#address-cells", NULL);
 
 	cells = prop ? *(u32 *)prop : prom_n_addr_cells(dn);
-	*phys = of_read_addr(dma_window, cells);
+	*phys = of_read_number(dma_window, cells);
 
 	dma_window += cells;
 
 	prop = get_property(dn, "ibm,#dma-size-cells", NULL);
 	cells = prop ? *(u32 *)prop : prom_n_size_cells(dn);
-	*size = of_read_addr(dma_window, cells);
+	*size = of_read_number(dma_window, cells);
+}
+
+/*
+ * Interrupt remapper
+ */
+
+static unsigned int of_irq_workarounds;
+static struct device_node *of_irq_dflt_pic;
+
+static struct device_node *of_irq_find_parent(struct device_node *child)
+{
+	struct device_node *p;
+	phandle *parp;
+
+	if (!of_node_get(child))
+		return NULL;
+
+	do {
+		parp = (phandle *)get_property(child, "interrupt-parent", NULL);
+		if (parp == NULL)
+			p = of_get_parent(child);
+		else {
+			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
+				p = of_node_get(of_irq_dflt_pic);
+			else
+				p = of_find_node_by_phandle(*parp);
+		}
+		of_node_put(child);
+		child = p;
+	} while (p && get_property(p, "#interrupt-cells", NULL) == NULL);
+
+	return p;
+}
+
+static u8 of_irq_pci_swizzle(u8 slot, u8 pin)
+{
+	return (((pin - 1) + slot) % 4) + 1;
 }
+
+/* This doesn't need to be called if you don't have any special workaround
+ * flags to pass
+ */
+void of_irq_map_init(unsigned int flags)
+{
+	of_irq_workarounds = flags;
+
+	/* OldWorld, don't bother looking at other things */
+	if (flags & OF_IMAP_OLDWORLD_MAC)
+		return;
+
+	/* If we don't have phandles, let's try to locate a default interrupt
+	 * controller (happens when booting with BootX). We do a first match
+	 * here, hopefully, that only ever happens on machines with one
+	 * controller.
+	 */
+	if (flags & OF_IMAP_NO_PHANDLE) {
+		struct device_node *np;
+
+		for(np = NULL; (np = of_find_all_nodes(np)) != NULL;) {
+			if (get_property(np, "interrupt-controller", NULL)
+			    == NULL)
+				continue;
+			/* Skip /chosen/interrupt-controller */
+			if (strcmp(np->name, "chosen") == 0)
+				continue;
+			/* It seems like at least one person on this planet wants
+			 * to use BootX on a machine with an AppleKiwi controller
+			 * which happens to pretend to be an interrupt
+			 * controller too.
+			 */
+			if (strcmp(np->name, "AppleKiwi") == 0)
+				continue;
+			/* I think we found one ! */
+			of_irq_dflt_pic = np;
+			break;
+		}
+	}
+
+}
+
+int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
+		   struct of_irq *out_irq)
+{
+	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
+	u32 *tmp, *imap, *imask;
+	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
+	int imaplen, match, i;
+
+	ipar = of_node_get(parent);
+
+	/* First get the #interrupt-cells property of the current cursor
+	 * that tells us how to interpret the passed-in intspec. If there
+	 * is none, we are nice and just walk up the tree
+	 */
+	do {
+		tmp = (u32 *)get_property(ipar, "#interrupt-cells", NULL);
+		if (tmp != NULL) {
+			intsize = *tmp;
+			break;
+		}
+		tnode = ipar;
+		ipar = of_irq_find_parent(ipar);
+		of_node_put(tnode);
+	} while (ipar);
+	if (ipar == NULL) {
+		DBG(" -> no parent found !\n");
+		goto fail;
+	}
+
+	DBG("of_irq_map_raw: ipar=%s, size=%d\n", ipar->full_name, intsize);
+
+	/* Look for this #address-cells. We have to implement the old linux
+	 * trick of looking for the parent here as some device-trees rely on it
+	 */
+	old = of_node_get(ipar);
+	do {
+		tmp = (u32 *)get_property(old, "#address-cells", NULL);
+		tnode = of_get_parent(old);
+		of_node_put(old);
+		old = tnode;
+	} while(old && tmp == NULL);
+	of_node_put(old);
+	old = NULL;
+	addrsize = (tmp == NULL) ? 2 : *tmp;
+
+	DBG(" -> addrsize=%d\n", addrsize);
+
+	/* Now start the actual "proper" walk of the interrupt tree */
+	while (ipar != NULL) {
+		/* Now check if cursor is an interrupt-controller and if it is
+		 * then we are done
+		 */
+		if (get_property(ipar, "interrupt-controller", NULL) != NULL) {
+			DBG(" -> got it !\n");
+			memcpy(out_irq->specifier, intspec,
+			       intsize * sizeof(u32));
+			out_irq->size = intsize;
+			out_irq->controller = ipar;
+			of_node_put(old);
+			return 0;
+		}
+
+		/* Now look for an interrupt-map */
+		imap = (u32 *)get_property(ipar, "interrupt-map", &imaplen);
+		/* No interrupt map, check for an interrupt parent */
+		if (imap == NULL) {
+			DBG(" -> no map, getting parent\n");
+			newpar = of_irq_find_parent(ipar);
+			goto skiplevel;
+		}
+		imaplen /= sizeof(u32);
+
+		/* Look for a mask */
+		imask = (u32 *)get_property(ipar, "interrupt-map-mask", NULL);
+
+		/* If we were passed no "reg" property and we attempt to parse
+		 * an interrupt-map, then #address-cells must be 0.
+		 * Fail if it's not.
+		 */
+		if (addr == NULL && addrsize != 0) {
+			DBG(" -> no reg passed in when needed !\n");
+			goto fail;
+		}
+
+		/* Parse interrupt-map */
+		match = 0;
+		while (imaplen > (addrsize + intsize + 1) && !match) {
+			/* Compare specifiers */
+			match = 1;
+			for (i = 0; i < addrsize && match; ++i) {
+				u32 mask = imask ? imask[i] : 0xffffffffu;
+				match = ((addr[i] ^ imap[i]) & mask) == 0;
+			}
+			for (; i < (addrsize + intsize) && match; ++i) {
+				u32 mask = imask ? imask[i] : 0xffffffffu;
+				match =
+				   ((intspec[i-addrsize] ^ imap[i]) & mask) == 0;
+			}
+			imap += addrsize + intsize;
+			imaplen -= addrsize + intsize;
+
+			DBG(" -> match=%d (imaplen=%d)\n", match, imaplen);
+
+			/* Get the interrupt parent */
+			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
+				newpar = of_node_get(of_irq_dflt_pic);
+			else
+				newpar = of_find_node_by_phandle((phandle)*imap);
+			imap++;
+			--imaplen;
+
+			/* Check if not found */
+			if (newpar == NULL) {
+				DBG(" -> imap parent not found !\n");
+				goto fail;
+			}
+
+			/* Get #interrupt-cells and #address-cells of new
+			 * parent
+			 */
+			tmp = (u32 *)get_property(newpar, "#interrupt-cells",
+						  NULL);
+			if (tmp == NULL) {
+				DBG(" -> parent lacks #interrupt-cells !\n");
+				goto fail;
+			}
+			newintsize = *tmp;
+			tmp = (u32 *)get_property(newpar, "#address-cells",
+						  NULL);
+			newaddrsize = (tmp == NULL) ? 0 : *tmp;
+
+			DBG(" -> newintsize=%d, newaddrsize=%d\n",
+			    newintsize, newaddrsize);
+
+			/* Check for malformed properties */
+			if (imaplen < (newaddrsize + newintsize))
+				goto fail;
+
+			imap += newaddrsize + newintsize;
+			imaplen -= newaddrsize + newintsize;
+
+			DBG(" -> imaplen=%d\n", imaplen);
+		}
+		if (!match)
+			goto fail;
+
+		of_node_put(old);
+		old = of_node_get(newpar);
+		addrsize = newaddrsize;
+		intsize = newintsize;
+		intspec = imap - intsize;
+		addr = intspec - addrsize;
+
+	skiplevel:
+		/* Iterate again with new parent */
+		DBG(" -> new parent: %s\n", newpar ? newpar->full_name : "<>");
+		of_node_put(ipar);
+		ipar = newpar;
+		newpar = NULL;
+	}
+ fail:
+	of_node_put(ipar);
+	of_node_put(old);
+	of_node_put(newpar);
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(of_irq_map_raw);
+
+#if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)
+static int of_irq_map_oldworld(struct device_node *device, int index,
+			       struct of_irq *out_irq)
+{
+	u32 *ints;
+	int intlen;
+
+	/*
+	 * Old machines just have a list of interrupt numbers
+	 * and no interrupt-controller nodes.
+	 */
+	ints = (u32 *) get_property(device, "AAPL,interrupts", &intlen);
+	if (ints == NULL)
+		return -EINVAL;
+	intlen /= sizeof(u32);
+
+	if (index >= intlen)
+		return -EINVAL;
+
+	out_irq->controller = NULL;
+	out_irq->specifier[0] = ints[index];
+	out_irq->size = 1;
+
+	return 0;
+}
+#else /* defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32) */
+static int of_irq_map_oldworld(struct device_node *device, int index,
+			       struct of_irq *out_irq)
+{
+	return -EINVAL;
+}
+#endif /* !(defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)) */
+
+int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq)
+{
+	struct device_node *p;
+	u32 *intspec, *tmp, intsize, intlen, *addr;
+	int res;
+
+	DBG("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index);
+
+	/* OldWorld mac stuff is "special", handle out of line */
+	if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC)
+		return of_irq_map_oldworld(device, index, out_irq);
+
+	/* Get the interrupts property */
+	intspec = (u32 *)get_property(device, "interrupts", &intlen);
+	if (intspec == NULL)
+		return -EINVAL;
+	intlen /= sizeof(u32);
+
+	/* Get the reg property (if any) */
+	addr = (u32 *)get_property(device, "reg", NULL);
+
+	/* Look for the interrupt parent. */
+	p = of_irq_find_parent(device);
+	if (p == NULL)
+		return -EINVAL;
+
+	/* Get size of interrupt specifier */
+	tmp = (u32 *)get_property(p, "#interrupt-cells", NULL);
+	if (tmp == NULL) {
+		of_node_put(p);
+		return -EINVAL;
+	}
+	intsize = *tmp;
+
+	/* Check index */
+	if (index * intsize >= intlen)
+		return -EINVAL;
+
+	/* Get new specifier and map it */
+	res = of_irq_map_raw(p, intspec + index * intsize, addr, out_irq);
+	of_node_put(p);
+	return res;
+}
+EXPORT_SYMBOL_GPL(of_irq_map_one);
+
+int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
+{
+	struct device_node *dn, *ppnode;
+	struct pci_dev *ppdev;
+	u32 lspec;
+	u32 laddr[3];
+	u8 pin;
+	int rc;
+
+	/* Check if we have a device node, if yes, fallback to standard OF
+	 * parsing
+	 */
+	dn = pci_device_to_OF_node(pdev);
+	if (dn)
+		return of_irq_map_one(dn, 0, out_irq);
+
+	/* Ok, we don't, time to have fun. Let's start by building up an
+	 * interrupt spec.  we assume #interrupt-cells is 1, which is standard
+	 * for PCI. If you do different, then don't use that routine.
+	 */
+	rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
+	if (rc != 0)
+		return rc;
+	/* No pin, exit */
+	if (pin == 0)
+		return -ENODEV;
+
+	/* Now we walk up the PCI tree */
+	lspec = pin;
+	for (;;) {
+		/* Get the pci_dev of our parent */
+		ppdev = pdev->bus->self;
+
+		/* Ouch, it's a host bridge... */
+		if (ppdev == NULL) {
+#ifdef CONFIG_PPC64
+			ppnode = pci_bus_to_OF_node(pdev->bus);
+#else
+			struct pci_controller *host;
+			host = pci_bus_to_host(pdev->bus);
+			ppnode = host ? host->arch_data : NULL;
+#endif
+			/* No node for host bridge ? give up */
+			if (ppnode == NULL)
+				return -EINVAL;
+		} else
+			/* We found a P2P bridge, check if it has a node */
+			ppnode = pci_device_to_OF_node(ppdev);
+
+		/* Ok, we have found a parent with a device-node, hand over to
+		 * the OF parsing code.
+		 * We build a unit address from the linux device to be used for
+		 * resolution. Note that we use the linux bus number which may
+		 * not match your firmware bus numbering.
+		 * Fortunately, in most cases, interrupt-map-mask doesn't include
+		 * the bus number as part of the matching.
+		 * You should still be careful about that though if you intend
+		 * to rely on this function (you ship  a firmware that doesn't
+		 * create device nodes for all PCI devices).
+		 */
+		if (ppnode)
+			break;
+
+		/* We can only get here if we hit a P2P bridge with no node,
+		 * let's do standard swizzling and try again
+		 */
+		lspec = of_irq_pci_swizzle(PCI_SLOT(pdev->devfn), lspec);
+		pdev = ppdev;
+	}
+
+	laddr[0] = (pdev->bus->number << 16)
+		| (pdev->devfn << 8);
+	laddr[1]  = laddr[2] = 0;
+	return of_irq_map_raw(ppnode, &lspec, laddr, out_irq);
+}
+EXPORT_SYMBOL_GPL(of_irq_map_pci);
+
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index b0768f4..48bef40 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -204,6 +204,15 @@ extern int release_OF_resource(struct device_node* node, int index);
  */
 
 
+/* Helper to read a big number */
+static inline u64 of_read_number(u32 *cell, int size)
+{
+	u64 r = 0;
+	while (size--)
+		r = (r << 32) | *(cell++);
+	return r;
+}
+
 /* Translate an OF address block into a CPU physical address
  */
 #define OF_BAD_ADDR	((u64)-1)
@@ -240,5 +249,83 @@ extern void kdump_move_device_tree(void);
 /* CPU OF node matching */
 struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
 
+
+/*
+ * OF interrupt mapping
+ */
+
+/* This structure is returned when an interrupt is mapped. The controller
+ * field needs to be put() after use
+ */
+
+#define OF_MAX_IRQ_SPEC		 4 /* We handle specifiers of at most 4 cells */
+
+struct of_irq {
+	struct device_node *controller;	/* Interrupt controller node */
+	u32 size;			/* Specifier size */
+	u32 specifier[OF_MAX_IRQ_SPEC];	/* Specifier copy */
+};
+
+/***
+ * of_irq_map_init - Initialize the irq remapper
+ * @flags:	flags defining workarounds to enable
+ *
+ * Some machines have bugs in the device-tree which require certain workarounds
+ * to be applied. Call this before any interrupt mapping attempts to enable
+ * those workarounds.
+ */
+#define OF_IMAP_OLDWORLD_MAC	0x00000001
+#define OF_IMAP_NO_PHANDLE	0x00000002
+
+extern void of_irq_map_init(unsigned int flags);
+
+/***
+ * of_irq_map_raw - Low level interrupt tree parsing
+ * @parent:	the device interrupt parent
+ * @intspec:	interrupt specifier ("interrupts" property of the device)
+ * @addr:	address specifier (start of "reg" property of the device)
+ * @out_irq:	structure of_irq filled by this function
+ *
+ * Returns 0 on success and a negative number on error
+ *
+ * This function is a low-level interrupt tree walking function. It
+ * can be used to do a partial walk with synthetized reg and interrupts
+ * properties, for example when resolving PCI interrupts when no device
+ * node exist for the parent.
+ *
+ */
+
+extern int of_irq_map_raw(struct device_node *parent, u32 *intspec, u32 *addr,
+			  struct of_irq *out_irq);
+
+
+/***
+ * of_irq_map_one - Resolve an interrupt for a device
+ * @device:	the device whose interrupt is to be resolved
+ * @index:     	index of the interrupt to resolve
+ * @out_irq:	structure of_irq filled by this function
+ *
+ * This function resolves an interrupt, walking the tree, for a given
+ * device-tree node. It's the high level pendant to of_irq_map_raw().
+ * It also implements the workarounds for OldWolrd Macs.
+ */
+extern int of_irq_map_one(struct device_node *device, int index,
+			  struct of_irq *out_irq);
+
+/***
+ * of_irq_map_pci - Resolve the interrupt for a PCI device
+ * @pdev:	the device whose interrupt is to be resolved
+ * @out_irq:	structure of_irq filled by this function
+ *
+ * This function resolves the PCI interrupt for a given PCI device. If a
+ * device-node exists for a given pci_dev, it will use normal OF tree
+ * walking. If not, it will implement standard swizzling and walk up the
+ * PCI tree until an device-node is found, at which point it will finish
+ * resolving using the OF tree walking.
+ */
+struct pci_dev;
+extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
+
+
 #endif /* __KERNEL__ */
 #endif /* _POWERPC_PROM_H */
-- 
cgit v0.10.2


From f63e115fb50db39706b955b81e3375ef6bab2268 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 3 Jul 2006 19:36:30 +1000
Subject: [POWERPC] Copy i8259 code back to arch/ppc

This copies the i8259 interrupt controller driver from arch/powerpc
to arch/ppc. It's currently shared by both architectures, but the upcoming
arch/powerpc interrupt changes will break the arch/ppc builds. The changes
are too important to just use #ifdef's in the driver.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 054bd8b..cebfae2 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -4,7 +4,6 @@ endif
 
 obj-$(CONFIG_MPIC)		+= mpic.o
 obj-$(CONFIG_PPC_INDIRECT_PCI)	+= indirect_pci.o
-obj-$(CONFIG_PPC_I8259)		+= i8259.o
 obj-$(CONFIG_PPC_MPC106)	+= grackle.o
 obj-$(CONFIG_BOOKE)		+= dcr.o
 obj-$(CONFIG_40x)		+= dcr.o
@@ -14,3 +13,7 @@ obj-$(CONFIG_PPC_83xx)		+= ipic.o
 obj-$(CONFIG_FSL_SOC)		+= fsl_soc.o
 obj-$(CONFIG_PPC_TODC)		+= todc.o
 obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
+
+ifeq ($(CONFIG_PPC_MERGE),y)
+obj-$(CONFIG_PPC_I8259)		+= i8259.o
+ endif
diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile
index 490749c..2497bbc 100644
--- a/arch/ppc/syslib/Makefile
+++ b/arch/ppc/syslib/Makefile
@@ -104,3 +104,5 @@ obj-$(CONFIG_PPC_MPC52xx)	+= mpc52xx_setup.o mpc52xx_pic.o \
 ifeq ($(CONFIG_PPC_MPC52xx),y)
 obj-$(CONFIG_PCI)		+= mpc52xx_pci.o
 endif
+
+obj-$(CONFIG_PPC_I8259)		+= i8259.o
-- 
cgit v0.10.2


From 7cddc397027ddf80b2d916f6e8fb15a21e9791c5 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Mon, 3 Jul 2006 12:26:02 +0100
Subject: [ARM] 3708/2: fix SMP build after section ioremap changes

Patch from Lennert Buytenhek

Commit ff0daca525dde796382b9ccd563f169df2571211 broke the SMP build,
this patch fixes it up again.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 7eac87f..3e86fe7 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -369,6 +369,7 @@ void __iounmap(void __iomem *addr)
 
 	addr = (void __iomem *)(PAGE_MASK & (unsigned long)addr);
 
+#ifndef CONFIG_SMP
 	/*
 	 * If this is a section based mapping we need to handle it
 	 * specially as the VM subysystem does not know how to handle
@@ -390,6 +391,7 @@ void __iounmap(void __iomem *addr)
 		}
 	}
 	write_unlock(&vmlist_lock);
+#endif
 
 	if (!section_mapping)
 		vunmap(addr);
-- 
cgit v0.10.2


From 0ebfff1491ef85d41ddf9c633834838be144f69f Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 3 Jul 2006 21:36:01 +1000
Subject: [POWERPC] Add new interrupt mapping core and change platforms to use
 it

This adds the new irq remapper core and removes the old one.  Because
there are some fundamental conflicts with the old code, like the value
of NO_IRQ which I'm now setting to 0 (as per discussions with Linus),
etc..., this commit also changes the relevant platform and driver code
over to use the new remapper (so as not to cause difficulties later
in bisecting).

This patch removes the old pre-parsing of the open firmware interrupt
tree along with all the bogus assumptions it made to try to renumber
interrupts according to the platform. This is all to be handled by the
new code now.

For the pSeries XICS interrupt controller, a single remapper host is
created for the whole machine regardless of how many interrupt
presentation and source controllers are found, and it's set to match
any device node that isn't a 8259.  That works fine on pSeries and
avoids having to deal with some of the complexities of split source
controllers vs. presentation controllers in the pSeries device trees.

The powerpc i8259 PIC driver now always requests the legacy interrupt
range. It also has the feature of being able to match any device node
(including NULL) if passed no device node as an input. That will help
porting over platforms with broken device-trees like Pegasos who don't
have a proper interrupt tree.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index e47d40a..97ddc02 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -323,13 +323,11 @@ int ibmebus_request_irq(struct ibmebus_dev *dev,
 			unsigned long irq_flags, const char * devname,
 			void *dev_id)
 {
-	unsigned int irq = virt_irq_create_mapping(ist);
+	unsigned int irq = irq_create_mapping(NULL, ist, 0);
 	
 	if (irq == NO_IRQ)
 		return -EINVAL;
 	
-	irq = irq_offset_up(irq);
-	
 	return request_irq(irq, handler,
 			   irq_flags, devname, dev_id);
 }
@@ -337,12 +335,9 @@ EXPORT_SYMBOL(ibmebus_request_irq);
 
 void ibmebus_free_irq(struct ibmebus_dev *dev, u32 ist, void *dev_id)
 {
-	unsigned int irq = virt_irq_create_mapping(ist);
+	unsigned int irq = irq_find_mapping(NULL, ist);
 	
-	irq = irq_offset_up(irq);
 	free_irq(irq, dev_id);
-	
-	return;
 }
 EXPORT_SYMBOL(ibmebus_free_irq);
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 9124855..05a7009 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -29,6 +29,8 @@
  * to reduce code space and undefined function references.
  */
 
+#undef DEBUG
+
 #include <linux/module.h>
 #include <linux/threads.h>
 #include <linux/kernel_stat.h>
@@ -46,7 +48,10 @@
 #include <linux/cpumask.h>
 #include <linux/profile.h>
 #include <linux/bitops.h>
-#include <linux/pci.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/mutex.h>
+#include <linux/bootmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -57,6 +62,7 @@
 #include <asm/prom.h>
 #include <asm/ptrace.h>
 #include <asm/machdep.h>
+#include <asm/udbg.h>
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/paca.h>
 #endif
@@ -88,7 +94,6 @@ extern atomic_t ipi_sent;
 EXPORT_SYMBOL(irq_desc);
 
 int distribute_irqs = 1;
-u64 ppc64_interrupt_controller;
 #endif /* CONFIG_PPC64 */
 
 int show_interrupts(struct seq_file *p, void *v)
@@ -181,7 +186,7 @@ void fixup_irqs(cpumask_t map)
 
 void do_IRQ(struct pt_regs *regs)
 {
-	int irq;
+	unsigned int irq;
 #ifdef CONFIG_IRQSTACKS
 	struct thread_info *curtp, *irqtp;
 #endif
@@ -212,7 +217,7 @@ void do_IRQ(struct pt_regs *regs)
 	 */
 	irq = ppc_md.get_irq(regs);
 
-	if (irq >= 0) {
+	if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
 #ifdef CONFIG_IRQSTACKS
 		/* Switch to the irq stack to handle this */
 		curtp = current_thread_info();
@@ -231,7 +236,7 @@ void do_IRQ(struct pt_regs *regs)
 		} else
 #endif
 			generic_handle_irq(irq, regs);
-	} else if (irq != -2)
+	} else if (irq != NO_IRQ_IGNORE)
 		/* That's not SMP safe ... but who cares ? */
 		ppc_spurious_interrupts++;
 
@@ -254,123 +259,6 @@ void __init init_IRQ(void)
 #endif
 }
 
-#ifdef CONFIG_PPC64
-/*
- * Virtual IRQ mapping code, used on systems with XICS interrupt controllers.
- */
-
-#define UNDEFINED_IRQ 0xffffffff
-unsigned int virt_irq_to_real_map[NR_IRQS];
-
-/*
- * Don't use virtual irqs 0, 1, 2 for devices.
- * The pcnet32 driver considers interrupt numbers < 2 to be invalid,
- * and 2 is the XICS IPI interrupt.
- * We limit virtual irqs to __irq_offet_value less than virt_irq_max so
- * that when we offset them we don't end up with an interrupt
- * number >= virt_irq_max.
- */
-#define MIN_VIRT_IRQ	3
-
-unsigned int virt_irq_max;
-static unsigned int max_virt_irq;
-static unsigned int nr_virt_irqs;
-
-void
-virt_irq_init(void)
-{
-	int i;
-
-	if ((virt_irq_max == 0) || (virt_irq_max > (NR_IRQS - 1)))
-		virt_irq_max = NR_IRQS - 1;
-	max_virt_irq = virt_irq_max - __irq_offset_value;
-	nr_virt_irqs = max_virt_irq - MIN_VIRT_IRQ + 1;
-
-	for (i = 0; i < NR_IRQS; i++)
-		virt_irq_to_real_map[i] = UNDEFINED_IRQ;
-}
-
-/* Create a mapping for a real_irq if it doesn't already exist.
- * Return the virtual irq as a convenience.
- */
-int virt_irq_create_mapping(unsigned int real_irq)
-{
-	unsigned int virq, first_virq;
-	static int warned;
-
-	if (ppc64_interrupt_controller == IC_OPEN_PIC)
-		return real_irq;	/* no mapping for openpic (for now) */
-
-	if (ppc64_interrupt_controller == IC_CELL_PIC)
-		return real_irq;	/* no mapping for iic either */
-
-	/* don't map interrupts < MIN_VIRT_IRQ */
-	if (real_irq < MIN_VIRT_IRQ) {
-		virt_irq_to_real_map[real_irq] = real_irq;
-		return real_irq;
-	}
-
-	/* map to a number between MIN_VIRT_IRQ and max_virt_irq */
-	virq = real_irq;
-	if (virq > max_virt_irq)
-		virq = (virq % nr_virt_irqs) + MIN_VIRT_IRQ;
-
-	/* search for this number or a free slot */
-	first_virq = virq;
-	while (virt_irq_to_real_map[virq] != UNDEFINED_IRQ) {
-		if (virt_irq_to_real_map[virq] == real_irq)
-			return virq;
-		if (++virq > max_virt_irq)
-			virq = MIN_VIRT_IRQ;
-		if (virq == first_virq)
-			goto nospace;	/* oops, no free slots */
-	}
-
-	virt_irq_to_real_map[virq] = real_irq;
-	return virq;
-
- nospace:
-	if (!warned) {
-		printk(KERN_CRIT "Interrupt table is full\n");
-		printk(KERN_CRIT "Increase virt_irq_max (currently %d) "
-		       "in your kernel sources and rebuild.\n", virt_irq_max);
-		warned = 1;
-	}
-	return NO_IRQ;
-}
-
-/*
- * In most cases will get a hit on the very first slot checked in the
- * virt_irq_to_real_map.  Only when there are a large number of
- * IRQs will this be expensive.
- */
-unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
-{
-	unsigned int virq;
-	unsigned int first_virq;
-
-	virq = real_irq;
-
-	if (virq > max_virt_irq)
-		virq = (virq % nr_virt_irqs) + MIN_VIRT_IRQ;
-
-	first_virq = virq;
-
-	do {
-		if (virt_irq_to_real_map[virq] == real_irq)
-			return virq;
-
-		virq++;
-
-		if (virq >= max_virt_irq)
-			virq = 0;
-
-	} while (first_virq != virq);
-
-	return NO_IRQ;
-
-}
-#endif /* CONFIG_PPC64 */
 
 #ifdef CONFIG_IRQSTACKS
 struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;
@@ -430,6 +318,503 @@ void do_softirq(void)
 }
 EXPORT_SYMBOL(do_softirq);
 
+
+/*
+ * IRQ controller and virtual interrupts
+ */
+
+#ifdef CONFIG_PPC_MERGE
+
+static LIST_HEAD(irq_hosts);
+static spinlock_t irq_big_lock = SPIN_LOCK_UNLOCKED;
+
+struct irq_map_entry irq_map[NR_IRQS];
+static unsigned int irq_virq_count = NR_IRQS;
+static struct irq_host *irq_default_host;
+
+struct irq_host *irq_alloc_host(unsigned int revmap_type,
+				unsigned int revmap_arg,
+				struct irq_host_ops *ops,
+				irq_hw_number_t inval_irq)
+{
+	struct irq_host *host;
+	unsigned int size = sizeof(struct irq_host);
+	unsigned int i;
+	unsigned int *rmap;
+	unsigned long flags;
+
+	/* Allocate structure and revmap table if using linear mapping */
+	if (revmap_type == IRQ_HOST_MAP_LINEAR)
+		size += revmap_arg * sizeof(unsigned int);
+	if (mem_init_done)
+		host = kzalloc(size, GFP_KERNEL);
+	else {
+		host = alloc_bootmem(size);
+		if (host)
+			memset(host, 0, size);
+	}
+	if (host == NULL)
+		return NULL;
+
+	/* Fill structure */
+	host->revmap_type = revmap_type;
+	host->inval_irq = inval_irq;
+	host->ops = ops;
+
+	spin_lock_irqsave(&irq_big_lock, flags);
+
+	/* If it's a legacy controller, check for duplicates and
+	 * mark it as allocated (we use irq 0 host pointer for that
+	 */
+	if (revmap_type == IRQ_HOST_MAP_LEGACY) {
+		if (irq_map[0].host != NULL) {
+			spin_unlock_irqrestore(&irq_big_lock, flags);
+			/* If we are early boot, we can't free the structure,
+			 * too bad...
+			 * this will be fixed once slab is made available early
+			 * instead of the current cruft
+			 */
+			if (mem_init_done)
+				kfree(host);
+			return NULL;
+		}
+		irq_map[0].host = host;
+	}
+
+	list_add(&host->link, &irq_hosts);
+	spin_unlock_irqrestore(&irq_big_lock, flags);
+
+	/* Additional setups per revmap type */
+	switch(revmap_type) {
+	case IRQ_HOST_MAP_LEGACY:
+		/* 0 is always the invalid number for legacy */
+		host->inval_irq = 0;
+		/* setup us as the host for all legacy interrupts */
+		for (i = 1; i < NUM_ISA_INTERRUPTS; i++) {
+			irq_map[i].hwirq = 0;
+			smp_wmb();
+			irq_map[i].host = host;
+			smp_wmb();
+
+			/* Clear some flags */
+			get_irq_desc(i)->status
+				&= ~(IRQ_NOREQUEST | IRQ_LEVEL);
+
+			/* Legacy flags are left to default at this point,
+			 * one can then use irq_create_mapping() to
+			 * explicitely change them
+			 */
+			ops->map(host, i, i, 0);
+		}
+		break;
+	case IRQ_HOST_MAP_LINEAR:
+		rmap = (unsigned int *)(host + 1);
+		for (i = 0; i < revmap_arg; i++)
+			rmap[i] = IRQ_NONE;
+		host->revmap_data.linear.size = revmap_arg;
+		smp_wmb();
+		host->revmap_data.linear.revmap = rmap;
+		break;
+	default:
+		break;
+	}
+
+	pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type, host);
+
+	return host;
+}
+
+struct irq_host *irq_find_host(struct device_node *node)
+{
+	struct irq_host *h, *found = NULL;
+	unsigned long flags;
+
+	/* We might want to match the legacy controller last since
+	 * it might potentially be set to match all interrupts in
+	 * the absence of a device node. This isn't a problem so far
+	 * yet though...
+	 */
+	spin_lock_irqsave(&irq_big_lock, flags);
+	list_for_each_entry(h, &irq_hosts, link)
+		if (h->ops->match == NULL || h->ops->match(h, node)) {
+			found = h;
+			break;
+		}
+	spin_unlock_irqrestore(&irq_big_lock, flags);
+	return found;
+}
+EXPORT_SYMBOL_GPL(irq_find_host);
+
+void irq_set_default_host(struct irq_host *host)
+{
+	pr_debug("irq: Default host set to @0x%p\n", host);
+
+	irq_default_host = host;
+}
+
+void irq_set_virq_count(unsigned int count)
+{
+	pr_debug("irq: Trying to set virq count to %d\n", count);
+
+	BUG_ON(count < NUM_ISA_INTERRUPTS);
+	if (count < NR_IRQS)
+		irq_virq_count = count;
+}
+
+unsigned int irq_create_mapping(struct irq_host *host,
+				irq_hw_number_t hwirq,
+				unsigned int flags)
+{
+	unsigned int virq, hint;
+
+	pr_debug("irq: irq_create_mapping(0x%p, 0x%lx, 0x%x)\n",
+		 host, hwirq, flags);
+
+	/* Look for default host if nececssary */
+	if (host == NULL)
+		host = irq_default_host;
+	if (host == NULL) {
+		printk(KERN_WARNING "irq_create_mapping called for"
+		       " NULL host, hwirq=%lx\n", hwirq);
+		WARN_ON(1);
+		return NO_IRQ;
+	}
+	pr_debug("irq: -> using host @%p\n", host);
+
+	/* Check if mapping already exist, if it does, call
+	 * host->ops->map() to update the flags
+	 */
+	virq = irq_find_mapping(host, hwirq);
+	if (virq != IRQ_NONE) {
+		pr_debug("irq: -> existing mapping on virq %d\n", virq);
+		host->ops->map(host, virq, hwirq, flags);
+		return virq;
+	}
+
+	/* Get a virtual interrupt number */
+	if (host->revmap_type == IRQ_HOST_MAP_LEGACY) {
+		/* Handle legacy */
+		virq = (unsigned int)hwirq;
+		if (virq == 0 || virq >= NUM_ISA_INTERRUPTS)
+			return NO_IRQ;
+		return virq;
+	} else {
+		/* Allocate a virtual interrupt number */
+		hint = hwirq % irq_virq_count;
+		virq = irq_alloc_virt(host, 1, hint);
+		if (virq == NO_IRQ) {
+			pr_debug("irq: -> virq allocation failed\n");
+			return NO_IRQ;
+		}
+	}
+	pr_debug("irq: -> obtained virq %d\n", virq);
+
+	/* Clear some flags */
+	get_irq_desc(virq)->status &= ~(IRQ_NOREQUEST | IRQ_LEVEL);
+
+	/* map it */
+	if (host->ops->map(host, virq, hwirq, flags)) {
+		pr_debug("irq: -> mapping failed, freeing\n");
+		irq_free_virt(virq, 1);
+		return NO_IRQ;
+	}
+	smp_wmb();
+	irq_map[virq].hwirq = hwirq;
+	smp_mb();
+	return virq;
+}
+EXPORT_SYMBOL_GPL(irq_create_mapping);
+
+extern unsigned int irq_create_of_mapping(struct device_node *controller,
+					  u32 *intspec, unsigned int intsize)
+{
+	struct irq_host *host;
+	irq_hw_number_t hwirq;
+	unsigned int flags = IRQ_TYPE_NONE;
+
+	if (controller == NULL)
+		host = irq_default_host;
+	else
+		host = irq_find_host(controller);
+	if (host == NULL)
+		return NO_IRQ;
+
+	/* If host has no translation, then we assume interrupt line */
+	if (host->ops->xlate == NULL)
+		hwirq = intspec[0];
+	else {
+		if (host->ops->xlate(host, controller, intspec, intsize,
+				     &hwirq, &flags))
+			return NO_IRQ;
+	}
+
+	return irq_create_mapping(host, hwirq, flags);
+}
+EXPORT_SYMBOL_GPL(irq_create_of_mapping);
+
+unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
+{
+	struct of_irq oirq;
+
+	if (of_irq_map_one(dev, index, &oirq))
+		return NO_IRQ;
+
+	return irq_create_of_mapping(oirq.controller, oirq.specifier,
+				     oirq.size);
+}
+EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
+
+void irq_dispose_mapping(unsigned int virq)
+{
+	struct irq_host *host = irq_map[virq].host;
+	irq_hw_number_t hwirq;
+	unsigned long flags;
+
+	WARN_ON (host == NULL);
+	if (host == NULL)
+		return;
+
+	/* Never unmap legacy interrupts */
+	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
+		return;
+
+	/* remove chip and handler */
+	set_irq_chip_and_handler(virq, NULL, NULL);
+
+	/* Make sure it's completed */
+	synchronize_irq(virq);
+
+	/* Tell the PIC about it */
+	if (host->ops->unmap)
+		host->ops->unmap(host, virq);
+	smp_mb();
+
+	/* Clear reverse map */
+	hwirq = irq_map[virq].hwirq;
+	switch(host->revmap_type) {
+	case IRQ_HOST_MAP_LINEAR:
+		if (hwirq < host->revmap_data.linear.size)
+			host->revmap_data.linear.revmap[hwirq] = IRQ_NONE;
+		break;
+	case IRQ_HOST_MAP_TREE:
+		/* Check if radix tree allocated yet */
+		if (host->revmap_data.tree.gfp_mask == 0)
+			break;
+		/* XXX radix tree not safe ! remove lock whem it becomes safe
+		 * and use some RCU sync to make sure everything is ok before we
+		 * can re-use that map entry
+		 */
+		spin_lock_irqsave(&irq_big_lock, flags);
+		radix_tree_delete(&host->revmap_data.tree, hwirq);
+		spin_unlock_irqrestore(&irq_big_lock, flags);
+		break;
+	}
+
+	/* Destroy map */
+	smp_mb();
+	irq_map[virq].hwirq = host->inval_irq;
+
+	/* Set some flags */
+	get_irq_desc(virq)->status |= IRQ_NOREQUEST;
+
+	/* Free it */
+	irq_free_virt(virq, 1);
+}
+EXPORT_SYMBOL_GPL(irq_dispose_mapping);
+
+unsigned int irq_find_mapping(struct irq_host *host,
+			      irq_hw_number_t hwirq)
+{
+	unsigned int i;
+	unsigned int hint = hwirq % irq_virq_count;
+
+	/* Look for default host if nececssary */
+	if (host == NULL)
+		host = irq_default_host;
+	if (host == NULL)
+		return NO_IRQ;
+
+	/* legacy -> bail early */
+	if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
+		return hwirq;
+
+	/* Slow path does a linear search of the map */
+	if (hint < NUM_ISA_INTERRUPTS)
+		hint = NUM_ISA_INTERRUPTS;
+	i = hint;
+	do  {
+		if (irq_map[i].host == host &&
+		    irq_map[i].hwirq == hwirq)
+			return i;
+		i++;
+		if (i >= irq_virq_count)
+			i = NUM_ISA_INTERRUPTS;
+	} while(i != hint);
+	return NO_IRQ;
+}
+EXPORT_SYMBOL_GPL(irq_find_mapping);
+
+
+unsigned int irq_radix_revmap(struct irq_host *host,
+			      irq_hw_number_t hwirq)
+{
+	struct radix_tree_root *tree;
+	struct irq_map_entry *ptr;
+	unsigned int virq;
+	unsigned long flags;
+
+	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
+
+	/* Check if the radix tree exist yet. We test the value of
+	 * the gfp_mask for that. Sneaky but saves another int in the
+	 * structure. If not, we fallback to slow mode
+	 */
+	tree = &host->revmap_data.tree;
+	if (tree->gfp_mask == 0)
+		return irq_find_mapping(host, hwirq);
+
+	/* XXX Current radix trees are NOT SMP safe !!! Remove that lock
+	 * when that is fixed (when Nick's patch gets in
+	 */
+	spin_lock_irqsave(&irq_big_lock, flags);
+
+	/* Now try to resolve */
+	ptr = radix_tree_lookup(tree, hwirq);
+	/* Found it, return */
+	if (ptr) {
+		virq = ptr - irq_map;
+		goto bail;
+	}
+
+	/* If not there, try to insert it */
+	virq = irq_find_mapping(host, hwirq);
+	if (virq != NO_IRQ)
+		radix_tree_insert(tree, virq, &irq_map[virq]);
+ bail:
+	spin_unlock_irqrestore(&irq_big_lock, flags);
+	return virq;
+}
+
+unsigned int irq_linear_revmap(struct irq_host *host,
+			       irq_hw_number_t hwirq)
+{
+	unsigned int *revmap;
+
+	WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
+
+	/* Check revmap bounds */
+	if (unlikely(hwirq >= host->revmap_data.linear.size))
+		return irq_find_mapping(host, hwirq);
+
+	/* Check if revmap was allocated */
+	revmap = host->revmap_data.linear.revmap;
+	if (unlikely(revmap == NULL))
+		return irq_find_mapping(host, hwirq);
+
+	/* Fill up revmap with slow path if no mapping found */
+	if (unlikely(revmap[hwirq] == NO_IRQ))
+		revmap[hwirq] = irq_find_mapping(host, hwirq);
+
+	return revmap[hwirq];
+}
+
+unsigned int irq_alloc_virt(struct irq_host *host,
+			    unsigned int count,
+			    unsigned int hint)
+{
+	unsigned long flags;
+	unsigned int i, j, found = NO_IRQ;
+	unsigned int limit = irq_virq_count - count;
+
+	if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS))
+		return NO_IRQ;
+
+	spin_lock_irqsave(&irq_big_lock, flags);
+
+	/* Use hint for 1 interrupt if any */
+	if (count == 1 && hint >= NUM_ISA_INTERRUPTS &&
+	    hint < irq_virq_count && irq_map[hint].host == NULL) {
+		found = hint;
+		goto hint_found;
+	}
+
+	/* Look for count consecutive numbers in the allocatable
+	 * (non-legacy) space
+	 */
+	for (i = NUM_ISA_INTERRUPTS; i <= limit; ) {
+		for (j = i; j < (i + count); j++)
+			if (irq_map[j].host != NULL) {
+				i = j + 1;
+				continue;
+			}
+		found = i;
+		break;
+	}
+	if (found == NO_IRQ) {
+		spin_unlock_irqrestore(&irq_big_lock, flags);
+		return NO_IRQ;
+	}
+ hint_found:
+	for (i = found; i < (found + count); i++) {
+		irq_map[i].hwirq = host->inval_irq;
+		smp_wmb();
+		irq_map[i].host = host;
+	}
+	spin_unlock_irqrestore(&irq_big_lock, flags);
+	return found;
+}
+
+void irq_free_virt(unsigned int virq, unsigned int count)
+{
+	unsigned long flags;
+	unsigned int i;
+
+	WARN_ON (virq < NUM_ISA_INTERRUPTS);
+	WARN_ON (count == 0 || (virq + count) > irq_virq_count);
+
+	spin_lock_irqsave(&irq_big_lock, flags);
+	for (i = virq; i < (virq + count); i++) {
+		struct irq_host *host;
+
+		if (i < NUM_ISA_INTERRUPTS ||
+		    (virq + count) > irq_virq_count)
+			continue;
+
+		host = irq_map[i].host;
+		irq_map[i].hwirq = host->inval_irq;
+		smp_wmb();
+		irq_map[i].host = NULL;
+	}
+	spin_unlock_irqrestore(&irq_big_lock, flags);
+}
+
+void irq_early_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < NR_IRQS; i++)
+		get_irq_desc(i)->status |= IRQ_NOREQUEST;
+}
+
+/* We need to create the radix trees late */
+static int irq_late_init(void)
+{
+	struct irq_host *h;
+	unsigned long flags;
+
+	spin_lock_irqsave(&irq_big_lock, flags);
+	list_for_each_entry(h, &irq_hosts, link) {
+		if (h->revmap_type == IRQ_HOST_MAP_TREE)
+			INIT_RADIX_TREE(&h->revmap_data.tree, GFP_ATOMIC);
+	}
+	spin_unlock_irqrestore(&irq_big_lock, flags);
+
+	return 0;
+}
+arch_initcall(irq_late_init);
+
+#endif /* CONFIG_PPC_MERGE */
+
 #ifdef CONFIG_PCI_MSI
 int pci_enable_msi(struct pci_dev * pdev)
 {
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index a550566..7e98e77 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -28,6 +28,7 @@ static struct legacy_serial_info {
 	struct device_node		*np;
 	unsigned int			speed;
 	unsigned int			clock;
+	int				irq_check_parent;
 	phys_addr_t			taddr;
 } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
 static unsigned int legacy_serial_count;
@@ -36,7 +37,7 @@ static int legacy_serial_console = -1;
 static int __init add_legacy_port(struct device_node *np, int want_index,
 				  int iotype, phys_addr_t base,
 				  phys_addr_t taddr, unsigned long irq,
-				  upf_t flags)
+				  upf_t flags, int irq_check_parent)
 {
 	u32 *clk, *spd, clock = BASE_BAUD * 16;
 	int index;
@@ -68,7 +69,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
 	if (legacy_serial_infos[index].np != 0) {
 		/* if we still have some room, move it, else override */
 		if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) {
-			printk(KERN_INFO "Moved legacy port %d -> %d\n",
+			printk(KERN_DEBUG "Moved legacy port %d -> %d\n",
 			       index, legacy_serial_count);
 			legacy_serial_ports[legacy_serial_count] =
 				legacy_serial_ports[index];
@@ -76,7 +77,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
 				legacy_serial_infos[index];
 			legacy_serial_count++;
 		} else {
-			printk(KERN_INFO "Replacing legacy port %d\n", index);
+			printk(KERN_DEBUG "Replacing legacy port %d\n", index);
 		}
 	}
 
@@ -95,10 +96,11 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
 	legacy_serial_infos[index].np = of_node_get(np);
 	legacy_serial_infos[index].clock = clock;
 	legacy_serial_infos[index].speed = spd ? *spd : 0;
+	legacy_serial_infos[index].irq_check_parent = irq_check_parent;
 
-	printk(KERN_INFO "Found legacy serial port %d for %s\n",
+	printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
 	       index, np->full_name);
-	printk(KERN_INFO "  %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
+	printk(KERN_DEBUG "  %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
 	       (iotype == UPIO_PORT) ? "port" : "mem",
 	       (unsigned long long)base, (unsigned long long)taddr, irq,
 	       legacy_serial_ports[index].uartclk,
@@ -132,7 +134,7 @@ static int __init add_legacy_soc_port(struct device_node *np,
 	/* Add port, irq will be dealt with later. We passed a translated
 	 * IO port value. It will be fixed up later along with the irq
 	 */
-	return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags);
+	return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0);
 }
 
 static int __init add_legacy_isa_port(struct device_node *np,
@@ -170,7 +172,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
 
 	/* Add port, irq will be dealt with later */
 	return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr,
-			       NO_IRQ, UPF_BOOT_AUTOCONF);
+			       NO_IRQ, UPF_BOOT_AUTOCONF, 0);
 
 }
 
@@ -242,7 +244,8 @@ static int __init add_legacy_pci_port(struct device_node *np,
 	/* Add port, irq will be dealt with later. We passed a translated
 	 * IO port value. It will be fixed up later along with the irq
 	 */
-	return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, UPF_BOOT_AUTOCONF);
+	return add_legacy_port(np, index, iotype, base, addr, NO_IRQ,
+			       UPF_BOOT_AUTOCONF, np != pci_dev);
 }
 #endif
 
@@ -373,27 +376,22 @@ static void __init fixup_port_irq(int index,
 				  struct device_node *np,
 				  struct plat_serial8250_port *port)
 {
+	unsigned int virq;
+
 	DBG("fixup_port_irq(%d)\n", index);
 
-	/* Check for interrupts in that node */
-	if (np->n_intrs > 0) {
-		port->irq = np->intrs[0].line;
-		DBG(" port %d (%s), irq=%d\n",
-		    index, np->full_name, port->irq);
-		return;
+	virq = irq_of_parse_and_map(np, 0);
+	if (virq == NO_IRQ && legacy_serial_infos[index].irq_check_parent) {
+		np = of_get_parent(np);
+		if (np == NULL)
+			return;
+		virq = irq_of_parse_and_map(np, 0);
+		of_node_put(np);
 	}
-
-	/* Check for interrupts in the parent */
-	np = of_get_parent(np);
-	if (np == NULL)
+	if (virq == NO_IRQ)
 		return;
 
-	if (np->n_intrs > 0) {
-		port->irq = np->intrs[0].line;
-		DBG(" port %d (%s), irq=%d\n",
-		    index, np->full_name, port->irq);
-	}
-	of_node_put(np);
+	port->irq = virq;
 }
 
 static void __init fixup_port_pio(int index,
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 1333335..898dae8 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -1404,6 +1404,43 @@ pcibios_update_irq(struct pci_dev *dev, int irq)
 	/* XXX FIXME - update OF device tree node interrupt property */
 }
 
+#ifdef CONFIG_PPC_MERGE
+/* XXX This is a copy of the ppc64 version. This is temporary until we start
+ * merging the 2 PCI layers
+ */
+/*
+ * Reads the interrupt pin to determine if interrupt is use by card.
+ * If the interrupt is used, then gets the interrupt line from the
+ * openfirmware and sets it in the pci_dev and pci_config line.
+ */
+int pci_read_irq_line(struct pci_dev *pci_dev)
+{
+	struct of_irq oirq;
+	unsigned int virq;
+
+	DBG("Try to map irq for %s...\n", pci_name(pci_dev));
+
+	if (of_irq_map_pci(pci_dev, &oirq)) {
+		DBG(" -> failed !\n");
+		return -1;
+	}
+
+	DBG(" -> got one, spec %d cells (0x%08x...) on %s\n",
+	    oirq.size, oirq.specifier[0], oirq.controller->full_name);
+
+	virq = irq_create_of_mapping(oirq.controller, oirq.specifier, oirq.size);
+	if(virq == NO_IRQ) {
+		DBG(" -> failed to map !\n");
+		return -1;
+	}
+	pci_dev->irq = virq;
+	pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, virq);
+
+	return 0;
+}
+EXPORT_SYMBOL(pci_read_irq_line);
+#endif /* CONFIG_PPC_MERGE */
+
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
 	u16 cmd, old_cmd;
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index bea8451..efc0b55 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -398,12 +398,8 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 	} else {
 		dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
 		dev->rom_base_reg = PCI_ROM_ADDRESS;
+		/* Maybe do a default OF mapping here */
 		dev->irq = NO_IRQ;
-		if (node->n_intrs > 0) {
-			dev->irq = node->intrs[0].line;
-			pci_write_config_byte(dev, PCI_INTERRUPT_LINE,
-					      dev->irq);
-		}
 	}
 
 	pci_parse_of_addrs(node, dev);
@@ -1288,23 +1284,26 @@ EXPORT_SYMBOL(pcibios_fixup_bus);
  */
 int pci_read_irq_line(struct pci_dev *pci_dev)
 {
-	u8 intpin;
-	struct device_node *node;
-
-    	pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &intpin);
-	if (intpin == 0)
-		return 0;
+	struct of_irq oirq;
+	unsigned int virq;
 
-	node = pci_device_to_OF_node(pci_dev);
-	if (node == NULL)
-		return -1;
+	DBG("Try to map irq for %s...\n", pci_name(pci_dev));
 
-	if (node->n_intrs == 0)
+	if (of_irq_map_pci(pci_dev, &oirq)) {
+		DBG(" -> failed !\n");
 		return -1;
+	}
 
-	pci_dev->irq = node->intrs[0].line;
+	DBG(" -> got one, spec %d cells (0x%08x...) on %s\n",
+	    oirq.size, oirq.specifier[0], oirq.controller->full_name);
 
-	pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, pci_dev->irq);
+	virq = irq_create_of_mapping(oirq.controller, oirq.specifier, oirq.size);
+	if(virq == NO_IRQ) {
+		DBG(" -> failed to map !\n");
+		return -1;
+	}
+	pci_dev->irq = virq;
+	pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, virq);
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index ef3619c..a1787ff 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/kexec.h>
 #include <linux/debugfs.h>
+#include <linux/irq.h>
 
 #include <asm/prom.h>
 #include <asm/rtas.h>
@@ -86,424 +87,6 @@ static DEFINE_RWLOCK(devtree_lock);
 /* export that to outside world */
 struct device_node *of_chosen;
 
-struct device_node *dflt_interrupt_controller;
-int num_interrupt_controllers;
-
-/*
- * Wrapper for allocating memory for various data that needs to be
- * attached to device nodes as they are processed at boot or when
- * added to the device tree later (e.g. DLPAR).  At boot there is
- * already a region reserved so we just increment *mem_start by size;
- * otherwise we call kmalloc.
- */
-static void * prom_alloc(unsigned long size, unsigned long *mem_start)
-{
-	unsigned long tmp;
-
-	if (!mem_start)
-		return kmalloc(size, GFP_KERNEL);
-
-	tmp = *mem_start;
-	*mem_start += size;
-	return (void *)tmp;
-}
-
-/*
- * Find the device_node with a given phandle.
- */
-static struct device_node * find_phandle(phandle ph)
-{
-	struct device_node *np;
-
-	for (np = allnodes; np != 0; np = np->allnext)
-		if (np->linux_phandle == ph)
-			return np;
-	return NULL;
-}
-
-/*
- * Find the interrupt parent of a node.
- */
-static struct device_node * __devinit intr_parent(struct device_node *p)
-{
-	phandle *parp;
-
-	parp = (phandle *) get_property(p, "interrupt-parent", NULL);
-	if (parp == NULL)
-		return p->parent;
-	p = find_phandle(*parp);
-	if (p != NULL)
-		return p;
-	/*
-	 * On a powermac booted with BootX, we don't get to know the
-	 * phandles for any nodes, so find_phandle will return NULL.
-	 * Fortunately these machines only have one interrupt controller
-	 * so there isn't in fact any ambiguity.  -- paulus
-	 */
-	if (num_interrupt_controllers == 1)
-		p = dflt_interrupt_controller;
-	return p;
-}
-
-/*
- * Find out the size of each entry of the interrupts property
- * for a node.
- */
-int __devinit prom_n_intr_cells(struct device_node *np)
-{
-	struct device_node *p;
-	unsigned int *icp;
-
-	for (p = np; (p = intr_parent(p)) != NULL; ) {
-		icp = (unsigned int *)
-			get_property(p, "#interrupt-cells", NULL);
-		if (icp != NULL)
-			return *icp;
-		if (get_property(p, "interrupt-controller", NULL) != NULL
-		    || get_property(p, "interrupt-map", NULL) != NULL) {
-			printk("oops, node %s doesn't have #interrupt-cells\n",
-			       p->full_name);
-			return 1;
-		}
-	}
-#ifdef DEBUG_IRQ
-	printk("prom_n_intr_cells failed for %s\n", np->full_name);
-#endif
-	return 1;
-}
-
-/*
- * Map an interrupt from a device up to the platform interrupt
- * descriptor.
- */
-static int __devinit map_interrupt(unsigned int **irq, struct device_node **ictrler,
-				   struct device_node *np, unsigned int *ints,
-				   int nintrc)
-{
-	struct device_node *p, *ipar;
-	unsigned int *imap, *imask, *ip;
-	int i, imaplen, match;
-	int newintrc = 0, newaddrc = 0;
-	unsigned int *reg;
-	int naddrc;
-
-	reg = (unsigned int *) get_property(np, "reg", NULL);
-	naddrc = prom_n_addr_cells(np);
-	p = intr_parent(np);
-	while (p != NULL) {
-		if (get_property(p, "interrupt-controller", NULL) != NULL)
-			/* this node is an interrupt controller, stop here */
-			break;
-		imap = (unsigned int *)
-			get_property(p, "interrupt-map", &imaplen);
-		if (imap == NULL) {
-			p = intr_parent(p);
-			continue;
-		}
-		imask = (unsigned int *)
-			get_property(p, "interrupt-map-mask", NULL);
-		if (imask == NULL) {
-			printk("oops, %s has interrupt-map but no mask\n",
-			       p->full_name);
-			return 0;
-		}
-		imaplen /= sizeof(unsigned int);
-		match = 0;
-		ipar = NULL;
-		while (imaplen > 0 && !match) {
-			/* check the child-interrupt field */
-			match = 1;
-			for (i = 0; i < naddrc && match; ++i)
-				match = ((reg[i] ^ imap[i]) & imask[i]) == 0;
-			for (; i < naddrc + nintrc && match; ++i)
-				match = ((ints[i-naddrc] ^ imap[i]) & imask[i]) == 0;
-			imap += naddrc + nintrc;
-			imaplen -= naddrc + nintrc;
-			/* grab the interrupt parent */
-			ipar = find_phandle((phandle) *imap++);
-			--imaplen;
-			if (ipar == NULL && num_interrupt_controllers == 1)
-				/* cope with BootX not giving us phandles */
-				ipar = dflt_interrupt_controller;
-			if (ipar == NULL) {
-				printk("oops, no int parent %x in map of %s\n",
-				       imap[-1], p->full_name);
-				return 0;
-			}
-			/* find the parent's # addr and intr cells */
-			ip = (unsigned int *)
-				get_property(ipar, "#interrupt-cells", NULL);
-			if (ip == NULL) {
-				printk("oops, no #interrupt-cells on %s\n",
-				       ipar->full_name);
-				return 0;
-			}
-			newintrc = *ip;
-			ip = (unsigned int *)
-				get_property(ipar, "#address-cells", NULL);
-			newaddrc = (ip == NULL)? 0: *ip;
-			imap += newaddrc + newintrc;
-			imaplen -= newaddrc + newintrc;
-		}
-		if (imaplen < 0) {
-			printk("oops, error decoding int-map on %s, len=%d\n",
-			       p->full_name, imaplen);
-			return 0;
-		}
-		if (!match) {
-#ifdef DEBUG_IRQ
-			printk("oops, no match in %s int-map for %s\n",
-			       p->full_name, np->full_name);
-#endif
-			return 0;
-		}
-		p = ipar;
-		naddrc = newaddrc;
-		nintrc = newintrc;
-		ints = imap - nintrc;
-		reg = ints - naddrc;
-	}
-	if (p == NULL) {
-#ifdef DEBUG_IRQ
-		printk("hmmm, int tree for %s doesn't have ctrler\n",
-		       np->full_name);
-#endif
-		return 0;
-	}
-	*irq = ints;
-	*ictrler = p;
-	return nintrc;
-}
-
-static unsigned char map_isa_senses[4] = {
-	IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE,
-	IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE,
-	IRQ_SENSE_EDGE  | IRQ_POLARITY_NEGATIVE,
-	IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE
-};
-
-static unsigned char map_mpic_senses[4] = {
-	IRQ_SENSE_EDGE  | IRQ_POLARITY_POSITIVE,
-	IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE,
-	/* 2 seems to be used for the 8259 cascade... */
-	IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE,
-	IRQ_SENSE_EDGE  | IRQ_POLARITY_NEGATIVE,
-};
-
-static int __devinit finish_node_interrupts(struct device_node *np,
-					    unsigned long *mem_start,
-					    int measure_only)
-{
-	unsigned int *ints;
-	int intlen, intrcells, intrcount;
-	int i, j, n, sense;
-	unsigned int *irq, virq;
-	struct device_node *ic;
-	int trace = 0;
-
-	//#define TRACE(fmt...) do { if (trace) { printk(fmt); mdelay(1000); } } while(0)
-#define TRACE(fmt...)
-
-	if (!strcmp(np->name, "smu-doorbell"))
-		trace = 1;
-
-	TRACE("Finishing SMU doorbell ! num_interrupt_controllers = %d\n",
-	      num_interrupt_controllers);
-
-	if (num_interrupt_controllers == 0) {
-		/*
-		 * Old machines just have a list of interrupt numbers
-		 * and no interrupt-controller nodes.
-		 */
-		ints = (unsigned int *) get_property(np, "AAPL,interrupts",
-						     &intlen);
-		/* XXX old interpret_pci_props looked in parent too */
-		/* XXX old interpret_macio_props looked for interrupts
-		   before AAPL,interrupts */
-		if (ints == NULL)
-			ints = (unsigned int *) get_property(np, "interrupts",
-							     &intlen);
-		if (ints == NULL)
-			return 0;
-
-		np->n_intrs = intlen / sizeof(unsigned int);
-		np->intrs = prom_alloc(np->n_intrs * sizeof(np->intrs[0]),
-				       mem_start);
-		if (!np->intrs)
-			return -ENOMEM;
-		if (measure_only)
-			return 0;
-
-		for (i = 0; i < np->n_intrs; ++i) {
-			np->intrs[i].line = *ints++;
-			np->intrs[i].sense = IRQ_SENSE_LEVEL
-				| IRQ_POLARITY_NEGATIVE;
-		}
-		return 0;
-	}
-
-	ints = (unsigned int *) get_property(np, "interrupts", &intlen);
-	TRACE("ints=%p, intlen=%d\n", ints, intlen);
-	if (ints == NULL)
-		return 0;
-	intrcells = prom_n_intr_cells(np);
-	intlen /= intrcells * sizeof(unsigned int);
-	TRACE("intrcells=%d, new intlen=%d\n", intrcells, intlen);
-	np->intrs = prom_alloc(intlen * sizeof(*(np->intrs)), mem_start);
-	if (!np->intrs)
-		return -ENOMEM;
-
-	if (measure_only)
-		return 0;
-
-	intrcount = 0;
-	for (i = 0; i < intlen; ++i, ints += intrcells) {
-		n = map_interrupt(&irq, &ic, np, ints, intrcells);
-		TRACE("map, irq=%d, ic=%p, n=%d\n", irq, ic, n);
-		if (n <= 0)
-			continue;
-
-		/* don't map IRQ numbers under a cascaded 8259 controller */
-		if (ic && device_is_compatible(ic, "chrp,iic")) {
-			np->intrs[intrcount].line = irq[0];
-			sense = (n > 1)? (irq[1] & 3): 3;
-			np->intrs[intrcount].sense = map_isa_senses[sense];
-		} else {
-			virq = virt_irq_create_mapping(irq[0]);
-			TRACE("virq=%d\n", virq);
-#ifdef CONFIG_PPC64
-			if (virq == NO_IRQ) {
-				printk(KERN_CRIT "Could not allocate interrupt"
-				       " number for %s\n", np->full_name);
-				continue;
-			}
-#endif
-			np->intrs[intrcount].line = irq_offset_up(virq);
-			sense = (n > 1)? (irq[1] & 3): 1;
-
-			/* Apple uses bits in there in a different way, let's
-			 * only keep the real sense bit on macs
-			 */
-			if (machine_is(powermac))
-				sense &= 0x1;
-			np->intrs[intrcount].sense = map_mpic_senses[sense];
-		}
-
-#ifdef CONFIG_PPC64
-		/* We offset irq numbers for the u3 MPIC by 128 in PowerMac */
-		if (machine_is(powermac) && ic && ic->parent) {
-			char *name = get_property(ic->parent, "name", NULL);
-			if (name && !strcmp(name, "u3"))
-				np->intrs[intrcount].line += 128;
-			else if (!(name && (!strcmp(name, "mac-io") ||
-					    !strcmp(name, "u4"))))
-				/* ignore other cascaded controllers, such as
-				   the k2-sata-root */
-				break;
-		}
-#endif /* CONFIG_PPC64 */
-		if (n > 2) {
-			printk("hmmm, got %d intr cells for %s:", n,
-			       np->full_name);
-			for (j = 0; j < n; ++j)
-				printk(" %d", irq[j]);
-			printk("\n");
-		}
-		++intrcount;
-	}
-	np->n_intrs = intrcount;
-
-	return 0;
-}
-
-static int __devinit finish_node(struct device_node *np,
-				 unsigned long *mem_start,
-				 int measure_only)
-{
-	struct device_node *child;
-	int rc = 0;
-
-	rc = finish_node_interrupts(np, mem_start, measure_only);
-	if (rc)
-		goto out;
-
-	for (child = np->child; child != NULL; child = child->sibling) {
-		rc = finish_node(child, mem_start, measure_only);
-		if (rc)
-			goto out;
-	}
-out:
-	return rc;
-}
-
-static void __init scan_interrupt_controllers(void)
-{
-	struct device_node *np;
-	int n = 0;
-	char *name, *ic;
-	int iclen;
-
-	for (np = allnodes; np != NULL; np = np->allnext) {
-		ic = get_property(np, "interrupt-controller", &iclen);
-		name = get_property(np, "name", NULL);
-		/* checking iclen makes sure we don't get a false
-		   match on /chosen.interrupt_controller */
-		if ((name != NULL
-		     && strcmp(name, "interrupt-controller") == 0)
-		    || (ic != NULL && iclen == 0
-			&& strcmp(name, "AppleKiwi"))) {
-			if (n == 0)
-				dflt_interrupt_controller = np;
-			++n;
-		}
-	}
-	num_interrupt_controllers = n;
-}
-
-/**
- * finish_device_tree is called once things are running normally
- * (i.e. with text and data mapped to the address they were linked at).
- * It traverses the device tree and fills in some of the additional,
- * fields in each node like {n_}addrs and {n_}intrs, the virt interrupt
- * mapping is also initialized at this point.
- */
-void __init finish_device_tree(void)
-{
-	unsigned long start, end, size = 0;
-
-	DBG(" -> finish_device_tree\n");
-
-#ifdef CONFIG_PPC64
-	/* Initialize virtual IRQ map */
-	virt_irq_init();
-#endif
-	scan_interrupt_controllers();
-
-	/*
-	 * Finish device-tree (pre-parsing some properties etc...)
-	 * We do this in 2 passes. One with "measure_only" set, which
-	 * will only measure the amount of memory needed, then we can
-	 * allocate that memory, and call finish_node again. However,
-	 * we must be careful as most routines will fail nowadays when
-	 * prom_alloc() returns 0, so we must make sure our first pass
-	 * doesn't start at 0. We pre-initialize size to 16 for that
-	 * reason and then remove those additional 16 bytes
-	 */
-	size = 16;
-	finish_node(allnodes, &size, 1);
-	size -= 16;
-
-	if (0 == size)
-		end = start = 0;
-	else
-		end = start = (unsigned long)__va(lmb_alloc(size, 128));
-
-	finish_node(allnodes, &end, 0);
-	BUG_ON(end != start + size);
-
-	DBG(" <- finish_device_tree\n");
-}
-
 static inline char *find_flat_dt_string(u32 offset)
 {
 	return ((char *)initial_boot_params) +
@@ -1389,27 +972,6 @@ prom_n_size_cells(struct device_node* np)
 EXPORT_SYMBOL(prom_n_size_cells);
 
 /**
- * Work out the sense (active-low level / active-high edge)
- * of each interrupt from the device tree.
- */
-void __init prom_get_irq_senses(unsigned char *senses, int off, int max)
-{
-	struct device_node *np;
-	int i, j;
-
-	/* default to level-triggered */
-	memset(senses, IRQ_SENSE_LEVEL | IRQ_POLARITY_NEGATIVE, max - off);
-
-	for (np = allnodes; np != 0; np = np->allnext) {
-		for (j = 0; j < np->n_intrs; j++) {
-			i = np->intrs[j].line;
-			if (i >= off && i < max)
-				senses[i-off] = np->intrs[j].sense;
-		}
-	}
-}
-
-/**
  * Construct and return a list of the device_nodes with a given name.
  */
 struct device_node *find_devices(const char *name)
@@ -1808,7 +1370,6 @@ static void of_node_release(struct kref *kref)
 			node->deadprops = NULL;
 		}
 	}
-	kfree(node->intrs);
 	kfree(node->full_name);
 	kfree(node->data);
 	kfree(node);
@@ -1881,13 +1442,7 @@ void of_detach_node(const struct device_node *np)
 #ifdef CONFIG_PPC_PSERIES
 /*
  * Fix up the uninitialized fields in a new device node:
- * name, type, n_addrs, addrs, n_intrs, intrs, and pci-specific fields
- *
- * A lot of boot-time code is duplicated here, because functions such
- * as finish_node_interrupts, interpret_pci_props, etc. cannot use the
- * slab allocator.
- *
- * This should probably be split up into smaller chunks.
+ * name, type and pci-specific fields
  */
 
 static int of_finish_dynamic_node(struct device_node *node)
@@ -1928,8 +1483,6 @@ static int prom_reconfig_notifier(struct notifier_block *nb,
 	switch (action) {
 	case PSERIES_RECONFIG_ADD:
 		err = of_finish_dynamic_node(node);
-		if (!err)
-			finish_node(node, NULL, 0);
 		if (err < 0) {
 			printk(KERN_ERR "finish_node returned %d\n", err);
 			err = NOTIFY_BAD;
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 6eb7e49..cda0226 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -297,19 +297,9 @@ unsigned long __init find_and_init_phbs(void)
 	struct device_node *node;
 	struct pci_controller *phb;
 	unsigned int index;
-	unsigned int root_size_cells = 0;
-	unsigned int *opprop = NULL;
 	struct device_node *root = of_find_node_by_path("/");
 
-	if (ppc64_interrupt_controller == IC_OPEN_PIC) {
-		opprop = (unsigned int *)get_property(root,
-				"platform-open-pic", NULL);
-	}
-
-	root_size_cells = prom_n_size_cells(root);
-
 	index = 0;
-
 	for (node = of_get_next_child(root, NULL);
 	     node != NULL;
 	     node = of_get_next_child(root, node)) {
@@ -324,13 +314,6 @@ unsigned long __init find_and_init_phbs(void)
 		setup_phb(node, phb);
 		pci_process_bridge_OF_ranges(phb, node, 0);
 		pci_setup_phb_io(phb, index == 0);
-#ifdef CONFIG_PPC_PSERIES
-		/* XXX This code need serious fixing ... --BenH */
-		if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) {
-			int addr = root_size_cells * (index + 2) - 1;
-			mpic_assign_isu(pSeries_mpic, index, opprop[addr]);
-		}
-#endif
 		index++;
 	}
 
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 0c21de3..e0df2ba 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -239,7 +239,6 @@ void __init setup_arch(char **cmdline_p)
 		ppc_md.init_early();
 
 	find_legacy_serial_ports();
-	finish_device_tree();
 
 	smp_setup_cpu_maps();
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ac7276c..fd1785e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -361,12 +361,15 @@ void __init setup_system(void)
 
 	/*
 	 * Fill the ppc64_caches & systemcfg structures with informations
-	 * retrieved from the device-tree. Need to be called before
-	 * finish_device_tree() since the later requires some of the
-	 * informations filled up here to properly parse the interrupt tree.
+ 	 * retrieved from the device-tree.
 	 */
 	initialize_cache_info();
 
+	/*
+	 * Initialize irq remapping subsystem
+	 */
+	irq_early_init();
+
 #ifdef CONFIG_PPC_RTAS
 	/*
 	 * Initialize RTAS if available
@@ -394,12 +397,6 @@ void __init setup_system(void)
 	find_legacy_serial_ports();
 
 	/*
-	 * "Finish" the device-tree, that is do the actual parsing of
-	 * some of the properties like the interrupt map
-	 */
-	finish_device_tree();
-
-	/*
 	 * Initialize xmon
 	 */
 #ifdef CONFIG_XMON_DEFAULT
@@ -427,8 +424,6 @@ void __init setup_system(void)
 
 	printk("-----------------------------------------------------\n");
 	printk("ppc64_pft_size                = 0x%lx\n", ppc64_pft_size);
-	printk("ppc64_interrupt_controller    = 0x%ld\n",
-	       ppc64_interrupt_controller);
 	printk("physicalMemorySize            = 0x%lx\n", lmb_phys_mem_size());
 	printk("ppc64_caches.dcache_line_size = 0x%x\n",
 	       ppc64_caches.dline_size);
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index cdf5867..fad8580 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -218,7 +218,6 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
 {
 	struct vio_dev *viodev;
 	unsigned int *unit_address;
-	unsigned int *irq_p;
 
 	/* we need the 'device_type' property, in order to match with drivers */
 	if (of_node->type == NULL) {
@@ -243,16 +242,7 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node)
 
 	viodev->dev.platform_data = of_node_get(of_node);
 
-	viodev->irq = NO_IRQ;
-	irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL);
-	if (irq_p) {
-		int virq = virt_irq_create_mapping(*irq_p);
-		if (virq == NO_IRQ) {
-			printk(KERN_ERR "Unable to allocate interrupt "
-			       "number for %s\n", of_node->full_name);
-		} else
-			viodev->irq = irq_offset_up(virq);
-	}
+	viodev->irq = irq_of_parse_and_map(of_node, 0);
 
 	snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address);
 	viodev->name = of_node->name;
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 97936f5..9d5da78 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -1,6 +1,9 @@
 /*
  * Cell Internal Interrupt Controller
  *
+ * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *                    IBM, Corp.
+ *
  * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
  *
  * Author: Arnd Bergmann <arndb@de.ibm.com>
@@ -25,11 +28,13 @@
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/types.h>
+#include <linux/ioport.h>
 
 #include <asm/io.h>
 #include <asm/pgtable.h>
 #include <asm/prom.h>
 #include <asm/ptrace.h>
+#include <asm/machdep.h>
 
 #include "interrupt.h"
 #include "cbe_regs.h"
@@ -39,9 +44,25 @@ struct iic {
 	u8 target_id;
 	u8 eoi_stack[16];
 	int eoi_ptr;
+	struct irq_host *host;
 };
 
 static DEFINE_PER_CPU(struct iic, iic);
+#define IIC_NODE_COUNT	2
+static struct irq_host *iic_hosts[IIC_NODE_COUNT];
+
+/* Convert between "pending" bits and hw irq number */
+static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits)
+{
+	unsigned char unit = bits.source & 0xf;
+
+	if (bits.flags & CBE_IIC_IRQ_IPI)
+		return IIC_IRQ_IPI0 | (bits.prio >> 4);
+	else if (bits.class <= 3)
+		return (bits.class << 4) | unit;
+	else
+		return IIC_IRQ_INVALID;
+}
 
 static void iic_mask(unsigned int irq)
 {
@@ -65,197 +86,21 @@ static struct irq_chip iic_chip = {
 	.eoi = iic_eoi,
 };
 
-/* XXX All of this has to be reworked completely. We need to assign a real
- * interrupt numbers to the external interrupts and remove all the hard coded
- * interrupt maps (rely on the device-tree whenever possible).
- *
- * Basically, my scheme is to define the "pendings" bits to be the HW interrupt
- * number (ignoring the data and flags here). That means we can sort-of split
- * external sources based on priority, and we can use request_irq() on pretty
- * much anything.
- *
- * For spider or axon, they have their own interrupt space. spider will just have
- * local "hardward" interrupts 0...xx * node stride. The node stride is not
- * necessary (separate interrupt chips will have separate HW number space), but
- * will allow to be compatible with existing device-trees.
- *
- * All of thise little world will get a standard remapping scheme to map those HW
- * numbers into the linux flat irq number space.
-*/
-static int iic_external_get_irq(struct cbe_iic_pending_bits pending)
-{
-	int irq;
-	unsigned char node, unit;
-
-	node = pending.source >> 4;
-	unit = pending.source & 0xf;
-	irq = -1;
-
-	/*
-	 * This mapping is specific to the Cell Broadband
-	 * Engine. We might need to get the numbers
-	 * from the device tree to support future CPUs.
-	 */
-	switch (unit) {
-	case 0x00:
-	case 0x0b:
-		/*
-		 * One of these units can be connected
-		 * to an external interrupt controller.
-		 */
-		if (pending.class != 2)
-			break;
-		/* TODO: We might want to silently ignore cascade interrupts
-		 * when no cascade handler exist yet
-		 */
-		irq = IIC_EXT_CASCADE + node * IIC_NODE_STRIDE;
-		break;
-	case 0x01 ... 0x04:
-	case 0x07 ... 0x0a:
-		/*
-		 * These units are connected to the SPEs
-		 */
-		if (pending.class > 2)
-			break;
-		irq = IIC_SPE_OFFSET
-			+ pending.class * IIC_CLASS_STRIDE
-			+ node * IIC_NODE_STRIDE
-			+ unit;
-		break;
-	}
-	if (irq == -1)
-		printk(KERN_WARNING "Unexpected interrupt class %02x, "
-			"source %02x, prio %02x, cpu %02x\n", pending.class,
-			pending.source, pending.prio, smp_processor_id());
-	return irq;
-}
-
 /* Get an IRQ number from the pending state register of the IIC */
-int iic_get_irq(struct pt_regs *regs)
-{
-	struct iic *iic;
-	int irq;
-	struct cbe_iic_pending_bits pending;
-
-	iic = &__get_cpu_var(iic);
-	*(unsigned long *) &pending = 
-		in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
-	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
-	BUG_ON(iic->eoi_ptr > 15);
-
-	irq = -1;
-	if (pending.flags & CBE_IIC_IRQ_VALID) {
-		if (pending.flags & CBE_IIC_IRQ_IPI) {
-			irq = IIC_IPI_OFFSET + (pending.prio >> 4);
-/*
-			if (irq > 0x80)
-				printk(KERN_WARNING "Unexpected IPI prio %02x"
-					"on CPU %02x\n", pending.prio,
-							smp_processor_id());
-*/
-		} else {
-			irq = iic_external_get_irq(pending);
-		}
-	}
-	return irq;
-}
-
-/* hardcoded part to be compatible with older firmware */
-
-static int __init setup_iic_hardcoded(void)
-{
-	struct device_node *np;
-	int nodeid, cpu;
-	unsigned long regs;
-	struct iic *iic;
-
-	for_each_possible_cpu(cpu) {
-		iic = &per_cpu(iic, cpu);
-		nodeid = cpu/2;
-
-		for (np = of_find_node_by_type(NULL, "cpu");
-		     np;
-		     np = of_find_node_by_type(np, "cpu")) {
-			if (nodeid == *(int *)get_property(np, "node-id", NULL))
-				break;
-			}
-
-		if (!np) {
-			printk(KERN_WARNING "IIC: CPU %d not found\n", cpu);
-			iic->regs = NULL;
-			iic->target_id = 0xff;
-			return -ENODEV;
-			}
-
-		regs = *(long *)get_property(np, "iic", NULL);
-
-		/* hack until we have decided on the devtree info */
-		regs += 0x400;
-		if (cpu & 1)
-			regs += 0x20;
-
-		printk(KERN_INFO "IIC for CPU %d at %lx\n", cpu, regs);
-		iic->regs = ioremap(regs, sizeof(struct cbe_iic_thread_regs));
-		iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe);
-		iic->eoi_stack[0] = 0xff;
-	}
-
-	return 0;
-}
-
-static int __init setup_iic(void)
+static unsigned int iic_get_irq(struct pt_regs *regs)
 {
-	struct device_node *dn;
-	unsigned long *regs;
-	char *compatible;
- 	unsigned *np, found = 0;
-	struct iic *iic = NULL;
-
-	for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
-		compatible = (char *)get_property(dn, "compatible", NULL);
-
-		if (!compatible) {
-			printk(KERN_WARNING "no compatible property found !\n");
-			continue;
-		}
-
- 		if (strstr(compatible, "IBM,CBEA-Internal-Interrupt-Controller"))
- 			regs = (unsigned long *)get_property(dn,"reg", NULL);
- 		else
-			continue;
-
- 		if (!regs)
- 			printk(KERN_WARNING "IIC: no reg property\n");
-
- 		np = (unsigned int *)get_property(dn, "ibm,interrupt-server-ranges", NULL);
-
- 		if (!np) {
-			printk(KERN_WARNING "IIC: CPU association not found\n");
-			iic->regs = NULL;
-			iic->target_id = 0xff;
-			return -ENODEV;
-		}
-
- 		iic = &per_cpu(iic, np[0]);
- 		iic->regs = ioremap(regs[0], sizeof(struct cbe_iic_thread_regs));
-		iic->target_id = ((np[0] & 2) << 3) + ((np[0] & 1) ? 0xf : 0xe);
-		iic->eoi_stack[0] = 0xff;
- 		printk("IIC for CPU %d at %lx mapped to %p\n", np[0], regs[0], iic->regs);
-
- 		iic = &per_cpu(iic, np[1]);
- 		iic->regs = ioremap(regs[2], sizeof(struct cbe_iic_thread_regs));
-		iic->target_id = ((np[1] & 2) << 3) + ((np[1] & 1) ? 0xf : 0xe);
-		iic->eoi_stack[0] = 0xff;
-
- 		printk("IIC for CPU %d at %lx mapped to %p\n", np[1], regs[2], iic->regs);
-
-		found++;
-  	}
-
-	if (found)
-		return 0;
-	else
-		return -ENODEV;
+  	struct cbe_iic_pending_bits pending;
+ 	struct iic *iic;
+
+ 	iic = &__get_cpu_var(iic);
+ 	*(unsigned long *) &pending =
+ 		in_be64((unsigned long __iomem *) &iic->regs->pending_destr);
+ 	iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+ 	BUG_ON(iic->eoi_ptr > 15);
+	if (pending.flags & CBE_IIC_IRQ_VALID)
+		return irq_linear_revmap(iic->host,
+ 					 iic_pending_to_hwnum(pending));
+	return NO_IRQ;
 }
 
 #ifdef CONFIG_SMP
@@ -263,12 +108,12 @@ static int __init setup_iic(void)
 /* Use the highest interrupt priorities for IPI */
 static inline int iic_ipi_to_irq(int ipi)
 {
-	return IIC_IPI_OFFSET + IIC_NUM_IPIS - 1 - ipi;
+	return IIC_IRQ_IPI0 + IIC_NUM_IPIS - 1 - ipi;
 }
 
 static inline int iic_irq_to_ipi(int irq)
 {
-	return IIC_NUM_IPIS - 1 - (irq - IIC_IPI_OFFSET);
+	return IIC_NUM_IPIS - 1 - (irq - IIC_IRQ_IPI0);
 }
 
 void iic_setup_cpu(void)
@@ -287,22 +132,51 @@ u8 iic_get_target_id(int cpu)
 }
 EXPORT_SYMBOL_GPL(iic_get_target_id);
 
+struct irq_host *iic_get_irq_host(int node)
+{
+	if (node < 0 || node >= IIC_NODE_COUNT)
+		return NULL;
+	return iic_hosts[node];
+}
+EXPORT_SYMBOL_GPL(iic_get_irq_host);
+
+
 static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
 {
-	smp_message_recv(iic_irq_to_ipi(irq), regs);
+	int ipi = (int)(long)dev_id;
+
+	smp_message_recv(ipi, regs);
+
 	return IRQ_HANDLED;
 }
 
 static void iic_request_ipi(int ipi, const char *name)
 {
-	int irq;
+	int node, virq;
 
-	irq = iic_ipi_to_irq(ipi);
-
-	/* IPIs are marked IRQF_DISABLED as they must run with irqs
-	 * disabled */
- 	set_irq_chip_and_handler(irq, &iic_chip, handle_percpu_irq);
-	request_irq(irq, iic_ipi_action, IRQF_DISABLED, name, NULL);
+	for (node = 0; node < IIC_NODE_COUNT; node++) {
+		char *rname;
+		if (iic_hosts[node] == NULL)
+			continue;
+		virq = irq_create_mapping(iic_hosts[node],
+					  iic_ipi_to_irq(ipi), 0);
+		if (virq == NO_IRQ) {
+			printk(KERN_ERR
+			       "iic: failed to map IPI %s on node %d\n",
+			       name, node);
+			continue;
+		}
+		rname = kzalloc(strlen(name) + 16, GFP_KERNEL);
+		if (rname)
+			sprintf(rname, "%s node %d", name, node);
+		else
+			rname = (char *)name;
+		if (request_irq(virq, iic_ipi_action, IRQF_DISABLED,
+				rname, (void *)(long)ipi))
+			printk(KERN_ERR
+			       "iic: failed to request IPI %s on node %d\n",
+			       name, node);
+	}
 }
 
 void iic_request_IPIs(void)
@@ -313,41 +187,119 @@ void iic_request_IPIs(void)
 	iic_request_ipi(PPC_MSG_DEBUGGER_BREAK, "IPI-debug");
 #endif /* CONFIG_DEBUGGER */
 }
+
 #endif /* CONFIG_SMP */
 
-static void __init iic_setup_builtin_handlers(void)
+
+static int iic_host_match(struct irq_host *h, struct device_node *node)
+{
+	return h->host_data != NULL && node == h->host_data;
+}
+
+static int iic_host_map(struct irq_host *h, unsigned int virq,
+			irq_hw_number_t hw, unsigned int flags)
+{
+	if (hw < IIC_IRQ_IPI0)
+		set_irq_chip_and_handler(virq, &iic_chip, handle_fasteoi_irq);
+	else
+		set_irq_chip_and_handler(virq, &iic_chip, handle_percpu_irq);
+	return 0;
+}
+
+static int iic_host_xlate(struct irq_host *h, struct device_node *ct,
+			   u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
 {
-	int be, isrc;
+	/* Currently, we don't translate anything. That needs to be fixed as
+	 * we get better defined device-trees. iic interrupts have to be
+	 * explicitely mapped by whoever needs them
+	 */
+	return -ENODEV;
+}
+
+static struct irq_host_ops iic_host_ops = {
+	.match = iic_host_match,
+	.map = iic_host_map,
+	.xlate = iic_host_xlate,
+};
+
+static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
+				struct irq_host *host)
+{
+	/* XXX FIXME: should locate the linux CPU number from the HW cpu
+	 * number properly. We are lucky for now
+	 */
+	struct iic *iic = &per_cpu(iic, hw_cpu);
 
-	/* XXX FIXME: Assume two threads per BE are present */
-	for (be=0; be < num_present_cpus() / 2; be++) {
-		int irq;
+	iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
+	BUG_ON(iic->regs == NULL);
 
-		/* setup SPE chip and handlers */
-		for (isrc = 0; isrc < IIC_CLASS_STRIDE * 3; isrc++) {
-			irq = IIC_NODE_STRIDE * be + IIC_SPE_OFFSET + isrc;
-			set_irq_chip_and_handler(irq, &iic_chip, handle_fasteoi_irq);
+	iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe);
+	iic->eoi_stack[0] = 0xff;
+	iic->host = host;
+	out_be64(&iic->regs->prio, 0);
+
+	printk(KERN_INFO "IIC for CPU %d at %lx mapped to %p, target id 0x%x\n",
+	       hw_cpu, addr, iic->regs, iic->target_id);
+}
+
+static int __init setup_iic(void)
+{
+	struct device_node *dn;
+	struct resource r0, r1;
+	struct irq_host *host;
+	int found = 0;
+ 	u32 *np;
+
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn,"interrupt-controller")) != NULL;) {
+		if (!device_is_compatible(dn,
+				     "IBM,CBEA-Internal-Interrupt-Controller"))
+			continue;
+ 		np = (u32 *)get_property(dn, "ibm,interrupt-server-ranges",
+					 NULL);
+ 		if (np == NULL) {
+			printk(KERN_WARNING "IIC: CPU association not found\n");
+			of_node_put(dn);
+			return -ENODEV;
 		}
-		/* setup cascade chip */
-		irq = IIC_EXT_CASCADE + be * IIC_NODE_STRIDE;
-		set_irq_chip_and_handler(irq, &iic_chip, handle_fasteoi_irq);
+		if (of_address_to_resource(dn, 0, &r0) ||
+		    of_address_to_resource(dn, 1, &r1)) {
+			printk(KERN_WARNING "IIC: Can't resolve addresses\n");
+			of_node_put(dn);
+			return -ENODEV;
+		}
+		host = NULL;
+		if (found < IIC_NODE_COUNT) {
+			host = irq_alloc_host(IRQ_HOST_MAP_LINEAR,
+					      IIC_SOURCE_COUNT,
+					      &iic_host_ops,
+					      IIC_IRQ_INVALID);
+			iic_hosts[found] = host;
+			BUG_ON(iic_hosts[found] == NULL);
+			iic_hosts[found]->host_data = of_node_get(dn);
+			found++;
+		}
+		init_one_iic(np[0], r0.start, host);
+		init_one_iic(np[1], r1.start, host);
 	}
+
+	if (found)
+		return 0;
+	else
+		return -ENODEV;
 }
 
 void __init iic_init_IRQ(void)
 {
-	int cpu, irq_offset;
-	struct iic *iic;
-
+	/* Discover and initialize iics */
 	if (setup_iic() < 0)
-		setup_iic_hardcoded();
+		panic("IIC: Failed to initialize !\n");
 
-	irq_offset = 0;
-	for_each_possible_cpu(cpu) {
-		iic = &per_cpu(iic, cpu);
-		if (iic->regs)
-			out_be64(&iic->regs->prio, 0xff);
-	}
-	iic_setup_builtin_handlers();
+	/* Set master interrupt handling function */
+	ppc_md.get_irq = iic_get_irq;
 
+	/* Enable on current CPU */
+	iic_setup_cpu();
 }
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
index c74515a..5560a92 100644
--- a/arch/powerpc/platforms/cell/interrupt.h
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -37,23 +37,22 @@
  */
 
 enum {
-	IIC_EXT_OFFSET   = 0x00, /* Start of south bridge IRQs */
-	IIC_EXT_CASCADE  = 0x20, /* There is no interrupt 32 on spider */
-	IIC_NUM_EXT      = 0x40, /* Number of south bridge IRQs */
-	IIC_SPE_OFFSET   = 0x40, /* Start of SPE interrupts */
-	IIC_CLASS_STRIDE = 0x10, /* SPE IRQs per class    */
-	IIC_IPI_OFFSET   = 0x70, /* Start of IPI IRQs */
-	IIC_NUM_IPIS     = 0x10, /* IRQs reserved for IPI */
-	IIC_NODE_STRIDE  = 0x80, /* Total IRQs per node   */
+	IIC_IRQ_INVALID		= 0xff,
+	IIC_IRQ_MAX		= 0x3f,
+	IIC_IRQ_EXT_IOIF0	= 0x20,
+	IIC_IRQ_EXT_IOIF1	= 0x2b,
+	IIC_IRQ_IPI0		= 0x40,
+	IIC_NUM_IPIS    	= 0x10, /* IRQs reserved for IPI */
+	IIC_SOURCE_COUNT	= 0x50,
 };
 
 extern void iic_init_IRQ(void);
-extern int  iic_get_irq(struct pt_regs *regs);
 extern void iic_cause_IPI(int cpu, int mesg);
 extern void iic_request_IPIs(void);
 extern void iic_setup_cpu(void);
 
 extern u8 iic_get_target_id(int cpu);
+extern struct irq_host *iic_get_irq_host(int node);
 
 extern void spider_init_IRQ(void);
 
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 70a4e90..282987d 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -80,6 +80,14 @@ static void cell_progress(char *s, unsigned short hex)
 	printk("*** %04x : %s\n", hex, s ? s : "");
 }
 
+static void __init cell_pcibios_fixup(void)
+{
+	struct pci_dev *dev = NULL;
+
+	for_each_pci_dev(dev)
+		pci_read_irq_line(dev);
+}
+
 static void __init cell_init_irq(void)
 {
 	iic_init_IRQ();
@@ -130,8 +138,6 @@ static void __init cell_init_early(void)
 
 	cell_init_iommu();
 
-	ppc64_interrupt_controller = IC_CELL_PIC;
-
 	DBG(" <- cell_init_early()\n");
 }
 
@@ -178,8 +184,7 @@ define_machine(cell) {
 	.check_legacy_ioport	= cell_check_legacy_ioport,
 	.progress		= cell_progress,
 	.init_IRQ       	= cell_init_irq,
-	.get_irq        	= iic_get_irq,
-
+	.pcibios_fixup		= cell_pcibios_fixup,
 #ifdef CONFIG_KEXEC
 	.machine_kexec		= default_machine_kexec,
 	.machine_kexec_prepare	= default_machine_kexec_prepare,
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 98425ac..ae7ef88 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -22,6 +22,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/ioport.h>
 
 #include <asm/pgtable.h>
 #include <asm/prom.h>
@@ -56,58 +57,67 @@ enum {
 	REISWAITEN	= 0x508, /* Reissue Wait Control*/
 };
 
-static void __iomem *spider_pics[4];
+#define SPIDER_CHIP_COUNT	4
+#define SPIDER_SRC_COUNT	64
+#define SPIDER_IRQ_INVALID	63
 
-static void __iomem *spider_get_pic(int irq)
-{
-	int node = irq / IIC_NODE_STRIDE;
-	irq %= IIC_NODE_STRIDE;
-
-	if (irq >= IIC_EXT_OFFSET &&
-	    irq < IIC_EXT_OFFSET + IIC_NUM_EXT &&
-	    spider_pics)
-		return spider_pics[node];
-	return NULL;
-}
+struct spider_pic {
+	struct irq_host		*host;
+	struct device_node	*of_node;
+	void __iomem		*regs;
+	unsigned int		node_id;
+};
+static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
 
-static int spider_get_nr(unsigned int irq)
+static struct spider_pic *spider_virq_to_pic(unsigned int virq)
 {
-	return (irq % IIC_NODE_STRIDE) - IIC_EXT_OFFSET;
+	return irq_map[virq].host->host_data;
 }
 
-static void __iomem *spider_get_irq_config(int irq)
+static void __iomem *spider_get_irq_config(struct spider_pic *pic,
+					   unsigned int src)
 {
-	void __iomem *pic;
-	pic = spider_get_pic(irq);
-	return pic + TIR_CFGA + 8 * spider_get_nr(irq);
+	return pic->regs + TIR_CFGA + 8 * src;
 }
 
-static void spider_unmask_irq(unsigned int irq)
+static void spider_unmask_irq(unsigned int virq)
 {
-	int nodeid = (irq / IIC_NODE_STRIDE) * 0x10;
-	void __iomem *cfg = spider_get_irq_config(irq);
-	irq = spider_get_nr(irq);
+	struct spider_pic *pic = spider_virq_to_pic(virq);
+	void __iomem *cfg = spider_get_irq_config(pic, irq_map[virq].hwirq);
 
-	/* FIXME: Most of that is configuration and has nothing to do with enabling/disable,
-	 * besides, it's also partially bogus.
+	/* We use no locking as we should be covered by the descriptor lock
+	 * for access to invidual source configuration registers
 	 */
-	out_be32(cfg, (in_be32(cfg) & ~0xf0)| 0x3107000eu | nodeid);
-	out_be32(cfg + 4, in_be32(cfg + 4) | 0x00020000u | irq);
+	out_be32(cfg, in_be32(cfg) | 0x30000000u);
 }
 
-static void spider_mask_irq(unsigned int irq)
+static void spider_mask_irq(unsigned int virq)
 {
-	void __iomem *cfg = spider_get_irq_config(irq);
-	irq = spider_get_nr(irq);
+	struct spider_pic *pic = spider_virq_to_pic(virq);
+	void __iomem *cfg = spider_get_irq_config(pic, irq_map[virq].hwirq);
 
+	/* We use no locking as we should be covered by the descriptor lock
+	 * for access to invidual source configuration registers
+	 */
 	out_be32(cfg, in_be32(cfg) & ~0x30000000u);
 }
 
-static void spider_ack_irq(unsigned int irq)
+static void spider_ack_irq(unsigned int virq)
 {
-	/* Should reset edge detection logic but we don't configure any edge interrupt
-	 * at the moment.
+	struct spider_pic *pic = spider_virq_to_pic(virq);
+	unsigned int src = irq_map[virq].hwirq;
+
+	/* Reset edge detection logic if necessary
 	 */
+	if (get_irq_desc(virq)->status & IRQ_LEVEL)
+		return;
+
+	/* Only interrupts 47 to 50 can be set to edge */
+	if (src < 47 || src > 50)
+		return;
+
+	/* Perform the clear of the edge logic */
+	out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf));
 }
 
 static struct irq_chip spider_pic = {
@@ -117,102 +127,243 @@ static struct irq_chip spider_pic = {
 	.ack = spider_ack_irq,
 };
 
-static int spider_get_irq(int node)
+static int spider_host_match(struct irq_host *h, struct device_node *node)
+{
+	struct spider_pic *pic = h->host_data;
+	return node == pic->of_node;
+}
+
+static int spider_host_map(struct irq_host *h, unsigned int virq,
+			irq_hw_number_t hw, unsigned int flags)
 {
-	unsigned long cs;
-	void __iomem *regs = spider_pics[node];
+	unsigned int sense = flags & IRQ_TYPE_SENSE_MASK;
+	struct spider_pic *pic = h->host_data;
+	void __iomem *cfg = spider_get_irq_config(pic, hw);
+	int level = 0;
+	u32 ic;
+
+	/* Note that only level high is supported for most interrupts */
+	if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH &&
+	    (hw < 47 || hw > 50))
+		return -EINVAL;
+
+	/* Decode sense type */
+	switch(sense) {
+	case IRQ_TYPE_EDGE_RISING:
+		ic = 0x3;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		ic = 0x2;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		ic = 0x0;
+		level = 1;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+	case IRQ_TYPE_NONE:
+		ic = 0x1;
+		level = 1;
+		break;
+	default:
+		return -EINVAL;
+	}
 
-	cs = in_be32(regs + TIR_CS) >> 24;
+	/* Configure the source. One gross hack that was there before and
+	 * that I've kept around is the priority to the BE which I set to
+	 * be the same as the interrupt source number. I don't know wether
+	 * that's supposed to make any kind of sense however, we'll have to
+	 * decide that, but for now, I'm not changing the behaviour.
+	 */
+	out_be32(cfg, (ic << 24) | (0x7 << 16) | (pic->node_id << 4) | 0xe);
+	out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff));
 
-	if (cs == 63)
-		return -1;
-	else
-		return cs;
+	if (level)
+		get_irq_desc(virq)->status |= IRQ_LEVEL;
+	set_irq_chip_and_handler(virq, &spider_pic, handle_level_irq);
+	return 0;
+}
+
+static int spider_host_xlate(struct irq_host *h, struct device_node *ct,
+			   u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/* Spider interrupts have 2 cells, first is the interrupt source,
+	 * second, well, I don't know for sure yet ... We mask the top bits
+	 * because old device-trees encode a node number in there
+	 */
+	*out_hwirq = intspec[0] & 0x3f;
+	*out_flags = IRQ_TYPE_LEVEL_HIGH;
+	return 0;
 }
 
+static struct irq_host_ops spider_host_ops = {
+	.match = spider_host_match,
+	.map = spider_host_map,
+	.xlate = spider_host_xlate,
+};
+
 static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc,
 			       struct pt_regs *regs)
 {
-	int node = (int)(long)desc->handler_data;
-	int cascade_irq;
+	struct spider_pic *pic = desc->handler_data;
+	unsigned int cs, virq;
 
-	cascade_irq = spider_get_irq(node);
-	generic_handle_irq(cascade_irq, regs);
+	cs = in_be32(pic->regs + TIR_CS) >> 24;
+	if (cs == SPIDER_IRQ_INVALID)
+		virq = NO_IRQ;
+	else
+		virq = irq_linear_revmap(pic->host, cs);
+	if (virq != NO_IRQ)
+		generic_handle_irq(virq, regs);
 	desc->chip->eoi(irq);
 }
 
-/* hardcoded part to be compatible with older firmware */
+/* For hooking up the cascace we have a problem. Our device-tree is
+ * crap and we don't know on which BE iic interrupt we are hooked on at
+ * least not the "standard" way. We can reconstitute it based on two
+ * informations though: which BE node we are connected to and wether
+ * we are connected to IOIF0 or IOIF1. Right now, we really only care
+ * about the IBM cell blade and we know that its firmware gives us an
+ * interrupt-map property which is pretty strange.
+ */
+static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
+{
+	unsigned int virq;
+	u32 *imap, *tmp;
+	int imaplen, intsize, unit;
+	struct device_node *iic;
+	struct irq_host *iic_host;
+
+#if 0 /* Enable that when we have a way to retreive the node as well */
+	/* First, we check wether we have a real "interrupts" in the device
+	 * tree in case the device-tree is ever fixed
+	 */
+	struct of_irq oirq;
+	if (of_irq_map_one(pic->of_node, 0, &oirq) == 0) {
+		virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+					     oirq.size);
+		goto bail;
+	}
+#endif
+
+	/* Now do the horrible hacks */
+	tmp = (u32 *)get_property(pic->of_node, "#interrupt-cells", NULL);
+	if (tmp == NULL)
+		return NO_IRQ;
+	intsize = *tmp;
+	imap = (u32 *)get_property(pic->of_node, "interrupt-map", &imaplen);
+	if (imap == NULL || imaplen < (intsize + 1))
+		return NO_IRQ;
+	iic = of_find_node_by_phandle(imap[intsize]);
+	if (iic == NULL)
+		return NO_IRQ;
+	imap += intsize + 1;
+	tmp = (u32 *)get_property(iic, "#interrupt-cells", NULL);
+	if (tmp == NULL)
+		return NO_IRQ;
+	intsize = *tmp;
+	/* Assume unit is last entry of interrupt specifier */
+	unit = imap[intsize - 1];
+	/* Ok, we have a unit, now let's try to get the node */
+	tmp = (u32 *)get_property(iic, "ibm,interrupt-server-ranges", NULL);
+	if (tmp == NULL) {
+		of_node_put(iic);
+		return NO_IRQ;
+	}
+	/* ugly as hell but works for now */
+	pic->node_id = (*tmp) >> 1;
+	of_node_put(iic);
+
+	/* Ok, now let's get cracking. You may ask me why I just didn't match
+	 * the iic host from the iic OF node, but that way I'm still compatible
+	 * with really really old old firmwares for which we don't have a node
+	 */
+	iic_host = iic_get_irq_host(pic->node_id);
+	if (iic_host == NULL)
+		return NO_IRQ;
+	/* Manufacture an IIC interrupt number of class 2 */
+	virq = irq_create_mapping(iic_host, 0x20 | unit, 0);
+	if (virq == NO_IRQ)
+		printk(KERN_ERR "spider_pic: failed to map cascade !");
+	return virq;
+}
+
 
-static void __init spider_init_one(int node, unsigned long addr)
+static void __init spider_init_one(struct device_node *of_node, int chip,
+				   unsigned long addr)
 {
-	int n, irq;
+	struct spider_pic *pic = &spider_pics[chip];
+	int i, virq;
 
-	spider_pics[node] = ioremap(addr, 0x800);
-	if (spider_pics[node] == NULL)
+	/* Map registers */
+	pic->regs = ioremap(addr, 0x1000);
+	if (pic->regs == NULL)
 		panic("spider_pic: can't map registers !");
 
-	printk(KERN_INFO "spider_pic: mapped for node %d, addr: 0x%lx mapped to %p\n",
-	       node, addr, spider_pics[node]);
+	/* Allocate a host */
+	pic->host = irq_alloc_host(IRQ_HOST_MAP_LINEAR, SPIDER_SRC_COUNT,
+				   &spider_host_ops, SPIDER_IRQ_INVALID);
+	if (pic->host == NULL)
+		panic("spider_pic: can't allocate irq host !");
+	pic->host->host_data = pic;
 
-	for (n = 0; n < IIC_NUM_EXT; n++) {
-		if (n == IIC_EXT_CASCADE)
-			continue;
-		irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE;
-		set_irq_chip_and_handler(irq, &spider_pic, handle_level_irq);
-		get_irq_desc(irq)->status |= IRQ_LEVEL;
+	/* Fill out other bits */
+	pic->of_node = of_node_get(of_node);
+
+	/* Go through all sources and disable them */
+	for (i = 0; i < SPIDER_SRC_COUNT; i++) {
+		void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i;
+		out_be32(cfg, in_be32(cfg) & ~0x30000000u);
 	}
 
 	/* do not mask any interrupts because of level */
-	out_be32(spider_pics[node] + TIR_MSK, 0x0);
-
-	/* disable edge detection clear */
-	/* out_be32(spider_pics[node] + TIR_EDC, 0x0); */
+	out_be32(pic->regs + TIR_MSK, 0x0);
 
 	/* enable interrupt packets to be output */
-	out_be32(spider_pics[node] + TIR_PIEN,
-		 in_be32(spider_pics[node] + TIR_PIEN) | 0x1);
+	out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1);
 
-	/* Hook up cascade */
-	irq = IIC_EXT_CASCADE + node * IIC_NODE_STRIDE;
-	set_irq_data(irq, (void *)(long)node);
-	set_irq_chained_handler(irq, spider_irq_cascade);
+	/* Hook up the cascade interrupt to the iic and nodeid */
+	virq = spider_find_cascade_and_node(pic);
+	if (virq == NO_IRQ)
+		return;
+	set_irq_data(virq, pic);
+	set_irq_chained_handler(virq, spider_irq_cascade);
+
+	printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %s\n",
+	       pic->node_id, addr, of_node->full_name);
 
 	/* Enable the interrupt detection enable bit. Do this last! */
-	out_be32(spider_pics[node] + TIR_DEN,
-		 in_be32(spider_pics[node] + TIR_DEN) | 0x1);
+	out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
 }
 
 void __init spider_init_IRQ(void)
 {
-	unsigned long *spider_reg;
+	struct resource r;
 	struct device_node *dn;
-	char *compatible;
-	int node = 0;
-
-	/* XXX node numbers are totally bogus. We _hope_ we get the device nodes in the right
-	 * order here but that's definitely not guaranteed, we need to get the node from the
-	 * device tree instead. There is currently no proper property for it (but our whole
-	 * device-tree is bogus anyway) so all we can do is pray or maybe test the address
-	 * and deduce the node-id
+	int chip = 0;
+
+	/* XXX node numbers are totally bogus. We _hope_ we get the device
+	 * nodes in the right order here but that's definitely not guaranteed,
+	 * we need to get the node from the device tree instead.
+	 * There is currently no proper property for it (but our whole
+	 * device-tree is bogus anyway) so all we can do is pray or maybe test
+	 * the address and deduce the node-id
 	 */
-	for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
-		compatible = (char *)get_property(dn, "compatible", NULL);
-
-		if (!compatible)
-			continue;
-
- 		if (strstr(compatible, "CBEA,platform-spider-pic"))
-			spider_reg = (unsigned long *)get_property(dn, "reg", NULL);
-		else if (strstr(compatible, "sti,platform-spider-pic") && (node < 2)) {
-			static long hard_coded_pics[] = { 0x24000008000, 0x34000008000 };
-			spider_reg = &hard_coded_pics[node];
+	for (dn = NULL;
+	     (dn = of_find_node_by_name(dn, "interrupt-controller"));) {
+		if (device_is_compatible(dn, "CBEA,platform-spider-pic")) {
+			if (of_address_to_resource(dn, 0, &r)) {
+				printk(KERN_WARNING "spider-pic: Failed\n");
+				continue;
+			}
+		} else if (device_is_compatible(dn, "sti,platform-spider-pic")
+			   && (chip < 2)) {
+			static long hard_coded_pics[] =
+				{ 0x24000008000, 0x34000008000 };
+			r.start = hard_coded_pics[chip];
 		} else
 			continue;
-
-		if (spider_reg == NULL)
-			printk(KERN_ERR "spider_pic: No address for node %d\n", node);
-
-		spider_init_one(node, *spider_reg);
-		node++;
+		spider_init_one(dn, chip++, r.start);
 	}
 }
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 656c1ef..5d2313a 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -264,51 +264,57 @@ spu_irq_class_2(int irq, void *data, struct pt_regs *regs)
 	return stat ? IRQ_HANDLED : IRQ_NONE;
 }
 
-static int
-spu_request_irqs(struct spu *spu)
+static int spu_request_irqs(struct spu *spu)
 {
-	int ret;
-	int irq_base;
-
-	irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET;
-
-	snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", spu->number);
-	ret = request_irq(irq_base + spu->isrc,
-		 spu_irq_class_0, IRQF_DISABLED, spu->irq_c0, spu);
-	if (ret)
-		goto out;
-
-	snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", spu->number);
-	ret = request_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc,
-		 spu_irq_class_1, IRQF_DISABLED, spu->irq_c1, spu);
-	if (ret)
-		goto out1;
+	int ret = 0;
 
-	snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", spu->number);
-	ret = request_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc,
-		 spu_irq_class_2, IRQF_DISABLED, spu->irq_c2, spu);
-	if (ret)
-		goto out2;
-	goto out;
+	if (spu->irqs[0] != NO_IRQ) {
+		snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
+			 spu->number);
+		ret = request_irq(spu->irqs[0], spu_irq_class_0,
+				  IRQF_DISABLED,
+				  spu->irq_c0, spu);
+		if (ret)
+			goto bail0;
+	}
+	if (spu->irqs[1] != NO_IRQ) {
+		snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
+			 spu->number);
+		ret = request_irq(spu->irqs[1], spu_irq_class_1,
+				  IRQF_DISABLED,
+				  spu->irq_c1, spu);
+		if (ret)
+			goto bail1;
+	}
+	if (spu->irqs[2] != NO_IRQ) {
+		snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
+			 spu->number);
+		ret = request_irq(spu->irqs[2], spu_irq_class_2,
+				  IRQF_DISABLED,
+				  spu->irq_c2, spu);
+		if (ret)
+			goto bail2;
+	}
+	return 0;
 
-out2:
-	free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu);
-out1:
-	free_irq(irq_base + spu->isrc, spu);
-out:
+bail2:
+	if (spu->irqs[1] != NO_IRQ)
+		free_irq(spu->irqs[1], spu);
+bail1:
+	if (spu->irqs[0] != NO_IRQ)
+		free_irq(spu->irqs[0], spu);
+bail0:
 	return ret;
 }
 
-static void
-spu_free_irqs(struct spu *spu)
+static void spu_free_irqs(struct spu *spu)
 {
-	int irq_base;
-
-	irq_base = IIC_NODE_STRIDE * spu->node + IIC_SPE_OFFSET;
-
-	free_irq(irq_base + spu->isrc, spu);
-	free_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc, spu);
-	free_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc, spu);
+	if (spu->irqs[0] != NO_IRQ)
+		free_irq(spu->irqs[0], spu);
+	if (spu->irqs[1] != NO_IRQ)
+		free_irq(spu->irqs[1], spu);
+	if (spu->irqs[2] != NO_IRQ)
+		free_irq(spu->irqs[2], spu);
 }
 
 static LIST_HEAD(spu_list);
@@ -559,17 +565,38 @@ static void spu_unmap(struct spu *spu)
 	iounmap((u8 __iomem *)spu->local_store);
 }
 
+/* This function shall be abstracted for HV platforms */
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+	struct irq_host *host;
+	unsigned int isrc;
+	u32 *tmp;
+
+	host = iic_get_irq_host(spu->node);
+	if (host == NULL)
+		return -ENODEV;
+
+	/* Get the interrupt source from the device-tree */
+	tmp = (u32 *)get_property(np, "isrc", NULL);
+	if (!tmp)
+		return -ENODEV;
+	spu->isrc = isrc = tmp[0];
+
+	/* Now map interrupts of all 3 classes */
+	spu->irqs[0] = irq_create_mapping(host, 0x00 | isrc, 0);
+	spu->irqs[1] = irq_create_mapping(host, 0x10 | isrc, 0);
+	spu->irqs[2] = irq_create_mapping(host, 0x20 | isrc, 0);
+
+	/* Right now, we only fail if class 2 failed */
+	return spu->irqs[2] == NO_IRQ ? -EINVAL : 0;
+}
+
 static int __init spu_map_device(struct spu *spu, struct device_node *node)
 {
 	char *prop;
 	int ret;
 
 	ret = -ENODEV;
-	prop = get_property(node, "isrc", NULL);
-	if (!prop)
-		goto out;
-	spu->isrc = *(unsigned int *)prop;
-
 	spu->name = get_property(node, "name", NULL);
 	if (!spu->name)
 		goto out;
@@ -636,7 +663,8 @@ static int spu_create_sysdev(struct spu *spu)
 		return ret;
 	}
 
-	sysdev_create_file(&spu->sysdev, &attr_isrc);
+	if (spu->isrc != 0)
+		sysdev_create_file(&spu->sysdev, &attr_isrc);
 	sysfs_add_device_to_node(&spu->sysdev, spu->nid);
 
 	return 0;
@@ -668,6 +696,9 @@ static int __init create_spu(struct device_node *spe)
 	spu->nid = of_node_to_nid(spe);
 	if (spu->nid == -1)
 		spu->nid = 0;
+	ret = spu_map_interrupts(spu, spe);
+	if (ret)
+		goto out_unmap;
 	spin_lock_init(&spu->register_lock);
 	spu_mfc_sdr_set(spu, mfspr(SPRN_SDR1));
 	spu_mfc_sr1_set(spu, 0x33);
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
index 66c2534..6802cdc 100644
--- a/arch/powerpc/platforms/chrp/pci.c
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -18,7 +18,6 @@
 #include <asm/machdep.h>
 #include <asm/sections.h>
 #include <asm/pci-bridge.h>
-#include <asm/open_pic.h>
 #include <asm/grackle.h>
 #include <asm/rtas.h>
 
@@ -161,15 +160,9 @@ void __init
 chrp_pcibios_fixup(void)
 {
 	struct pci_dev *dev = NULL;
-	struct device_node *np;
 
-	/* PCI interrupts are controlled by the OpenPIC */
-	for_each_pci_dev(dev) {
-		np = pci_device_to_OF_node(dev);
-		if ((np != 0) && (np->n_intrs > 0) && (np->intrs[0].line != 0))
-			dev->irq = np->intrs[0].line;
-		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
-	}
+	for_each_pci_dev(dev)
+		pci_read_irq_line(dev);
 }
 
 #define PRG_CL_RESET_VALID 0x00010000
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index a5dffc8..bb10171 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -59,7 +59,7 @@ void rtas_indicator_progress(char *, unsigned short);
 int _chrp_type;
 EXPORT_SYMBOL(_chrp_type);
 
-struct mpic *chrp_mpic;
+static struct mpic *chrp_mpic;
 
 /* Used for doing CHRP event-scans */
 DEFINE_PER_CPU(struct timer_list, heartbeat_timer);
@@ -315,19 +315,13 @@ chrp_event_scan(unsigned long unused)
 		  jiffies + event_scan_interval);
 }
 
-void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc,
-			struct pt_regs *regs)
+static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc,
+			      struct pt_regs *regs)
 {
-	unsigned int max = 100;
-
-	while(max--) {
-		int irq = i8259_irq(regs);
-		if (max == 99)
-			desc->chip->eoi(irq);
-		if (irq < 0)
-			break;
-		generic_handle_irq(irq, regs);
-	};
+	unsigned int cascade_irq = i8259_irq(regs);
+	if (cascade_irq != NO_IRQ)
+		generic_handle_irq(cascade_irq, regs);
+	desc->chip->eoi(irq);
 }
 
 /*
@@ -336,18 +330,17 @@ void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc,
 static void __init chrp_find_openpic(void)
 {
 	struct device_node *np, *root;
-	int len, i, j, irq_count;
+	int len, i, j;
 	int isu_size, idu_size;
 	unsigned int *iranges, *opprop = NULL;
 	int oplen = 0;
 	unsigned long opaddr;
 	int na = 1;
-	unsigned char init_senses[NR_IRQS - NUM_8259_INTERRUPTS];
 
-	np = find_type_devices("open-pic");
+	np = of_find_node_by_type(NULL, "open-pic");
 	if (np == NULL)
 		return;
-	root = find_path_device("/");
+	root = of_find_node_by_path("/");
 	if (root) {
 		opprop = (unsigned int *) get_property
 			(root, "platform-open-pic", &oplen);
@@ -358,19 +351,15 @@ static void __init chrp_find_openpic(void)
 		oplen /= na * sizeof(unsigned int);
 	} else {
 		struct resource r;
-		if (of_address_to_resource(np, 0, &r))
-			return;
+		if (of_address_to_resource(np, 0, &r)) {
+			goto bail;
+		}
 		opaddr = r.start;
 		oplen = 0;
 	}
 
 	printk(KERN_INFO "OpenPIC at %lx\n", opaddr);
 
-	irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */
-	prom_get_irq_senses(init_senses, NUM_ISA_INTERRUPTS, NR_IRQS - 4);
-	/* i8259 cascade is always positive level */
-	init_senses[0] = IRQ_SENSE_LEVEL | IRQ_POLARITY_POSITIVE;
-
 	iranges = (unsigned int *) get_property(np, "interrupt-ranges", &len);
 	if (iranges == NULL)
 		len = 0;	/* non-distributed mpic */
@@ -397,15 +386,12 @@ static void __init chrp_find_openpic(void)
 	if (len > 1)
 		isu_size = iranges[3];
 
-	chrp_mpic = mpic_alloc(opaddr, MPIC_PRIMARY,
-			       isu_size, NUM_ISA_INTERRUPTS, irq_count,
-			       NR_IRQS - 4, init_senses, irq_count,
-			       " MPIC    ");
+	chrp_mpic = mpic_alloc(np, opaddr, MPIC_PRIMARY,
+			       isu_size, 0, " MPIC    ");
 	if (chrp_mpic == NULL) {
 		printk(KERN_ERR "Failed to allocate MPIC structure\n");
-		return;
+		goto bail;
 	}
-
 	j = na - 1;
 	for (i = 1; i < len; ++i) {
 		iranges += 2;
@@ -417,7 +403,10 @@ static void __init chrp_find_openpic(void)
 	}
 
 	mpic_init(chrp_mpic);
-	set_irq_chained_handler(NUM_ISA_INTERRUPTS, chrp_8259_cascade);
+	ppc_md.get_irq = mpic_get_irq;
+ bail:
+	of_node_put(root);
+	of_node_put(np);
 }
 
 #if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON)
@@ -428,14 +417,34 @@ static struct irqaction xmon_irqaction = {
 };
 #endif
 
-void __init chrp_init_IRQ(void)
+static void __init chrp_find_8259(void)
 {
-	struct device_node *np;
+	struct device_node *np, *pic = NULL;
 	unsigned long chrp_int_ack = 0;
-#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON)
-	struct device_node *kbd;
-#endif
+	unsigned int cascade_irq;
 
+	/* Look for cascade */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (device_is_compatible(np, "chrp,iic")) {
+			pic = np;
+			break;
+		}
+	/* Ok, 8259 wasn't found. We need to handle the case where
+	 * we have a pegasos that claims to be chrp but doesn't have
+	 * a proper interrupt tree
+	 */
+	if (pic == NULL && chrp_mpic != NULL) {
+		printk(KERN_ERR "i8259: Not found in device-tree"
+		       " assuming no legacy interrupts\n");
+		return;
+	}
+
+	/* Look for intack. In a perfect world, we would look for it on
+	 * the ISA bus that holds the 8259 but heh... Works that way. If
+	 * we ever see a problem, we can try to re-use the pSeries code here.
+	 * Also, Pegasos-type platforms don't have a proper node to start
+	 * from anyway
+	 */
 	for (np = find_devices("pci"); np != NULL; np = np->next) {
 		unsigned int *addrp = (unsigned int *)
 			get_property(np, "8259-interrupt-acknowledge", NULL);
@@ -446,11 +455,29 @@ void __init chrp_init_IRQ(void)
 		break;
 	}
 	if (np == NULL)
-		printk(KERN_ERR "Cannot find PCI interrupt acknowledge address\n");
+		printk(KERN_WARNING "Cannot find PCI interrupt acknowledge"
+		       " address, polling\n");
+
+	i8259_init(pic, chrp_int_ack);
+	if (ppc_md.get_irq == NULL)
+		ppc_md.get_irq = i8259_irq;
+	if (chrp_mpic != NULL) {
+		cascade_irq = irq_of_parse_and_map(pic, 0);
+		if (cascade_irq == NO_IRQ)
+			printk(KERN_ERR "i8259: failed to map cascade irq\n");
+		else
+			set_irq_chained_handler(cascade_irq,
+						chrp_8259_cascade);
+	}
+}
 
+void __init chrp_init_IRQ(void)
+{
+#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(XMON)
+	struct device_node *kbd;
+#endif
 	chrp_find_openpic();
-
-	i8259_init(chrp_int_ack, 0);
+	chrp_find_8259();
 
 	if (_chrp_type == _CHRP_Pegasos)
 		ppc_md.get_irq        = i8259_irq;
@@ -535,10 +562,6 @@ static int __init chrp_probe(void)
 	DMA_MODE_READ = 0x44;
 	DMA_MODE_WRITE = 0x48;
 	isa_io_base = CHRP_ISA_IO_BASE;		/* default value */
-	ppc_do_canonicalize_irqs = 1;
-
-	/* Assume we have an 8259... */
-	__irq_offset_value = NUM_ISA_INTERRUPTS;
 
 	return 1;
 }
@@ -550,7 +573,6 @@ define_machine(chrp) {
 	.init			= chrp_init2,
 	.show_cpuinfo		= chrp_show_cpuinfo,
 	.init_IRQ		= chrp_init_IRQ,
-	.get_irq		= mpic_get_irq,
 	.pcibios_fixup		= chrp_pcibios_fixup,
 	.restart		= rtas_restart,
 	.power_off		= rtas_power_off,
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
index c298ca1..1d2307e 100644
--- a/arch/powerpc/platforms/chrp/smp.c
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -29,7 +29,6 @@
 #include <asm/smp.h>
 #include <asm/residual.h>
 #include <asm/time.h>
-#include <asm/open_pic.h>
 #include <asm/machdep.h>
 #include <asm/smp.h>
 #include <asm/mpic.h>
diff --git a/arch/powerpc/platforms/iseries/irq.c b/arch/powerpc/platforms/iseries/irq.c
index 7fb6a08..2275e64 100644
--- a/arch/powerpc/platforms/iseries/irq.c
+++ b/arch/powerpc/platforms/iseries/irq.c
@@ -162,27 +162,6 @@ static void pci_event_handler(struct HvLpEvent *event, struct pt_regs *regs)
 		printk(KERN_ERR "pci_event_handler: NULL event received\n");
 }
 
-/*
- * This is called by init_IRQ.  set in ppc_md.init_IRQ by iSeries_setup.c
- * It must be called before the bus walk.
- */
-void __init iSeries_init_IRQ(void)
-{
-	/* Register PCI event handler and open an event path */
-	int ret;
-
-	ret = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo,
-			&pci_event_handler);
-	if (ret == 0) {
-		ret = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0);
-		if (ret != 0)
-			printk(KERN_ERR "iseries_init_IRQ: open event path "
-					"failed with rc 0x%x\n", ret);
-	} else
-		printk(KERN_ERR "iseries_init_IRQ: register handler "
-				"failed with rc 0x%x\n", ret);
-}
-
 #define REAL_IRQ_TO_SUBBUS(irq)	(((irq) >> 14) & 0xff)
 #define REAL_IRQ_TO_BUS(irq)	((((irq) >> 6) & 0xff) + 1)
 #define REAL_IRQ_TO_IDSEL(irq)	((((irq) >> 3) & 7) + 1)
@@ -196,7 +175,7 @@ static void iseries_enable_IRQ(unsigned int irq)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = virt_irq_to_real_map[irq];
+	unsigned int rirq = (unsigned int)irq_map[irq].hwirq;
 
 	/* The IRQ has already been locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -213,7 +192,7 @@ static unsigned int iseries_startup_IRQ(unsigned int irq)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = virt_irq_to_real_map[irq];
+	unsigned int rirq = (unsigned int)irq_map[irq].hwirq;
 
 	bus = REAL_IRQ_TO_BUS(rirq);
 	function = REAL_IRQ_TO_FUNC(rirq);
@@ -254,7 +233,7 @@ static void iseries_shutdown_IRQ(unsigned int irq)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = virt_irq_to_real_map[irq];
+	unsigned int rirq = (unsigned int)irq_map[irq].hwirq;
 
 	/* irq should be locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -277,7 +256,7 @@ static void iseries_disable_IRQ(unsigned int irq)
 {
 	u32 bus, dev_id, function, mask;
 	const u32 sub_bus = 0;
-	unsigned int rirq = virt_irq_to_real_map[irq];
+	unsigned int rirq = (unsigned int)irq_map[irq].hwirq;
 
 	/* The IRQ has already been locked by the caller */
 	bus = REAL_IRQ_TO_BUS(rirq);
@@ -291,7 +270,7 @@ static void iseries_disable_IRQ(unsigned int irq)
 
 static void iseries_end_IRQ(unsigned int irq)
 {
-	unsigned int rirq = virt_irq_to_real_map[irq];
+	unsigned int rirq = (unsigned int)irq_map[irq].hwirq;
 
 	HvCallPci_eoi(REAL_IRQ_TO_BUS(rirq), REAL_IRQ_TO_SUBBUS(rirq),
 		(REAL_IRQ_TO_IDSEL(rirq) << 4) + REAL_IRQ_TO_FUNC(rirq));
@@ -314,16 +293,14 @@ static struct irq_chip iseries_pic = {
 int __init iSeries_allocate_IRQ(HvBusNumber bus,
 		HvSubBusNumber sub_bus, u32 bsubbus)
 {
-	int virtirq;
 	unsigned int realirq;
 	u8 idsel = ISERIES_GET_DEVICE_FROM_SUBBUS(bsubbus);
 	u8 function = ISERIES_GET_FUNCTION_FROM_SUBBUS(bsubbus);
 
 	realirq = (((((sub_bus << 8) + (bus - 1)) << 3) + (idsel - 1)) << 3)
 		+ function;
-	virtirq = virt_irq_create_mapping(realirq);
-	set_irq_chip_and_handler(virtirq, &iseries_pic, handle_fasteoi_irq);
-	return virtirq;
+
+	return irq_create_mapping(NULL, realirq, IRQ_TYPE_NONE);
 }
 
 #endif /* CONFIG_PCI */
@@ -331,10 +308,9 @@ int __init iSeries_allocate_IRQ(HvBusNumber bus,
 /*
  * Get the next pending IRQ.
  */
-int iSeries_get_irq(struct pt_regs *regs)
+unsigned int iSeries_get_irq(struct pt_regs *regs)
 {
-	/* -2 means ignore this interrupt */
-	int irq = -2;
+	int irq = NO_IRQ_IGNORE;
 
 #ifdef CONFIG_SMP
 	if (get_lppaca()->int_dword.fields.ipi_cnt) {
@@ -357,9 +333,57 @@ int iSeries_get_irq(struct pt_regs *regs)
 		}
 		spin_unlock(&pending_irqs_lock);
 		if (irq >= NR_IRQS)
-			irq = -2;
+			irq = NO_IRQ_IGNORE;
 	}
 #endif
 
 	return irq;
 }
+
+static int iseries_irq_host_map(struct irq_host *h, unsigned int virq,
+				irq_hw_number_t hw, unsigned int flags)
+{
+	set_irq_chip_and_handler(virq, &iseries_pic, handle_fasteoi_irq);
+
+	return 0;
+}
+
+static struct irq_host_ops iseries_irq_host_ops = {
+	.map = iseries_irq_host_map,
+};
+
+/*
+ * This is called by init_IRQ.  set in ppc_md.init_IRQ by iSeries_setup.c
+ * It must be called before the bus walk.
+ */
+void __init iSeries_init_IRQ(void)
+{
+	/* Register PCI event handler and open an event path */
+	struct irq_host *host;
+	int ret;
+
+	/*
+	 * The Hypervisor only allows us up to 256 interrupt
+	 * sources (the irq number is passed in a u8).
+	 */
+	irq_set_virq_count(256);
+
+	/* Create irq host. No need for a revmap since HV will give us
+	 * back our virtual irq number
+	 */
+	host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, 0, &iseries_irq_host_ops, 0);
+	BUG_ON(host == NULL);
+	irq_set_default_host(host);
+
+	ret = HvLpEvent_registerHandler(HvLpEvent_Type_PciIo,
+			&pci_event_handler);
+	if (ret == 0) {
+		ret = HvLpEvent_openPath(HvLpEvent_Type_PciIo, 0);
+		if (ret != 0)
+			printk(KERN_ERR "iseries_init_IRQ: open event path "
+					"failed with rc 0x%x\n", ret);
+	} else
+		printk(KERN_ERR "iseries_init_IRQ: register handler "
+				"failed with rc 0x%x\n", ret);
+}
+
diff --git a/arch/powerpc/platforms/iseries/irq.h b/arch/powerpc/platforms/iseries/irq.h
index 188aa80..1ee8985 100644
--- a/arch/powerpc/platforms/iseries/irq.h
+++ b/arch/powerpc/platforms/iseries/irq.h
@@ -4,6 +4,6 @@
 extern void iSeries_init_IRQ(void);
 extern int  iSeries_allocate_IRQ(HvBusNumber, HvSubBusNumber, u32);
 extern void iSeries_activate_IRQs(void);
-extern int iSeries_get_irq(struct pt_regs *);
+extern unsigned int iSeries_get_irq(struct pt_regs *);
 
 #endif /* _ISERIES_IRQ_H */
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index c877074..c9605d7 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -294,8 +294,6 @@ static void __init iSeries_init_early(void)
 {
 	DBG(" -> iSeries_init_early()\n");
 
-	ppc64_interrupt_controller = IC_ISERIES;
-
 #if defined(CONFIG_BLK_DEV_INITRD)
 	/*
 	 * If the init RAM disk has been configured and there is
@@ -659,12 +657,6 @@ static int __init iseries_probe(void)
 	powerpc_firmware_features |= FW_FEATURE_ISERIES;
 	powerpc_firmware_features |= FW_FEATURE_LPAR;
 
-	/*
-	 * The Hypervisor only allows us up to 256 interrupt
-	 * sources (the irq number is passed in a u8).
-	 */
-	virt_irq_max = 255;
-
 	hpte_init_iSeries();
 
 	return 1;
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
index f7170ff..63a1670 100644
--- a/arch/powerpc/platforms/maple/pci.c
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -443,18 +443,23 @@ void __init maple_pci_init(void)
 int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel)
 {
 	struct device_node *np;
-	int irq = channel ? 15 : 14;
+	unsigned int defirq = channel ? 15 : 14;
+	unsigned int irq;
 
 	if (pdev->vendor != PCI_VENDOR_ID_AMD ||
 	    pdev->device != PCI_DEVICE_ID_AMD_8111_IDE)
-		return irq;
+		return defirq;
 
 	np = pci_device_to_OF_node(pdev);
 	if (np == NULL)
-		return irq;
-	if (np->n_intrs < 2)
-		return irq;
-	return np->intrs[channel & 0x1].line;
+		return defirq;
+	irq = irq_of_parse_and_map(np, channel & 0x1);
+	if (irq == NO_IRQ) {
+		printk("Failed to map onboard IDE interrupt for channel %d\n",
+		       channel);
+		return defirq;
+	}
+	return irq;
 }
 
 /* XXX: To remove once all firmwares are ok */
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 611ca8e..cb528c9 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -198,50 +198,81 @@ static void __init maple_init_early(void)
 {
 	DBG(" -> maple_init_early\n");
 
-	/* Setup interrupt mapping options */
-	ppc64_interrupt_controller = IC_OPEN_PIC;
-
 	iommu_init_early_dart();
 
 	DBG(" <- maple_init_early\n");
 }
 
-
-static __init void maple_init_IRQ(void)
+/*
+ * This is almost identical to pSeries and CHRP. We need to make that
+ * code generic at one point, with appropriate bits in the device-tree to
+ * identify the presence of an HT APIC
+ */
+static void __init maple_init_IRQ(void)
 {
-	struct device_node *root;
+	struct device_node *root, *np, *mpic_node = NULL;
 	unsigned int *opprop;
-	unsigned long opic_addr;
+	unsigned long openpic_addr = 0;
+	int naddr, n, i, opplen, has_isus = 0;
 	struct mpic *mpic;
-	unsigned char senses[128];
-	int n;
+	unsigned int flags = MPIC_PRIMARY;
 
-	DBG(" -> maple_init_IRQ\n");
+	/* Locate MPIC in the device-tree. Note that there is a bug
+	 * in Maple device-tree where the type of the controller is
+	 * open-pic and not interrupt-controller
+	 */
+	for_each_node_by_type(np, "open-pic") {
+		mpic_node = np;
+		break;
+	}
+	if (mpic_node == NULL) {
+		printk(KERN_ERR
+		       "Failed to locate the MPIC interrupt controller\n");
+		return;
+	}
 
-	/* XXX: Non standard, replace that with a proper openpic/mpic node
-	 * in the device-tree. Find the Open PIC if present */
+	/* Find address list in /platform-open-pic */
 	root = of_find_node_by_path("/");
-	opprop = (unsigned int *) get_property(root,
-				"platform-open-pic", NULL);
-	if (opprop == 0)
-		panic("OpenPIC not found !\n");
-
-	n = prom_n_addr_cells(root);
-	for (opic_addr = 0; n > 0; --n)
-		opic_addr = (opic_addr << 32) + *opprop++;
+	naddr = prom_n_addr_cells(root);
+	opprop = (unsigned int *) get_property(root, "platform-open-pic",
+					       &opplen);
+	if (opprop != 0) {
+		openpic_addr = of_read_number(opprop, naddr);
+		has_isus = (opplen > naddr);
+		printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n",
+		       openpic_addr, has_isus);
+	}
 	of_node_put(root);
 
-	/* Obtain sense values from device-tree */
-	prom_get_irq_senses(senses, 0, 128);
+	BUG_ON(openpic_addr == 0);
 
-	mpic = mpic_alloc(opic_addr,
-			  MPIC_PRIMARY | MPIC_BIG_ENDIAN |
-			  MPIC_BROKEN_U3 | MPIC_WANTS_RESET,
-			  0, 0, 128, 128, senses, 128, "U3-MPIC");
+	/* Check for a big endian MPIC */
+	if (get_property(np, "big-endian", NULL) != NULL)
+		flags |= MPIC_BIG_ENDIAN;
+
+	/* XXX Maple specific bits */
+	flags |= MPIC_BROKEN_U3 | MPIC_WANTS_RESET;
+
+	/* Setup the openpic driver. More device-tree junks, we hard code no
+	 * ISUs for now. I'll have to revisit some stuffs with the folks doing
+	 * the firmware for those
+	 */
+	mpic = mpic_alloc(mpic_node, openpic_addr, flags,
+			  /*has_isus ? 16 :*/ 0, 0, " MPIC     ");
 	BUG_ON(mpic == NULL);
-	mpic_init(mpic);
 
-	DBG(" <- maple_init_IRQ\n");
+	/* Add ISUs */
+	opplen /= sizeof(u32);
+	for (n = 0, i = naddr; i < opplen; i += naddr, n++) {
+		unsigned long isuaddr = of_read_number(opprop + i, naddr);
+		mpic_assign_isu(mpic, n, isuaddr);
+	}
+
+	/* All ISUs are setup, complete initialization */
+	mpic_init(mpic);
+	ppc_md.get_irq = mpic_get_irq;
+	of_node_put(mpic_node);
+	of_node_put(root);
 }
 
 static void __init maple_progress(char *s, unsigned short hex)
@@ -279,7 +310,6 @@ define_machine(maple_md) {
 	.setup_arch		= maple_setup_arch,
 	.init_early		= maple_init_early,
 	.init_IRQ		= maple_init_IRQ,
-	.get_irq		= mpic_get_irq,
 	.pcibios_fixup		= maple_pcibios_fixup,
 	.pci_get_legacy_ide_irq	= maple_pci_get_legacy_ide_irq,
 	.restart		= maple_restart,
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 5685ad9..e63d52f 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -162,6 +162,8 @@ static void __init bootx_add_chosen_props(unsigned long base,
 {
 	u32 val;
 
+	bootx_dt_add_prop("linux,bootx", NULL, 0, mem_end);
+
 	if (bootx_info->kernelParamsOffset) {
 		char *args = (char *)((unsigned long)bootx_info) +
 			bootx_info->kernelParamsOffset;
@@ -228,7 +230,7 @@ static void __init bootx_scan_dt_build_strings(unsigned long base,
 
 	if (!strcmp(namep, "/chosen")) {
 		DBG(" detected /chosen ! adding properties names !\n");
-		bootx_dt_add_string("linux,platform", mem_end);
+		bootx_dt_add_string("linux,bootx", mem_end);
 		bootx_dt_add_string("linux,stdout-path", mem_end);
 		bootx_dt_add_string("linux,initrd-start", mem_end);
 		bootx_dt_add_string("linux,initrd-end", mem_end);
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index ceafaf5..8677f50 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -522,10 +522,11 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 		host->speed = KW_I2C_MODE_25KHZ;
 		break;
 	}	
-	if (np->n_intrs > 0)
-		host->irq = np->intrs[0].line;
-	else
-		host->irq = NO_IRQ;
+	host->irq = irq_of_parse_and_map(np, 0);
+	if (host->irq == NO_IRQ)
+		printk(KERN_WARNING
+		       "low_i2c: Failed to map interrupt for %s\n",
+		       np->full_name);
 
 	host->base = ioremap((*addrp), 0x1000);
 	if (host->base == NULL) {
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index 41fa240..6a36ea9 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -29,6 +29,8 @@
 #include <asm/machdep.h>
 #include <asm/nvram.h>
 
+#include "pmac.h"
+
 #define DEBUG
 
 #ifdef DEBUG
@@ -80,9 +82,6 @@ static int nvram_partitions[3];
 // XXX Turn that into a sem
 static DEFINE_SPINLOCK(nv_lock);
 
-extern int pmac_newworld;
-extern int system_running;
-
 static int (*core99_write_bank)(int bank, u8* datas);
 static int (*core99_erase_bank)(int bank);
 
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index d524a91..556b349 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -46,6 +46,9 @@ static int has_uninorth;
 static struct pci_controller *u3_agp;
 static struct pci_controller *u4_pcie;
 static struct pci_controller *u3_ht;
+#define has_second_ohare 0
+#else
+static int has_second_ohare;
 #endif /* CONFIG_PPC64 */
 
 extern u8 pci_cache_line_size;
@@ -647,6 +650,33 @@ static void __init init_p2pbridge(void)
 	early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val);
 }
 
+static void __init init_second_ohare(void)
+{
+	struct device_node *np = of_find_node_by_name(NULL, "pci106b,7");
+	unsigned char bus, devfn;
+	unsigned short cmd;
+
+	if (np == NULL)
+		return;
+
+	/* This must run before we initialize the PICs since the second
+	 * ohare hosts a PIC that will be accessed there.
+	 */
+	if (pci_device_from_OF_node(np, &bus, &devfn) == 0) {
+		struct pci_controller* hose =
+			pci_find_hose_for_OF_device(np);
+		if (!hose) {
+			printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
+			return;
+		}
+		early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
+		cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+		cmd &= ~PCI_COMMAND_IO;
+		early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
+	}
+	has_second_ohare = 1;
+}
+
 /*
  * Some Apple desktop machines have a NEC PD720100A USB2 controller
  * on the motherboard. Open Firmware, on these, will disable the
@@ -688,9 +718,6 @@ static void __init fixup_nec_usb2(void)
 			       " EHCI, fixing up...\n");
 			data &= ~1UL;
 			early_write_config_dword(hose, bus, devfn, 0xe4, data);
-			early_write_config_byte(hose, bus,
-						devfn | 2, PCI_INTERRUPT_LINE,
-				nec->intrs[0].line);
 		}
 	}
 }
@@ -958,32 +985,28 @@ static int __init add_bridge(struct device_node *dev)
 	return 0;
 }
 
-static void __init pcibios_fixup_OF_interrupts(void)
+void __init pmac_pcibios_fixup(void)
 {
 	struct pci_dev* dev = NULL;
 
-	/*
-	 * Open Firmware often doesn't initialize the
-	 * PCI_INTERRUPT_LINE config register properly, so we
-	 * should find the device node and apply the interrupt
-	 * obtained from the OF device-tree
-	 */
 	for_each_pci_dev(dev) {
-		struct device_node *node;
-		node = pci_device_to_OF_node(dev);
-		/* this is the node, see if it has interrupts */
-		if (node && node->n_intrs > 0)
-			dev->irq = node->intrs[0].line;
-		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+		/* Read interrupt from the device-tree */
+		pci_read_irq_line(dev);
+
+		/* Fixup interrupt for the modem/ethernet combo controller.
+		 * on machines with a second ohare chip.
+		 * The number in the device tree (27) is bogus (correct for
+		 * the ethernet-only board but not the combo ethernet/modem
+		 * board). The real interrupt is 28 on the second controller
+		 * -> 28+32 = 60.
+		 */
+		if (has_second_ohare &&
+		    dev->vendor == PCI_VENDOR_ID_DEC &&
+		    dev->device == PCI_DEVICE_ID_DEC_TULIP_PLUS)
+			dev->irq = irq_create_mapping(NULL, 60, 0);
 	}
 }
 
-void __init pmac_pcibios_fixup(void)
-{
-	/* Fixup interrupts according to OF tree */
-	pcibios_fixup_OF_interrupts();
-}
-
 #ifdef CONFIG_PPC64
 static void __init pmac_fixup_phb_resources(void)
 {
@@ -1071,6 +1094,7 @@ void __init pmac_pci_init(void)
 
 #else /* CONFIG_PPC64 */
 	init_p2pbridge();
+	init_second_ohare();
 	fixup_nec_usb2();
 
 	/* We are still having some issues with the Xserve G4, enabling
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index d6eab8b..6d66359 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -24,19 +24,18 @@ static irqreturn_t macio_gpio_irq(int irq, void *data, struct pt_regs *regs)
 
 static int macio_do_gpio_irq_enable(struct pmf_function *func)
 {
-	if (func->node->n_intrs < 1)
+	unsigned int irq = irq_of_parse_and_map(func->node, 0);
+	if (irq == NO_IRQ)
 		return -EINVAL;
-
-	return request_irq(func->node->intrs[0].line, macio_gpio_irq, 0,
-			   func->node->name, func);
+	return request_irq(irq, macio_gpio_irq, 0, func->node->name, func);
 }
 
 static int macio_do_gpio_irq_disable(struct pmf_function *func)
 {
-	if (func->node->n_intrs < 1)
+	unsigned int irq = irq_of_parse_and_map(func->node, 0);
+	if (irq == NO_IRQ)
 		return -EINVAL;
-
-	free_irq(func->node->intrs[0].line, func);
+	free_irq(irq, func);
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 58a4c7b..3d328bc 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -65,13 +65,11 @@ static u32 level_mask[4];
 
 static DEFINE_SPINLOCK(pmac_pic_lock);
 
-#define GATWICK_IRQ_POOL_SIZE        10
-static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE];
-
 #define NR_MASK_WORDS	((NR_IRQS + 31) / 32)
 static unsigned long ppc_lost_interrupts[NR_MASK_WORDS];
 static unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
 static int pmac_irq_cascade = -1;
+static struct irq_host *pmac_pic_host;
 
 static void __pmac_retrigger(unsigned int irq_nr)
 {
@@ -86,18 +84,16 @@ static void __pmac_retrigger(unsigned int irq_nr)
 	}
 }
 
-static void pmac_mask_and_ack_irq(unsigned int irq_nr)
+static void pmac_mask_and_ack_irq(unsigned int virq)
 {
-        unsigned long bit = 1UL << (irq_nr & 0x1f);
-        int i = irq_nr >> 5;
+	unsigned int src = irq_map[virq].hwirq;
+        unsigned long bit = 1UL << (virq & 0x1f);
+        int i = virq >> 5;
         unsigned long flags;
 
-        if ((unsigned)irq_nr >= max_irqs)
-                return;
-
 	spin_lock_irqsave(&pmac_pic_lock, flags);
-        __clear_bit(irq_nr, ppc_cached_irq_mask);
-        if (__test_and_clear_bit(irq_nr, ppc_lost_interrupts))
+        __clear_bit(src, ppc_cached_irq_mask);
+        if (__test_and_clear_bit(src, ppc_lost_interrupts))
                 atomic_dec(&ppc_n_lost_interrupts);
         out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
         out_le32(&pmac_irq_hw[i]->ack, bit);
@@ -110,17 +106,15 @@ static void pmac_mask_and_ack_irq(unsigned int irq_nr)
 	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 }
 
-static void pmac_ack_irq(unsigned int irq_nr)
+static void pmac_ack_irq(unsigned int virq)
 {
-        unsigned long bit = 1UL << (irq_nr & 0x1f);
-        int i = irq_nr >> 5;
+	unsigned int src = irq_map[virq].hwirq;
+        unsigned long bit = 1UL << (src & 0x1f);
+        int i = src >> 5;
         unsigned long flags;
 
-        if ((unsigned)irq_nr >= max_irqs)
-                return;
-
   	spin_lock_irqsave(&pmac_pic_lock, flags);
-	if (__test_and_clear_bit(irq_nr, ppc_lost_interrupts))
+	if (__test_and_clear_bit(src, ppc_lost_interrupts))
                 atomic_dec(&ppc_n_lost_interrupts);
         out_le32(&pmac_irq_hw[i]->ack, bit);
         (void)in_le32(&pmac_irq_hw[i]->ack);
@@ -157,48 +151,51 @@ static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
 /* When an irq gets requested for the first client, if it's an
  * edge interrupt, we clear any previous one on the controller
  */
-static unsigned int pmac_startup_irq(unsigned int irq_nr)
+static unsigned int pmac_startup_irq(unsigned int virq)
 {
 	unsigned long flags;
-        unsigned long bit = 1UL << (irq_nr & 0x1f);
-        int i = irq_nr >> 5;
+	unsigned int src = irq_map[virq].hwirq;
+        unsigned long bit = 1UL << (src & 0x1f);
+        int i = src >> 5;
 
   	spin_lock_irqsave(&pmac_pic_lock, flags);
-	if ((irq_desc[irq_nr].status & IRQ_LEVEL) == 0)
+	if ((irq_desc[virq].status & IRQ_LEVEL) == 0)
 		out_le32(&pmac_irq_hw[i]->ack, bit);
-        __set_bit(irq_nr, ppc_cached_irq_mask);
-        __pmac_set_irq_mask(irq_nr, 0);
+        __set_bit(src, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(src, 0);
   	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 
 	return 0;
 }
 
-static void pmac_mask_irq(unsigned int irq_nr)
+static void pmac_mask_irq(unsigned int virq)
 {
 	unsigned long flags;
+	unsigned int src = irq_map[virq].hwirq;
 
   	spin_lock_irqsave(&pmac_pic_lock, flags);
-        __clear_bit(irq_nr, ppc_cached_irq_mask);
-        __pmac_set_irq_mask(irq_nr, 0);
+        __clear_bit(src, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(src, 0);
   	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 }
 
-static void pmac_unmask_irq(unsigned int irq_nr)
+static void pmac_unmask_irq(unsigned int virq)
 {
 	unsigned long flags;
+	unsigned int src = irq_map[virq].hwirq;
 
 	spin_lock_irqsave(&pmac_pic_lock, flags);
-	__set_bit(irq_nr, ppc_cached_irq_mask);
-        __pmac_set_irq_mask(irq_nr, 0);
+	__set_bit(src, ppc_cached_irq_mask);
+        __pmac_set_irq_mask(src, 0);
   	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 }
 
-static int pmac_retrigger(unsigned int irq_nr)
+static int pmac_retrigger(unsigned int virq)
 {
 	unsigned long flags;
 
   	spin_lock_irqsave(&pmac_pic_lock, flags);
-	__pmac_retrigger(irq_nr);
+	__pmac_retrigger(irq_map[virq].hwirq);
   	spin_unlock_irqrestore(&pmac_pic_lock, flags);
 	return 1;
 }
@@ -238,7 +235,7 @@ static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs)
 	return rc;
 }
 
-static int pmac_get_irq(struct pt_regs *regs)
+static unsigned int pmac_pic_get_irq(struct pt_regs *regs)
 {
 	int irq;
 	unsigned long bits = 0;
@@ -250,7 +247,7 @@ static int pmac_get_irq(struct pt_regs *regs)
        	/* IPI's are a hack on the powersurge -- Cort */
        	if ( smp_processor_id() != 0 ) {
 		psurge_smp_message_recv(regs);
-		return -2;	/* ignore, already handled */
+		return NO_IRQ_IGNORE;	/* ignore, already handled */
         }
 #endif /* CONFIG_SMP */
   	spin_lock_irqsave(&pmac_pic_lock, flags);
@@ -266,133 +263,9 @@ static int pmac_get_irq(struct pt_regs *regs)
 		break;
 	}
   	spin_unlock_irqrestore(&pmac_pic_lock, flags);
-
-	return irq;
-}
-
-/* This routine will fix some missing interrupt values in the device tree
- * on the gatwick mac-io controller used by some PowerBooks
- *
- * Walking of OF nodes could use a bit more fixing up here, but it's not
- * very important as this is all boot time code on static portions of the
- * device-tree.
- *
- * However, the modifications done to "intrs" will have to be removed and
- * replaced with proper updates of the "interrupts" properties or
- * AAPL,interrupts, yet to be decided, once the dynamic parsing is there.
- */
-static void __init pmac_fix_gatwick_interrupts(struct device_node *gw,
-					       int irq_base)
-{
-	struct device_node *node;
-	int count;
-
-	memset(gatwick_int_pool, 0, sizeof(gatwick_int_pool));
-	count = 0;
-	for (node = NULL; (node = of_get_next_child(gw, node)) != NULL;) {
-		/* Fix SCC */
-		if ((strcasecmp(node->name, "escc") == 0) && node->child) {
-			if (node->child->n_intrs < 3) {
-				node->child->intrs = &gatwick_int_pool[count];
-				count += 3;
-			}
-			node->child->n_intrs = 3;
-			node->child->intrs[0].line = 15+irq_base;
-			node->child->intrs[1].line =  4+irq_base;
-			node->child->intrs[2].line =  5+irq_base;
-			printk(KERN_INFO "irq: fixed SCC on gatwick"
-			       " (%d,%d,%d)\n",
-			       node->child->intrs[0].line,
-			       node->child->intrs[1].line,
-			       node->child->intrs[2].line);
-		}
-		/* Fix media-bay & left SWIM */
-		if (strcasecmp(node->name, "media-bay") == 0) {
-			struct device_node* ya_node;
-
-			if (node->n_intrs == 0)
-				node->intrs = &gatwick_int_pool[count++];
-			node->n_intrs = 1;
-			node->intrs[0].line = 29+irq_base;
-			printk(KERN_INFO "irq: fixed media-bay on gatwick"
-			       " (%d)\n", node->intrs[0].line);
-
-			ya_node = node->child;
-			while(ya_node) {
-				if (strcasecmp(ya_node->name, "floppy") == 0) {
-					if (ya_node->n_intrs < 2) {
-						ya_node->intrs = &gatwick_int_pool[count];
-						count += 2;
-					}
-					ya_node->n_intrs = 2;
-					ya_node->intrs[0].line = 19+irq_base;
-					ya_node->intrs[1].line =  1+irq_base;
-					printk(KERN_INFO "irq: fixed floppy on second controller (%d,%d)\n",
-						ya_node->intrs[0].line, ya_node->intrs[1].line);
-				}
-				if (strcasecmp(ya_node->name, "ata4") == 0) {
-					if (ya_node->n_intrs < 2) {
-						ya_node->intrs = &gatwick_int_pool[count];
-						count += 2;
-					}
-					ya_node->n_intrs = 2;
-					ya_node->intrs[0].line = 14+irq_base;
-					ya_node->intrs[1].line =  3+irq_base;
-					printk(KERN_INFO "irq: fixed ide on second controller (%d,%d)\n",
-						ya_node->intrs[0].line, ya_node->intrs[1].line);
-				}
-				ya_node = ya_node->sibling;
-			}
-		}
-	}
-	if (count > 10) {
-		printk("WARNING !! Gatwick interrupt pool overflow\n");
-		printk("  GATWICK_IRQ_POOL_SIZE = %d\n", GATWICK_IRQ_POOL_SIZE);
-		printk("              requested = %d\n", count);
-	}
-}
-
-/*
- * The PowerBook 3400/2400/3500 can have a combo ethernet/modem
- * card which includes an ohare chip that acts as a second interrupt
- * controller.  If we find this second ohare, set it up and fix the
- * interrupt value in the device tree for the ethernet chip.
- */
-static void __init enable_second_ohare(struct device_node *np)
-{
-	unsigned char bus, devfn;
-	unsigned short cmd;
-	struct device_node *ether;
-
-	/* This code doesn't strictly belong here, it could be part of
-	 * either the PCI initialisation or the feature code. It's kept
-	 * here for historical reasons.
-	 */
-	if (pci_device_from_OF_node(np, &bus, &devfn) == 0) {
-		struct pci_controller* hose =
-			pci_find_hose_for_OF_device(np);
-		if (!hose) {
-			printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
-			return;
-		}
-		early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
-		cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
-		cmd &= ~PCI_COMMAND_IO;
-		early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
-	}
-
-	/* Fix interrupt for the modem/ethernet combo controller. The number
-	 * in the device tree (27) is bogus (correct for the ethernet-only
-	 * board but not the combo ethernet/modem board).
-	 * The real interrupt is 28 on the second controller -> 28+32 = 60.
-	 */
-	ether = of_find_node_by_name(NULL, "pci1011,14");
-	if (ether && ether->n_intrs > 0) {
-		ether->intrs[0].line = 60;
-		printk(KERN_INFO "irq: Fixed ethernet IRQ to %d\n",
-		       ether->intrs[0].line);
-	}
-	of_node_put(ether);
+	if (unlikely(irq < 0))
+		return NO_IRQ;
+	return irq_linear_revmap(pmac_pic_host, irq);
 }
 
 #ifdef CONFIG_XMON
@@ -411,6 +284,50 @@ static struct irqaction gatwick_cascade_action = {
 	.name		= "cascade",
 };
 
+static int pmac_pic_host_match(struct irq_host *h, struct device_node *node)
+{
+	/* We match all, we don't always have a node anyway */
+	return 1;
+}
+
+static int pmac_pic_host_map(struct irq_host *h, unsigned int virq,
+			     irq_hw_number_t hw, unsigned int flags)
+{
+	struct irq_desc *desc = get_irq_desc(virq);
+	int level;
+
+	if (hw >= max_irqs)
+		return -EINVAL;
+
+	/* Mark level interrupts, set delayed disable for edge ones and set
+	 * handlers
+	 */
+	level = !!(level_mask[hw >> 5] & (1UL << (hw & 0x1f)));
+	if (level)
+		desc->status |= IRQ_LEVEL;
+	else
+		desc->status |= IRQ_DELAYED_DISABLE;
+	set_irq_chip_and_handler(virq, &pmac_pic, level ?
+				 handle_level_irq : handle_edge_irq);
+	return 0;
+}
+
+static int pmac_pic_host_xlate(struct irq_host *h, struct device_node *ct,
+			       u32 *intspec, unsigned int intsize,
+			       irq_hw_number_t *out_hwirq,
+			       unsigned int *out_flags)
+
+{
+	*out_hwirq = *intspec;
+	return 0;
+}
+
+static struct irq_host_ops pmac_pic_host_ops = {
+	.match = pmac_pic_host_match,
+	.map = pmac_pic_host_map,
+	.xlate = pmac_pic_host_xlate,
+};
+
 static void __init pmac_pic_probe_oldstyle(void)
 {
         int i;
@@ -420,7 +337,7 @@ static void __init pmac_pic_probe_oldstyle(void)
 	struct resource r;
 
 	/* Set our get_irq function */
-	ppc_md.get_irq = pmac_get_irq;
+	ppc_md.get_irq = pmac_pic_get_irq;
 
 	/*
 	 * Find the interrupt controller type & node
@@ -438,7 +355,6 @@ static void __init pmac_pic_probe_oldstyle(void)
 		if (slave) {
 			max_irqs = 64;
 			level_mask[1] = OHARE_LEVEL_MASK;
-			enable_second_ohare(slave);
 		}
 	} else if ((master = of_find_node_by_name(NULL, "mac-io")) != NULL) {
 		max_irqs = max_real_irqs = 64;
@@ -462,21 +378,18 @@ static void __init pmac_pic_probe_oldstyle(void)
 			max_irqs = 128;
 			level_mask[2] = HEATHROW_LEVEL_MASK;
 			level_mask[3] = 0;
-			pmac_fix_gatwick_interrupts(slave, max_real_irqs);
 		}
 	}
 	BUG_ON(master == NULL);
 
-	/* Mark level interrupts and set handlers */
-	for (i = 0; i < max_irqs; i++) {
-		int level = !!(level_mask[i >> 5] & (1UL << (i & 0x1f)));
-		if (level)
-			irq_desc[i].status |= IRQ_LEVEL;
-		else
-			irq_desc[i].status |= IRQ_DELAYED_DISABLE;
-		set_irq_chip_and_handler(i, &pmac_pic, level ?
-					 handle_level_irq : handle_edge_irq);
-	}
+	/*
+	 * Allocate an irq host
+	 */
+	pmac_pic_host = irq_alloc_host(IRQ_HOST_MAP_LINEAR, max_irqs,
+				       &pmac_pic_host_ops,
+				       max_irqs);
+	BUG_ON(pmac_pic_host == NULL);
+	irq_set_default_host(pmac_pic_host);
 
 	/* Get addresses of first controller if we have a node for it */
 	BUG_ON(of_address_to_resource(master, 0, &r));
@@ -503,7 +416,7 @@ static void __init pmac_pic_probe_oldstyle(void)
 			pmac_irq_hw[i++] =
 				(volatile struct pmac_irq_hw __iomem *)
 				(addr + 0x10);
-		pmac_irq_cascade = slave->intrs[0].line;
+		pmac_irq_cascade = irq_of_parse_and_map(slave, 0);
 
 		printk(KERN_INFO "irq: Found slave Apple PIC %s for %d irqs"
 		       " cascade: %d\n", slave->full_name,
@@ -516,12 +429,12 @@ static void __init pmac_pic_probe_oldstyle(void)
 		out_le32(&pmac_irq_hw[i]->enable, 0);
 
 	/* Hookup cascade irq */
-	if (slave)
+	if (slave && pmac_irq_cascade != NO_IRQ)
 		setup_irq(pmac_irq_cascade, &gatwick_cascade_action);
 
 	printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs);
 #ifdef CONFIG_XMON
-	setup_irq(20, &xmon_action);
+	setup_irq(irq_create_mapping(NULL, 20, 0), &xmon_action);
 #endif
 }
 #endif /* CONFIG_PPC32 */
@@ -530,16 +443,11 @@ static void pmac_u3_cascade(unsigned int irq, struct irq_desc *desc,
 			    struct pt_regs *regs)
 {
 	struct mpic *mpic = desc->handler_data;
-	unsigned int max = 100;
 
-	while(max--) {
-		int cascade_irq = mpic_get_one_irq(mpic, regs);
-		if (max == 99)
-			desc->chip->eoi(irq);
-		if (irq < 0)
-			break;
+	unsigned int cascade_irq = mpic_get_one_irq(mpic, regs);
+	if (cascade_irq != NO_IRQ)
 		generic_handle_irq(cascade_irq, regs);
-	};
+	desc->chip->eoi(irq);
 }
 
 static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic)
@@ -549,21 +457,20 @@ static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic)
 	int nmi_irq;
 
 	pswitch = of_find_node_by_name(NULL, "programmer-switch");
-	if (pswitch && pswitch->n_intrs) {
-		nmi_irq = pswitch->intrs[0].line;
-		mpic_irq_set_priority(nmi_irq, 9);
-		setup_irq(nmi_irq, &xmon_action);
+	if (pswitch) {
+		nmi_irq = irq_of_parse_and_map(pswitch, 0);
+		if (nmi_irq != NO_IRQ) {
+			mpic_irq_set_priority(nmi_irq, 9);
+			setup_irq(nmi_irq, &xmon_action);
+		}
+		of_node_put(pswitch);
 	}
-	of_node_put(pswitch);
 #endif	/* defined(CONFIG_XMON) && defined(CONFIG_PPC32) */
 }
 
 static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
 						int master)
 {
-	unsigned char senses[128];
-	int offset = master ? 0 : 128;
-	int count = master ? 128 : 124;
 	const char *name = master ? " MPIC 1   " : " MPIC 2   ";
 	struct resource r;
 	struct mpic *mpic;
@@ -576,8 +483,6 @@ static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
 
 	pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0);
 
-	prom_get_irq_senses(senses, offset, offset + count);
-
 	flags |= MPIC_WANTS_RESET;
 	if (get_property(np, "big-endian", NULL))
 		flags |= MPIC_BIG_ENDIAN;
@@ -588,8 +493,7 @@ static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
 	if (master && (flags & MPIC_BIG_ENDIAN))
 		flags |= MPIC_BROKEN_U3;
 
-	mpic = mpic_alloc(r.start, flags, 0, offset, count, master ? 252 : 0,
-			  senses, count, name);
+	mpic = mpic_alloc(np, r.start, flags, 0, 0, name);
 	if (mpic == NULL)
 		return NULL;
 
@@ -602,6 +506,7 @@ static int __init pmac_pic_probe_mpic(void)
 {
 	struct mpic *mpic1, *mpic2;
 	struct device_node *np, *master = NULL, *slave = NULL;
+	unsigned int cascade;
 
 	/* We can have up to 2 MPICs cascaded */
 	for (np = NULL; (np = of_find_node_by_type(np, "open-pic"))
@@ -638,8 +543,15 @@ static int __init pmac_pic_probe_mpic(void)
 	of_node_put(master);
 
 	/* No slave, let's go out */
-	if (slave == NULL || slave->n_intrs < 1)
+	if (slave == NULL)
+		return 0;
+
+	/* Get/Map slave interrupt */
+	cascade = irq_of_parse_and_map(slave, 0);
+	if (cascade == NO_IRQ) {
+		printk(KERN_ERR "Failed to map cascade IRQ\n");
 		return 0;
+	}
 
 	mpic2 = pmac_setup_one_mpic(slave, 0);
 	if (mpic2 == NULL) {
@@ -647,8 +559,8 @@ static int __init pmac_pic_probe_mpic(void)
 		of_node_put(slave);
 		return 0;
 	}
-	set_irq_data(slave->intrs[0].line, mpic2);
-	set_irq_chained_handler(slave->intrs[0].line, pmac_u3_cascade);
+	set_irq_data(cascade, mpic2);
+	set_irq_chained_handler(cascade, pmac_u3_cascade);
 
 	of_node_put(slave);
 	return 0;
@@ -657,6 +569,19 @@ static int __init pmac_pic_probe_mpic(void)
 
 void __init pmac_pic_init(void)
 {
+	unsigned int flags = 0;
+
+	/* We configure the OF parsing based on our oldworld vs. newworld
+	 * platform type and wether we were booted by BootX.
+	 */
+#ifdef CONFIG_PPC32
+	if (!pmac_newworld)
+		flags |= OF_IMAP_OLDWORLD_MAC;
+	if (get_property(of_chosen, "linux,bootx", NULL) != NULL)
+		flags |= OF_IMAP_NO_PHANDLE;
+	of_irq_map_init(flags);
+#endif /* CONFIG_PPC_32 */
+
 	/* We first try to detect Apple's new Core99 chipset, since mac-io
 	 * is quite different on those machines and contains an IBM MPIC2.
 	 */
@@ -679,6 +604,7 @@ unsigned long sleep_save_mask[2];
 
 /* This used to be passed by the PMU driver but that link got
  * broken with the new driver model. We use this tweak for now...
+ * We really want to do things differently though...
  */
 static int pmacpic_find_viaint(void)
 {
@@ -692,7 +618,7 @@ static int pmacpic_find_viaint(void)
 	np = of_find_node_by_name(NULL, "via-pmu");
 	if (np == NULL)
 		goto not_found;
-	viaint = np->intrs[0].line;
+	viaint = irq_of_parse_and_map(np, 0);;
 #endif /* CONFIG_ADB_PMU */
 
 not_found:
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index 21c7b0f..94e7b24 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -12,6 +12,8 @@
 
 struct rtc_time;
 
+extern int pmac_newworld;
+
 extern long pmac_time_init(void);
 extern unsigned long pmac_get_boot_time(void);
 extern void pmac_get_rtc_time(struct rtc_time *);
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 8654b5f..31a9da7 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -613,9 +613,6 @@ static void __init pmac_init_early(void)
 	udbg_adb_init(!!strstr(cmd_line, "btextdbg"));
 
 #ifdef CONFIG_PPC64
-	/* Setup interrupt mapping options */
-	ppc64_interrupt_controller = IC_OPEN_PIC;
-
 	iommu_init_early_dart();
 #endif
 }
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 9639c66..9df7830 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -72,32 +72,62 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id,
 
 /* #define DEBUG */
 
-static void request_ras_irqs(struct device_node *np, char *propname,
+
+static void request_ras_irqs(struct device_node *np,
 			irqreturn_t (*handler)(int, void *, struct pt_regs *),
 			const char *name)
 {
-	unsigned int *ireg, len, i;
-	int virq, n_intr;
-
-	ireg = (unsigned int *)get_property(np, propname, &len);
-	if (ireg == NULL)
-		return;
-	n_intr = prom_n_intr_cells(np);
-	len /= n_intr * sizeof(*ireg);
-
-	for (i = 0; i < len; i++) {
-		virq = virt_irq_create_mapping(*ireg);
-		if (virq == NO_IRQ) {
-			printk(KERN_ERR "Unable to allocate interrupt "
-			       "number for %s\n", np->full_name);
-			return;
+	int i, index, count = 0;
+	struct of_irq oirq;
+	u32 *opicprop;
+	unsigned int opicplen;
+	unsigned int virqs[16];
+
+	/* Check for obsolete "open-pic-interrupt" property. If present, then
+	 * map those interrupts using the default interrupt host and default
+	 * trigger
+	 */
+	opicprop = (u32 *)get_property(np, "open-pic-interrupt", &opicplen);
+	if (opicprop) {
+		opicplen /= sizeof(u32);
+		for (i = 0; i < opicplen; i++) {
+			if (count > 15)
+				break;
+			virqs[count] = irq_create_mapping(NULL, *(opicprop++),
+							 IRQ_TYPE_NONE);
+			if (virqs[count] == NO_IRQ)
+				printk(KERN_ERR "Unable to allocate interrupt "
+				       "number for %s\n", np->full_name);
+			else
+				count++;
+
 		}
-		if (request_irq(irq_offset_up(virq), handler, 0, name, NULL)) {
+	}
+	/* Else use normal interrupt tree parsing */
+	else {
+		/* First try to do a proper OF tree parsing */
+		for (index = 0; of_irq_map_one(np, index, &oirq) == 0;
+		     index++) {
+			if (count > 15)
+				break;
+			virqs[count] = irq_create_of_mapping(oirq.controller,
+							    oirq.specifier,
+							    oirq.size);
+			if (virqs[count] == NO_IRQ)
+				printk(KERN_ERR "Unable to allocate interrupt "
+				       "number for %s\n", np->full_name);
+			else
+				count++;
+		}
+	}
+
+	/* Now request them */
+	for (i = 0; i < count; i++) {
+		if (request_irq(virqs[i], handler, 0, name, NULL)) {
 			printk(KERN_ERR "Unable to request interrupt %d for "
-			       "%s\n", irq_offset_up(virq), np->full_name);
+			       "%s\n", virqs[i], np->full_name);
 			return;
 		}
-		ireg += n_intr;
 	}
 }
 
@@ -115,20 +145,14 @@ static int __init init_ras_IRQ(void)
 	/* Internal Errors */
 	np = of_find_node_by_path("/event-sources/internal-errors");
 	if (np != NULL) {
-		request_ras_irqs(np, "open-pic-interrupt", ras_error_interrupt,
-				 "RAS_ERROR");
-		request_ras_irqs(np, "interrupts", ras_error_interrupt,
-				 "RAS_ERROR");
+		request_ras_irqs(np, ras_error_interrupt, "RAS_ERROR");
 		of_node_put(np);
 	}
 
 	/* EPOW Events */
 	np = of_find_node_by_path("/event-sources/epow-events");
 	if (np != NULL) {
-		request_ras_irqs(np, "open-pic-interrupt", ras_epow_interrupt,
-				 "RAS_EPOW");
-		request_ras_irqs(np, "interrupts", ras_epow_interrupt,
-				 "RAS_EPOW");
+		request_ras_irqs(np, ras_epow_interrupt, "RAS_EPOW");
 		of_node_put(np);
 	}
 
@@ -162,7 +186,7 @@ ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs)
 
 	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 			   RAS_VECTOR_OFFSET,
-			   virt_irq_to_real(irq_offset_down(irq)),
+			   irq_map[irq].hwirq,
 			   RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
 			   critical, __pa(&ras_log_buf),
 				rtas_get_error_log_max());
@@ -198,7 +222,7 @@ ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs)
 
 	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
 			   RAS_VECTOR_OFFSET,
-			   virt_irq_to_real(irq_offset_down(irq)),
+			   irq_map[irq].hwirq,
 			   RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
 			   __pa(&ras_log_buf),
 				rtas_get_error_log_max());
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 476b564..54a5243 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -76,6 +76,9 @@
 #define DBG(fmt...)
 #endif
 
+/* move those away to a .h */
+extern void smp_init_pseries_mpic(void);
+extern void smp_init_pseries_xics(void);
 extern void find_udbg_vterm(void);
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
@@ -83,7 +86,7 @@ int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 static void pseries_shared_idle_sleep(void);
 static void pseries_dedicated_idle_sleep(void);
 
-struct mpic *pSeries_mpic;
+static struct device_node *pSeries_mpic_node;
 
 static void pSeries_show_cpuinfo(struct seq_file *m)
 {
@@ -118,78 +121,92 @@ static void __init fwnmi_init(void)
 		fwnmi_active = 1;
 }
 
-void pSeries_8259_cascade(unsigned int irq, struct irq_desc *desc,
+void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc,
 			  struct pt_regs *regs)
 {
-	unsigned int max = 100;
-
-	while(max--) {
-		int cascade_irq = i8259_irq(regs);
-		if (max == 99)
-			desc->chip->eoi(irq);
-		if (cascade_irq < 0)
-			break;
+	unsigned int cascade_irq = i8259_irq(regs);
+	if (cascade_irq != NO_IRQ)
 		generic_handle_irq(cascade_irq, regs);
-	};
+	desc->chip->eoi(irq);
 }
 
-static void __init pSeries_init_mpic(void)
+static void __init pseries_mpic_init_IRQ(void)
 {
+	struct device_node *np, *old, *cascade = NULL;
         unsigned int *addrp;
-	struct device_node *np;
 	unsigned long intack = 0;
-
-	/* All ISUs are setup, complete initialization */
-	mpic_init(pSeries_mpic);
-
-	/* Check what kind of cascade ACK we have */
-        if (!(np = of_find_node_by_name(NULL, "pci"))
-            || !(addrp = (unsigned int *)
-                 get_property(np, "8259-interrupt-acknowledge", NULL)))
-                printk(KERN_ERR "Cannot find pci to get ack address\n");
-        else
-		intack = addrp[prom_n_addr_cells(np)-1];
-	of_node_put(np);
-
-	/* Setup the legacy interrupts & controller */
-	i8259_init(intack, 0);
-
-	/* Hook cascade to mpic */
-	set_irq_chained_handler(NUM_ISA_INTERRUPTS, pSeries_8259_cascade);
-}
-
-static void __init pSeries_setup_mpic(void)
-{
 	unsigned int *opprop;
 	unsigned long openpic_addr = 0;
-        unsigned char senses[NR_IRQS - NUM_ISA_INTERRUPTS];
-        struct device_node *root;
-	int irq_count;
+	unsigned int cascade_irq;
+	int naddr, n, i, opplen;
+	struct mpic *mpic;
 
-	/* Find the Open PIC if present */
-	root = of_find_node_by_path("/");
-	opprop = (unsigned int *) get_property(root, "platform-open-pic", NULL);
+	np = of_find_node_by_path("/");
+	naddr = prom_n_addr_cells(np);
+	opprop = (unsigned int *) get_property(np, "platform-open-pic", &opplen);
 	if (opprop != 0) {
-		int n = prom_n_addr_cells(root);
-
-		for (openpic_addr = 0; n > 0; --n)
-			openpic_addr = (openpic_addr << 32) + *opprop++;
+		openpic_addr = of_read_number(opprop, naddr);
 		printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);
 	}
-	of_node_put(root);
+	of_node_put(np);
 
 	BUG_ON(openpic_addr == 0);
 
-	/* Get the sense values from OF */
-	prom_get_irq_senses(senses, NUM_ISA_INTERRUPTS, NR_IRQS);
-	
 	/* Setup the openpic driver */
-	irq_count = NR_IRQS - NUM_ISA_INTERRUPTS - 4; /* leave room for IPIs */
-	pSeries_mpic = mpic_alloc(openpic_addr, MPIC_PRIMARY,
-				  16, 16, irq_count, /* isu size, irq offset, irq count */ 
-				  NR_IRQS - 4, /* ipi offset */
-				  senses, irq_count, /* sense & sense size */
-				  " MPIC     ");
+	mpic = mpic_alloc(pSeries_mpic_node, openpic_addr,
+			  MPIC_PRIMARY,
+			  16, 250, /* isu size, irq count */
+			  " MPIC     ");
+	BUG_ON(mpic == NULL);
+
+	/* Add ISUs */
+	opplen /= sizeof(u32);
+	for (n = 0, i = naddr; i < opplen; i += naddr, n++) {
+		unsigned long isuaddr = of_read_number(opprop + i, naddr);
+		mpic_assign_isu(mpic, n, isuaddr);
+	}
+
+	/* All ISUs are setup, complete initialization */
+	mpic_init(mpic);
+
+	/* Look for cascade */
+	for_each_node_by_type(np, "interrupt-controller")
+		if (device_is_compatible(np, "chrp,iic")) {
+			cascade = np;
+			break;
+		}
+	if (cascade == NULL)
+		return;
+
+	cascade_irq = irq_of_parse_and_map(cascade, 0);
+	if (cascade == NO_IRQ) {
+		printk(KERN_ERR "xics: failed to map cascade interrupt");
+		return;
+	}
+
+	/* Check ACK type */
+	for (old = of_node_get(cascade); old != NULL ; old = np) {
+		np = of_get_parent(old);
+		of_node_put(old);
+		if (np == NULL)
+			break;
+		if (strcmp(np->name, "pci") != 0)
+			continue;
+		addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge",
+					    NULL);
+		if (addrp == NULL)
+			continue;
+		naddr = prom_n_addr_cells(np);
+		intack = addrp[naddr-1];
+		if (naddr > 1)
+			intack |= ((unsigned long)addrp[naddr-2]) << 32;
+	}
+	if (intack)
+		printk(KERN_DEBUG "mpic: PCI 8259 intack at 0x%016lx\n",
+		       intack);
+	i8259_init(cascade, intack);
+	of_node_put(cascade);
+	set_irq_chained_handler(cascade_irq, pseries_8259_cascade);
 }
 
 static void pseries_lpar_enable_pmcs(void)
@@ -207,21 +224,67 @@ static void pseries_lpar_enable_pmcs(void)
 		get_lppaca()->pmcregs_in_use = 1;
 }
 
-static void __init pSeries_setup_arch(void)
+#ifdef CONFIG_KEXEC
+static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary)
 {
-	/* Fixup ppc_md depending on the type of interrupt controller */
-	if (ppc64_interrupt_controller == IC_OPEN_PIC) {
-		ppc_md.init_IRQ       = pSeries_init_mpic;
-		ppc_md.get_irq        = mpic_get_irq;
-		/* Allocate the mpic now, so that find_and_init_phbs() can
-		 * fill the ISUs */
-		pSeries_setup_mpic();
-	} else
-		ppc_md.init_IRQ       = xics_init_IRQ;
+	mpic_teardown_this_cpu(secondary);
+}
 
+static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary)
+{
+	/* Don't risk a hypervisor call if we're crashing */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
+		unsigned long vpa = __pa(get_lppaca());
+
+		if (unregister_vpa(hard_smp_processor_id(), vpa)) {
+			printk("VPA deregistration of cpu %u (hw_cpu_id %d) "
+					"failed\n", smp_processor_id(),
+					hard_smp_processor_id());
+		}
+	}
+	xics_teardown_cpu(secondary);
+}
+#endif /* CONFIG_KEXEC */
+
+static void __init pseries_discover_pic(void)
+{
+	struct device_node *np;
+	char *typep;
+
+	for (np = NULL; (np = of_find_node_by_name(np,
+						   "interrupt-controller"));) {
+		typep = (char *)get_property(np, "compatible", NULL);
+		if (strstr(typep, "open-pic")) {
+			pSeries_mpic_node = of_node_get(np);
+			ppc_md.init_IRQ       = pseries_mpic_init_IRQ;
+			ppc_md.get_irq        = mpic_get_irq;
+#ifdef CONFIG_KEXEC
+			ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_mpic;
+#endif
 #ifdef CONFIG_SMP
-	smp_init_pSeries();
+			smp_init_pseries_mpic();
 #endif
+			return;
+		} else if (strstr(typep, "ppc-xicp")) {
+			ppc_md.init_IRQ       = xics_init_IRQ;
+#ifdef CONFIG_KEXEC
+			ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_xics;
+#endif
+#ifdef CONFIG_SMP
+			smp_init_pseries_xics();
+#endif
+			return;
+		}
+	}
+	printk(KERN_ERR "pSeries_discover_pic: failed to recognize"
+	       " interrupt-controller\n");
+}
+
+static void __init pSeries_setup_arch(void)
+{
+	/* Discover PIC type and setup ppc_md accordingly */
+	pseries_discover_pic();
+
 	/* openpic global configuration register (64-bit format). */
 	/* openpic Interrupt Source Unit pointer (64-bit format). */
 	/* python0 facility area (mmio) (64-bit format) REAL address. */
@@ -273,33 +336,6 @@ static int __init pSeries_init_panel(void)
 }
 arch_initcall(pSeries_init_panel);
 
-static  void __init pSeries_discover_pic(void)
-{
-	struct device_node *np;
-	char *typep;
-
-	/*
-	 * Setup interrupt mapping options that are needed for finish_device_tree
-	 * to properly parse the OF interrupt tree & do the virtual irq mapping
-	 */
-	__irq_offset_value = NUM_ISA_INTERRUPTS;
-	ppc64_interrupt_controller = IC_INVALID;
-	for (np = NULL; (np = of_find_node_by_name(np, "interrupt-controller"));) {
-		typep = (char *)get_property(np, "compatible", NULL);
-		if (strstr(typep, "open-pic")) {
-			ppc64_interrupt_controller = IC_OPEN_PIC;
-			break;
-		} else if (strstr(typep, "ppc-xicp")) {
-			ppc64_interrupt_controller = IC_PPC_XIC;
-			break;
-		}
-	}
-	if (ppc64_interrupt_controller == IC_INVALID)
-		printk("pSeries_discover_pic: failed to recognize"
-			" interrupt-controller\n");
-
-}
-
 static void pSeries_mach_cpu_die(void)
 {
 	local_irq_disable();
@@ -342,8 +378,6 @@ static void __init pSeries_init_early(void)
 
 	iommu_init_early_pSeries();
 
-	pSeries_discover_pic();
-
 	DBG(" <- pSeries_init_early()\n");
 }
 
@@ -515,27 +549,6 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus)
 	return PCI_PROBE_NORMAL;
 }
 
-#ifdef CONFIG_KEXEC
-static void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
-{
-	/* Don't risk a hypervisor call if we're crashing */
-	if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
-		unsigned long vpa = __pa(get_lppaca());
-
-		if (unregister_vpa(hard_smp_processor_id(), vpa)) {
-			printk("VPA deregistration of cpu %u (hw_cpu_id %d) "
-					"failed\n", smp_processor_id(),
-					hard_smp_processor_id());
-		}
-	}
-
-	if (ppc64_interrupt_controller == IC_OPEN_PIC)
-		mpic_teardown_this_cpu(secondary);
-	else
-		xics_teardown_cpu(secondary);
-}
-#endif
-
 define_machine(pseries) {
 	.name			= "pSeries",
 	.probe			= pSeries_probe,
@@ -560,7 +573,6 @@ define_machine(pseries) {
 	.system_reset_exception = pSeries_system_reset_exception,
 	.machine_check_exception = pSeries_machine_check_exception,
 #ifdef CONFIG_KEXEC
-	.kexec_cpu_down		= pseries_kexec_cpu_down,
 	.machine_kexec		= default_machine_kexec,
 	.machine_kexec_prepare	= default_machine_kexec_prepare,
 	.machine_crash_shutdown	= default_machine_crash_shutdown,
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 4ad144d..ac61098 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -416,27 +416,12 @@ static struct smp_ops_t pSeries_xics_smp_ops = {
 #endif
 
 /* This is called very early */
-void __init smp_init_pSeries(void)
+static void __init smp_init_pseries(void)
 {
 	int i;
 
 	DBG(" -> smp_init_pSeries()\n");
 
-	switch (ppc64_interrupt_controller) {
-#ifdef CONFIG_MPIC
-	case IC_OPEN_PIC:
-		smp_ops = &pSeries_mpic_smp_ops;
-		break;
-#endif
-#ifdef CONFIG_XICS
-	case IC_PPC_XIC:
-		smp_ops = &pSeries_xics_smp_ops;
-		break;
-#endif
-	default:
-		panic("Invalid interrupt controller");
-	}
-
 #ifdef CONFIG_HOTPLUG_CPU
 	smp_ops->cpu_disable = pSeries_cpu_disable;
 	smp_ops->cpu_die = pSeries_cpu_die;
@@ -471,3 +456,18 @@ void __init smp_init_pSeries(void)
 	DBG(" <- smp_init_pSeries()\n");
 }
 
+#ifdef CONFIG_MPIC
+void __init smp_init_pseries_mpic(void)
+{
+	smp_ops = &pSeries_mpic_smp_ops;
+
+	smp_init_pseries();
+}
+#endif
+
+void __init smp_init_pseries_xics(void)
+{
+	smp_ops = &pSeries_xics_smp_ops;
+
+	smp_init_pseries();
+}
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index c7f0442..716972a 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -8,6 +8,9 @@
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
  */
+
+#undef DEBUG
+
 #include <linux/types.h>
 #include <linux/threads.h>
 #include <linux/kernel.h>
@@ -19,6 +22,7 @@
 #include <linux/gfp.h>
 #include <linux/radix-tree.h>
 #include <linux/cpu.h>
+
 #include <asm/firmware.h>
 #include <asm/prom.h>
 #include <asm/io.h>
@@ -31,9 +35,6 @@
 
 #include "xics.h"
 
-/* This is used to map real irq numbers to virtual */
-static struct radix_tree_root irq_map = RADIX_TREE_INIT(GFP_ATOMIC);
-
 #define XICS_IPI		2
 #define XICS_IRQ_SPURIOUS	0
 
@@ -64,12 +65,12 @@ struct xics_ipl {
 
 static struct xics_ipl __iomem *xics_per_cpu[NR_CPUS];
 
-static int xics_irq_8259_cascade = 0;
-static int xics_irq_8259_cascade_real = 0;
 static unsigned int default_server = 0xFF;
 static unsigned int default_distrib_server = 0;
 static unsigned int interrupt_server_size = 8;
 
+static struct irq_host *xics_host;
+
 /*
  * XICS only has a single IPI, so encode the messages per CPU
  */
@@ -85,7 +86,7 @@ static int ibm_int_off;
 /* Direct HW low level accessors */
 
 
-static inline int direct_xirr_info_get(int n_cpu)
+static inline unsigned int direct_xirr_info_get(int n_cpu)
 {
 	return in_be32(&xics_per_cpu[n_cpu]->xirr.word);
 }
@@ -130,7 +131,7 @@ static inline long plpar_xirr(unsigned long *xirr_ret)
 	return plpar_hcall(H_XIRR, 0, 0, 0, 0, xirr_ret, &dummy, &dummy);
 }
 
-static inline int lpar_xirr_info_get(int n_cpu)
+static inline unsigned int lpar_xirr_info_get(int n_cpu)
 {
 	unsigned long lpar_rc;
 	unsigned long return_value;
@@ -138,7 +139,7 @@ static inline int lpar_xirr_info_get(int n_cpu)
 	lpar_rc = plpar_xirr(&return_value);
 	if (lpar_rc != H_SUCCESS)
 		panic(" bad return code xirr - rc = %lx \n", lpar_rc);
-	return (int)return_value;
+	return (unsigned int)return_value;
 }
 
 static inline void lpar_xirr_info_set(int n_cpu, int value)
@@ -175,11 +176,11 @@ static inline void lpar_qirr_info(int n_cpu , u8 value)
 
 
 #ifdef CONFIG_SMP
-static int get_irq_server(unsigned int irq)
+static int get_irq_server(unsigned int virq)
 {
 	unsigned int server;
 	/* For the moment only implement delivery to all cpus or one cpu */
-	cpumask_t cpumask = irq_desc[irq].affinity;
+	cpumask_t cpumask = irq_desc[virq].affinity;
 	cpumask_t tmp = CPU_MASK_NONE;
 
 	if (!distribute_irqs)
@@ -200,7 +201,7 @@ static int get_irq_server(unsigned int irq)
 
 }
 #else
-static int get_irq_server(unsigned int irq)
+static int get_irq_server(unsigned int virq)
 {
 	return default_server;
 }
@@ -213,9 +214,11 @@ static void xics_unmask_irq(unsigned int virq)
 	int call_status;
 	unsigned int server;
 
-	irq = virt_irq_to_real(irq_offset_down(virq));
-	WARN_ON(irq == NO_IRQ);
-	if (irq == XICS_IPI || irq == NO_IRQ)
+	pr_debug("xics: unmask virq %d\n", virq);
+
+	irq = (unsigned int)irq_map[virq].hwirq;
+	pr_debug(" -> map to hwirq 0x%x\n", irq);
+	if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
 		return;
 
 	server = get_irq_server(virq);
@@ -267,75 +270,57 @@ static void xics_mask_irq(unsigned int virq)
 {
 	unsigned int irq;
 
-	irq = virt_irq_to_real(irq_offset_down(virq));
-	WARN_ON(irq == NO_IRQ);
-	if (irq != NO_IRQ)
-		xics_mask_real_irq(irq);
+	pr_debug("xics: mask virq %d\n", virq);
+
+	irq = (unsigned int)irq_map[virq].hwirq;
+	if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
+		return;
+	xics_mask_real_irq(irq);
 }
 
-static void xics_set_irq_revmap(unsigned int virq)
+static unsigned int xics_startup(unsigned int virq)
 {
 	unsigned int irq;
 
-	irq = irq_offset_down(virq);
-	if (radix_tree_insert(&irq_map, virt_irq_to_real(irq),
-			      &virt_irq_to_real_map[irq]) == -ENOMEM)
-		printk(KERN_CRIT "Out of memory creating real -> virtual"
-		       " IRQ mapping for irq %u (real 0x%x)\n",
-		       virq, virt_irq_to_real(irq));
-}
+	/* force a reverse mapping of the interrupt so it gets in the cache */
+	irq = (unsigned int)irq_map[virq].hwirq;
+	irq_radix_revmap(xics_host, irq);
 
-static unsigned int xics_startup(unsigned int virq)
-{
-	xics_set_irq_revmap(virq);
+	/* unmask it */
 	xics_unmask_irq(virq);
 	return 0;
 }
 
-static unsigned int real_irq_to_virt(unsigned int real_irq)
-{
-	unsigned int *ptr;
-
-	ptr = radix_tree_lookup(&irq_map, real_irq);
-	if (ptr == NULL)
-		return NO_IRQ;
-	return ptr - virt_irq_to_real_map;
-}
-
-static void xics_eoi_direct(unsigned int irq)
+static void xics_eoi_direct(unsigned int virq)
 {
 	int cpu = smp_processor_id();
+	unsigned int irq = (unsigned int)irq_map[virq].hwirq;
 
 	iosync();
-	direct_xirr_info_set(cpu, ((0xff << 24) |
-				   (virt_irq_to_real(irq_offset_down(irq)))));
+	direct_xirr_info_set(cpu, (0xff << 24) | irq);
 }
 
 
-static void xics_eoi_lpar(unsigned int irq)
+static void xics_eoi_lpar(unsigned int virq)
 {
 	int cpu = smp_processor_id();
+	unsigned int irq = (unsigned int)irq_map[virq].hwirq;
 
 	iosync();
-	lpar_xirr_info_set(cpu, ((0xff << 24) |
-				 (virt_irq_to_real(irq_offset_down(irq)))));
-
+	lpar_xirr_info_set(cpu, (0xff << 24) | irq);
 }
 
-static inline int xics_remap_irq(int vec)
+static inline unsigned int xics_remap_irq(unsigned int vec)
 {
-	int irq;
+	unsigned int irq;
 
 	vec &= 0x00ffffff;
 
 	if (vec == XICS_IRQ_SPURIOUS)
 		return NO_IRQ;
-
-	irq = real_irq_to_virt(vec);
-	if (irq == NO_IRQ)
-		irq = real_irq_to_virt_slowpath(vec);
+	irq = irq_radix_revmap(xics_host, vec);
 	if (likely(irq != NO_IRQ))
-		return irq_offset_up(irq);
+		return irq;
 
 	printk(KERN_ERR "Interrupt %u (real) is invalid,"
 	       " disabling it.\n", vec);
@@ -343,14 +328,14 @@ static inline int xics_remap_irq(int vec)
 	return NO_IRQ;
 }
 
-static int xics_get_irq_direct(struct pt_regs *regs)
+static unsigned int xics_get_irq_direct(struct pt_regs *regs)
 {
 	unsigned int cpu = smp_processor_id();
 
 	return xics_remap_irq(direct_xirr_info_get(cpu));
 }
 
-static int xics_get_irq_lpar(struct pt_regs *regs)
+static unsigned int xics_get_irq_lpar(struct pt_regs *regs)
 {
 	unsigned int cpu = smp_processor_id();
 
@@ -437,8 +422,8 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
 	unsigned long newmask;
 	cpumask_t tmp = CPU_MASK_NONE;
 
-	irq = virt_irq_to_real(irq_offset_down(virq));
-	if (irq == XICS_IPI || irq == NO_IRQ)
+	irq = (unsigned int)irq_map[virq].hwirq;
+	if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
 		return;
 
 	status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
@@ -469,6 +454,24 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
 	}
 }
 
+void xics_setup_cpu(void)
+{
+	int cpu = smp_processor_id();
+
+	xics_set_cpu_priority(cpu, 0xff);
+
+	/*
+	 * Put the calling processor into the GIQ.  This is really only
+	 * necessary from a secondary thread as the OF start-cpu interface
+	 * performs this function for us on primary threads.
+	 *
+	 * XXX: undo of teardown on kexec needs this too, as may hotplug
+	 */
+	rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
+		(1UL << interrupt_server_size) - 1 - default_distrib_server, 1);
+}
+
+
 static struct irq_chip xics_pic_direct = {
 	.typename = " XICS     ",
 	.startup = xics_startup,
@@ -489,90 +492,245 @@ static struct irq_chip xics_pic_lpar = {
 };
 
 
-void xics_setup_cpu(void)
+static int xics_host_match(struct irq_host *h, struct device_node *node)
 {
-	int cpu = smp_processor_id();
+	/* IBM machines have interrupt parents of various funky types for things
+	 * like vdevices, events, etc... The trick we use here is to match
+	 * everything here except the legacy 8259 which is compatible "chrp,iic"
+	 */
+	return !device_is_compatible(node, "chrp,iic");
+}
 
-	xics_set_cpu_priority(cpu, 0xff);
+static int xics_host_map_direct(struct irq_host *h, unsigned int virq,
+				irq_hw_number_t hw, unsigned int flags)
+{
+	unsigned int sense = flags & IRQ_TYPE_SENSE_MASK;
 
-	/*
-	 * Put the calling processor into the GIQ.  This is really only
-	 * necessary from a secondary thread as the OF start-cpu interface
-	 * performs this function for us on primary threads.
-	 *
-	 * XXX: undo of teardown on kexec needs this too, as may hotplug
+	pr_debug("xics: map_direct virq %d, hwirq 0x%lx, flags: 0x%x\n",
+		 virq, hw, flags);
+
+	if (sense && sense != IRQ_TYPE_LEVEL_LOW)
+		printk(KERN_WARNING "xics: using unsupported sense 0x%x"
+		       " for irq %d (h: 0x%lx)\n", flags, virq, hw);
+
+	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	set_irq_chip_and_handler(virq, &xics_pic_direct, handle_fasteoi_irq);
+	return 0;
+}
+
+static int xics_host_map_lpar(struct irq_host *h, unsigned int virq,
+			      irq_hw_number_t hw, unsigned int flags)
+{
+	unsigned int sense = flags & IRQ_TYPE_SENSE_MASK;
+
+	pr_debug("xics: map_lpar virq %d, hwirq 0x%lx, flags: 0x%x\n",
+		 virq, hw, flags);
+
+	if (sense && sense != IRQ_TYPE_LEVEL_LOW)
+		printk(KERN_WARNING "xics: using unsupported sense 0x%x"
+		       " for irq %d (h: 0x%lx)\n", flags, virq, hw);
+
+	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	set_irq_chip_and_handler(virq, &xics_pic_lpar, handle_fasteoi_irq);
+	return 0;
+}
+
+static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
+			   u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	/* Current xics implementation translates everything
+	 * to level. It is not technically right for MSIs but this
+	 * is irrelevant at this point. We might get smarter in the future
 	 */
-	rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
-		(1UL << interrupt_server_size) - 1 - default_distrib_server, 1);
+	*out_hwirq = intspec[0];
+	*out_flags = IRQ_TYPE_LEVEL_LOW;
+
+	return 0;
+}
+
+static struct irq_host_ops xics_host_direct_ops = {
+	.match = xics_host_match,
+	.map = xics_host_map_direct,
+	.xlate = xics_host_xlate,
+};
+
+static struct irq_host_ops xics_host_lpar_ops = {
+	.match = xics_host_match,
+	.map = xics_host_map_lpar,
+	.xlate = xics_host_xlate,
+};
+
+static void __init xics_init_host(void)
+{
+	struct irq_host_ops *ops;
+
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		ops = &xics_host_lpar_ops;
+	else
+		ops = &xics_host_direct_ops;
+	xics_host = irq_alloc_host(IRQ_HOST_MAP_TREE, 0, ops,
+				   XICS_IRQ_SPURIOUS);
+	BUG_ON(xics_host == NULL);
+	irq_set_default_host(xics_host);
 }
 
-void xics_init_IRQ(void)
+static void __init xics_map_one_cpu(int hw_id, unsigned long addr,
+				     unsigned long size)
 {
+#ifdef CONFIG_SMP
 	int i;
-	unsigned long intr_size = 0;
-	struct device_node *np;
-	uint *ireg, ilen, indx = 0;
-	unsigned long intr_base = 0;
-	struct xics_interrupt_node {
-		unsigned long addr;
-		unsigned long size;
-	} intnodes[NR_CPUS];
-	struct irq_chip *chip;
 
-	ppc64_boot_msg(0x20, "XICS Init");
+	/* This may look gross but it's good enough for now, we don't quite
+	 * have a hard -> linux processor id matching.
+	 */
+	for_each_possible_cpu(i) {
+		if (!cpu_present(i))
+			continue;
+		if (hw_id == get_hard_smp_processor_id(i)) {
+			xics_per_cpu[i] = ioremap(addr, size);
+			return;
+		}
+	}
+#else
+	if (hw_id != 0)
+		return;
+	xics_per_cpu[0] = ioremap(addr, size);
+#endif /* CONFIG_SMP */
+}
 
-	ibm_get_xive = rtas_token("ibm,get-xive");
-	ibm_set_xive = rtas_token("ibm,set-xive");
-	ibm_int_on  = rtas_token("ibm,int-on");
-	ibm_int_off = rtas_token("ibm,int-off");
+static void __init xics_init_one_node(struct device_node *np,
+				      unsigned int *indx)
+{
+	unsigned int ilen;
+	u32 *ireg;
 
-	np = of_find_node_by_type(NULL, "PowerPC-External-Interrupt-Presentation");
-	if (!np)
-		panic("xics_init_IRQ: can't find interrupt presentation");
+	/* This code does the theorically broken assumption that the interrupt
+	 * server numbers are the same as the hard CPU numbers.
+	 * This happens to be the case so far but we are playing with fire...
+	 * should be fixed one of these days. -BenH.
+	 */
+	ireg = (u32 *)get_property(np, "ibm,interrupt-server-ranges", NULL);
 
-nextnode:
-	ireg = (uint *)get_property(np, "ibm,interrupt-server-ranges", NULL);
+	/* Do that ever happen ? we'll know soon enough... but even good'old
+	 * f80 does have that property ..
+	 */
+	WARN_ON(ireg == NULL);
 	if (ireg) {
 		/*
 		 * set node starting index for this node
 		 */
-		indx = *ireg;
+		*indx = *ireg;
 	}
-
-	ireg = (uint *)get_property(np, "reg", &ilen);
+	ireg = (u32 *)get_property(np, "reg", &ilen);
 	if (!ireg)
 		panic("xics_init_IRQ: can't find interrupt reg property");
 
-	while (ilen) {
-		intnodes[indx].addr = (unsigned long)*ireg++ << 32;
-		ilen -= sizeof(uint);
-		intnodes[indx].addr |= *ireg++;
-		ilen -= sizeof(uint);
-		intnodes[indx].size = (unsigned long)*ireg++ << 32;
-		ilen -= sizeof(uint);
-		intnodes[indx].size |= *ireg++;
-		ilen -= sizeof(uint);
-		indx++;
-		if (indx >= NR_CPUS) break;
+	while (ilen >= (4 * sizeof(u32))) {
+		unsigned long addr, size;
+
+		/* XXX Use proper OF parsing code here !!! */
+		addr = (unsigned long)*ireg++ << 32;
+		ilen -= sizeof(u32);
+		addr |= *ireg++;
+		ilen -= sizeof(u32);
+		size = (unsigned long)*ireg++ << 32;
+		ilen -= sizeof(u32);
+		size |= *ireg++;
+		ilen -= sizeof(u32);
+		xics_map_one_cpu(*indx, addr, size);
+		(*indx)++;
+	}
+}
+
+
+static void __init xics_setup_8259_cascade(void)
+{
+	struct device_node *np, *old, *found = NULL;
+	int cascade, naddr;
+	u32 *addrp;
+	unsigned long intack = 0;
+
+	for_each_node_by_type(np, "interrupt-controller")
+		if (device_is_compatible(np, "chrp,iic")) {
+			found = np;
+			break;
+		}
+	if (found == NULL) {
+		printk(KERN_DEBUG "xics: no ISA interrupt controller\n");
+		return;
 	}
+	cascade = irq_of_parse_and_map(found, 0);
+	if (cascade == NO_IRQ) {
+		printk(KERN_ERR "xics: failed to map cascade interrupt");
+		return;
+	}
+	pr_debug("xics: cascade mapped to irq %d\n", cascade);
+
+	for (old = of_node_get(found); old != NULL ; old = np) {
+		np = of_get_parent(old);
+		of_node_put(old);
+		if (np == NULL)
+			break;
+		if (strcmp(np->name, "pci") != 0)
+			continue;
+		addrp = (u32 *)get_property(np, "8259-interrupt-acknowledge", NULL);
+		if (addrp == NULL)
+			continue;
+		naddr = prom_n_addr_cells(np);
+		intack = addrp[naddr-1];
+		if (naddr > 1)
+			intack |= ((unsigned long)addrp[naddr-2]) << 32;
+	}
+	if (intack)
+		printk(KERN_DEBUG "xics: PCI 8259 intack at 0x%016lx\n", intack);
+	i8259_init(found, intack);
+	of_node_put(found);
+	set_irq_chained_handler(cascade, pseries_8259_cascade);
+}
 
-	np = of_find_node_by_type(np, "PowerPC-External-Interrupt-Presentation");
-	if ((indx < NR_CPUS) && np) goto nextnode;
+void __init xics_init_IRQ(void)
+{
+	int i;
+	struct device_node *np;
+	u32 *ireg, ilen, indx = 0;
+	int found = 0;
+
+	ppc64_boot_msg(0x20, "XICS Init");
+
+	ibm_get_xive = rtas_token("ibm,get-xive");
+	ibm_set_xive = rtas_token("ibm,set-xive");
+	ibm_int_on  = rtas_token("ibm,int-on");
+	ibm_int_off = rtas_token("ibm,int-off");
+
+	for_each_node_by_type(np, "PowerPC-External-Interrupt-Presentation") {
+		found = 1;
+		if (firmware_has_feature(FW_FEATURE_LPAR))
+			break;
+		xics_init_one_node(np, &indx);
+	}
+	if (found == 0)
+		return;
+
+	xics_init_host();
 
 	/* Find the server numbers for the boot cpu. */
 	for (np = of_find_node_by_type(NULL, "cpu");
 	     np;
 	     np = of_find_node_by_type(np, "cpu")) {
-		ireg = (uint *)get_property(np, "reg", &ilen);
+		ireg = (u32 *)get_property(np, "reg", &ilen);
 		if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) {
-			ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s",
-						    &ilen);
+			ireg = (u32 *)get_property(np,
+						  "ibm,ppc-interrupt-gserver#s",
+						   &ilen);
 			i = ilen / sizeof(int);
 			if (ireg && i > 0) {
 				default_server = ireg[0];
-				default_distrib_server = ireg[i-1]; /* take last element */
+				/* take last element */
+				default_distrib_server = ireg[i-1];
 			}
-			ireg = (uint *)get_property(np,
+			ireg = (u32 *)get_property(np,
 					"ibm,interrupt-server#-size", NULL);
 			if (ireg)
 				interrupt_server_size = *ireg;
@@ -581,102 +739,48 @@ nextnode:
 	}
 	of_node_put(np);
 
-	intr_base = intnodes[0].addr;
-	intr_size = intnodes[0].size;
-
- 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
- 		ppc_md.get_irq = xics_get_irq_lpar;
- 		chip = &xics_pic_lpar;
-  	} else {
-#ifdef CONFIG_SMP
-		for_each_possible_cpu(i) {
-			int hard_id;
-
-			/* FIXME: Do this dynamically! --RR */
-			if (!cpu_present(i))
-				continue;
-
-			hard_id = get_hard_smp_processor_id(i);
-			xics_per_cpu[i] = ioremap(intnodes[hard_id].addr,
-						  intnodes[hard_id].size);
-		}
-#else
-		xics_per_cpu[0] = ioremap(intr_base, intr_size);
-#endif /* CONFIG_SMP */
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		ppc_md.get_irq = xics_get_irq_lpar;
+	else
 		ppc_md.get_irq = xics_get_irq_direct;
-		chip = &xics_pic_direct;
-
-	}
-
-	for (i = irq_offset_value(); i < NR_IRQS; ++i) {
-		/* All IRQs on XICS are level for now. MSI code may want to modify
-		 * that for reporting purposes
-		 */
-		get_irq_desc(i)->status |= IRQ_LEVEL;
-		set_irq_chip_and_handler(i, chip, handle_fasteoi_irq);
-	}
 
 	xics_setup_cpu();
 
-	ppc64_boot_msg(0x21, "XICS Done");
-}
+	xics_setup_8259_cascade();
 
-static int xics_setup_8259_cascade(void)
-{
-	struct device_node *np;
-	uint *ireg;
-
-	np = of_find_node_by_type(NULL, "interrupt-controller");
-	if (np == NULL) {
-		printk(KERN_WARNING "xics: no ISA interrupt controller\n");
-		xics_irq_8259_cascade_real = -1;
-		xics_irq_8259_cascade = -1;
-		return 0;
-	}
-
-	ireg = (uint *) get_property(np, "interrupts", NULL);
-	if (!ireg)
-		panic("xics_init_IRQ: can't find ISA interrupts property");
-
-	xics_irq_8259_cascade_real = *ireg;
-	xics_irq_8259_cascade = irq_offset_up
-		(virt_irq_create_mapping(xics_irq_8259_cascade_real));
-	i8259_init(0, 0);
-	of_node_put(np);
-
-	xics_set_irq_revmap(xics_irq_8259_cascade);
-	set_irq_chained_handler(xics_irq_8259_cascade, pSeries_8259_cascade);
-
-	return 0;
+	ppc64_boot_msg(0x21, "XICS Done");
 }
-arch_initcall(xics_setup_8259_cascade);
 
 
 #ifdef CONFIG_SMP
 void xics_request_IPIs(void)
 {
-	virt_irq_to_real_map[XICS_IPI] = XICS_IPI;
+	unsigned int ipi;
+
+	ipi = irq_create_mapping(xics_host, XICS_IPI, 0);
+	BUG_ON(ipi == NO_IRQ);
 
 	/*
 	 * IPIs are marked IRQF_DISABLED as they must run with irqs
 	 * disabled
 	 */
-	set_irq_handler(irq_offset_up(XICS_IPI), handle_percpu_irq);
+	set_irq_handler(ipi, handle_percpu_irq);
 	if (firmware_has_feature(FW_FEATURE_LPAR))
-		request_irq(irq_offset_up(XICS_IPI), xics_ipi_action_lpar,
-			    SA_INTERRUPT, "IPI", NULL);
+		request_irq(ipi, xics_ipi_action_lpar, IRQF_DISABLED,
+			    "IPI", NULL);
 	else
-		request_irq(irq_offset_up(XICS_IPI), xics_ipi_action_direct,
-			    SA_INTERRUPT, "IPI", NULL);
+		request_irq(ipi, xics_ipi_action_direct, IRQF_DISABLED,
+			    "IPI", NULL);
 }
 #endif /* CONFIG_SMP */
 
 void xics_teardown_cpu(int secondary)
 {
-	struct irq_desc *desc = get_irq_desc(irq_offset_up(XICS_IPI));
 	int cpu = smp_processor_id();
+	unsigned int ipi;
+	struct irq_desc *desc;
 
- 	xics_set_cpu_priority(cpu, 0);
+	xics_set_cpu_priority(cpu, 0);
 
 	/*
 	 * we need to EOI the IPI if we got here from kexec down IPI
@@ -685,6 +789,11 @@ void xics_teardown_cpu(int secondary)
 	 * should we be flagging idle loop instead?
 	 * or creating some task to be scheduled?
 	 */
+
+	ipi = irq_find_mapping(xics_host, XICS_IPI);
+	if (ipi == XICS_IRQ_SPURIOUS)
+		return;
+	desc = get_irq_desc(ipi);
 	if (desc->chip && desc->chip->eoi)
 		desc->chip->eoi(XICS_IPI);
 
@@ -694,8 +803,8 @@ void xics_teardown_cpu(int secondary)
 	 */
 	if (secondary)
 		rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
-			(1UL << interrupt_server_size) - 1 -
-			default_distrib_server, 0);
+				   (1UL << interrupt_server_size) - 1 -
+				   default_distrib_server, 0);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -723,15 +832,15 @@ void xics_migrate_irqs_away(void)
 		unsigned long flags;
 
 		/* We cant set affinity on ISA interrupts */
-		if (virq < irq_offset_value())
+		if (virq < NUM_ISA_INTERRUPTS)
 			continue;
-
-		desc = get_irq_desc(virq);
-		irq = virt_irq_to_real(irq_offset_down(virq));
-
+		if (irq_map[virq].host != xics_host)
+			continue;
+		irq = (unsigned int)irq_map[virq].hwirq;
 		/* We need to get IPIs still. */
-		if (irq == XICS_IPI || irq == NO_IRQ)
+		if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
 			continue;
+		desc = get_irq_desc(virq);
 
 		/* We only need to migrate enabled IRQS */
 		if (desc == NULL || desc->chip == NULL
diff --git a/arch/powerpc/platforms/pseries/xics.h b/arch/powerpc/platforms/pseries/xics.h
index 67dedf3..6ee1055 100644
--- a/arch/powerpc/platforms/pseries/xics.h
+++ b/arch/powerpc/platforms/pseries/xics.h
@@ -31,7 +31,7 @@ struct xics_ipi_struct {
 extern struct xics_ipi_struct xics_ipi_message[NR_CPUS] __cacheline_aligned;
 
 struct irq_desc;
-extern void pSeries_8259_cascade(unsigned int irq, struct irq_desc *desc,
+extern void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc,
 				 struct pt_regs *regs);
 
 #endif /* _POWERPC_KERNEL_XICS_H */
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index c2e9465..72c73a6 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -6,11 +6,16 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#undef DEBUG
+
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
 #include <asm/io.h>
 #include <asm/i8259.h>
+#include <asm/prom.h>
 
 static volatile void __iomem *pci_intack; /* RO, gives us the irq vector */
 
@@ -20,7 +25,8 @@ static unsigned char cached_8259[2] = { 0xff, 0xff };
 
 static DEFINE_SPINLOCK(i8259_lock);
 
-static int i8259_pic_irq_offset;
+static struct device_node *i8259_node;
+static struct irq_host *i8259_host;
 
 /*
  * Acknowledge the IRQ using either the PCI host bridge's interrupt
@@ -28,16 +34,18 @@ static int i8259_pic_irq_offset;
  * which is called.  It should be noted that polling is broken on some
  * IBM and Motorola PReP boxes so we must use the int-ack feature on them.
  */
-int i8259_irq(struct pt_regs *regs)
+unsigned int i8259_irq(struct pt_regs *regs)
 {
 	int irq;
-
-	spin_lock(&i8259_lock);
+	int lock = 0;
 
 	/* Either int-ack or poll for the IRQ */
 	if (pci_intack)
 		irq = readb(pci_intack);
 	else {
+		spin_lock(&i8259_lock);
+		lock = 1;
+
 		/* Perform an interrupt acknowledge cycle on controller 1. */
 		outb(0x0C, 0x20);		/* prepare for poll */
 		irq = inb(0x20) & 7;
@@ -62,11 +70,13 @@ int i8259_irq(struct pt_regs *regs)
 		if (!pci_intack)
 			outb(0x0B, 0x20);	/* ISR register */
 		if(~inb(0x20) & 0x80)
-			irq = -1;
-	}
+			irq = NO_IRQ;
+	} else if (irq == 0xff)
+		irq = NO_IRQ;
 
-	spin_unlock(&i8259_lock);
-	return irq + i8259_pic_irq_offset;
+	if (lock)
+		spin_unlock(&i8259_lock);
+	return irq;
 }
 
 static void i8259_mask_and_ack_irq(unsigned int irq_nr)
@@ -74,7 +84,6 @@ static void i8259_mask_and_ack_irq(unsigned int irq_nr)
 	unsigned long flags;
 
 	spin_lock_irqsave(&i8259_lock, flags);
-	irq_nr -= i8259_pic_irq_offset;
 	if (irq_nr > 7) {
 		cached_A1 |= 1 << (irq_nr-8);
 		inb(0xA1); 	/* DUMMY */
@@ -100,8 +109,9 @@ static void i8259_mask_irq(unsigned int irq_nr)
 {
 	unsigned long flags;
 
+	pr_debug("i8259_mask_irq(%d)\n", irq_nr);
+
 	spin_lock_irqsave(&i8259_lock, flags);
-	irq_nr -= i8259_pic_irq_offset;
 	if (irq_nr < 8)
 		cached_21 |= 1 << irq_nr;
 	else
@@ -114,8 +124,9 @@ static void i8259_unmask_irq(unsigned int irq_nr)
 {
 	unsigned long flags;
 
+	pr_debug("i8259_unmask_irq(%d)\n", irq_nr);
+
 	spin_lock_irqsave(&i8259_lock, flags);
-	irq_nr -= i8259_pic_irq_offset;
 	if (irq_nr < 8)
 		cached_21 &= ~(1 << irq_nr);
 	else
@@ -152,25 +163,84 @@ static struct resource pic_edgectrl_iores = {
 	.flags = IORESOURCE_BUSY,
 };
 
-static struct irqaction i8259_irqaction = {
-	.handler = no_action,
-	.flags = IRQF_DISABLED,
-	.mask = CPU_MASK_NONE,
-	.name = "82c59 secondary cascade",
+static int i8259_host_match(struct irq_host *h, struct device_node *node)
+{
+	return i8259_node == NULL || i8259_node == node;
+}
+
+static int i8259_host_map(struct irq_host *h, unsigned int virq,
+			  irq_hw_number_t hw, unsigned int flags)
+{
+	pr_debug("i8259_host_map(%d, 0x%lx)\n", virq, hw);
+
+	/* We block the internal cascade */
+	if (hw == 2)
+		get_irq_desc(virq)->status |= IRQ_NOREQUEST;
+
+	/* We use the level stuff only for now, we might want to
+	 * be more cautious here but that works for now
+	 */
+	get_irq_desc(virq)->status |= IRQ_LEVEL;
+	set_irq_chip_and_handler(virq, &i8259_pic, handle_level_irq);
+	return 0;
+}
+
+static void i8259_host_unmap(struct irq_host *h, unsigned int virq)
+{
+	/* Make sure irq is masked in hardware */
+	i8259_mask_irq(virq);
+
+	/* remove chip and handler */
+	set_irq_chip_and_handler(virq, NULL, NULL);
+
+	/* Make sure it's completed */
+	synchronize_irq(virq);
+}
+
+static int i8259_host_xlate(struct irq_host *h, struct device_node *ct,
+			    u32 *intspec, unsigned int intsize,
+			    irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+	static unsigned char map_isa_senses[4] = {
+		IRQ_TYPE_LEVEL_LOW,
+		IRQ_TYPE_LEVEL_HIGH,
+		IRQ_TYPE_EDGE_FALLING,
+		IRQ_TYPE_EDGE_RISING,
+	};
+
+	*out_hwirq = intspec[0];
+	if (intsize > 1 && intspec[1] < 4)
+		*out_flags = map_isa_senses[intspec[1]];
+	else
+		*out_flags = IRQ_TYPE_NONE;
+
+	return 0;
+}
+
+static struct irq_host_ops i8259_host_ops = {
+	.match = i8259_host_match,
+	.map = i8259_host_map,
+	.unmap = i8259_host_unmap,
+	.xlate = i8259_host_xlate,
 };
 
-/*
- * i8259_init()
- * intack_addr - PCI interrupt acknowledge (real) address which will return
- *               the active irq from the 8259
+/****
+ * i8259_init - Initialize the legacy controller
+ * @node: device node of the legacy PIC (can be NULL, but then, it will match
+ *        all interrupts, so beware)
+ * @intack_addr: PCI interrupt acknowledge (real) address which will return
+ *             	 the active irq from the 8259
  */
-void __init i8259_init(unsigned long intack_addr, int offset)
+void i8259_init(struct device_node *node, unsigned long intack_addr)
 {
 	unsigned long flags;
-	int i;
 
+	/* initialize the controller */
 	spin_lock_irqsave(&i8259_lock, flags);
-	i8259_pic_irq_offset = offset;
+
+	/* Mask all first */
+	outb(0xff, 0xA1);
+	outb(0xff, 0x21);
 
 	/* init master interrupt controller */
 	outb(0x11, 0x20); /* Start init sequence */
@@ -184,24 +254,36 @@ void __init i8259_init(unsigned long intack_addr, int offset)
 	outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */
 	outb(0x01, 0xA1); /* Select 8086 mode */
 
+	/* That thing is slow */
+	udelay(100);
+
 	/* always read ISR */
 	outb(0x0B, 0x20);
 	outb(0x0B, 0xA0);
 
-	/* Mask all interrupts */
+	/* Unmask the internal cascade */
+	cached_21 &= ~(1 << 2);
+
+	/* Set interrupt masks */
 	outb(cached_A1, 0xA1);
 	outb(cached_21, 0x21);
 
 	spin_unlock_irqrestore(&i8259_lock, flags);
 
-	for (i = 0; i < NUM_ISA_INTERRUPTS; ++i) {
-		set_irq_chip_and_handler(offset + i, &i8259_pic,
-					 handle_level_irq);
-		irq_desc[offset + i].status |= IRQ_LEVEL;
+	/* create a legacy host */
+	if (node)
+		i8259_node = of_node_get(node);
+	i8259_host = irq_alloc_host(IRQ_HOST_MAP_LEGACY, 0, &i8259_host_ops, 0);
+	if (i8259_host == NULL) {
+		printk(KERN_ERR "i8259: failed to allocate irq host !\n");
+		return;
 	}
 
 	/* reserve our resources */
-	setup_irq(offset + 2, &i8259_irqaction);
+	/* XXX should we continue doing that ? it seems to cause problems
+	 * with further requesting of PCI IO resources for that range...
+	 * need to look into it.
+	 */
 	request_resource(&ioport_resource, &pic1_iores);
 	request_resource(&ioport_resource, &pic2_iores);
 	request_resource(&ioport_resource, &pic_edgectrl_iores);
@@ -209,4 +291,5 @@ void __init i8259_init(unsigned long intack_addr, int offset)
 	if (intack_addr != 0)
 		pci_intack = ioremap(intack_addr, 1);
 
+	printk(KERN_INFO "i8259 legacy interrupt controller initialized\n");
 }
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 9a95f16..7d31d7c 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -340,27 +340,19 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic)
 #endif /* CONFIG_MPIC_BROKEN_U3 */
 
 
+#define mpic_irq_to_hw(virq)	((unsigned int)irq_map[virq].hwirq)
+
 /* Find an mpic associated with a given linux interrupt */
 static struct mpic *mpic_find(unsigned int irq, unsigned int *is_ipi)
 {
-	struct mpic *mpic = mpics;
-
-	while(mpic) {
-		/* search IPIs first since they may override the main interrupts */
-		if (irq >= mpic->ipi_offset && irq < (mpic->ipi_offset + 4)) {
-			if (is_ipi)
-				*is_ipi = 1;
-			return mpic;
-		}
-		if (irq >= mpic->irq_offset &&
-		    irq < (mpic->irq_offset + mpic->irq_count)) {
-			if (is_ipi)
-				*is_ipi = 0;
-			return mpic;
-		}
-		mpic = mpic -> next;
-	}
-	return NULL;
+	unsigned int src = mpic_irq_to_hw(irq);
+
+	if (irq < NUM_ISA_INTERRUPTS)
+		return NULL;
+	if (is_ipi)
+		*is_ipi = (src >= MPIC_VEC_IPI_0 && src <= MPIC_VEC_IPI_3);
+
+	return irq_desc[irq].chip_data;
 }
 
 /* Convert a cpu mask from logical to physical cpu numbers. */
@@ -398,9 +390,7 @@ static inline void mpic_eoi(struct mpic *mpic)
 #ifdef CONFIG_SMP
 static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
 {
-	struct mpic *mpic = dev_id;
-
-	smp_message_recv(irq - mpic->ipi_offset, regs);
+	smp_message_recv(mpic_irq_to_hw(irq) - MPIC_VEC_IPI_0, regs);
 	return IRQ_HANDLED;
 }
 #endif /* CONFIG_SMP */
@@ -414,7 +404,7 @@ static void mpic_unmask_irq(unsigned int irq)
 {
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq(irq);
-	unsigned int src = irq - mpic->irq_offset;
+	unsigned int src = mpic_irq_to_hw(irq);
 
 	DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src);
 
@@ -435,7 +425,7 @@ static void mpic_mask_irq(unsigned int irq)
 {
 	unsigned int loops = 100000;
 	struct mpic *mpic = mpic_from_irq(irq);
-	unsigned int src = irq - mpic->irq_offset;
+	unsigned int src = mpic_irq_to_hw(irq);
 
 	DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src);
 
@@ -472,7 +462,7 @@ static void mpic_end_irq(unsigned int irq)
 static void mpic_unmask_ht_irq(unsigned int irq)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
-	unsigned int src = irq - mpic->irq_offset;
+	unsigned int src = mpic_irq_to_hw(irq);
 
 	mpic_unmask_irq(irq);
 
@@ -483,7 +473,7 @@ static void mpic_unmask_ht_irq(unsigned int irq)
 static unsigned int mpic_startup_ht_irq(unsigned int irq)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
-	unsigned int src = irq - mpic->irq_offset;
+	unsigned int src = mpic_irq_to_hw(irq);
 
 	mpic_unmask_irq(irq);
 	mpic_startup_ht_interrupt(mpic, src, irq_desc[irq].status);
@@ -494,7 +484,7 @@ static unsigned int mpic_startup_ht_irq(unsigned int irq)
 static void mpic_shutdown_ht_irq(unsigned int irq)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
-	unsigned int src = irq - mpic->irq_offset;
+	unsigned int src = mpic_irq_to_hw(irq);
 
 	mpic_shutdown_ht_interrupt(mpic, src, irq_desc[irq].status);
 	mpic_mask_irq(irq);
@@ -503,7 +493,7 @@ static void mpic_shutdown_ht_irq(unsigned int irq)
 static void mpic_end_ht_irq(unsigned int irq)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
-	unsigned int src = irq - mpic->irq_offset;
+	unsigned int src = mpic_irq_to_hw(irq);
 
 #ifdef DEBUG_IRQ
 	DBG("%s: end_irq: %d\n", mpic->name, irq);
@@ -525,7 +515,7 @@ static void mpic_end_ht_irq(unsigned int irq)
 static void mpic_unmask_ipi(unsigned int irq)
 {
 	struct mpic *mpic = mpic_from_ipi(irq);
-	unsigned int src = irq - mpic->ipi_offset;
+	unsigned int src = mpic_irq_to_hw(irq) - MPIC_VEC_IPI_0;
 
 	DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src);
 	mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
@@ -555,15 +545,46 @@ static void mpic_end_ipi(unsigned int irq)
 static void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
 {
 	struct mpic *mpic = mpic_from_irq(irq);
+	unsigned int src = mpic_irq_to_hw(irq);
 
 	cpumask_t tmp;
 
 	cpus_and(tmp, cpumask, cpu_online_map);
 
-	mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION,
+	mpic_irq_write(src, MPIC_IRQ_DESTINATION,
 		       mpic_physmask(cpus_addr(tmp)[0]));	
 }
 
+static unsigned int mpic_flags_to_vecpri(unsigned int flags, int *level)
+{
+	unsigned int vecpri;
+
+	/* Now convert sense value */
+	switch(flags & IRQ_TYPE_SENSE_MASK) {
+	case IRQ_TYPE_EDGE_RISING:
+		vecpri = MPIC_VECPRI_SENSE_EDGE |
+			MPIC_VECPRI_POLARITY_POSITIVE;
+		*level = 0;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		vecpri = MPIC_VECPRI_SENSE_EDGE |
+			MPIC_VECPRI_POLARITY_NEGATIVE;
+		*level = 0;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		vecpri = MPIC_VECPRI_SENSE_LEVEL |
+			MPIC_VECPRI_POLARITY_POSITIVE;
+		*level = 1;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+	default:
+		vecpri = MPIC_VECPRI_SENSE_LEVEL |
+			MPIC_VECPRI_POLARITY_NEGATIVE;
+		*level = 1;
+	}
+	return vecpri;
+}
+
 static struct irq_chip mpic_irq_chip = {
 	.mask	= mpic_mask_irq,
 	.unmask	= mpic_unmask_irq,
@@ -589,19 +610,111 @@ static struct irq_chip mpic_irq_ht_chip = {
 #endif /* CONFIG_MPIC_BROKEN_U3 */
 
 
+static int mpic_host_match(struct irq_host *h, struct device_node *node)
+{
+	struct mpic *mpic = h->host_data;
+
+	/* Exact match, unless mpic node is NULL */
+	return mpic->of_node == NULL || mpic->of_node == node;
+}
+
+static int mpic_host_map(struct irq_host *h, unsigned int virq,
+			 irq_hw_number_t hw, unsigned int flags)
+{
+	struct irq_desc *desc = get_irq_desc(virq);
+	struct irq_chip *chip;
+	struct mpic *mpic = h->host_data;
+	unsigned int vecpri = MPIC_VECPRI_SENSE_LEVEL |
+		MPIC_VECPRI_POLARITY_NEGATIVE;
+	int level;
+
+	pr_debug("mpic: map virq %d, hwirq 0x%lx, flags: 0x%x\n",
+		 virq, hw, flags);
+
+	if (hw == MPIC_VEC_SPURRIOUS)
+		return -EINVAL;
+#ifdef CONFIG_SMP
+	else if (hw >= MPIC_VEC_IPI_0) {
+		WARN_ON(!(mpic->flags & MPIC_PRIMARY));
+
+		pr_debug("mpic: mapping as IPI\n");
+		set_irq_chip_data(virq, mpic);
+		set_irq_chip_and_handler(virq, &mpic->hc_ipi,
+					 handle_percpu_irq);
+		return 0;
+	}
+#endif /* CONFIG_SMP */
+
+	if (hw >= mpic->irq_count)
+		return -EINVAL;
+
+	/* If no sense provided, check default sense array */
+	if (((flags & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_NONE) &&
+	    mpic->senses && hw < mpic->senses_count)
+		flags |= mpic->senses[hw];
+
+	vecpri = mpic_flags_to_vecpri(flags, &level);
+	if (level)
+		desc->status |= IRQ_LEVEL;
+	chip = &mpic->hc_irq;
+
+#ifdef CONFIG_MPIC_BROKEN_U3
+	/* Check for HT interrupts, override vecpri */
+	if (mpic_is_ht_interrupt(mpic, hw)) {
+		vecpri &= ~(MPIC_VECPRI_SENSE_MASK |
+			    MPIC_VECPRI_POLARITY_MASK);
+		vecpri |= MPIC_VECPRI_POLARITY_POSITIVE;
+		chip = &mpic->hc_ht_irq;
+	}
+#endif
+
+	/* Reconfigure irq */
+	vecpri |= MPIC_VECPRI_MASK | hw | (8 << MPIC_VECPRI_PRIORITY_SHIFT);
+	mpic_irq_write(hw, MPIC_IRQ_VECTOR_PRI, vecpri);
+
+	pr_debug("mpic: mapping as IRQ\n");
+
+	set_irq_chip_data(virq, mpic);
+	set_irq_chip_and_handler(virq, chip, handle_fasteoi_irq);
+	return 0;
+}
+
+static int mpic_host_xlate(struct irq_host *h, struct device_node *ct,
+			   u32 *intspec, unsigned int intsize,
+			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+	static unsigned char map_mpic_senses[4] = {
+		IRQ_TYPE_EDGE_RISING,
+		IRQ_TYPE_LEVEL_LOW,
+		IRQ_TYPE_LEVEL_HIGH,
+		IRQ_TYPE_EDGE_FALLING,
+	};
+
+	*out_hwirq = intspec[0];
+	if (intsize > 1 && intspec[1] < 4)
+		*out_flags = map_mpic_senses[intspec[1]];
+	else
+		*out_flags = IRQ_TYPE_NONE;
+
+	return 0;
+}
+
+static struct irq_host_ops mpic_host_ops = {
+	.match = mpic_host_match,
+	.map = mpic_host_map,
+	.xlate = mpic_host_xlate,
+};
+
 /*
  * Exported functions
  */
 
-
-struct mpic * __init mpic_alloc(unsigned long phys_addr,
+struct mpic * __init mpic_alloc(struct device_node *node,
+				unsigned long phys_addr,
 				unsigned int flags,
 				unsigned int isu_size,
-				unsigned int irq_offset,
 				unsigned int irq_count,
-				unsigned int ipi_offset,
-				unsigned char *senses,
-				unsigned int senses_count,
 				const char *name)
 {
 	struct mpic	*mpic;
@@ -613,10 +726,19 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr,
 	if (mpic == NULL)
 		return NULL;
 	
-
 	memset(mpic, 0, sizeof(struct mpic));
 	mpic->name = name;
+	mpic->of_node = node ? of_node_get(node) : NULL;
 
+	mpic->irqhost = irq_alloc_host(IRQ_HOST_MAP_LINEAR, 256,
+				       &mpic_host_ops,
+				       MPIC_VEC_SPURRIOUS);
+	if (mpic->irqhost == NULL) {
+		of_node_put(node);
+		return NULL;
+	}
+
+	mpic->irqhost->host_data = mpic;
 	mpic->hc_irq = mpic_irq_chip;
 	mpic->hc_irq.typename = name;
 	if (flags & MPIC_PRIMARY)
@@ -628,18 +750,14 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr,
 		mpic->hc_ht_irq.set_affinity = mpic_set_affinity;
 #endif /* CONFIG_MPIC_BROKEN_U3 */
 #ifdef CONFIG_SMP
-	mpic->hc_ipi.typename = name;
 	mpic->hc_ipi = mpic_ipi_chip;
+	mpic->hc_ipi.typename = name;
 #endif /* CONFIG_SMP */
 
 	mpic->flags = flags;
 	mpic->isu_size = isu_size;
-	mpic->irq_offset = irq_offset;
 	mpic->irq_count = irq_count;
-	mpic->ipi_offset = ipi_offset;
 	mpic->num_sources = 0; /* so far */
-	mpic->senses = senses;
-	mpic->senses_count = senses_count;
 
 	/* Map the global registers */
 	mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000);
@@ -707,8 +825,10 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr,
 	mpic->next = mpics;
 	mpics = mpic;
 
-	if (flags & MPIC_PRIMARY)
+	if (flags & MPIC_PRIMARY) {
 		mpic_primary = mpic;
+		irq_set_default_host(mpic->irqhost);
+	}
 
 	return mpic;
 }
@@ -725,11 +845,22 @@ void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
 		mpic->num_sources = isu_first + mpic->isu_size;
 }
 
+void __init mpic_set_default_senses(struct mpic *mpic, u8 *senses, int count)
+{
+	mpic->senses = senses;
+	mpic->senses_count = count;
+}
+
 void __init mpic_init(struct mpic *mpic)
 {
 	int i;
 
 	BUG_ON(mpic->num_sources == 0);
+	WARN_ON(mpic->num_sources > MPIC_VEC_IPI_0);
+
+	/* Sanitize source count */
+	if (mpic->num_sources > MPIC_VEC_IPI_0)
+		mpic->num_sources = MPIC_VEC_IPI_0;
 
 	printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources);
 
@@ -753,14 +884,6 @@ void __init mpic_init(struct mpic *mpic)
 			       MPIC_VECPRI_MASK |
 			       (10 << MPIC_VECPRI_PRIORITY_SHIFT) |
 			       (MPIC_VEC_IPI_0 + i));
-#ifdef CONFIG_SMP
-		if (!(mpic->flags & MPIC_PRIMARY))
-			continue;
-		set_irq_chip_data(mpic->ipi_offset+i, mpic);
-		set_irq_chip_and_handler(mpic->ipi_offset+i,
-					 &mpic->hc_ipi,
-					 handle_percpu_irq);
-#endif /* CONFIG_SMP */
 	}
 
 	/* Initialize interrupt sources */
@@ -777,25 +900,15 @@ void __init mpic_init(struct mpic *mpic)
 	for (i = 0; i < mpic->num_sources; i++) {
 		/* start with vector = source number, and masked */
 		u32 vecpri = MPIC_VECPRI_MASK | i | (8 << MPIC_VECPRI_PRIORITY_SHIFT);
-		int level = 0;
+		int level = 1;
 		
-		/* if it's an IPI, we skip it */
-		if ((mpic->irq_offset + i) >= (mpic->ipi_offset + i) &&
-		    (mpic->irq_offset + i) <  (mpic->ipi_offset + i + 4))
-			continue;
-
 		/* do senses munging */
-		if (mpic->senses && i < mpic->senses_count) {
-			if (mpic->senses[i] & IRQ_SENSE_LEVEL)
-				vecpri |= MPIC_VECPRI_SENSE_LEVEL;
-			if (mpic->senses[i] & IRQ_POLARITY_POSITIVE)
-				vecpri |= MPIC_VECPRI_POLARITY_POSITIVE;
-		} else
+		if (mpic->senses && i < mpic->senses_count)
+			vecpri = mpic_flags_to_vecpri(mpic->senses[i],
+						      &level);
+		else
 			vecpri |= MPIC_VECPRI_SENSE_LEVEL;
 
-		/* remember if it was a level interrupts */
-		level = (vecpri & MPIC_VECPRI_SENSE_LEVEL);
-
 		/* deal with broken U3 */
 		if (mpic->flags & MPIC_BROKEN_U3) {
 #ifdef CONFIG_MPIC_BROKEN_U3
@@ -816,21 +929,6 @@ void __init mpic_init(struct mpic *mpic)
 		mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri);
 		mpic_irq_write(i, MPIC_IRQ_DESTINATION,
 			       1 << hard_smp_processor_id());
-
-		/* init linux descriptors */
-		if (i < mpic->irq_count) {
-			struct irq_chip *chip = &mpic->hc_irq;
-
-			irq_desc[mpic->irq_offset+i].status |=
-				level ? IRQ_LEVEL : 0;
-#ifdef CONFIG_MPIC_BROKEN_U3
-			if (mpic_is_ht_interrupt(mpic, i))
-				chip = &mpic->hc_ht_irq;
-#endif /* CONFIG_MPIC_BROKEN_U3 */
-			set_irq_chip_data(mpic->irq_offset+i, mpic);
-			set_irq_chip_and_handler(mpic->irq_offset+i, chip,
-						 handle_fasteoi_irq);
-		}
 	}
 	
 	/* Init spurrious vector */
@@ -871,19 +969,20 @@ void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
 {
 	int is_ipi;
 	struct mpic *mpic = mpic_find(irq, &is_ipi);
+	unsigned int src = mpic_irq_to_hw(irq);
 	unsigned long flags;
 	u32 reg;
 
 	spin_lock_irqsave(&mpic_lock, flags);
 	if (is_ipi) {
-		reg = mpic_ipi_read(irq - mpic->ipi_offset) &
+		reg = mpic_ipi_read(src - MPIC_VEC_IPI_0) &
 			~MPIC_VECPRI_PRIORITY_MASK;
-		mpic_ipi_write(irq - mpic->ipi_offset,
+		mpic_ipi_write(src - MPIC_VEC_IPI_0,
 			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
 	} else {
-		reg = mpic_irq_read(irq - mpic->irq_offset,MPIC_IRQ_VECTOR_PRI)
+		reg = mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI)
 			& ~MPIC_VECPRI_PRIORITY_MASK;
-		mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI,
+		mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
 			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
 	}
 	spin_unlock_irqrestore(&mpic_lock, flags);
@@ -893,14 +992,15 @@ unsigned int mpic_irq_get_priority(unsigned int irq)
 {
 	int is_ipi;
 	struct mpic *mpic = mpic_find(irq, &is_ipi);
+	unsigned int src = mpic_irq_to_hw(irq);
 	unsigned long flags;
 	u32 reg;
 
 	spin_lock_irqsave(&mpic_lock, flags);
 	if (is_ipi)
-		reg = mpic_ipi_read(irq - mpic->ipi_offset);
+		reg = mpic_ipi_read(src = MPIC_VEC_IPI_0);
 	else
-		reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI);
+		reg = mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI);
 	spin_unlock_irqrestore(&mpic_lock, flags);
 	return (reg & MPIC_VECPRI_PRIORITY_MASK) >> MPIC_VECPRI_PRIORITY_SHIFT;
 }
@@ -995,29 +1095,20 @@ void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask)
 		       mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0]));
 }
 
-int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs)
+unsigned int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs)
 {
-	u32 irq;
+	u32 src;
 
-	irq = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK;
+	src = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK;
 #ifdef DEBUG_LOW
-	DBG("%s: get_one_irq(): %d\n", mpic->name, irq);
-#endif
-	if (unlikely(irq == MPIC_VEC_SPURRIOUS))
-		return -1;
-	if (irq < MPIC_VEC_IPI_0) {
-#ifdef DEBUG_IRQ
-		DBG("%s: irq %d\n", mpic->name, irq + mpic->irq_offset);
+	DBG("%s: get_one_irq(): %d\n", mpic->name, src);
 #endif
-		return irq + mpic->irq_offset;
-	}
-#ifdef DEBUG_IPI
-       	DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0);
-#endif
-	return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset;
+	if (unlikely(src == MPIC_VEC_SPURRIOUS))
+		return NO_IRQ;
+	return irq_linear_revmap(mpic->irqhost, src);
 }
 
-int mpic_get_irq(struct pt_regs *regs)
+unsigned int mpic_get_irq(struct pt_regs *regs)
 {
 	struct mpic *mpic = mpic_primary;
 
@@ -1031,25 +1122,27 @@ int mpic_get_irq(struct pt_regs *regs)
 void mpic_request_ipis(void)
 {
 	struct mpic *mpic = mpic_primary;
-
+	int i;
+	static char *ipi_names[] = {
+		"IPI0 (call function)",
+		"IPI1 (reschedule)",
+		"IPI2 (unused)",
+		"IPI3 (debugger break)",
+	};
 	BUG_ON(mpic == NULL);
-	
-	printk("requesting IPIs ... \n");
 
-	/*
-	 * IPIs are marked IRQF_DISABLED as they must run with irqs
-	 * disabled
-	 */
-	request_irq(mpic->ipi_offset+0, mpic_ipi_action, IRQF_DISABLED,
-		    "IPI0 (call function)", mpic);
-	request_irq(mpic->ipi_offset+1, mpic_ipi_action, IRQF_DISABLED,
-		   "IPI1 (reschedule)", mpic);
-	request_irq(mpic->ipi_offset+2, mpic_ipi_action, IRQF_DISABLED,
-		   "IPI2 (unused)", mpic);
-	request_irq(mpic->ipi_offset+3, mpic_ipi_action, IRQF_DISABLED,
-		   "IPI3 (debugger break)", mpic);
-
-	printk("IPIs requested... \n");
+	printk(KERN_INFO "mpic: requesting IPIs ... \n");
+
+	for (i = 0; i < 4; i++) {
+		unsigned int vipi = irq_create_mapping(mpic->irqhost,
+						       MPIC_VEC_IPI_0 + i, 0);
+		if (vipi == NO_IRQ) {
+			printk(KERN_ERR "Failed to map IPI %d\n", i);
+			break;
+		}
+		request_irq(vipi, mpic_ipi_action, IRQF_DISABLED,
+			    ipi_names[i], mpic);
+	}
 }
 
 void smp_mpic_message_pass(int target, int msg)
diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c
index 8dc205b..56612a2 100644
--- a/drivers/char/hvsi.c
+++ b/drivers/char/hvsi.c
@@ -1299,13 +1299,12 @@ static int __init hvsi_console_init(void)
 		hp->inbuf_end = hp->inbuf;
 		hp->state = HVSI_CLOSED;
 		hp->vtermno = *vtermno;
-		hp->virq = virt_irq_create_mapping(irq[0]);
+		hp->virq = irq_create_mapping(NULL, irq[0], 0);
 		if (hp->virq == NO_IRQ) {
 			printk(KERN_ERR "%s: couldn't create irq mapping for 0x%x\n",
-				__FUNCTION__, hp->virq);
+				__FUNCTION__, irq[0]);
 			continue;
-		} else
-			hp->virq = irq_offset_up(hp->virq);
+		}
 
 		hvsi_count++;
 	}
diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c
index 314fc08..4b08852 100644
--- a/drivers/macintosh/macio-adb.c
+++ b/drivers/macintosh/macio-adb.c
@@ -90,22 +90,12 @@ int macio_init(void)
 {
 	struct device_node *adbs;
 	struct resource r;
+	unsigned int irq;
 
 	adbs = find_compatible_devices("adb", "chrp,adb0");
 	if (adbs == 0)
 		return -ENXIO;
 
-#if 0
-	{ int i = 0;
-
-	printk("macio_adb_init: node = %p, addrs =", adbs->node);
-	while(!of_address_to_resource(adbs, i, &r))
-		printk(" %x(%x)", r.start, r.end - r.start);
-	printk(", intrs =");
-	for (i = 0; i < adbs->n_intrs; ++i)
-		printk(" %x", adbs->intrs[i].line);
-	printk("\n"); }
-#endif
 	if (of_address_to_resource(adbs, 0, &r))
 		return -ENXIO;
 	adb = ioremap(r.start, sizeof(struct adb_regs));
@@ -117,10 +107,9 @@ int macio_init(void)
 	out_8(&adb->active_lo.r, 0xff);
 	out_8(&adb->autopoll.r, APE);
 
-	if (request_irq(adbs->intrs[0].line, macio_adb_interrupt,
-			0, "ADB", (void *)0)) {
-		printk(KERN_ERR "ADB: can't get irq %d\n",
-		       adbs->intrs[0].line);
+	irq = irq_of_parse_and_map(adbs, 0);
+	if (request_irq(irq, macio_adb_interrupt, 0, "ADB", (void *)0)) {
+		printk(KERN_ERR "ADB: can't get irq %d\n", irq);
 		return -EAGAIN;
 	}
 	out_8(&adb->intr_enb.r, DFB | TAG);
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 40ae7b6..80c0c66 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -280,75 +280,128 @@ static void macio_release_dev(struct device *dev)
 static int macio_resource_quirks(struct device_node *np, struct resource *res,
 				 int index)
 {
-	if (res->flags & IORESOURCE_MEM) {
-		/* Grand Central has too large resource 0 on some machines */
-		if (index == 0 && !strcmp(np->name, "gc"))
-			res->end = res->start + 0x1ffff;
+	/* Only quirks for memory resources for now */
+	if ((res->flags & IORESOURCE_MEM) == 0)
+		return 0;
+
+	/* Grand Central has too large resource 0 on some machines */
+	if (index == 0 && !strcmp(np->name, "gc"))
+		res->end = res->start + 0x1ffff;
 
-		/* Airport has bogus resource 2 */
-		if (index >= 2 && !strcmp(np->name, "radio"))
-			return 1;
+	/* Airport has bogus resource 2 */
+	if (index >= 2 && !strcmp(np->name, "radio"))
+		return 1;
 
 #ifndef CONFIG_PPC64
-		/* DBDMAs may have bogus sizes */
-		if ((res->start & 0x0001f000) == 0x00008000)
-			res->end = res->start + 0xff;
+	/* DBDMAs may have bogus sizes */
+	if ((res->start & 0x0001f000) == 0x00008000)
+		res->end = res->start + 0xff;
 #endif /* CONFIG_PPC64 */
 
-		/* ESCC parent eats child resources. We could have added a
-		 * level of hierarchy, but I don't really feel the need
-		 * for it
-		 */
-		if (!strcmp(np->name, "escc"))
-			return 1;
-
-		/* ESCC has bogus resources >= 3 */
-		if (index >= 3 && !(strcmp(np->name, "ch-a") &&
-				    strcmp(np->name, "ch-b")))
-			return 1;
-
-		/* Media bay has too many resources, keep only first one */
-		if (index > 0 && !strcmp(np->name, "media-bay"))
-			return 1;
-
-		/* Some older IDE resources have bogus sizes */
-		if (!(strcmp(np->name, "IDE") && strcmp(np->name, "ATA") &&
-		      strcmp(np->type, "ide") && strcmp(np->type, "ata"))) {
-			if (index == 0 && (res->end - res->start) > 0xfff)
-				res->end = res->start + 0xfff;
-			if (index == 1 && (res->end - res->start) > 0xff)
-				res->end = res->start + 0xff;
-		}
+	/* ESCC parent eats child resources. We could have added a
+	 * level of hierarchy, but I don't really feel the need
+	 * for it
+	 */
+	if (!strcmp(np->name, "escc"))
+		return 1;
+
+	/* ESCC has bogus resources >= 3 */
+	if (index >= 3 && !(strcmp(np->name, "ch-a") &&
+			    strcmp(np->name, "ch-b")))
+		return 1;
+
+	/* Media bay has too many resources, keep only first one */
+	if (index > 0 && !strcmp(np->name, "media-bay"))
+		return 1;
+
+	/* Some older IDE resources have bogus sizes */
+	if (!(strcmp(np->name, "IDE") && strcmp(np->name, "ATA") &&
+	      strcmp(np->type, "ide") && strcmp(np->type, "ata"))) {
+		if (index == 0 && (res->end - res->start) > 0xfff)
+			res->end = res->start + 0xfff;
+		if (index == 1 && (res->end - res->start) > 0xff)
+			res->end = res->start + 0xff;
 	}
 	return 0;
 }
 
+static void macio_create_fixup_irq(struct macio_dev *dev, int index,
+				   unsigned int line)
+{
+	unsigned int irq;
 
-static void macio_setup_interrupts(struct macio_dev *dev)
+	irq = irq_create_mapping(NULL, line, 0);
+	if (irq != NO_IRQ) {
+		dev->interrupt[index].start = irq;
+		dev->interrupt[index].flags = IORESOURCE_IRQ;
+		dev->interrupt[index].name = dev->ofdev.dev.bus_id;
+	}
+	if (dev->n_interrupts <= index)
+		dev->n_interrupts = index + 1;
+}
+
+static void macio_add_missing_resources(struct macio_dev *dev)
 {
 	struct device_node *np = dev->ofdev.node;
-	int i,j;
+	unsigned int irq_base;
+
+	/* Gatwick has some missing interrupts on child nodes */
+	if (dev->bus->chip->type != macio_gatwick)
+		return;
 
-	/* For now, we use pre-parsed entries in the device-tree for
-	 * interrupt routing and addresses, but we should change that
-	 * to dynamically parsed entries and so get rid of most of the
-	 * clutter in struct device_node
+	/* irq_base is always 64 on gatwick. I have no cleaner way to get
+	 * that value from here at this point
 	 */
-	for (i = j = 0; i < np->n_intrs; i++) {
+	irq_base = 64;
+
+	/* Fix SCC */
+	if (strcmp(np->name, "ch-a") == 0) {
+		macio_create_fixup_irq(dev, 0, 15 + irq_base);
+		macio_create_fixup_irq(dev, 1,  4 + irq_base);
+		macio_create_fixup_irq(dev, 2,  5 + irq_base);
+		printk(KERN_INFO "macio: fixed SCC irqs on gatwick\n");
+	}
+
+	/* Fix media-bay */
+	if (strcmp(np->name, "media-bay") == 0) {
+		macio_create_fixup_irq(dev, 0, 29 + irq_base);
+		printk(KERN_INFO "macio: fixed media-bay irq on gatwick\n");
+	}
+
+	/* Fix left media bay childs */
+	if (dev->media_bay != NULL && strcmp(np->name, "floppy") == 0) {
+		macio_create_fixup_irq(dev, 0, 19 + irq_base);
+		macio_create_fixup_irq(dev, 1,  1 + irq_base);
+		printk(KERN_INFO "macio: fixed left floppy irqs\n");
+	}
+	if (dev->media_bay != NULL && strcasecmp(np->name, "ata4") == 0) {
+		macio_create_fixup_irq(dev, 0, 14 + irq_base);
+		macio_create_fixup_irq(dev, 0,  3 + irq_base);
+		printk(KERN_INFO "macio: fixed left ide irqs\n");
+	}
+}
+
+static void macio_setup_interrupts(struct macio_dev *dev)
+{
+	struct device_node *np = dev->ofdev.node;
+	unsigned int irq;
+	int i = 0, j = 0;
+
+	for (;;) {
 		struct resource *res = &dev->interrupt[j];
 
 		if (j >= MACIO_DEV_COUNT_IRQS)
 			break;
-		res->start = np->intrs[i].line;
-		res->flags = IORESOURCE_IO;
-		if (np->intrs[j].sense)
-			res->flags |= IORESOURCE_IRQ_LOWLEVEL;
-		else
-			res->flags |= IORESOURCE_IRQ_HIGHEDGE;
+		irq = irq_of_parse_and_map(np, i++);
+		if (irq == NO_IRQ)
+			break;
+		res->start = irq;
+		res->flags = IORESOURCE_IRQ;
 		res->name = dev->ofdev.dev.bus_id;
-		if (macio_resource_quirks(np, res, i))
+		if (macio_resource_quirks(np, res, i - 1)) {
 			memset(res, 0, sizeof(struct resource));
-		else
+			continue;
+		} else
 			j++;
 	}
 	dev->n_interrupts = j;
@@ -445,6 +498,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
 	/* Setup interrupts & resources */
 	macio_setup_interrupts(dev);
 	macio_setup_resources(dev, parent_res);
+	macio_add_missing_resources(dev);
 
 	/* Register with core */
 	if (of_device_register(&dev->ofdev) != 0) {
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index ff6d9bf..f139a74 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -497,8 +497,7 @@ int __init smu_init (void)
 	smu->doorbell = *data;
 	if (smu->doorbell < 0x50)
 		smu->doorbell += 0x50;
-	if (np->n_intrs > 0)
-		smu->db_irq = np->intrs[0].line;
+	smu->db_irq = irq_of_parse_and_map(np, 0);
 
 	of_node_put(np);
 
@@ -515,8 +514,7 @@ int __init smu_init (void)
 		smu->msg = *data;
 		if (smu->msg < 0x50)
 			smu->msg += 0x50;
-		if (np->n_intrs > 0)
-			smu->msg_irq = np->intrs[0].line;
+		smu->msg_irq = irq_of_parse_and_map(np, 0);
 		of_node_put(np);
 	} while(0);
 
diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c
index 6501db5..69d5452 100644
--- a/drivers/macintosh/via-cuda.c
+++ b/drivers/macintosh/via-cuda.c
@@ -34,13 +34,6 @@
 static volatile unsigned char __iomem *via;
 static DEFINE_SPINLOCK(cuda_lock);
 
-#ifdef CONFIG_MAC
-#define CUDA_IRQ IRQ_MAC_ADB
-#define eieio()
-#else
-#define CUDA_IRQ vias->intrs[0].line
-#endif
-
 /* VIA registers - spaced 0x200 bytes apart */
 #define RS		0x200		/* skip between registers */
 #define B		0		/* B-side data */
@@ -189,11 +182,24 @@ int __init find_via_cuda(void)
 
 static int __init via_cuda_start(void)
 {
+    unsigned int irq;
+
     if (via == NULL)
 	return -ENODEV;
 
-    if (request_irq(CUDA_IRQ, cuda_interrupt, 0, "ADB", cuda_interrupt)) {
-	printk(KERN_ERR "cuda_init: can't get irq %d\n", CUDA_IRQ);
+#ifdef CONFIG_MAC
+    irq = IRQ_MAC_ADB;
+#else /* CONFIG_MAC */
+    irq = irq_of_parse_and_map(vias, 0);
+    if (irq == NO_IRQ) {
+	printk(KERN_ERR "via-cuda: can't map interrupts for %s\n",
+	       vias->full_name);
+	return -ENODEV;
+    }
+#endif /* CONFIG_MAP */
+
+    if (request_irq(irq, cuda_interrupt, 0, "ADB", cuda_interrupt)) {
+	printk(KERN_ERR "via-cuda: can't request irq %d\n", irq);
 	return -EAGAIN;
     }
 
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index c1193d3..06ca80b 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -64,10 +64,6 @@
 #include <asm/backlight.h>
 #endif
 
-#ifdef CONFIG_PPC32
-#include <asm/open_pic.h>
-#endif
-
 #include "via-pmu-event.h"
 
 /* Some compile options */
@@ -151,7 +147,7 @@ static int pmu_fully_inited = 0;
 static int pmu_has_adb;
 static struct device_node *gpio_node;
 static unsigned char __iomem *gpio_reg = NULL;
-static int gpio_irq = -1;
+static int gpio_irq = NO_IRQ;
 static int gpio_irq_enabled = -1;
 static volatile int pmu_suspended = 0;
 static spinlock_t pmu_lock;
@@ -403,22 +399,21 @@ static int __init pmu_init(void)
  */
 static int __init via_pmu_start(void)
 {
+	unsigned int irq;
+
 	if (vias == NULL)
 		return -ENODEV;
 
 	batt_req.complete = 1;
 
-#ifndef CONFIG_PPC_MERGE
-	if (pmu_kind == PMU_KEYLARGO_BASED)
-		openpic_set_irq_priority(vias->intrs[0].line,
-					 OPENPIC_PRIORITY_DEFAULT + 1);
-#endif
-
-	if (request_irq(vias->intrs[0].line, via_pmu_interrupt, 0, "VIA-PMU",
-			(void *)0)) {
-		printk(KERN_ERR "VIA-PMU: can't get irq %d\n",
-		       vias->intrs[0].line);
-		return -EAGAIN;
+	irq = irq_of_parse_and_map(vias, 0);
+	if (irq == NO_IRQ) {
+		printk(KERN_ERR "via-pmu: can't map interruptn");
+		return -ENODEV;
+	}
+	if (request_irq(irq, via_pmu_interrupt, 0, "VIA-PMU", (void *)0)) {
+		printk(KERN_ERR "via-pmu: can't request irq %d\n", irq);
+		return -ENODEV;
 	}
 
 	if (pmu_kind == PMU_KEYLARGO_BASED) {
@@ -426,10 +421,10 @@ static int __init via_pmu_start(void)
 		if (gpio_node == NULL)
 			gpio_node = of_find_node_by_name(NULL,
 							 "pmu-interrupt");
-		if (gpio_node && gpio_node->n_intrs > 0)
-			gpio_irq = gpio_node->intrs[0].line;
+		if (gpio_node)
+			gpio_irq = irq_of_parse_and_map(gpio_node, 0);
 
-		if (gpio_irq != -1) {
+		if (gpio_irq != NO_IRQ) {
 			if (request_irq(gpio_irq, gpio1_interrupt, 0,
 					"GPIO1 ADB", (void *)0))
 				printk(KERN_ERR "pmu: can't get irq %d"
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index f2c0bf8..29e4b5a 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -242,12 +242,12 @@ static int __devinit mace_probe(struct macio_dev *mdev, const struct of_device_i
 	}
 	rc = request_irq(mp->tx_dma_intr, mace_txdma_intr, 0, "MACE-txdma", dev);
 	if (rc) {
-		printk(KERN_ERR "MACE: can't get irq %d\n", mace->intrs[1].line);
+		printk(KERN_ERR "MACE: can't get irq %d\n", mp->tx_dma_intr);
 		goto err_free_irq;
 	}
 	rc = request_irq(mp->rx_dma_intr, mace_rxdma_intr, 0, "MACE-rxdma", dev);
 	if (rc) {
-		printk(KERN_ERR "MACE: can't get irq %d\n", mace->intrs[2].line);
+		printk(KERN_ERR "MACE: can't get irq %d\n", mp->rx_dma_intr);
 		goto err_free_tx_irq;
 	}
 
diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index 459c023..bfd2a22 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -1443,8 +1443,8 @@ static int __init pmz_init_port(struct uart_pmac_port *uap)
 			uap->flags &= ~PMACZILOG_FLAG_HAS_DMA;
 			goto no_dma;
 		}
-		uap->tx_dma_irq = np->intrs[1].line;
-		uap->rx_dma_irq = np->intrs[2].line;
+		uap->tx_dma_irq = irq_of_parse_and_map(np, 1);
+		uap->rx_dma_irq = irq_of_parse_and_map(np, 2);
 	}
 no_dma:
 
@@ -1491,7 +1491,7 @@ no_dma:
 	 * Init remaining bits of "port" structure
 	 */
 	uap->port.iotype = UPIO_MEM;
-	uap->port.irq = np->intrs[0].line;
+	uap->port.irq = irq_of_parse_and_map(np, 0);
 	uap->port.uartclk = ZS_CLOCK;
 	uap->port.fifosize = 1;
 	uap->port.ops = &pmz_pops;
diff --git a/include/asm-powerpc/i8259.h b/include/asm-powerpc/i8259.h
index ff31cb9..c80e113 100644
--- a/include/asm-powerpc/i8259.h
+++ b/include/asm-powerpc/i8259.h
@@ -4,8 +4,13 @@
 
 #include <linux/irq.h>
 
+#ifdef CONFIG_PPC_MERGE
+extern void i8259_init(struct device_node *node, unsigned long intack_addr);
+extern unsigned int i8259_irq(struct pt_regs *regs);
+#else
 extern void i8259_init(unsigned long intack_addr, int offset);
 extern int i8259_irq(struct pt_regs *regs);
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_I8259_H */
diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h
index 13fa2ef..e057547 100644
--- a/include/asm-powerpc/irq.h
+++ b/include/asm-powerpc/irq.h
@@ -9,26 +9,14 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/config.h>
 #include <linux/threads.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
 
 #include <asm/types.h>
 #include <asm/atomic.h>
 
-/* this number is used when no interrupt has been assigned */
-#define NO_IRQ			(-1)
-
-/*
- * These constants are used for passing information about interrupt
- * signal polarity and level/edge sensing to the low-level PIC chip
- * drivers.
- */
-#define IRQ_SENSE_MASK		0x1
-#define IRQ_SENSE_LEVEL		0x1	/* interrupt on active level */
-#define IRQ_SENSE_EDGE		0x0	/* interrupt triggered by edge */
-
-#define IRQ_POLARITY_MASK	0x2
-#define IRQ_POLARITY_POSITIVE	0x2	/* high level or low->high edge */
-#define IRQ_POLARITY_NEGATIVE	0x0	/* low level or high->low edge */
 
 #define get_irq_desc(irq) (&irq_desc[(irq)])
 
@@ -36,50 +24,325 @@
 #define for_each_irq(i) \
 	for ((i) = 0; (i) < NR_IRQS; ++(i))
 
-#ifdef CONFIG_PPC64
+extern atomic_t ppc_n_lost_interrupts;
 
-/*
- * Maximum number of interrupt sources that we can handle.
+#ifdef CONFIG_PPC_MERGE
+
+/* This number is used when no interrupt has been assigned */
+#define NO_IRQ			(0)
+
+/* This is a special irq number to return from get_irq() to tell that
+ * no interrupt happened _and_ ignore it (don't count it as bad). Some
+ * platforms like iSeries rely on that.
  */
+#define NO_IRQ_IGNORE		((unsigned int)-1)
+
+/* Total number of virq in the platform (make it a CONFIG_* option ? */
 #define NR_IRQS		512
 
-/* Interrupt numbers are virtual in case they are sparsely
- * distributed by the hardware.
+/* Number of irqs reserved for the legacy controller */
+#define NUM_ISA_INTERRUPTS	16
+
+/* This type is the placeholder for a hardware interrupt number. It has to
+ * be big enough to enclose whatever representation is used by a given
+ * platform.
+ */
+typedef unsigned long irq_hw_number_t;
+
+/* Interrupt controller "host" data structure. This could be defined as a
+ * irq domain controller. That is, it handles the mapping between hardware
+ * and virtual interrupt numbers for a given interrupt domain. The host
+ * structure is generally created by the PIC code for a given PIC instance
+ * (though a host can cover more than one PIC if they have a flat number
+ * model). It's the host callbacks that are responsible for setting the
+ * irq_chip on a given irq_desc after it's been mapped.
+ *
+ * The host code and data structures are fairly agnostic to the fact that
+ * we use an open firmware device-tree. We do have references to struct
+ * device_node in two places: in irq_find_host() to find the host matching
+ * a given interrupt controller node, and of course as an argument to its
+ * counterpart host->ops->match() callback. However, those are treated as
+ * generic pointers by the core and the fact that it's actually a device-node
+ * pointer is purely a convention between callers and implementation. This
+ * code could thus be used on other architectures by replacing those two
+ * by some sort of arch-specific void * "token" used to identify interrupt
+ * controllers.
+ */
+struct irq_host;
+struct radix_tree_root;
+
+/* Functions below are provided by the host and called whenever a new mapping
+ * is created or an old mapping is disposed. The host can then proceed to
+ * whatever internal data structures management is required. It also needs
+ * to setup the irq_desc when returning from map().
+ */
+struct irq_host_ops {
+	/* Match an interrupt controller device node to a host, returns
+	 * 1 on a match
+	 */
+	int (*match)(struct irq_host *h, struct device_node *node);
+
+	/* Create or update a mapping between a virtual irq number and a hw
+	 * irq number. This can be called several times for the same mapping
+	 * but with different flags, though unmap shall always be called
+	 * before the virq->hw mapping is changed.
+	 */
+	int (*map)(struct irq_host *h, unsigned int virq,
+		   irq_hw_number_t hw, unsigned int flags);
+
+	/* Dispose of such a mapping */
+	void (*unmap)(struct irq_host *h, unsigned int virq);
+
+	/* Translate device-tree interrupt specifier from raw format coming
+	 * from the firmware to a irq_hw_number_t (interrupt line number) and
+	 * trigger flags that can be passed to irq_create_mapping().
+	 * If no translation is provided, raw format is assumed to be one cell
+	 * for interrupt line and default sense.
+	 */
+	int (*xlate)(struct irq_host *h, struct device_node *ctrler,
+		     u32 *intspec, unsigned int intsize,
+		     irq_hw_number_t *out_hwirq, unsigned int *out_flags);
+};
+
+struct irq_host {
+	struct list_head	link;
+
+	/* type of reverse mapping technique */
+	unsigned int		revmap_type;
+#define IRQ_HOST_MAP_LEGACY     0 /* legacy 8259, gets irqs 1..15 */
+#define IRQ_HOST_MAP_NOMAP	1 /* no fast reverse mapping */
+#define IRQ_HOST_MAP_LINEAR	2 /* linear map of interrupts */
+#define IRQ_HOST_MAP_TREE	3 /* radix tree */
+	union {
+		struct {
+			unsigned int size;
+			unsigned int *revmap;
+		} linear;
+		struct radix_tree_root tree;
+	} revmap_data;
+	struct irq_host_ops	*ops;
+	void			*host_data;
+	irq_hw_number_t		inval_irq;
+};
+
+/* The main irq map itself is an array of NR_IRQ entries containing the
+ * associate host and irq number. An entry with a host of NULL is free.
+ * An entry can be allocated if it's free, the allocator always then sets
+ * hwirq first to the host's invalid irq number and then fills ops.
+ */
+struct irq_map_entry {
+	irq_hw_number_t	hwirq;
+	struct irq_host	*host;
+};
+
+extern struct irq_map_entry irq_map[NR_IRQS];
+
+
+/***
+ * irq_alloc_host - Allocate a new irq_host data structure
+ * @node: device-tree node of the interrupt controller
+ * @revmap_type: type of reverse mapping to use
+ * @revmap_arg: for IRQ_HOST_MAP_LINEAR linear only: size of the map
+ * @ops: map/unmap host callbacks
+ * @inval_irq: provide a hw number in that host space that is always invalid
+ *
+ * Allocates and initialize and irq_host structure. Note that in the case of
+ * IRQ_HOST_MAP_LEGACY, the map() callback will be called before this returns
+ * for all legacy interrupts except 0 (which is always the invalid irq for
+ * a legacy controller). For a IRQ_HOST_MAP_LINEAR, the map is allocated by
+ * this call as well. For a IRQ_HOST_MAP_TREE, the radix tree will be allocated
+ * later during boot automatically (the reverse mapping will use the slow path
+ * until that happens).
+ */
+extern struct irq_host *irq_alloc_host(unsigned int revmap_type,
+				       unsigned int revmap_arg,
+				       struct irq_host_ops *ops,
+				       irq_hw_number_t inval_irq);
+
+
+/***
+ * irq_find_host - Locates a host for a given device node
+ * @node: device-tree node of the interrupt controller
+ */
+extern struct irq_host *irq_find_host(struct device_node *node);
+
+
+/***
+ * irq_set_default_host - Set a "default" host
+ * @host: default host pointer
+ *
+ * For convenience, it's possible to set a "default" host that will be used
+ * whenever NULL is passed to irq_create_mapping(). It makes life easier for
+ * platforms that want to manipulate a few hard coded interrupt numbers that
+ * aren't properly represented in the device-tree.
+ */
+extern void irq_set_default_host(struct irq_host *host);
+
+
+/***
+ * irq_set_virq_count - Set the maximum number of virt irqs
+ * @count: number of linux virtual irqs, capped with NR_IRQS
+ *
+ * This is mainly for use by platforms like iSeries who want to program
+ * the virtual irq number in the controller to avoid the reverse mapping
+ */
+extern void irq_set_virq_count(unsigned int count);
+
+
+/***
+ * irq_create_mapping - Map a hardware interrupt into linux virq space
+ * @host: host owning this hardware interrupt or NULL for default host
+ * @hwirq: hardware irq number in that host space
+ * @flags: flags passed to the controller. contains the trigger type among
+ *         others. Use IRQ_TYPE_* defined in include/linux/irq.h
+ *
+ * Only one mapping per hardware interrupt is permitted. Returns a linux
+ * virq number. The flags can be used to provide sense information to the
+ * controller (typically extracted from the device-tree). If no information
+ * is passed, the controller defaults will apply (for example, xics can only
+ * do edge so flags are irrelevant for some pseries specific irqs).
+ *
+ * The device-tree generally contains the trigger info in an encoding that is
+ * specific to a given type of controller. In that case, you can directly use
+ * host->ops->trigger_xlate() to translate that.
+ *
+ * It is recommended that new PICs that don't have existing OF bindings chose
+ * to use a representation of triggers identical to linux.
+ */
+extern unsigned int irq_create_mapping(struct irq_host *host,
+				       irq_hw_number_t hwirq,
+				       unsigned int flags);
+
+
+/***
+ * irq_dispose_mapping - Unmap an interrupt
+ * @virq: linux virq number of the interrupt to unmap
  */
-extern unsigned int virt_irq_to_real_map[NR_IRQS];
+extern void irq_dispose_mapping(unsigned int virq);
 
-/* The maximum virtual IRQ number that we support.  This
- * can be set by the platform and will be reduced by the
- * value of __irq_offset_value.  It defaults to and is
- * capped by (NR_IRQS - 1).
+/***
+ * irq_find_mapping - Find a linux virq from an hw irq number.
+ * @host: host owning this hardware interrupt
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is a slow path, for use by generic code. It's expected that an
+ * irq controller implementation directly calls the appropriate low level
+ * mapping function.
  */
-extern unsigned int virt_irq_max;
+extern unsigned int irq_find_mapping(struct irq_host *host,
+				     irq_hw_number_t hwirq);
 
-/* Create a mapping for a real_irq if it doesn't already exist.
- * Return the virtual irq as a convenience.
+
+/***
+ * irq_radix_revmap - Find a linux virq from a hw irq number.
+ * @host: host owning this hardware interrupt
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is a fast path, for use by irq controller code that uses radix tree
+ * revmaps
+ */
+extern unsigned int irq_radix_revmap(struct irq_host *host,
+				     irq_hw_number_t hwirq);
+
+/***
+ * irq_linear_revmap - Find a linux virq from a hw irq number.
+ * @host: host owning this hardware interrupt
+ * @hwirq: hardware irq number in that host space
+ *
+ * This is a fast path, for use by irq controller code that uses linear
+ * revmaps. It does fallback to the slow path if the revmap doesn't exist
+ * yet and will create the revmap entry with appropriate locking
+ */
+
+extern unsigned int irq_linear_revmap(struct irq_host *host,
+				      irq_hw_number_t hwirq);
+
+
+
+/***
+ * irq_alloc_virt - Allocate virtual irq numbers
+ * @host: host owning these new virtual irqs
+ * @count: number of consecutive numbers to allocate
+ * @hint: pass a hint number, the allocator will try to use a 1:1 mapping
+ *
+ * This is a low level function that is used internally by irq_create_mapping()
+ * and that can be used by some irq controllers implementations for things
+ * like allocating ranges of numbers for MSIs. The revmaps are left untouched.
  */
-int virt_irq_create_mapping(unsigned int real_irq);
-void virt_irq_init(void);
+extern unsigned int irq_alloc_virt(struct irq_host *host,
+				   unsigned int count,
+				   unsigned int hint);
+
+/***
+ * irq_free_virt - Free virtual irq numbers
+ * @virq: virtual irq number of the first interrupt to free
+ * @count: number of interrupts to free
+ *
+ * This function is the opposite of irq_alloc_virt. It will not clear reverse
+ * maps, this should be done previously by unmap'ing the interrupt. In fact,
+ * all interrupts covered by the range being freed should have been unmapped
+ * prior to calling this.
+ */
+extern void irq_free_virt(unsigned int virq, unsigned int count);
+
+
+/* -- OF helpers -- */
+
+/* irq_create_of_mapping - Map a hardware interrupt into linux virq space
+ * @controller: Device node of the interrupt controller
+ * @inspec: Interrupt specifier from the device-tree
+ * @intsize: Size of the interrupt specifier from the device-tree
+ *
+ * This function is identical to irq_create_mapping except that it takes
+ * as input informations straight from the device-tree (typically the results
+ * of the of_irq_map_*() functions
+ */
+extern unsigned int irq_create_of_mapping(struct device_node *controller,
+					  u32 *intspec, unsigned int intsize);
+
+
+/* irq_of_parse_and_map - Parse nad Map an interrupt into linux virq space
+ * @device: Device node of the device whose interrupt is to be mapped
+ * @index: Index of the interrupt to map
+ *
+ * This function is a wrapper that chains of_irq_map_one() and
+ * irq_create_of_mapping() to make things easier to callers
+ */
+extern unsigned int irq_of_parse_and_map(struct device_node *dev, int index);
+
+/* -- End OF helpers -- */
 
-static inline unsigned int virt_irq_to_real(unsigned int virt_irq)
+/***
+ * irq_early_init - Init irq remapping subsystem
+ */
+extern void irq_early_init(void);
+
+static __inline__ int irq_canonicalize(int irq)
 {
-	return virt_irq_to_real_map[virt_irq];
+	return irq;
 }
 
-extern unsigned int real_irq_to_virt_slowpath(unsigned int real_irq);
+
+#else /* CONFIG_PPC_MERGE */
+
+/* This number is used when no interrupt has been assigned */
+#define NO_IRQ			(-1)
+#define NO_IRQ_IGNORE		(-2)
+
 
 /*
- * List of interrupt controllers.
+ * These constants are used for passing information about interrupt
+ * signal polarity and level/edge sensing to the low-level PIC chip
+ * drivers.
  */
-#define IC_INVALID    0
-#define IC_OPEN_PIC   1
-#define IC_PPC_XIC    2
-#define IC_CELL_PIC   3
-#define IC_ISERIES    4
+#define IRQ_SENSE_MASK		0x1
+#define IRQ_SENSE_LEVEL		0x1	/* interrupt on active level */
+#define IRQ_SENSE_EDGE		0x0	/* interrupt triggered by edge */
 
-extern u64 ppc64_interrupt_controller;
+#define IRQ_POLARITY_MASK	0x2
+#define IRQ_POLARITY_POSITIVE	0x2	/* high level or low->high edge */
+#define IRQ_POLARITY_NEGATIVE	0x0	/* low level or high->low edge */
 
-#else /* 32-bit */
 
 #if defined(CONFIG_40x)
 #include <asm/ibm4xx.h>
@@ -512,19 +775,11 @@ extern u64 ppc64_interrupt_controller;
 
 #endif /* CONFIG_8260 */
 
-#endif
+#endif /* Whatever way too big #ifdef */
 
-#ifndef CONFIG_PPC_MERGE
 #define NR_MASK_WORDS	((NR_IRQS + 31) / 32)
 /* pedantic: these are long because they are used with set_bit --RR */
 extern unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
-#endif
-
-extern atomic_t ppc_n_lost_interrupts;
-
-#define virt_irq_create_mapping(x)	(x)
-
-#endif
 
 /*
  * Because many systems have two overlapping names spaces for
@@ -563,6 +818,7 @@ static __inline__ int irq_canonicalize(int irq)
 		irq = 9;
 	return irq;
 }
+#endif /* CONFIG_PPC_MERGE */
 
 extern int distribute_irqs;
 
diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h
index eba133d..c17c137 100644
--- a/include/asm-powerpc/machdep.h
+++ b/include/asm-powerpc/machdep.h
@@ -97,7 +97,7 @@ struct machdep_calls {
 	void		(*show_percpuinfo)(struct seq_file *m, int i);
 
 	void		(*init_IRQ)(void);
-	int		(*get_irq)(struct pt_regs *);
+	unsigned int	(*get_irq)(struct pt_regs *);
 #ifdef CONFIG_KEXEC
 	void		(*kexec_cpu_down)(int crash_shutdown, int secondary);
 #endif
diff --git a/include/asm-powerpc/mpic.h b/include/asm-powerpc/mpic.h
index a2277cb..eb241c9 100644
--- a/include/asm-powerpc/mpic.h
+++ b/include/asm-powerpc/mpic.h
@@ -129,6 +129,12 @@ struct mpic_irq_fixup
 /* The instance data of a given MPIC */
 struct mpic
 {
+	/* The device node of the interrupt controller */
+	struct device_node	*of_node;
+
+	/* The remapper for this MPIC */
+	struct irq_host		*irqhost;
+
 	/* The "linux" controller struct */
 	struct irq_chip		hc_irq;
 #ifdef CONFIG_MPIC_BROKEN_U3
@@ -144,16 +150,12 @@ struct mpic
 	unsigned int		isu_size;
 	unsigned int		isu_shift;
 	unsigned int		isu_mask;
-	/* Offset of irq vector numbers */
-	unsigned int		irq_offset;	
 	unsigned int		irq_count;
-	/* Offset of ipi vector numbers */
-	unsigned int		ipi_offset;
 	/* Number of sources */
 	unsigned int		num_sources;
 	/* Number of CPUs */
 	unsigned int		num_cpus;
-	/* senses array */
+	/* default senses array */
 	unsigned char		*senses;
 	unsigned int		senses_count;
 
@@ -209,14 +211,11 @@ struct mpic
  * The values in the array start at the first source of the MPIC,
  * that is senses[0] correspond to linux irq "irq_offset".
  */
-extern struct mpic *mpic_alloc(unsigned long phys_addr,
+extern struct mpic *mpic_alloc(struct device_node *node,
+			       unsigned long phys_addr,
 			       unsigned int flags,
 			       unsigned int isu_size,
-			       unsigned int irq_offset,
 			       unsigned int irq_count,
-			       unsigned int ipi_offset,
-			       unsigned char *senses,
-			       unsigned int senses_num,
 			       const char *name);
 
 /* Assign ISUs, to call before mpic_init()
@@ -228,6 +227,22 @@ extern struct mpic *mpic_alloc(unsigned long phys_addr,
 extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
 			    unsigned long phys_addr);
 
+/* Set default sense codes
+ *
+ * @mpic:	controller
+ * @senses:	array of sense codes
+ * @count:	size of above array
+ *
+ * Optionally provide an array (indexed on hardware interrupt numbers
+ * for this MPIC) of default sense codes for the chip. Those are linux
+ * sense codes IRQ_TYPE_*
+ *
+ * The driver gets ownership of the pointer, don't dispose of it or
+ * anything like that. __init only.
+ */
+extern void mpic_set_default_senses(struct mpic *mpic, u8 *senses, int count);
+
+
 /* Initialize the controller. After this has been called, none of the above
  * should be called again for this mpic
  */
@@ -269,9 +284,9 @@ extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask);
 void smp_mpic_message_pass(int target, int msg);
 
 /* Fetch interrupt from a given mpic */
-extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs);
+extern unsigned int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs);
 /* This one gets to the primary mpic */
-extern int mpic_get_irq(struct pt_regs *regs);
+extern unsigned int mpic_get_irq(struct pt_regs *regs);
 
 /* Set the EPIC clock ratio */
 void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio);
@@ -279,8 +294,5 @@ void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio);
 /* Enable/Disable EPIC serial interrupt mode */
 void mpic_set_serial_int(struct mpic *mpic, int enable);
 
-/* global mpic for pSeries */
-extern struct mpic *pSeries_mpic;
-
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_MPIC_H */
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index 48bef40..b095a28 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -64,11 +64,6 @@ struct boot_param_header
 typedef u32 phandle;
 typedef u32 ihandle;
 
-struct interrupt_info {
-	int	line;
-	int	sense;		/* +ve/-ve logic, edge or level, etc. */
-};
-
 struct property {
 	char	*name;
 	int	length;
@@ -81,8 +76,6 @@ struct device_node {
 	char	*type;
 	phandle	node;
 	phandle linux_phandle;
-	int	n_intrs;
-	struct	interrupt_info *intrs;
 	char	*full_name;
 
 	struct	property *properties;
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h
index 9609d3e..c02d105 100644
--- a/include/asm-powerpc/spu.h
+++ b/include/asm-powerpc/spu.h
@@ -117,6 +117,7 @@ struct spu {
 	struct list_head sched_list;
 	int number;
 	int nid;
+	unsigned int irqs[3];
 	u32 isrc;
 	u32 node;
 	u64 flags;
diff --git a/sound/aoa/core/snd-aoa-gpio-feature.c b/sound/aoa/core/snd-aoa-gpio-feature.c
index bab9754..7ae0c0b 100644
--- a/sound/aoa/core/snd-aoa-gpio-feature.c
+++ b/sound/aoa/core/snd-aoa-gpio-feature.c
@@ -112,12 +112,7 @@ static struct device_node *get_gpio(char *name,
 
 static void get_irq(struct device_node * np, int *irqptr)
 {
-	*irqptr = -1;
-	if (!np)
-		return;
-	if (np->n_intrs != 1)
-		return;
-	*irqptr = np->intrs[0].line;
+	*irqptr = irq_of_parse_and_map(np, 0);
 }
 
 /* 0x4 is outenable, 0x1 is out, thus 4 or 5 */
diff --git a/sound/aoa/soundbus/i2sbus/i2sbus-core.c b/sound/aoa/soundbus/i2sbus/i2sbus-core.c
index f268dac..01c0724 100644
--- a/sound/aoa/soundbus/i2sbus/i2sbus-core.c
+++ b/sound/aoa/soundbus/i2sbus/i2sbus-core.c
@@ -129,7 +129,7 @@ static int i2sbus_add_dev(struct macio_dev *macio,
 	if (strncmp(np->name, "i2s-", 4))
 		return 0;
 
-	if (np->n_intrs != 3)
+	if (macio_irq_count(macio) != 3)
 		return 0;
 
 	dev = kzalloc(sizeof(struct i2sbus_dev), GFP_KERNEL);
@@ -183,9 +183,10 @@ static int i2sbus_add_dev(struct macio_dev *macio,
 		snprintf(dev->rnames[i], sizeof(dev->rnames[i]), rnames[i], np->name);
 	}
 	for (i=0;i<3;i++) {
-		if (request_irq(np->intrs[i].line, ints[i], 0, dev->rnames[i], dev))
+		if (request_irq(macio_irq(macio, i), ints[i], 0,
+				dev->rnames[i], dev))
 			goto err;
-		dev->interrupts[i] = np->intrs[i].line;
+		dev->interrupts[i] = macio_irq(macio, i);
 	}
 
 	for (i=0;i<3;i++) {
diff --git a/sound/oss/dmasound/dmasound_awacs.c b/sound/oss/dmasound/dmasound_awacs.c
index de454ca..4359903 100644
--- a/sound/oss/dmasound/dmasound_awacs.c
+++ b/sound/oss/dmasound/dmasound_awacs.c
@@ -374,10 +374,7 @@ setup_audio_gpio(const char *name, const char* compatible, int *gpio_addr, int*
 		*gpio_pol = *pp;
 	else
 		*gpio_pol = 1;
-	if (np->n_intrs > 0)
-		return np->intrs[0].line;
-	
-	return 0;
+	return irq_of_parse_and_map(np, 0);
 }
 
 static inline void
@@ -2864,14 +2861,13 @@ printk("dmasound_pmac: couldn't find a Codec we can handle\n");
 	 * other info if necessary (early AWACS we want to read chip ids)
 	 */
 
-	if (of_get_address(io, 2, NULL, NULL) == NULL || io->n_intrs < 3) {
+	if (of_get_address(io, 2, NULL, NULL) == NULL) {
 		/* OK - maybe we need to use the 'awacs' node (on earlier
 		 * machines).
 		 */
 		if (awacs_node) {
 			io = awacs_node ;
-			if (of_get_address(io, 2, NULL, NULL) == NULL ||
-			    io->n_intrs < 3) {
+			if (of_get_address(io, 2, NULL, NULL) == NULL) {
 				printk("dmasound_pmac: can't use %s\n",
 				       io->full_name);
 				return -ENODEV;
@@ -2940,9 +2936,9 @@ printk("dmasound_pmac: couldn't find a Codec we can handle\n");
 	if (awacs_revision == AWACS_SCREAMER && awacs)
 		awacs_recalibrate();
 
-	awacs_irq = io->intrs[0].line;
-	awacs_tx_irq = io->intrs[1].line;
-	awacs_rx_irq = io->intrs[2].line;
+	awacs_irq = irq_of_parse_and_map(io, 0);
+	awacs_tx_irq = irq_of_parse_and_map(io, 1);
+	awacs_rx_irq = irq_of_parse_and_map(io, 2);
 
 	/* Hack for legacy crap that will be killed someday */
 	awacs_node = io;
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index 90db9a1..6414306 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -1120,6 +1120,7 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
 	struct snd_pmac *chip;
 	struct device_node *np;
 	int i, err;
+	unsigned int irq;
 	unsigned long ctrl_addr, txdma_addr, rxdma_addr;
 	static struct snd_device_ops ops = {
 		.dev_free =	snd_pmac_dev_free,
@@ -1153,10 +1154,6 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
 	if (chip->is_k2) {
 		static char *rnames[] = {
 			"Sound Control", "Sound DMA" };
-		if (np->n_intrs < 3) {
-			err = -ENODEV;
-			goto __error;
-		}
 		for (i = 0; i < 2; i ++) {
 			if (of_address_to_resource(np->parent, i,
 						   &chip->rsrc[i])) {
@@ -1185,10 +1182,6 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
 	} else {
 		static char *rnames[] = {
 			"Sound Control", "Sound Tx DMA", "Sound Rx DMA" };
-		if (np->n_intrs < 3) {
-			err = -ENODEV;
-			goto __error;
-		}
 		for (i = 0; i < 3; i ++) {
 			if (of_address_to_resource(np, i,
 						   &chip->rsrc[i])) {
@@ -1220,28 +1213,30 @@ int __init snd_pmac_new(struct snd_card *card, struct snd_pmac **chip_return)
 	chip->playback.dma = ioremap(txdma_addr, 0x100);
 	chip->capture.dma = ioremap(rxdma_addr, 0x100);
 	if (chip->model <= PMAC_BURGUNDY) {
-		if (request_irq(np->intrs[0].line, snd_pmac_ctrl_intr, 0,
+		irq = irq_of_parse_and_map(np, 0);
+		if (request_irq(irq, snd_pmac_ctrl_intr, 0,
 				"PMac", (void*)chip)) {
-			snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", np->intrs[0].line);
+			snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n",
+				   irq);
 			err = -EBUSY;
 			goto __error;
 		}
-		chip->irq = np->intrs[0].line;
+		chip->irq = irq;
 	}
-	if (request_irq(np->intrs[1].line, snd_pmac_tx_intr, 0,
-			"PMac Output", (void*)chip)) {
-		snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", np->intrs[1].line);
+	irq = irq_of_parse_and_map(np, 1);
+	if (request_irq(irq, snd_pmac_tx_intr, 0, "PMac Output", (void*)chip)){
+		snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", irq);
 		err = -EBUSY;
 		goto __error;
 	}
-	chip->tx_irq = np->intrs[1].line;
-	if (request_irq(np->intrs[2].line, snd_pmac_rx_intr, 0,
-			"PMac Input", (void*)chip)) {
-		snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", np->intrs[2].line);
+	chip->tx_irq = irq;
+	irq = irq_of_parse_and_map(np, 2);
+	if (request_irq(irq, snd_pmac_rx_intr, 0, "PMac Input", (void*)chip)) {
+		snd_printk(KERN_ERR "pmac: unable to grab IRQ %d\n", irq);
 		err = -EBUSY;
 		goto __error;
 	}
-	chip->rx_irq = np->intrs[2].line;
+	chip->rx_irq = irq;
 
 	snd_pmac_sound_feature(chip, 1);
 
diff --git a/sound/ppc/tumbler.c b/sound/ppc/tumbler.c
index 70e4ebc..692c611 100644
--- a/sound/ppc/tumbler.c
+++ b/sound/ppc/tumbler.c
@@ -1121,7 +1121,7 @@ static long tumbler_find_device(const char *device, const char *platform,
 	DBG("(I) GPIO device %s found, offset: %x, active state: %d !\n",
 	    device, gp->addr, gp->active_state);
 
-	return (node->n_intrs > 0) ? node->intrs[0].line : 0;
+	return irq_of_parse_and_map(node, 0);
 }
 
 /* reset audio */
@@ -1264,16 +1264,16 @@ static int __init tumbler_init(struct snd_pmac *chip)
 				    &mix->line_mute, 1);
 	irq = tumbler_find_device("headphone-detect",
 				  NULL, &mix->hp_detect, 0);
-	if (irq < 0)
+	if (irq <= NO_IRQ)
 		irq = tumbler_find_device("headphone-detect",
 					  NULL, &mix->hp_detect, 1);
-	if (irq < 0)
+	if (irq <= NO_IRQ)
 		irq = tumbler_find_device("keywest-gpio15",
 					  NULL, &mix->hp_detect, 1);
 	mix->headphone_irq = irq;
  	irq = tumbler_find_device("line-output-detect",
 				  NULL, &mix->line_detect, 0);
- 	if (irq < 0)
+ 	if (irq <= NO_IRQ)
 		irq = tumbler_find_device("line-output-detect",
 					  NULL, &mix->line_detect, 1);
 	mix->lineout_irq = irq;
-- 
cgit v0.10.2


From 00eb0f6b6595107dd8be80d1af2220cedb71130d Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 3 Jul 2006 12:36:07 +0100
Subject: [ARM] More missing proc-macros.S includes

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 700297a..1d8316f 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -34,6 +34,8 @@
 #include <asm/procinfo.h>
 #include <asm/ptrace.h>
 
+#include "proc-macros.S"
+
 /*
  * This is the maximum size of an area which will be invalidated
  * using the single invalidate entry instructions.  Anything larger
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index 0c33a5e..89b1d6d 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -34,6 +34,8 @@
 #include <asm/procinfo.h>
 #include <asm/ptrace.h>
 
+#include "proc-macros.S"
+
 /*
  * This is the maximum size of an area which will be invalidated
  * using the single invalidate entry instructions.  Anything larger
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index 566a556..a089528 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -23,6 +23,8 @@
 #include <asm/procinfo.h>
 #include <asm/ptrace.h>
 
+#include "proc-macros.S"
+
 /*
  * This is the maximum size of an area which will be invalidated
  * using the single invalidate entry instructions.  Anything larger
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 6ea7632..d6d84d9 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -23,6 +23,8 @@
 #include <asm/procinfo.h>
 #include <asm/ptrace.h>
 
+#include "proc-macros.S"
+
 /*
  * This is the maximum size of an area which will be invalidated
  * using the single invalidate entry instructions.  Anything larger
-- 
cgit v0.10.2


From 906243d07b8bfebeaa05b5aeaa0e5ab2d4034954 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 3 Jul 2006 12:44:30 +0100
Subject: [ARM] Fix bad asm instruction in proc-arm925.S

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ad15f85..8d9a9f9 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -454,7 +454,8 @@ __arm925_setup:
 	mcr	p15, 7, r0, c15, c0, 0
 #endif
 
-	adr	r5, {r5, r6}
+	adr	r5, arm925_crval
+	ldmia	r5, {r5, r6}
 	mrc	p15, 0, r0, c1, c0		@ get control register v4
 	bic	r0, r0, r5
 	orr	r0, r0, r6
-- 
cgit v0.10.2


From 020732ad9add86b71e9d90d5fa7687d51f58ef49 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 3 Jul 2006 13:18:04 +0100
Subject: [ARM] Fix ISA IRQ resources

The ISA IRQ code was not using named initialisers, so merging the
64-bit resource code (which re-ordered the struct members) broke
this.  Fix it up to use named initialisers.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mach-footbridge/isa-irq.c b/arch/arm/mach-footbridge/isa-irq.c
index e1c43b3..87448c2 100644
--- a/arch/arm/mach-footbridge/isa-irq.c
+++ b/arch/arm/mach-footbridge/isa-irq.c
@@ -98,9 +98,22 @@ isa_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
 	desc_handle_irq(isa_irq, desc, regs);
 }
 
-static struct irqaction irq_cascade = { .handler = no_action, .name = "cascade", };
-static struct resource pic1_resource = { "pic1", 0x20, 0x3f };
-static struct resource pic2_resource = { "pic2", 0xa0, 0xbf };
+static struct irqaction irq_cascade = {
+	.handler = no_action,
+	.name = "cascade",
+};
+
+static struct resource pic1_resource = {
+	.name	= "pic1",
+	.start	= 0x20,
+	.end	= 0x3f,
+};
+
+static struct resource pic2_resource = {
+	.name	= "pic2",
+	.start	= 0xa0,
+	.end	= 0xbf,
+};
 
 void __init isa_init_irq(unsigned int host_irq)
 {
-- 
cgit v0.10.2


From c9e4143c4df08f7b54ba2099480266a6a1303f17 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 3 Jul 2006 13:29:03 +0100
Subject: [ARM] Fix ecard.c resource warnings.

Platforms which use ecard.c always have 32-bit resources, so
might as well lose the "long" format strings.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c
index ab4ad95..b9a74a7 100644
--- a/arch/arm/kernel/ecard.c
+++ b/arch/arm/kernel/ecard.c
@@ -883,7 +883,7 @@ static ssize_t ecard_show_resources(struct device *dev, struct device_attribute
 	int i;
 
 	for (i = 0; i < ECARD_NUM_RESOURCES; i++)
-		str += sprintf(str, "%08lx %08lx %08lx\n",
+		str += sprintf(str, "%08x %08x %08lx\n",
 				ec->resource[i].start,
 				ec->resource[i].end,
 				ec->resource[i].flags);
-- 
cgit v0.10.2


From 4e19025bc7be18e4d1dc8d1fde06c2d23927eb4d Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 3 Jul 2006 13:29:38 +0100
Subject: [ARM] Fix warnings in arch/arm/kernel/setup.c

cr_alignment is unsigned long, so should be the format string.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 7d6a516..ed1c4d6 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -344,7 +344,7 @@ static void __init setup_processor(void)
 	cpu_cache = *list->cache;
 #endif
 
-	printk("CPU: %s [%08x] revision %d (ARMv%s), cr=%08x\n",
+	printk("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n",
 	       cpu_name, processor_id, (int)processor_id & 15,
 	       proc_arch[cpu_architecture()], cr_alignment);
 
-- 
cgit v0.10.2


From 67f3a58856b6a41a46e9256a79a8ca3809f47cc6 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 3 Jul 2006 13:30:52 +0100
Subject: [ARM] Fix warning in consistent.c

No need for 'cr' to be a local variable, which is unused in the
SMP case, and only used once in the UP case.  Just call get_cr()
directly.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 3e86fe7..dba7ddd 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -303,7 +303,6 @@ __ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size,
 	int err;
 	unsigned long addr;
  	struct vm_struct * area;
-	unsigned int cr = get_cr();
 
 	/*
 	 * High mappings must be supersection aligned
@@ -317,7 +316,7 @@ __ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size,
  	addr = (unsigned long)area->addr;
 
 #ifndef CONFIG_SMP
-	if ((((cpu_architecture() >= CPU_ARCH_ARMv6) && (cr & CR_XP)) ||
+	if ((((cpu_architecture() >= CPU_ARCH_ARMv6) && (get_cr() & CR_XP)) ||
 	       cpu_is_xsc3()) &&
 	       !((__pfn_to_phys(pfn) | size | addr) & ~SUPERSECTION_MASK)) {
 		area->flags |= VM_ARM_SECTION_MAPPING;
-- 
cgit v0.10.2


From 61a116ef29e712147d7ee7a6738886e6f91df5f5 Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 3 Jul 2006 15:22:35 +0100
Subject: [SERIAL] Ensure 8250_pci quirks are not marked __devinit

The 8250_pci quirks must not be marked __devinit since they may
be used from parport_serial.  We only really need to mark those
which might be used by cards recognised by parport_serial, but
that wouldn't allow static checking.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 94886c0..950560f 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -134,7 +134,7 @@ afavlab_setup(struct serial_private *priv, struct pciserial_board *board,
  * and Keystone have one Diva chip with 3 UARTs.  Some later machines have
  * one Diva chip, but it has been expanded to 5 UARTs.
  */
-static int __devinit pci_hp_diva_init(struct pci_dev *dev)
+static int pci_hp_diva_init(struct pci_dev *dev)
 {
 	int rc = 0;
 
@@ -194,7 +194,7 @@ pci_hp_diva_setup(struct serial_private *priv, struct pciserial_board *board,
 /*
  * Added for EKF Intel i960 serial boards
  */
-static int __devinit pci_inteli960ni_init(struct pci_dev *dev)
+static int pci_inteli960ni_init(struct pci_dev *dev)
 {
 	unsigned long oldval;
 
@@ -216,7 +216,7 @@ static int __devinit pci_inteli960ni_init(struct pci_dev *dev)
  * seems to be mainly needed on card using the PLX which also use I/O
  * mapped memory.
  */
-static int __devinit pci_plx9050_init(struct pci_dev *dev)
+static int pci_plx9050_init(struct pci_dev *dev)
 {
 	u8 irq_config;
 	void __iomem *p;
@@ -314,7 +314,7 @@ sbs_setup(struct serial_private *priv, struct pciserial_board *board,
 /* global control register offset for SBS PMC-OctalPro */
 #define OCT_REG_CR_OFF		0x500
 
-static int __devinit sbs_init(struct pci_dev *dev)
+static int sbs_init(struct pci_dev *dev)
 {
 	u8 __iomem *p;
 
@@ -493,7 +493,7 @@ static const struct timedia_struct {
 	{ 0, NULL }
 };
 
-static int __devinit pci_timedia_init(struct pci_dev *dev)
+static int pci_timedia_init(struct pci_dev *dev)
 {
 	unsigned short *ids;
 	int i, j;
@@ -566,13 +566,13 @@ titan_400l_800l_setup(struct serial_private *priv,
 	return setup_port(priv, port, bar, offset, board->reg_shift);
 }
 
-static int __devinit pci_xircom_init(struct pci_dev *dev)
+static int pci_xircom_init(struct pci_dev *dev)
 {
 	msleep(100);
 	return 0;
 }
 
-static int __devinit pci_netmos_init(struct pci_dev *dev)
+static int pci_netmos_init(struct pci_dev *dev)
 {
 	/* subdevice 0x00PS means <P> parallel, <S> serial */
 	unsigned int num_serial = dev->subsystem_device & 0xf;
@@ -622,7 +622,7 @@ pci_default_setup(struct serial_private *priv, struct pciserial_board *board,
  */
 static struct pci_serial_quirk pci_serial_quirks[] = {
 	/*
-	 * AFAVLAB cards.
+	 * AFAVLAB cards - these may be called via parport_serial
 	 *  It is not clear whether this applies to all products.
 	 */
 	{
@@ -754,7 +754,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
 		.exit		= __devexit_p(sbs_exit),
 	},
 	/*
-	 * SIIG cards.
+	 * SIIG cards - these may be called via parport_serial
 	 */
 	{
 		.vendor		= PCI_VENDOR_ID_SIIG,
@@ -811,7 +811,7 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
 		.setup		= pci_default_setup,
 	},
 	/*
-	 * Netmos cards
+	 * Netmos cards - these may be called via parport_serial
 	 */
 	{
 		.vendor		= PCI_VENDOR_ID_NETMOS,
-- 
cgit v0.10.2


From 0f1482fdd7e5efc473335b92f350027b8f1519fb Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Mon, 3 Jul 2006 15:32:47 +0100
Subject: [ARM] Fix lh7a40x_udc.c

In 3ae5eaec1d2d9c0cf53745352e7d4b152810ba24, I broke this driver
by missing a comma.  Replace the missing comma.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

diff --git a/drivers/usb/gadget/lh7a40x_udc.c b/drivers/usb/gadget/lh7a40x_udc.c
index bb22b7e..36db725 100644
--- a/drivers/usb/gadget/lh7a40x_udc.c
+++ b/drivers/usb/gadget/lh7a40x_udc.c
@@ -2143,7 +2143,7 @@ static int lh7a40x_udc_remove(struct platform_device *pdev)
 
 static struct platform_driver udc_driver = {
 	.probe = lh7a40x_udc_probe,
-	.remove = lh7a40x_udc_remove
+	.remove = lh7a40x_udc_remove,
 	    /* FIXME power management support */
 	    /* .suspend = ... disable UDC */
 	    /* .resume = ... re-enable UDC */
-- 
cgit v0.10.2


From cbdb54d3ca986e30d0b8c1f755c3910b5573baae Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Mon, 3 Jul 2006 15:10:14 -0500
Subject: Documentation: correct values in MPC8548E SEC example node

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>

diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index 217e517..3c62e66 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -1436,9 +1436,9 @@ platforms are moved over to use the flattened-device-tree model.
                interrupts = <1d 3>;
                interrupt-parent = <40000>;
                num-channels = <4>;
-               channel-fifo-len = <24>;
+               channel-fifo-len = <18>;
                exec-units-mask = <000000fe>;
-               descriptor-types-mask = <073f1127>;
+               descriptor-types-mask = <012b0ebf>;
        };
 
 
-- 
cgit v0.10.2


From 00280166993af8469dbfee24b779b61d3dd326c3 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Fri, 30 Jun 2006 18:41:20 -0500
Subject: powerpc: Add base support for the Freescale MPC8349E-mITX eval board

Added support for the Freescale MPC8343e-mITX board.  Currently based on the
8343 SYS code.  The 2nd PHY (5-port switch) and SATA are untested (work in
progress).

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>

diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
index 7675e67..5fe7b7f 100644
--- a/arch/powerpc/platforms/83xx/Kconfig
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -16,12 +16,21 @@ config MPC834x_SYS
 	  3 PCI slots.  The PIBs PCI initialization is the bootloader's
 	  responsiblilty.
 
+config MPC834x_ITX
+	bool "Freescale MPC834x ITX"
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for the MPC 834x ITX evaluation board.
+
+	  Be aware that PCI initialization is the bootloader's
+	  responsiblilty.
+
 endchoice
 
 config MPC834x
 	bool
 	select PPC_UDBG_16550
 	select PPC_INDIRECT_PCI
-	default y if MPC834x_SYS
+	default y if MPC834x_SYS || MPC834x_ITX
 
 endmenu
diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile
index 5c72367..9387a11 100644
--- a/arch/powerpc/platforms/83xx/Makefile
+++ b/arch/powerpc/platforms/83xx/Makefile
@@ -4,3 +4,4 @@
 obj-y				:= misc.o
 obj-$(CONFIG_PCI)		+= pci.o
 obj-$(CONFIG_MPC834x_SYS)	+= mpc834x_sys.o
+obj-$(CONFIG_MPC834x_ITX)	+= mpc834x_itx.o
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c
new file mode 100644
index 0000000..b463056
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c
@@ -0,0 +1,156 @@
+/*
+ * arch/powerpc/platforms/83xx/mpc834x_itx.c
+ *
+ * MPC834x ITX board specific routines
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/bootinfo.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+#include <platforms/83xx/mpc834x_sys.h>
+
+#ifndef CONFIG_PCI
+unsigned long isa_io_base = 0;
+unsigned long isa_mem_base = 0;
+#endif
+
+#ifdef CONFIG_PCI
+static int
+mpc83xx_map_irq(struct pci_dev *dev, unsigned char idsel, unsigned char pin)
+{
+	static char pci_irq_table[][4] =
+	    /*
+	     *      PCI IDSEL/INTPIN->INTLINE
+	     *       A      B      C      D
+	     */
+	{
+		{PIRQB, PIRQC, PIRQD, PIRQA},	/* idsel 0x0e */
+		{PIRQA, PIRQB, PIRQC, PIRQD},	/* idsel 0x0f */
+		{PIRQC, PIRQD, PIRQA, PIRQB},	/* idsel 0x10 */
+	};
+
+	const long min_idsel = 0x0e, max_idsel = 0x10, irqs_per_slot = 4;
+	return PCI_IRQ_TABLE_LOOKUP;
+}
+#endif				/* CONFIG_PCI */
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc834x_itx_setup_arch(void)
+{
+	struct device_node *np;
+
+	if (ppc_md.progress)
+		ppc_md.progress("mpc834x_itx_setup_arch()", 0);
+
+	np = of_find_node_by_type(NULL, "cpu");
+	if (np != 0) {
+		unsigned int *fp =
+		    (int *)get_property(np, "clock-frequency", NULL);
+		if (fp != 0)
+			loops_per_jiffy = *fp / HZ;
+		else
+			loops_per_jiffy = 50000000 / HZ;
+		of_node_put(np);
+	}
+#ifdef CONFIG_PCI
+	for (np = NULL; (np = of_find_node_by_type(np, "pci")) != NULL;)
+		add_bridge(np);
+
+	ppc_md.pci_swizzle = common_swizzle;
+	ppc_md.pci_map_irq = mpc83xx_map_irq;
+	ppc_md.pci_exclude_device = mpc83xx_exclude_device;
+#endif
+
+#ifdef  CONFIG_ROOT_NFS
+	ROOT_DEV = Root_NFS;
+#else
+	ROOT_DEV = Root_HDA1;
+#endif
+}
+
+void __init mpc834x_itx_init_IRQ(void)
+{
+	u8 senses[8] = {
+		0,			/* EXT 0 */
+		IRQ_SENSE_LEVEL,	/* EXT 1 */
+		IRQ_SENSE_LEVEL,	/* EXT 2 */
+		0,			/* EXT 3 */
+#ifdef CONFIG_PCI
+		IRQ_SENSE_LEVEL,	/* EXT 4 */
+		IRQ_SENSE_LEVEL,	/* EXT 5 */
+		IRQ_SENSE_LEVEL,	/* EXT 6 */
+		IRQ_SENSE_LEVEL,	/* EXT 7 */
+#else
+		0,			/* EXT 4 */
+		0,			/* EXT 5 */
+		0,			/* EXT 6 */
+		0,			/* EXT 7 */
+#endif
+	};
+
+	ipic_init(get_immrbase() + 0x00700, 0, 0, senses, 8);
+
+	/* Initialize the default interrupt mapping priorities,
+	 * in case the boot rom changed something on us.
+	 */
+	ipic_set_default_priority();
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc834x_itx_probe(void)
+{
+	/* We always match for now, eventually we should look at the flat
+	   dev tree to ensure this is the board we are suppose to run on
+	*/
+	return 1;
+}
+
+define_machine(mpc834x_itx) {
+	.name			= "MPC834x ITX",
+	.probe			= mpc834x_itx_probe,
+	.setup_arch		= mpc834x_itx_setup_arch,
+	.init_IRQ		= mpc834x_itx_init_IRQ,
+	.get_irq		= ipic_get_irq,
+	.restart		= mpc83xx_restart,
+	.time_init		= mpc83xx_time_init,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.h b/arch/powerpc/platforms/83xx/mpc834x_itx.h
new file mode 100644
index 0000000..174ca4e
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.h
@@ -0,0 +1,23 @@
+/*
+ * arch/powerpc/platforms/83xx/mpc834x_itx.h
+ *
+ * MPC834X ITX common board definitions
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef __MACH_MPC83XX_ITX_H__
+#define __MACH_MPC83XX_ITX_H__
+
+#define PIRQA	MPC83xx_IRQ_EXT4
+#define PIRQB	MPC83xx_IRQ_EXT5
+#define PIRQC	MPC83xx_IRQ_EXT6
+#define PIRQD	MPC83xx_IRQ_EXT7
+
+#endif				/* __MACH_MPC83XX_ITX_H__ */
-- 
cgit v0.10.2


From 913b83944b9e13c60ca4ef95cf262547ff6bf93b Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Fri, 30 Jun 2006 18:41:29 -0500
Subject: powerpc: add defconfig for Freescale MPC8349E-mITX board

Provide default configuration for the Freescale MPC8349E-mITX board, including
the on-board 16MB flash.

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>

diff --git a/arch/powerpc/configs/mpc834x_itx_defconfig b/arch/powerpc/configs/mpc834x_itx_defconfig
new file mode 100644
index 0000000..fc2d978
--- /dev/null
+++ b/arch/powerpc/configs/mpc834x_itx_defconfig
@@ -0,0 +1,1336 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.17
+# Fri Jun 30 17:53:25 2006
+#
+# CONFIG_PPC64 is not set
+CONFIG_PPC32=y
+CONFIG_PPC_MERGE=y
+CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_IRQ_PER_CPU=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_PPC=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_NVRAM=y
+CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_PPC_OF=y
+CONFIG_PPC_UDBG_16550=y
+# CONFIG_GENERIC_TBSYNC is not set
+CONFIG_DEFAULT_UIMAGE=y
+
+#
+# Processor support
+#
+# CONFIG_CLASSIC32 is not set
+# CONFIG_PPC_52xx is not set
+# CONFIG_PPC_82xx is not set
+CONFIG_PPC_83xx=y
+# CONFIG_PPC_85xx is not set
+# CONFIG_PPC_86xx is not set
+# CONFIG_40x is not set
+# CONFIG_44x is not set
+# CONFIG_8xx is not set
+# CONFIG_E200 is not set
+CONFIG_6xx=y
+CONFIG_83xx=y
+CONFIG_PPC_FPU=y
+CONFIG_PPC_STD_MMU=y
+CONFIG_PPC_STD_MMU_32=y
+# CONFIG_SMP is not set
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+
+#
+# General setup
+#
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+# CONFIG_AUDIT is not set
+# CONFIG_IKCONFIG is not set
+# CONFIG_RELAY is not set
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_EMBEDDED=y
+# CONFIG_KALLSYMS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_RT_MUTEXES=y
+CONFIG_FUTEX=y
+# CONFIG_EPOLL is not set
+CONFIG_SHMEM=y
+CONFIG_SLAB=y
+# CONFIG_TINY_SHMEM is not set
+CONFIG_BASE_SMALL=0
+# CONFIG_SLOB is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+# CONFIG_KMOD is not set
+
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+CONFIG_PPC_GEN550=y
+# CONFIG_WANT_EARLY_SERIAL is not set
+
+#
+# Platform support
+#
+# CONFIG_MPC834x_SYS is not set
+CONFIG_MPC834x_ITX=y
+CONFIG_MPC834x=y
+
+#
+# Kernel options
+#
+# CONFIG_HIGHMEM is not set
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=250
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_RESOURCES_64BIT is not set
+CONFIG_PROC_DEVICETREE=y
+# CONFIG_CMDLINE_BOOL is not set
+# CONFIG_PM is not set
+# CONFIG_SOFTWARE_SUSPEND is not set
+CONFIG_SECCOMP=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options
+#
+CONFIG_GENERIC_ISA_DMA=y
+# CONFIG_PPC_I8259 is not set
+CONFIG_PPC_INDIRECT_PCI=y
+CONFIG_FSL_SOC=y
+CONFIG_PCI=y
+CONFIG_PCI_DOMAINS=y
+# CONFIG_PCIEPORTBUS is not set
+# CONFIG_PCI_DEBUG is not set
+
+#
+# PCCARD (PCMCIA/CardBus) support
+#
+# CONFIG_PCCARD is not set
+
+#
+# PCI Hotplug Support
+#
+# CONFIG_HOTPLUG_PCI is not set
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_HIGHMEM_START=0xfe000000
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_TASK_SIZE=0x80000000
+CONFIG_BOOT_LOAD=0x00800000
+
+#
+# Networking
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+# CONFIG_NETDEBUG is not set
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_IPV6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+
+#
+# DCCP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_DCCP is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+# CONFIG_IP_SCTP is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_IEEE80211 is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
+
+#
+# Connector - unified userspace <-> kernelspace linker
+#
+# CONFIG_CONNECTOR is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+# CONFIG_MTD_CONCAT is not set
+# CONFIG_MTD_PARTITIONS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+# CONFIG_MTD_BLOCK is not set
+# CONFIG_MTD_BLOCK_RO is not set
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_CFI_INTELEXT is not set
+CONFIG_MTD_CFI_AMDSTD=y
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+# CONFIG_MTD_OBSOLETE_CHIPS is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_START=0xfe000000
+CONFIG_MTD_PHYSMAP_LEN=0x1000000
+CONFIG_MTD_PHYSMAP_BANKWIDTH=2
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_PMC551 is not set
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_M25P80 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+
+#
+# NAND Flash Device Drivers
+#
+# CONFIG_MTD_NAND is not set
+
+#
+# OneNAND Flash Device Drivers
+#
+# CONFIG_MTD_ONENAND is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play support
+#
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_SX8 is not set
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+# CONFIG_BLK_DEV_IDE is not set
+# CONFIG_BLK_DEV_HD_ONLY is not set
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=y
+# CONFIG_CHR_DEV_ST is not set
+# CONFIG_CHR_DEV_OSST is not set
+# CONFIG_BLK_DEV_SR is not set
+CONFIG_CHR_DEV_SG=y
+# CONFIG_CHR_DEV_SCH is not set
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+# CONFIG_SCSI_MULTI_LUN is not set
+# CONFIG_SCSI_CONSTANTS is not set
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=y
+# CONFIG_SCSI_FC_ATTRS is not set
+# CONFIG_SCSI_ISCSI_ATTRS is not set
+# CONFIG_SCSI_SAS_ATTRS is not set
+
+#
+# SCSI low-level drivers
+#
+# CONFIG_ISCSI_TCP is not set
+# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+# CONFIG_SCSI_3W_9XXX is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AACRAID is not set
+# CONFIG_SCSI_AIC7XXX is not set
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+# CONFIG_SCSI_AIC79XX is not set
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_MEGARAID_NEWGEN is not set
+# CONFIG_MEGARAID_LEGACY is not set
+# CONFIG_MEGARAID_SAS is not set
+CONFIG_SCSI_SATA=y
+# CONFIG_SCSI_SATA_AHCI is not set
+# CONFIG_SCSI_SATA_SVW is not set
+# CONFIG_SCSI_ATA_PIIX is not set
+# CONFIG_SCSI_SATA_MV is not set
+# CONFIG_SCSI_SATA_NV is not set
+# CONFIG_SCSI_PDC_ADMA is not set
+# CONFIG_SCSI_HPTIOP is not set
+# CONFIG_SCSI_SATA_QSTOR is not set
+# CONFIG_SCSI_SATA_PROMISE is not set
+# CONFIG_SCSI_SATA_SX4 is not set
+CONFIG_SCSI_SATA_SIL=y
+# CONFIG_SCSI_SATA_SIL24 is not set
+# CONFIG_SCSI_SATA_SIS is not set
+# CONFIG_SCSI_SATA_ULI is not set
+# CONFIG_SCSI_SATA_VIA is not set
+# CONFIG_SCSI_SATA_VITESSE is not set
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+# CONFIG_SCSI_GDTH is not set
+# CONFIG_SCSI_IPS is not set
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
+# CONFIG_SCSI_IPR is not set
+# CONFIG_SCSI_QLOGIC_1280 is not set
+# CONFIG_SCSI_QLA_FC is not set
+# CONFIG_SCSI_LPFC is not set
+# CONFIG_SCSI_DC395x is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_NSP32 is not set
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=y
+CONFIG_MD_RAID0=y
+CONFIG_MD_RAID1=y
+# CONFIG_MD_RAID10 is not set
+# CONFIG_MD_RAID456 is not set
+# CONFIG_MD_MULTIPATH is not set
+# CONFIG_MD_FAULTY is not set
+# CONFIG_BLK_DEV_DM is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+# CONFIG_FUSION_SPI is not set
+# CONFIG_FUSION_FC is not set
+# CONFIG_FUSION_SAS is not set
+
+#
+# IEEE 1394 (FireWire) support
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# I2O device support
+#
+# CONFIG_I2O is not set
+
+#
+# Macintosh device drivers
+#
+# CONFIG_WINDFARM is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+
+#
+# PHY device support
+#
+CONFIG_PHYLIB=y
+
+#
+# MII PHY device drivers
+#
+# CONFIG_MARVELL_PHY is not set
+# CONFIG_DAVICOM_PHY is not set
+# CONFIG_QSEMI_PHY is not set
+# CONFIG_LXT_PHY is not set
+CONFIG_CICADA_PHY=y
+# CONFIG_VITESSE_PHY is not set
+# CONFIG_SMSC_PHY is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_HAPPYMEAL is not set
+# CONFIG_SUNGEM is not set
+# CONFIG_CASSINI is not set
+# CONFIG_NET_VENDOR_3COM is not set
+
+#
+# Tulip family network device support
+#
+# CONFIG_NET_TULIP is not set
+# CONFIG_HP100 is not set
+CONFIG_NET_PCI=y
+# CONFIG_PCNET32 is not set
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_B44 is not set
+# CONFIG_FORCEDETH is not set
+# CONFIG_DGRS is not set
+# CONFIG_EEPRO100 is not set
+CONFIG_E100=y
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_TLAN is not set
+# CONFIG_VIA_RHINE is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+# CONFIG_ACENIC is not set
+# CONFIG_DL2K is not set
+# CONFIG_E1000 is not set
+# CONFIG_NS83820 is not set
+# CONFIG_HAMACHI is not set
+# CONFIG_YELLOWFIN is not set
+# CONFIG_R8169 is not set
+# CONFIG_SIS190 is not set
+# CONFIG_SKGE is not set
+# CONFIG_SKY2 is not set
+# CONFIG_SK98LIN is not set
+# CONFIG_VIA_VELOCITY is not set
+# CONFIG_TIGON3 is not set
+# CONFIG_BNX2 is not set
+CONFIG_GIANFAR=y
+CONFIG_GFAR_NAPI=y
+
+#
+# Ethernet (10000 Mbit)
+#
+# CONFIG_CHELSIO_T1 is not set
+# CONFIG_IXGB is not set
+# CONFIG_S2IO is not set
+# CONFIG_MYRI10GE is not set
+
+#
+# Token Ring devices
+#
+# CONFIG_TR is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+# CONFIG_FDDI is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NET_FC is not set
+# CONFIG_SHAPER is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_PCI=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+# CONFIG_SERIAL_8250_EXTENDED is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+
+#
+# IPMI
+#
+# CONFIG_IPMI_HANDLER is not set
+
+#
+# Watchdog Cards
+#
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+# CONFIG_SOFT_WATCHDOG is not set
+CONFIG_83xx_WDT=y
+
+#
+# PCI-based Watchdog Cards
+#
+# CONFIG_PCIPCWATCHDOG is not set
+# CONFIG_WDTPCI is not set
+
+#
+# USB-based Watchdog Cards
+#
+# CONFIG_USBPCWATCHDOG is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_NVRAM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_RAW_DRIVER is not set
+
+#
+# TPM devices
+#
+# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
+
+#
+# I2C support
+#
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+
+#
+# I2C Algorithms
+#
+# CONFIG_I2C_ALGOBIT is not set
+# CONFIG_I2C_ALGOPCF is not set
+# CONFIG_I2C_ALGOPCA is not set
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_PIIX4 is not set
+CONFIG_I2C_MPC=y
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_PROSAVAGE is not set
+# CONFIG_I2C_SAVAGE4 is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+# CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_ISA is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_SENSORS_DS1337 is not set
+# CONFIG_SENSORS_DS1374 is not set
+# CONFIG_SENSORS_EEPROM is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_M41T00 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# SPI support
+#
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_MPC83xx=y
+
+#
+# SPI Protocol Masters
+#
+
+#
+# Dallas's 1-wire bus
+#
+
+#
+# Hardware Monitoring support
+#
+CONFIG_HWMON=y
+# CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_ABITUGURU is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_DS1621 is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_FSCHER is not set
+# CONFIG_SENSORS_FSCPOS is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM70 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT8231 is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
+
+#
+# Digital Video Broadcasting Devices
+#
+# CONFIG_DVB is not set
+# CONFIG_USB_DABUSB is not set
+
+#
+# Graphics support
+#
+CONFIG_FIRMWARE_EDID=y
+# CONFIG_FB is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
+CONFIG_USB=y
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+# CONFIG_USB_OTG is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_SPLIT_ISO is not set
+# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+# CONFIG_USB_ISP116X_HCD is not set
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+CONFIG_USB_UHCI_HCD=y
+# CONFIG_USB_SL811_HCD is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
+#
+CONFIG_USB_STORAGE=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_USBAT is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_SDDR55 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_STORAGE_ALAUDA is not set
+# CONFIG_USB_LIBUSUAL is not set
+
+#
+# USB Input Devices
+#
+# CONFIG_USB_HID is not set
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+# CONFIG_USB_AIPTEK is not set
+# CONFIG_USB_WACOM is not set
+# CONFIG_USB_ACECAD is not set
+# CONFIG_USB_KBTAB is not set
+# CONFIG_USB_POWERMATE is not set
+# CONFIG_USB_TOUCHSCREEN is not set
+# CONFIG_USB_YEALINK is not set
+# CONFIG_USB_XPAD is not set
+# CONFIG_USB_ATI_REMOTE is not set
+# CONFIG_USB_ATI_REMOTE2 is not set
+# CONFIG_USB_KEYSPAN_REMOTE is not set
+# CONFIG_USB_APPLETOUCH is not set
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_MICROTEK is not set
+
+#
+# USB Network Adapters
+#
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_RTL8150 is not set
+# CONFIG_USB_USBNET is not set
+CONFIG_USB_MON=y
+
+#
+# USB port drivers
+#
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_EMI62 is not set
+# CONFIG_USB_EMI26 is not set
+# CONFIG_USB_AUERSWALD is not set
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_LEGOTOWER is not set
+# CONFIG_USB_LCD is not set
+# CONFIG_USB_LED is not set
+# CONFIG_USB_CY7C63 is not set
+# CONFIG_USB_CYTHERM is not set
+# CONFIG_USB_PHIDGETKIT is not set
+# CONFIG_USB_PHIDGETSERVO is not set
+# CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_APPLEDISPLAY is not set
+# CONFIG_USB_SISUSBVGA is not set
+# CONFIG_USB_LD is not set
+# CONFIG_USB_TEST is not set
+
+#
+# USB DSL modem support
+#
+
+#
+# USB Gadget Support
+#
+CONFIG_USB_GADGET=y
+# CONFIG_USB_GADGET_DEBUG_FILES is not set
+CONFIG_USB_GADGET_SELECTED=y
+CONFIG_USB_GADGET_NET2280=y
+CONFIG_USB_NET2280=y
+# CONFIG_USB_GADGET_PXA2XX is not set
+# CONFIG_USB_GADGET_GOKU is not set
+# CONFIG_USB_GADGET_LH7A40X is not set
+# CONFIG_USB_GADGET_OMAP is not set
+# CONFIG_USB_GADGET_AT91 is not set
+# CONFIG_USB_GADGET_DUMMY_HCD is not set
+CONFIG_USB_GADGET_DUALSPEED=y
+# CONFIG_USB_ZERO is not set
+CONFIG_USB_ETH=y
+CONFIG_USB_ETH_RNDIS=y
+# CONFIG_USB_GADGETFS is not set
+# CONFIG_USB_FILE_STORAGE is not set
+# CONFIG_USB_G_SERIAL is not set
+
+#
+# MMC/SD Card support
+#
+# CONFIG_MMC is not set
+
+#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
+#
+# InfiniBand support
+#
+# CONFIG_INFINIBAND is not set
+
+#
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
+#
+
+#
+# Real Time Clock
+#
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+CONFIG_RTC_INTF_DEV_UIE_EMUL=y
+
+#
+# RTC drivers
+#
+# CONFIG_RTC_DRV_X1205 is not set
+CONFIG_RTC_DRV_DS1307=y
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_RS5C348 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_TEST is not set
+# CONFIG_RTC_DRV_MAX6902 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# DMA Engine support
+#
+CONFIG_DMA_ENGINE=y
+
+#
+# DMA Clients
+#
+CONFIG_NET_DMA=y
+
+#
+# DMA Devices
+#
+CONFIG_INTEL_IOATDMA=y
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+CONFIG_DNOTIFY=y
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_HUGETLB_PAGE is not set
+CONFIG_RAMFS=y
+# CONFIG_CONFIGFS_FS is not set
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+CONFIG_NFS_V4=y
+# CONFIG_NFS_DIRECTIO is not set
+# CONFIG_NFSD is not set
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_CIFS_DEBUG2 is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+# CONFIG_9P_FS is not set
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_KARMA_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+
+#
+# Native Language Support
+#
+# CONFIG_NLS is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+CONFIG_CRC32=y
+# CONFIG_LIBCRC32C is not set
+CONFIG_PLIST=y
+
+#
+# Instrumentation Support
+#
+# CONFIG_PROFILING is not set
+
+#
+# Kernel hacking
+#
+CONFIG_PRINTK_TIME=y
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
+CONFIG_FORCED_INLINING=y
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_DEBUGGER is not set
+# CONFIG_BDI_SWITCH is not set
+CONFIG_BOOTX_TEXT=y
+CONFIG_SERIAL_TEXT_DEBUG=y
+# CONFIG_PPC_EARLY_DEBUG is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_WP512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Hardware crypto devices
+#
-- 
cgit v0.10.2


From 63104eec234bdecb55fd9c15467ae00d0a3f42ac Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@mars.ravnborg.org>
Date: Mon, 3 Jul 2006 23:30:54 +0200
Subject: kbuild: introduce utsrelease.h

include/linux/version.h contained both actual KERNEL version
and UTS_RELEASE that contains a subset from git SHA1 for when
kernel was compiled as part of a git repository.
This had the unfortunate side-effect that all files including version.h
would be recompiled when some git changes was made due to changes SHA1.
Split it out so we keep independent parts in separate files.

Also update checkversion.pl script to no longer check for UTS_RELEASE.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>

diff --git a/Makefile b/Makefile
index 1cc793f..b36aeb6 100644
--- a/Makefile
+++ b/Makefile
@@ -812,8 +812,8 @@ endif
 # prepare2 creates a makefile if using a separate output directory
 prepare2: prepare3 outputmakefile
 
-prepare1: prepare2 include/linux/version.h include/asm \
-                   include/config/auto.conf
+prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \
+                   include/asm include/config/auto.conf
 ifneq ($(KBUILD_MODULES),)
 	$(Q)mkdir -p $(MODVERDIR)
 	$(Q)rm -f $(MODVERDIR)/*
@@ -848,21 +848,26 @@ include/asm:
 # needs to be updated, so this check is forced on all builds
 
 uts_len := 64
+define filechk_utsrelease.h
+	if [ `echo -n "$(KERNELRELEASE)" | wc -c ` -gt $(uts_len) ]; then \
+	  echo '"$(KERNELRELEASE)" exceeds $(uts_len) characters' >&2;    \
+	  exit 1;                                                         \
+	fi;                                                               \
+	(echo \#define UTS_RELEASE \"$(KERNELRELEASE)\";)
+endef
 
 define filechk_version.h
-	if [ `echo -n "$(KERNELRELEASE)" | wc -c ` -gt $(uts_len) ]; then \
-	  echo '"$(KERNELRELEASE)" exceeds $(uts_len) characters' >&2; \
-	  exit 1; \
-	fi; \
-	(echo \#define UTS_RELEASE \"$(KERNELRELEASE)\"; \
-	  echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)`; \
-	 echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))'; \
-	)
+	(echo \#define LINUX_VERSION_CODE $(shell                             \
+	expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL));     \
+	echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))';)
 endef
 
-include/linux/version.h: $(srctree)/Makefile include/config/kernel.release FORCE
+include/linux/version.h: $(srctree)/Makefile FORCE
 	$(call filechk,version.h)
 
+include/linux/utsrelease.h: include/config/kernel.release FORCE
+	$(call filechk,utsrelease.h)
+
 # ---------------------------------------------------------------------------
 
 PHONY += depend dep
@@ -955,7 +960,8 @@ CLEAN_FILES +=	vmlinux System.map \
 # Directories & files removed with 'make mrproper'
 MRPROPER_DIRS  += include/config include2
 MRPROPER_FILES += .config .config.old include/asm .version .old_version \
-                  include/linux/autoconf.h include/linux/version.h \
+                  include/linux/autoconf.h include/linux/version.h      \
+                  include/linux/utsrelease.h                            \
 		  Module.symvers tags TAGS cscope*
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c
index ec53c28..3af21c7 100644
--- a/arch/alpha/boot/bootp.c
+++ b/arch/alpha/boot/bootp.c
@@ -9,7 +9,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c
index a6657f2..4307bde 100644
--- a/arch/alpha/boot/bootpz.c
+++ b/arch/alpha/boot/bootpz.c
@@ -11,7 +11,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c
index 78c9b0b..90ed55b 100644
--- a/arch/alpha/boot/main.c
+++ b/arch/alpha/boot/main.c
@@ -7,7 +7,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 #include <linux/mm.h>
 
 #include <asm/system.h>
diff --git a/arch/frv/kernel/setup.c b/arch/frv/kernel/setup.c
index 5db3d4e..af08ccd 100644
--- a/arch/frv/kernel/setup.c
+++ b/arch/frv/kernel/setup.c
@@ -10,7 +10,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
index 0a5a3be..d2b684c 100644
--- a/arch/i386/boot/setup.S
+++ b/arch/i386/boot/setup.S
@@ -47,7 +47,7 @@
  */
 
 #include <asm/segment.h>
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 #include <linux/compile.h>
 #include <asm/boot.h>
 #include <asm/e820.h>
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 1f1771b..9df9f20 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -24,7 +24,7 @@
 #include <linux/reboot.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 #include <linux/adb.h>
 #include <linux/module.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index cb257ae..24f09e2 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
 #include <asm/page.h>
diff --git a/arch/ppc/syslib/btext.c b/arch/ppc/syslib/btext.c
index 51ab6e9..d116670 100644
--- a/arch/ppc/syslib/btext.c
+++ b/arch/ppc/syslib/btext.c
@@ -6,7 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 
 #include <asm/sections.h>
 #include <asm/bootx.h>
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index 7de8b8f..a50b631 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -46,7 +46,7 @@
  */
 
 #include <asm/segment.h>
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 #include <linux/compile.h>
 #include <asm/boot.h>
 #include <asm/e820.h>
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_ethtool.c b/drivers/net/wireless/bcm43xx/bcm43xx_ethtool.c
index b3ffcf5..e386dcc 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_ethtool.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_ethtool.c
@@ -32,7 +32,7 @@
 #include <linux/netdevice.h>
 #include <linux/pci.h>
 #include <linux/string.h>
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 
 
 static void bcm43xx_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index dc7c621..46919f9 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -1,4 +1,4 @@
-#include <linux/version.h>
+#include <linux/utsrelease.h>
 #include <linux/module.h>
 
 /* Simply sanity version stamp for modules. */
diff --git a/init/version.c b/init/version.c
index 3ddc3ce..e290802 100644
--- a/init/version.c
+++ b/init/version.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
+#include <linux/utsrelease.h>
 #include <linux/version.h>
 
 #define version(a) Version_ ## a
diff --git a/scripts/checkversion.pl b/scripts/checkversion.pl
index 9f84e56..ec7d211 100755
--- a/scripts/checkversion.pl
+++ b/scripts/checkversion.pl
@@ -1,7 +1,7 @@
 #! /usr/bin/perl
 #
-# checkversion find uses of LINUX_VERSION_CODE, KERNEL_VERSION, or
-# UTS_RELEASE without including <linux/version.h>, or cases of
+# checkversion find uses of LINUX_VERSION_CODE or KERNEL_VERSION
+# without including <linux/version.h>, or cases of
 # including <linux/version.h> that don't need it.
 # Copyright (C) 2003, Randy Dunlap <rdunlap@xenotime.net>
 
@@ -41,8 +41,7 @@ foreach $file (@ARGV)
 	}
 
 	# Look for uses: LINUX_VERSION_CODE, KERNEL_VERSION, UTS_RELEASE
-	if (($_ =~ /LINUX_VERSION_CODE/) || ($_ =~ /\WKERNEL_VERSION/) ||
-		($_ =~ /UTS_RELEASE/)) {
+	if (($_ =~ /LINUX_VERSION_CODE/) || ($_ =~ /\WKERNEL_VERSION/)) {
 	    $fUseVersion = 1;
 	    last LINE if $iLinuxVersion;
 	}
-- 
cgit v0.10.2


From 0fbfb036adee72cbfff170075e84ffd9e54f1938 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 4 Jul 2006 07:31:37 +1000
Subject: [POWERPC] Actually copy over i8259.c to arch/ppc/syslib this time

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/ppc/syslib/i8259.c b/arch/ppc/syslib/i8259.c
new file mode 100644
index 0000000..eb35353
--- /dev/null
+++ b/arch/ppc/syslib/i8259.c
@@ -0,0 +1,212 @@
+/*
+ * i8259 interrupt controller driver.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <asm/io.h>
+#include <asm/i8259.h>
+
+static volatile void __iomem *pci_intack; /* RO, gives us the irq vector */
+
+static unsigned char cached_8259[2] = { 0xff, 0xff };
+#define cached_A1 (cached_8259[0])
+#define cached_21 (cached_8259[1])
+
+static DEFINE_SPINLOCK(i8259_lock);
+
+static int i8259_pic_irq_offset;
+
+/*
+ * Acknowledge the IRQ using either the PCI host bridge's interrupt
+ * acknowledge feature or poll.  How i8259_init() is called determines
+ * which is called.  It should be noted that polling is broken on some
+ * IBM and Motorola PReP boxes so we must use the int-ack feature on them.
+ */
+int i8259_irq(struct pt_regs *regs)
+{
+	int irq;
+
+	spin_lock(&i8259_lock);
+
+	/* Either int-ack or poll for the IRQ */
+	if (pci_intack)
+		irq = readb(pci_intack);
+	else {
+		/* Perform an interrupt acknowledge cycle on controller 1. */
+		outb(0x0C, 0x20);		/* prepare for poll */
+		irq = inb(0x20) & 7;
+		if (irq == 2 ) {
+			/*
+			 * Interrupt is cascaded so perform interrupt
+			 * acknowledge on controller 2.
+			 */
+			outb(0x0C, 0xA0);	/* prepare for poll */
+			irq = (inb(0xA0) & 7) + 8;
+		}
+	}
+
+	if (irq == 7) {
+		/*
+		 * This may be a spurious interrupt.
+		 *
+		 * Read the interrupt status register (ISR). If the most
+		 * significant bit is not set then there is no valid
+		 * interrupt.
+		 */
+		if (!pci_intack)
+			outb(0x0B, 0x20);	/* ISR register */
+		if(~inb(0x20) & 0x80)
+			irq = -1;
+	}
+
+	spin_unlock(&i8259_lock);
+	return irq + i8259_pic_irq_offset;
+}
+
+static void i8259_mask_and_ack_irq(unsigned int irq_nr)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259_lock, flags);
+	irq_nr -= i8259_pic_irq_offset;
+	if (irq_nr > 7) {
+		cached_A1 |= 1 << (irq_nr-8);
+		inb(0xA1); 	/* DUMMY */
+		outb(cached_A1, 0xA1);
+		outb(0x20, 0xA0);	/* Non-specific EOI */
+		outb(0x20, 0x20);	/* Non-specific EOI to cascade */
+	} else {
+		cached_21 |= 1 << irq_nr;
+		inb(0x21); 	/* DUMMY */
+		outb(cached_21, 0x21);
+		outb(0x20, 0x20);	/* Non-specific EOI */
+	}
+	spin_unlock_irqrestore(&i8259_lock, flags);
+}
+
+static void i8259_set_irq_mask(int irq_nr)
+{
+	outb(cached_A1,0xA1);
+	outb(cached_21,0x21);
+}
+
+static void i8259_mask_irq(unsigned int irq_nr)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259_lock, flags);
+	irq_nr -= i8259_pic_irq_offset;
+	if (irq_nr < 8)
+		cached_21 |= 1 << irq_nr;
+	else
+		cached_A1 |= 1 << (irq_nr-8);
+	i8259_set_irq_mask(irq_nr);
+	spin_unlock_irqrestore(&i8259_lock, flags);
+}
+
+static void i8259_unmask_irq(unsigned int irq_nr)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259_lock, flags);
+	irq_nr -= i8259_pic_irq_offset;
+	if (irq_nr < 8)
+		cached_21 &= ~(1 << irq_nr);
+	else
+		cached_A1 &= ~(1 << (irq_nr-8));
+	i8259_set_irq_mask(irq_nr);
+	spin_unlock_irqrestore(&i8259_lock, flags);
+}
+
+static struct irq_chip i8259_pic = {
+	.typename	= " i8259    ",
+	.mask		= i8259_mask_irq,
+	.unmask		= i8259_unmask_irq,
+	.mask_ack	= i8259_mask_and_ack_irq,
+};
+
+static struct resource pic1_iores = {
+	.name = "8259 (master)",
+	.start = 0x20,
+	.end = 0x21,
+	.flags = IORESOURCE_BUSY,
+};
+
+static struct resource pic2_iores = {
+	.name = "8259 (slave)",
+	.start = 0xa0,
+	.end = 0xa1,
+	.flags = IORESOURCE_BUSY,
+};
+
+static struct resource pic_edgectrl_iores = {
+	.name = "8259 edge control",
+	.start = 0x4d0,
+	.end = 0x4d1,
+	.flags = IORESOURCE_BUSY,
+};
+
+static struct irqaction i8259_irqaction = {
+	.handler = no_action,
+	.flags = SA_INTERRUPT,
+	.mask = CPU_MASK_NONE,
+	.name = "82c59 secondary cascade",
+};
+
+/*
+ * i8259_init()
+ * intack_addr - PCI interrupt acknowledge (real) address which will return
+ *               the active irq from the 8259
+ */
+void __init i8259_init(unsigned long intack_addr, int offset)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&i8259_lock, flags);
+	i8259_pic_irq_offset = offset;
+
+	/* init master interrupt controller */
+	outb(0x11, 0x20); /* Start init sequence */
+	outb(0x00, 0x21); /* Vector base */
+	outb(0x04, 0x21); /* edge tiggered, Cascade (slave) on IRQ2 */
+	outb(0x01, 0x21); /* Select 8086 mode */
+
+	/* init slave interrupt controller */
+	outb(0x11, 0xA0); /* Start init sequence */
+	outb(0x08, 0xA1); /* Vector base */
+	outb(0x02, 0xA1); /* edge triggered, Cascade (slave) on IRQ2 */
+	outb(0x01, 0xA1); /* Select 8086 mode */
+
+	/* always read ISR */
+	outb(0x0B, 0x20);
+	outb(0x0B, 0xA0);
+
+	/* Mask all interrupts */
+	outb(cached_A1, 0xA1);
+	outb(cached_21, 0x21);
+
+	spin_unlock_irqrestore(&i8259_lock, flags);
+
+	for (i = 0; i < NUM_ISA_INTERRUPTS; ++i) {
+		set_irq_chip_and_handler(offset + i, &i8259_pic,
+					 handle_level_irq);
+		irq_desc[offset + i].status |= IRQ_LEVEL;
+	}
+
+	/* reserve our resources */
+	setup_irq(offset + 2, &i8259_irqaction);
+	request_resource(&ioport_resource, &pic1_iores);
+	request_resource(&ioport_resource, &pic2_iores);
+	request_resource(&ioport_resource, &pic_edgectrl_iores);
+
+	if (intack_addr != 0)
+		pci_intack = ioremap(intack_addr, 1);
+
+}
-- 
cgit v0.10.2


From 88fecaa27f398d95db6c405a1908292c6f0bc3ef Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Mon, 3 Jul 2006 00:24:04 -0700
Subject: [PATCH] time initialisation fix

We're not reay to take a timer interrupt until timekeeping_init() has run.
But time_init() will start the time interrupt and if it is called with
local interrupts enabled we'll immediately take an interrupt and die.

Fix that by running timekeeping_init() prior to time_init().

We don't know _why_ local interrupts got enabled on Jesse Brandeburg's
machine.  That's a separate as-yet-unsolved problem.  THe patch adds a little
bit of debugging to detect that.

This whole requirement that local interrupts be held off during early boot
keeps on biting us.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Jesse Brandeburg <jesse.brandeburg@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/init/main.c b/init/main.c
index b2f3b56..c8960b9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -496,8 +496,8 @@ asmlinkage void __init start_kernel(void)
 	init_timers();
 	hrtimers_init();
 	softirq_init();
-	time_init();
 	timekeeping_init();
+	time_init();
 
 	/*
 	 * HACK ALERT! This is early. We're enabling the console before
@@ -508,6 +508,8 @@ asmlinkage void __init start_kernel(void)
 	if (panic_later)
 		panic(panic_later, panic_param);
 	profile_init();
+	if (!irqs_disabled())
+		printk("start_kernel(): bug: interrupts were enabled early\n");
 	local_irq_enable();
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (initrd_start && !initrd_below_start_ok &&
-- 
cgit v0.10.2


From dada0769b95602ae791b9c4cd1cbecfc367f00a9 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 3 Jul 2006 00:24:05 -0700
Subject: [PATCH] genirq ia64 cleanup

Remove duplicate/redundant/wrong  IRQF_PERCPU definition.

Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h
index 8acb001..79479e2 100644
--- a/include/asm-ia64/irq.h
+++ b/include/asm-ia64/irq.h
@@ -14,8 +14,6 @@
 #define NR_IRQS		256
 #define NR_IRQ_VECTORS	NR_IRQS
 
-#define IRQF_PERCPU	0x02000000
-
 static __inline__ int
 irq_canonicalize (int irq)
 {
-- 
cgit v0.10.2


From b02454f43578b24bc8b8ab54a239156841f56f6d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 3 Jul 2006 00:24:06 -0700
Subject: [PATCH] lockdep: special s390 print_symbol() version

Have a special version of print_symbol() for s390 which clears the most
significant bit of addr before calling __print_symbol().  This seems to be
better than checking/changing each place in the kernel that saves an
instruction pointer.

Without this the output would look like:

hardirqs last  enabled at (30907): [<80018c6a>] 0x80018c6a
hardirqs last disabled at (30908): [<8001e48c>] 0x8001e48c
softirqs last  enabled at (30904): [<8001dc96>] 0x8001dc96
softirqs last disabled at (30897): [<8001dc50>] 0x8001dc50

instead of this:

hardirqs last  enabled at (19421): [<80018c72>] cpu_idle+0x176/0x1c4
hardirqs last disabled at (19422): [<8001e494>] io_no_vtime+0xa/0x1a
softirqs last  enabled at (19418): [<8001dc9e>] do_softirq+0xa6/0xe8
softirqs last disabled at (19411): [<8001dc58>] do_softirq+0x60/0xe8

Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 54e2549..ad71ac0 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -57,10 +57,11 @@ do {						\
 #define print_fn_descriptor_symbol(fmt, addr) print_symbol(fmt, addr)
 #endif
 
-#define print_symbol(fmt, addr)			\
-do {						\
-	__check_printsym_format(fmt, "");	\
-	__print_symbol(fmt, addr);		\
-} while(0)
+static inline void print_symbol(const char *fmt, unsigned long addr)
+{
+	__check_printsym_format(fmt, "");
+	__print_symbol(fmt, (unsigned long)
+		       __builtin_extract_return_addr((void *)addr));
+}
 
 #endif /*_LINUX_KALLSYMS_H*/
-- 
cgit v0.10.2


From 6abd219c6eab92172038e4719e02905acb7b68d4 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 3 Jul 2006 00:24:07 -0700
Subject: [PATCH] bcm43xx: netlink deadlock fix

reported by Jure Repinc:

> > http://bugzilla.kernel.org/show_bug.cgi?id=6773

> > checked out dmesg output and found the message
> >
> > ======================================================
> > [ BUG: hard-safe -> hard-unsafe lock order detected! ]
> > ------------------------------------------------------
> >
> > starting at line 660 of the dmesg.txt that I will attach.

The patch below should fix the deadlock, albeit I suspect it's not the
"right" fix; the right fix may well be to move the rx processing in bcm43xx
to softirq context.  [it's debatable, ipw2200 hit this exact same bug; at
some point it's better to bite the bullet and move this to the common layer
as my patch below does]

Make the nl_table_lock irq-safe; it's taken for read in various netlink
functions, including functions that several wireless drivers (ipw2200,
bcm43xx) want to call from hardirq context.

The deadlock was found by the lock validator.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Michael Buesch <mb@bu3sch.de>
Cc: "John W. Linville" <linville@tuxdriver.com>
Cc: Jeff Garzik <jeff@garzik.org>
Acked-by: "David S. Miller" <davem@davemloft.net>
Cc: jamal <hadi@cyberus.ca>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 70cee82..55c0adc 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -156,7 +156,7 @@ static void netlink_sock_destruct(struct sock *sk)
 
 static void netlink_table_grab(void)
 {
-	write_lock_bh(&nl_table_lock);
+	write_lock_irq(&nl_table_lock);
 
 	if (atomic_read(&nl_table_users)) {
 		DECLARE_WAITQUEUE(wait, current);
@@ -166,9 +166,9 @@ static void netlink_table_grab(void)
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			if (atomic_read(&nl_table_users) == 0)
 				break;
-			write_unlock_bh(&nl_table_lock);
+			write_unlock_irq(&nl_table_lock);
 			schedule();
-			write_lock_bh(&nl_table_lock);
+			write_lock_irq(&nl_table_lock);
 		}
 
 		__set_current_state(TASK_RUNNING);
@@ -178,7 +178,7 @@ static void netlink_table_grab(void)
 
 static __inline__ void netlink_table_ungrab(void)
 {
-	write_unlock_bh(&nl_table_lock);
+	write_unlock_irq(&nl_table_lock);
 	wake_up(&nl_table_wait);
 }
 
-- 
cgit v0.10.2


From c61a8416ed3db84481651270032696a4781cac17 Mon Sep 17 00:00:00 2001
From: Theodore Tso <tytso@mit.edu>
Date: Mon, 3 Jul 2006 00:24:09 -0700
Subject: [PATCH] uml build fix

This is needed to fix UML compilation given that alternatives_smp_module_add
and alternatives_smp_module_del are null inline functions if !CONFIG_SMP.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 7d51dd7..37cfe77 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -495,6 +495,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
 }
 
+#ifdef CONFIG_SMP
 void alternatives_smp_module_add(struct module *mod, char *name,
 				 void *locks, void *locks_end,
 				 void *text,  void *text_end)
@@ -504,3 +505,4 @@ void alternatives_smp_module_add(struct module *mod, char *name,
 void alternatives_smp_module_del(struct module *mod)
 {
 }
+#endif
-- 
cgit v0.10.2


From c32928c579d88acd43981b59e86900da65f40762 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 3 Jul 2006 00:24:10 -0700
Subject: [PATCH] PNPACPI: support shareable interrupts

ACPI supplies a "shareable" indication, but PNPACPI ignores it.  If a PNP
device uses a shared interrupt, request_irq() fails because the PNP driver
can't tell whether to supply SA_SHIRQ.

This patch allows PNP drivers to test
    (pnp_irq_flags(dev, 0) & IORESOURCE_IRQ_SHAREABLE)

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Matthieu Castet <castet.matthieu@free.fr>
Cc: Li Shaohua <shaohua.li@intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 3a4a644..2122688 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -74,7 +74,7 @@ static void decode_irq_flags(int flag, int *triggering, int *polarity)
 
 static void
 pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi,
-	int triggering, int polarity)
+	int triggering, int polarity, int shareable)
 {
 	int i = 0;
 	int irq;
@@ -95,6 +95,9 @@ pnpacpi_parse_allocated_irqresource(struct pnp_resource_table *res, u32 gsi,
 		return;
 	}
 
+	if (shareable)
+		res->irq_resource[i].flags |= IORESOURCE_IRQ_SHAREABLE;
+
 	res->irq_resource[i].start = irq;
 	res->irq_resource[i].end = irq;
 	pcibios_penalize_isa_irq(irq, 1);
@@ -194,7 +197,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
 			pnpacpi_parse_allocated_irqresource(res_table,
 				res->data.irq.interrupts[i],
 				res->data.irq.triggering,
-				res->data.irq.polarity);
+				res->data.irq.polarity,
+				res->data.irq.sharable);
 		}
 		break;
 
@@ -255,7 +259,8 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
 			pnpacpi_parse_allocated_irqresource(res_table,
 				res->data.extended_irq.interrupts[i],
 				res->data.extended_irq.triggering,
-				res->data.extended_irq.polarity);
+				res->data.extended_irq.polarity,
+				res->data.extended_irq.sharable);
 		}
 		break;
 
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 87a9fc0..5612dfe 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -55,6 +55,7 @@ struct resource_list {
 #define IORESOURCE_IRQ_LOWEDGE		(1<<1)
 #define IORESOURCE_IRQ_HIGHLEVEL	(1<<2)
 #define IORESOURCE_IRQ_LOWLEVEL		(1<<3)
+#define IORESOURCE_IRQ_SHAREABLE	(1<<4)
 
 /* ISA PnP DMA specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_DMA_TYPE_MASK	(3<<0)
-- 
cgit v0.10.2


From cb6358eb69d9854f65f2979c0ce9280eee041828 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 3 Jul 2006 00:24:12 -0700
Subject: [PATCH] SERIAL: allow shared 8250_pnp interrupts

PNP devices can use shared interrupts, so check to see whether we'll need
SA_SHIRQ for request_irq().

The builtin PDH UART on the HP rx8640 is an example of an ACPI/PNP device
that uses a shareable level-triggered, active-low interrupt.  The interrupt
can be shared in very large I/O configurations or by artificially lowering
IA64_DEF_LAST_DEVICE_VECTOR.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Matthieu Castet <castet.matthieu@free.fr>
Cc: Li Shaohua <shaohua.li@intel.com>
Cc: Len Brown <len.brown@intel.com>
Acked-by: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c
index 739bc84..632f62d 100644
--- a/drivers/serial/8250_pnp.c
+++ b/drivers/serial/8250_pnp.c
@@ -431,6 +431,8 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
 #endif
 
 	port.flags |= UPF_SKIP_TEST | UPF_BOOT_AUTOCONF;
+	if (pnp_irq_flags(dev, 0) & IORESOURCE_IRQ_SHAREABLE)
+		port.flags |= UPF_SHARE_IRQ;
 	port.uartclk = 1843200;
 	port.dev = &dev->dev;
 
-- 
cgit v0.10.2


From 9614634fe6a138fd8ae044950700d2af8d203f97 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Mon, 3 Jul 2006 00:24:13 -0700
Subject: [PATCH] ZVC/zone_reclaim: Leave 1% of unmapped pagecache pages for
 file I/O

It turns out that it is advantageous to leave a small portion of unmapped file
backed pages if all of a zone's pages (or almost all pages) are allocated and
so the page allocator has to go off-node.

This allows recently used file I/O buffers to stay on the node and
reduces the times that zone reclaim is invoked if file I/O occurs
when we run out of memory in a zone.

The problem is that zone reclaim runs too frequently when the page cache is
used for file I/O (read write and therefore unmapped pages!) alone and we have
almost all pages of the zone allocated.  Zone reclaim may remove 32 unmapped
pages.  File I/O will use these pages for the next read/write requests and the
unmapped pages increase.  After the zone has filled up again zone reclaim will
remove it again after only 32 pages.  This cycle is too inefficient and there
are potentially too many zone reclaim cycles.

With the 1% boundary we may still remove all unmapped pages for file I/O in
zone reclaim pass.  However.  it will take a large number of read and writes
to get back to 1% again where we trigger zone reclaim again.

The zone reclaim 2.6.16/17 does not show this behavior because we have a 30
second timeout.

[akpm@osdl.org: rename the /proc file and the variable]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 86754eb..7cee902 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -28,6 +28,7 @@ Currently, these files are in /proc/sys/vm:
 - block_dump
 - drop-caches
 - zone_reclaim_mode
+- min_unmapped_ratio
 - panic_on_oom
 
 ==============================================================
@@ -168,6 +169,19 @@ in all nodes of the system.
 
 =============================================================
 
+min_unmapped_ratio:
+
+This is available only on NUMA kernels.
+
+A percentage of the file backed pages in each zone.  Zone reclaim will only
+occur if more than this percentage of pages are file backed and unmapped.
+This is to insure that a minimal amount of local pages is still available for
+file I/O even if the node is overallocated.
+
+The default is 1 percent.
+
+=============================================================
+
 panic_on_oom
 
 This enables or disables panic on out-of-memory feature.  If this is set to 1,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 27e748e..656b588 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -150,6 +150,10 @@ struct zone {
 	unsigned long		lowmem_reserve[MAX_NR_ZONES];
 
 #ifdef CONFIG_NUMA
+	/*
+	 * zone reclaim becomes active if more unmapped pages exist.
+	 */
+	unsigned long		min_unmapped_ratio;
 	struct per_cpu_pageset	*pageset[NR_CPUS];
 #else
 	struct per_cpu_pageset	pageset[NR_CPUS];
@@ -414,6 +418,8 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
 					void __user *, size_t *, loff_t *);
 int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
 					void __user *, size_t *, loff_t *);
+int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
+			struct file *, void __user *, size_t *, loff_t *);
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index cf6ca6e..5e59184 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -189,6 +189,7 @@ extern long vm_total_pages;
 
 #ifdef CONFIG_NUMA
 extern int zone_reclaim_mode;
+extern int sysctl_min_unmapped_ratio;
 extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
 #else
 #define zone_reclaim_mode 0
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 46e4d8f..e4b1a4d 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -188,7 +188,7 @@ enum
 	VM_DROP_PAGECACHE=29,	/* int: nuke lots of pagecache */
 	VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
 	VM_ZONE_RECLAIM_MODE=31, /* reclaim local zone memory before going off node */
-	VM_ZONE_RECLAIM_INTERVAL=32, /* time period to wait after reclaim failure */
+	VM_MIN_UNMAPPED=32,	/* Set min percent of unmapped pages */
 	VM_PANIC_ON_OOM=33,	/* panic at out-of-memory */
 	VM_VDSO_ENABLED=34,	/* map VDSO into new processes? */
 };
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 99a58f2..362a0cc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -932,6 +932,17 @@ static ctl_table vm_table[] = {
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 	},
+	{
+		.ctl_name	= VM_MIN_UNMAPPED,
+		.procname	= "min_unmapped_ratio",
+		.data		= &sysctl_min_unmapped_ratio,
+		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
+		.mode		= 0644,
+		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one_hundred,
+	},
 #endif
 #ifdef CONFIG_X86_32
 	{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3e792a5..54a4f53 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2005,6 +2005,10 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
 
 		zone->spanned_pages = size;
 		zone->present_pages = realsize;
+#ifdef CONFIG_NUMA
+		zone->min_unmapped_ratio = (realsize*sysctl_min_unmapped_ratio)
+						/ 100;
+#endif
 		zone->name = zone_names[j];
 		spin_lock_init(&zone->lock);
 		spin_lock_init(&zone->lru_lock);
@@ -2298,6 +2302,24 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
 	return 0;
 }
 
+#ifdef CONFIG_NUMA
+int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
+	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+	struct zone *zone;
+	int rc;
+
+	rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	if (rc)
+		return rc;
+
+	for_each_zone(zone)
+		zone->min_unmapped_ratio = (zone->present_pages *
+				sysctl_min_unmapped_ratio) / 100;
+	return 0;
+}
+#endif
+
 /*
  * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
  *	proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ff2ebe9..5d4c4d0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1503,10 +1503,6 @@ module_init(kswapd_init)
  *
  * If non-zero call zone_reclaim when the number of free pages falls below
  * the watermarks.
- *
- * In the future we may add flags to the mode. However, the page allocator
- * should only have to check that zone_reclaim_mode != 0 before calling
- * zone_reclaim().
  */
 int zone_reclaim_mode __read_mostly;
 
@@ -1524,6 +1520,12 @@ int zone_reclaim_mode __read_mostly;
 #define ZONE_RECLAIM_PRIORITY 4
 
 /*
+ * Percentage of pages in a zone that must be unmapped for zone_reclaim to
+ * occur.
+ */
+int sysctl_min_unmapped_ratio = 1;
+
+/*
  * Try to free up some pages from this zone through reclaim.
  */
 static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -1590,18 +1592,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	int node_id;
 
 	/*
-	 * Do not reclaim if there are not enough reclaimable pages in this
-	 * zone that would satify this allocations.
+	 * Zone reclaim reclaims unmapped file backed pages.
 	 *
-	 * All unmapped pagecache pages are reclaimable.
-	 *
-	 * Both counters may be temporarily off a bit so we use
-	 * SWAP_CLUSTER_MAX as the boundary. It may also be good to
-	 * leave a few frequently used unmapped pagecache pages around.
+	 * A small portion of unmapped file backed pages is needed for
+	 * file I/O otherwise pages read by file I/O will be immediately
+	 * thrown out if the zone is overallocated. So we do not reclaim
+	 * if less than a specified percentage of the zone is used by
+	 * unmapped file backed pages.
 	 */
 	if (zone_page_state(zone, NR_FILE_PAGES) -
-		zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX)
-			return 0;
+	    zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio)
+		return 0;
 
 	/*
 	 * Avoid concurrent zone reclaims, do not reclaim in a zone that does
-- 
cgit v0.10.2


From ce51059be56f63762089412b3ece348067afda85 Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <76306.1226@compuserve.com>
Date: Mon, 3 Jul 2006 00:24:14 -0700
Subject: [PATCH] binfmt_elf: fix checks for bad address

Fix check for bad address; use macro instead of open-coding two checks.

Taken from RHEL4 kernel update.

From: Ernie Petrides <petrides@redhat.com>

  For background, the BAD_ADDR() macro should return TRUE if the address is
  TASK_SIZE, because that's the lowest address that is *not* valid for
  user-space mappings.  The macro was correct in binfmt_aout.c but was wrong
  for the "equal to" case in binfmt_elf.c.  There were two in-line validations
  of user-space addresses in binfmt_elf.c, which have been appropriately
  converted to use the corrected BAD_ADDR() macro in the patch you posted
  yesterday.  Note that the size checks against TASK_SIZE are okay as coded.

  The additional changes that I propose are below.  These are in the error
  paths for bad ELF entry addresses once load_elf_binary() has already
  committed to exec'ing the new image (following the tearing down of the
  task's original address space).

  The 1st hunk deals with the interp-side of the outer "if".  There were two
  problems here.  The printk() should be removed because this path can be
  triggered at will by a bogus interpreter image created and used by a
  malicious user.  Further, the error code should not be ENOEXEC, because that
  causes the loop in search_binary_handler() to continue trying other exec
  handlers (twice, in fact).  But it's too late for this to work correctly,
  because the user address space has already been torn down, and an exec()
  failure cannot be returned to the user code because the code no longer
  exists.  The only recovery is to force a SIGSEGV, but it's best to terminate
  the search loop immediately.  I somewhat arbitrarily chose EINVAL as a
  fallback error code, but any error returned by load_elf_interp() will
  override that (but this value will never be seen by user-space).

  The 2nd hunk deals with the non-interp-side of the outer "if".  There were
  two problems here as well.  The SIGSEGV needs to be forced, because a prior
  sigaction() syscall might have set the associated disposition to SIG_IGN.
  And the ENOEXEC should be changed to EINVAL as described above.

Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com>
Signed-off-by: Ernie Petrides <petrides@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d043440..f42e642 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = {
 		.min_coredump	= ELF_EXEC_PAGESIZE
 };
 
-#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
+#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
 
 static int set_brk(unsigned long start, unsigned long end)
 {
@@ -394,7 +394,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 			 * <= p_memsize so it's only necessary to check p_memsz.
 			 */
 			k = load_addr + eppnt->p_vaddr;
-			if (k > TASK_SIZE ||
+			if (BAD_ADDR(k) ||
 			    eppnt->p_filesz > eppnt->p_memsz ||
 			    eppnt->p_memsz > TASK_SIZE ||
 			    TASK_SIZE - eppnt->p_memsz < k) {
@@ -887,7 +887,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		 * allowed task size. Note that p_filesz must always be
 		 * <= p_memsz so it is only necessary to check p_memsz.
 		 */
-		if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
+		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 		    elf_ppnt->p_memsz > TASK_SIZE ||
 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
 			/* set_brk can never work. Avoid overflows. */
@@ -941,10 +941,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 						    interpreter,
 						    &interp_load_addr);
 		if (BAD_ADDR(elf_entry)) {
-			printk(KERN_ERR "Unable to load interpreter %.128s\n",
-				elf_interpreter);
 			force_sig(SIGSEGV, current);
-			retval = -ENOEXEC; /* Nobody gets to see this, but.. */
+			retval = IS_ERR((void *)elf_entry) ?
+					(int)elf_entry : -EINVAL;
 			goto out_free_dentry;
 		}
 		reloc_func_desc = interp_load_addr;
@@ -955,8 +954,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	} else {
 		elf_entry = loc->elf_ex.e_entry;
 		if (BAD_ADDR(elf_entry)) {
-			send_sig(SIGSEGV, current, 0);
-			retval = -ENOEXEC; /* Nobody gets to see this, but.. */
+			force_sig(SIGSEGV, current);
+			retval = -EINVAL;
 			goto out_free_dentry;
 		}
 	}
-- 
cgit v0.10.2


From 0f40efbdf49913cad50edce5aa9756896c30424f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Mon, 3 Jul 2006 00:24:15 -0700
Subject: [PATCH] kernel-doc MAINTAINERS

Martin says that I can add self to MAINTAINERS.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc: Martin Waitz <tali@admingilde.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 42be131..387dff7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -861,6 +861,8 @@ S:	Maintained
 DOCBOOK FOR DOCUMENTATION
 P:	Martin Waitz
 M:	tali@admingilde.org
+P:	Randy Dunlap
+M:	rdunlap@xenotime.net
 T:	git http://tali.admingilde.org/git/linux-docbook.git
 S:	Maintained
 
-- 
cgit v0.10.2


From 8320204ab495c858da7b6501f49a71b53d828e74 Mon Sep 17 00:00:00 2001
From: Michael Krufky <mkrufky@linuxtv.org>
Date: Mon, 3 Jul 2006 00:24:18 -0700
Subject: [PATCH] add Mike Isely as pvrusb2 maintainer

Update MAINTAINERS with contact info for Mike Isely, the PVRUSB2
maintainer, while also adding the pvrusb2 mailing list and web site.

Signed-off-by: Mike Isely <isely@pobox.com>
Signed-off-by: Michael Krufky <mkrufky@linuxtv.org>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 387dff7..5f76a4f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2300,6 +2300,14 @@ M:	promise@pnd-pc.demon.co.uk
 W:	http://www.pnd-pc.demon.co.uk/promise/
 S:	Maintained
 
+PVRUSB2 VIDEO4LINUX DRIVER
+P:	Mike Isely
+M:	isely@pobox.com
+L:	pvrusb2@isely.net
+L:	video4linux-list@redhat.com
+W:	http://www.isely.net/pvrusb2/
+S:	Maintained
+
 PXA2xx SUPPORT
 P:	Nicolas Pitre
 M:	nico@cam.org
-- 
cgit v0.10.2


From 36c9366efd63e4bab82d46e166140bddf3acc4cf Mon Sep 17 00:00:00 2001
From: Vitaly Wool <vitalywool@gmail.com>
Date: Mon, 3 Jul 2006 00:24:19 -0700
Subject: [PATCH] fbdev: Add framebuffer and display update module support for
 pnx4008

Add support for Display Update Module and RGB framebuffer device on Philips
PNX4008 ARM board.

Signed-off-by: Grigory Tolstolytkin <gtolstolytkin@ru.mvista.com>
Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 17de4c8..3badb48 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1557,6 +1557,21 @@ config FB_S3C2410_DEBUG
 	  Turn on debugging messages. Note that you can set/unset at run time
 	  through sysfs
 
+config FB_PNX4008_DUM
+	tristate "Display Update Module support on Philips PNX4008 board"
+	depends on FB && ARCH_PNX4008
+	---help---
+	  Say Y here to enable support for PNX4008 Display Update Module (DUM)
+
+config FB_PNX4008_DUM_RGB
+	tristate "RGB Framebuffer support on Philips PNX4008 board"
+	depends on FB_PNX4008_DUM
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	---help---
+	  Say Y here to enable support for PNX4008 RGB Framebuffer
+
 config FB_VIRTUAL
 	tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
 	depends on FB
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index c335e9b..6283d01 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -94,6 +94,8 @@ obj-$(CONFIG_FB_TX3912)		  += tx3912fb.o
 obj-$(CONFIG_FB_S1D13XXX)	  += s1d13xxxfb.o
 obj-$(CONFIG_FB_IMX)              += imxfb.o
 obj-$(CONFIG_FB_S3C2410)	  += s3c2410fb.o
+obj-$(CONFIG_FB_PNX4008_DUM)	  += pnx4008/
+obj-$(CONFIG_FB_PNX4008_DUM_RGB)  += pnx4008/
 
 # Platform or fallback drivers go here
 obj-$(CONFIG_FB_VESA)             += vesafb.o
diff --git a/drivers/video/pnx4008/Makefile b/drivers/video/pnx4008/Makefile
new file mode 100644
index 0000000..636aacc
--- /dev/null
+++ b/drivers/video/pnx4008/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the new PNX4008 framebuffer device driver
+#
+
+obj-$(CONFIG_FB_PNX4008_DUM) += sdum.o
+obj-$(CONFIG_FB_PNX4008_DUM_RGB) += pnxrgbfb.o
+
diff --git a/drivers/video/pnx4008/dum.h b/drivers/video/pnx4008/dum.h
new file mode 100644
index 0000000..d80a614
--- /dev/null
+++ b/drivers/video/pnx4008/dum.h
@@ -0,0 +1,211 @@
+/*
+ * linux/drivers/video/pnx4008/dum.h
+ *
+ * Internal header for SDUM
+ *
+ * 2005 (c) Koninklijke Philips N.V. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+
+#ifndef __PNX008_DUM_H__
+#define __PNX008_DUM_H__
+
+#include <asm/arch/platform.h>
+
+#define PNX4008_DUMCONF_VA_BASE		IO_ADDRESS(PNX4008_DUMCONF_BASE)
+#define PNX4008_DUM_MAIN_VA_BASE	IO_ADDRESS(PNX4008_DUM_MAINCFG_BASE)
+
+/* DUM CFG ADDRESSES */
+#define DUM_CH_BASE_ADR		(PNX4008_DUMCONF_VA_BASE + 0x00)
+#define DUM_CH_MIN_ADR		(PNX4008_DUMCONF_VA_BASE + 0x00)
+#define DUM_CH_MAX_ADR		(PNX4008_DUMCONF_VA_BASE + 0x04)
+#define DUM_CH_CONF_ADR		(PNX4008_DUMCONF_VA_BASE + 0x08)
+#define DUM_CH_STAT_ADR		(PNX4008_DUMCONF_VA_BASE + 0x0C)
+#define DUM_CH_CTRL_ADR		(PNX4008_DUMCONF_VA_BASE + 0x10)
+
+#define CH_MARG		(0x100 / sizeof(u32))
+#define DUM_CH_MIN(i)	(*((volatile u32 *)DUM_CH_MIN_ADR + (i) * CH_MARG))
+#define DUM_CH_MAX(i)	(*((volatile u32 *)DUM_CH_MAX_ADR + (i) * CH_MARG))
+#define DUM_CH_CONF(i)	(*((volatile u32 *)DUM_CH_CONF_ADR + (i) * CH_MARG))
+#define DUM_CH_STAT(i)	(*((volatile u32 *)DUM_CH_STAT_ADR + (i) * CH_MARG))
+#define DUM_CH_CTRL(i)	(*((volatile u32 *)DUM_CH_CTRL_ADR + (i) * CH_MARG))
+
+#define DUM_CONF_ADR          (PNX4008_DUM_MAIN_VA_BASE + 0x00)
+#define DUM_CTRL_ADR          (PNX4008_DUM_MAIN_VA_BASE + 0x04)
+#define DUM_STAT_ADR          (PNX4008_DUM_MAIN_VA_BASE + 0x08)
+#define DUM_DECODE_ADR        (PNX4008_DUM_MAIN_VA_BASE + 0x0C)
+#define DUM_COM_BASE_ADR      (PNX4008_DUM_MAIN_VA_BASE + 0x10)
+#define DUM_SYNC_C_ADR        (PNX4008_DUM_MAIN_VA_BASE + 0x14)
+#define DUM_CLK_DIV_ADR       (PNX4008_DUM_MAIN_VA_BASE + 0x18)
+#define DUM_DIRTY_LOW_ADR     (PNX4008_DUM_MAIN_VA_BASE + 0x20)
+#define DUM_DIRTY_HIGH_ADR    (PNX4008_DUM_MAIN_VA_BASE + 0x24)
+#define DUM_FORMAT_ADR        (PNX4008_DUM_MAIN_VA_BASE + 0x28)
+#define DUM_WTCFG1_ADR        (PNX4008_DUM_MAIN_VA_BASE + 0x30)
+#define DUM_RTCFG1_ADR        (PNX4008_DUM_MAIN_VA_BASE + 0x34)
+#define DUM_WTCFG2_ADR        (PNX4008_DUM_MAIN_VA_BASE + 0x38)
+#define DUM_RTCFG2_ADR        (PNX4008_DUM_MAIN_VA_BASE + 0x3C)
+#define DUM_TCFG_ADR          (PNX4008_DUM_MAIN_VA_BASE + 0x40)
+#define DUM_OUTP_FORMAT1_ADR  (PNX4008_DUM_MAIN_VA_BASE + 0x44)
+#define DUM_OUTP_FORMAT2_ADR  (PNX4008_DUM_MAIN_VA_BASE + 0x48)
+#define DUM_SYNC_MODE_ADR     (PNX4008_DUM_MAIN_VA_BASE + 0x4C)
+#define DUM_SYNC_OUT_C_ADR    (PNX4008_DUM_MAIN_VA_BASE + 0x50)
+
+#define DUM_CONF              (*(volatile u32 *)(DUM_CONF_ADR))
+#define DUM_CTRL              (*(volatile u32 *)(DUM_CTRL_ADR))
+#define DUM_STAT              (*(volatile u32 *)(DUM_STAT_ADR))
+#define DUM_DECODE            (*(volatile u32 *)(DUM_DECODE_ADR))
+#define DUM_COM_BASE          (*(volatile u32 *)(DUM_COM_BASE_ADR))
+#define DUM_SYNC_C            (*(volatile u32 *)(DUM_SYNC_C_ADR))
+#define DUM_CLK_DIV           (*(volatile u32 *)(DUM_CLK_DIV_ADR))
+#define DUM_DIRTY_LOW         (*(volatile u32 *)(DUM_DIRTY_LOW_ADR))
+#define DUM_DIRTY_HIGH        (*(volatile u32 *)(DUM_DIRTY_HIGH_ADR))
+#define DUM_FORMAT            (*(volatile u32 *)(DUM_FORMAT_ADR))
+#define DUM_WTCFG1            (*(volatile u32 *)(DUM_WTCFG1_ADR))
+#define DUM_RTCFG1            (*(volatile u32 *)(DUM_RTCFG1_ADR))
+#define DUM_WTCFG2            (*(volatile u32 *)(DUM_WTCFG2_ADR))
+#define DUM_RTCFG2            (*(volatile u32 *)(DUM_RTCFG2_ADR))
+#define DUM_TCFG              (*(volatile u32 *)(DUM_TCFG_ADR))
+#define DUM_OUTP_FORMAT1      (*(volatile u32 *)(DUM_OUTP_FORMAT1_ADR))
+#define DUM_OUTP_FORMAT2      (*(volatile u32 *)(DUM_OUTP_FORMAT2_ADR))
+#define DUM_SYNC_MODE         (*(volatile u32 *)(DUM_SYNC_MODE_ADR))
+#define DUM_SYNC_OUT_C        (*(volatile u32 *)(DUM_SYNC_OUT_C_ADR))
+
+/* DUM SLAVE ADDRESSES */
+#define DUM_SLAVE_WRITE_ADR      (PNX4008_DUM_MAINCFG_BASE + 0x0000000)
+#define DUM_SLAVE_READ1_I_ADR    (PNX4008_DUM_MAINCFG_BASE + 0x1000000)
+#define DUM_SLAVE_READ1_R_ADR    (PNX4008_DUM_MAINCFG_BASE + 0x1000004)
+#define DUM_SLAVE_READ2_I_ADR    (PNX4008_DUM_MAINCFG_BASE + 0x1000008)
+#define DUM_SLAVE_READ2_R_ADR    (PNX4008_DUM_MAINCFG_BASE + 0x100000C)
+
+#define DUM_SLAVE_WRITE_W  ((volatile u32 *)(DUM_SLAVE_WRITE_ADR))
+#define DUM_SLAVE_WRITE_HW ((volatile u16 *)(DUM_SLAVE_WRITE_ADR))
+#define DUM_SLAVE_READ1_I  ((volatile u8 *)(DUM_SLAVE_READ1_I_ADR))
+#define DUM_SLAVE_READ1_R  ((volatile u16 *)(DUM_SLAVE_READ1_R_ADR))
+#define DUM_SLAVE_READ2_I  ((volatile u8 *)(DUM_SLAVE_READ2_I_ADR))
+#define DUM_SLAVE_READ2_R  ((volatile u16 *)(DUM_SLAVE_READ2_R_ADR))
+
+/* Sony display register addresses */
+#define DISP_0_REG            (0x00)
+#define DISP_1_REG            (0x01)
+#define DISP_CAL_REG          (0x20)
+#define DISP_ID_REG           (0x2A)
+#define DISP_XMIN_L_REG       (0x30)
+#define DISP_XMIN_H_REG       (0x31)
+#define DISP_YMIN_REG         (0x32)
+#define DISP_XMAX_L_REG       (0x34)
+#define DISP_XMAX_H_REG       (0x35)
+#define DISP_YMAX_REG         (0x36)
+#define DISP_SYNC_EN_REG      (0x38)
+#define DISP_SYNC_RISE_L_REG  (0x3C)
+#define DISP_SYNC_RISE_H_REG  (0x3D)
+#define DISP_SYNC_FALL_L_REG  (0x3E)
+#define DISP_SYNC_FALL_H_REG  (0x3F)
+#define DISP_PIXEL_REG        (0x0B)
+#define DISP_DUMMY1_REG       (0x28)
+#define DISP_DUMMY2_REG       (0x29)
+#define DISP_TIMING_REG       (0x98)
+#define DISP_DUMP_REG         (0x99)
+
+/* Sony display constants */
+#define SONY_ID1              (0x22)
+#define SONY_ID2              (0x23)
+
+/* Philips display register addresses */
+#define PH_DISP_ORIENT_REG    (0x003)
+#define PH_DISP_YPOINT_REG    (0x200)
+#define PH_DISP_XPOINT_REG    (0x201)
+#define PH_DISP_PIXEL_REG     (0x202)
+#define PH_DISP_YMIN_REG      (0x406)
+#define PH_DISP_YMAX_REG      (0x407)
+#define PH_DISP_XMIN_REG      (0x408)
+#define PH_DISP_XMAX_REG      (0x409)
+
+/* Misc constants */
+#define NO_VALID_DISPLAY_FOUND      (0)
+#define DISPLAY2_IS_NOT_CONNECTED   (0)
+
+/* register values */
+#define V_BAC_ENABLE		(BIT(0))
+#define V_BAC_DISABLE_IDLE	(BIT(1))
+#define V_BAC_DISABLE_TRIG	(BIT(2))
+#define V_DUM_RESET		(BIT(3))
+#define V_MUX_RESET		(BIT(4))
+#define BAC_ENABLED		(BIT(0))
+#define BAC_DISABLED		0
+
+/* Sony LCD commands */
+#define V_LCD_STANDBY_OFF	((BIT(25)) | (0 << 16) | DISP_0_REG)
+#define V_LCD_USE_9BIT_BUS	((BIT(25)) | (2 << 16) | DISP_1_REG)
+#define V_LCD_SYNC_RISE_L	((BIT(25)) | (0 << 16) | DISP_SYNC_RISE_L_REG)
+#define V_LCD_SYNC_RISE_H	((BIT(25)) | (0 << 16) | DISP_SYNC_RISE_H_REG)
+#define V_LCD_SYNC_FALL_L	((BIT(25)) | (160 << 16) | DISP_SYNC_FALL_L_REG)
+#define V_LCD_SYNC_FALL_H	((BIT(25)) | (0 << 16) | DISP_SYNC_FALL_H_REG)
+#define V_LCD_SYNC_ENABLE	((BIT(25)) | (128 << 16) | DISP_SYNC_EN_REG)
+#define V_LCD_DISPLAY_ON	((BIT(25)) | (64 << 16) | DISP_0_REG)
+
+enum {
+	PAD_NONE,
+	PAD_512,
+	PAD_1024
+};
+
+enum {
+	RGB888,
+	RGB666,
+	RGB565,
+	BGR565,
+	ARGB1555,
+	ABGR1555,
+	ARGB4444,
+	ABGR4444
+};
+
+struct dum_setup {
+	int sync_neg_edge;
+	int round_robin;
+	int mux_int;
+	int synced_dirty_flag_int;
+	int dirty_flag_int;
+	int error_int;
+	int pf_empty_int;
+	int sf_empty_int;
+	int bac_dis_int;
+	u32 dirty_base_adr;
+	u32 command_base_adr;
+	u32 sync_clk_div;
+	int sync_output;
+	u32 sync_restart_val;
+	u32 set_sync_high;
+	u32 set_sync_low;
+};
+
+struct dum_ch_setup {
+	int disp_no;
+	u32 xmin;
+	u32 ymin;
+	u32 xmax;
+	u32 ymax;
+	int xmirror;
+	int ymirror;
+	int rotate;
+	u32 minadr;
+	u32 maxadr;
+	u32 dirtybuffer;
+	int pad;
+	int format;
+	int hwdirty;
+	int slave_trans;
+};
+
+struct disp_window {
+	u32 xmin_l;
+	u32 xmin_h;
+	u32 ymin;
+	u32 xmax_l;
+	u32 xmax_h;
+	u32 ymax;
+};
+
+#endif				/* #ifndef __PNX008_DUM_H__ */
diff --git a/drivers/video/pnx4008/fbcommon.h b/drivers/video/pnx4008/fbcommon.h
new file mode 100644
index 0000000..4ebc87d
--- /dev/null
+++ b/drivers/video/pnx4008/fbcommon.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2005 Philips Semiconductors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA, or http://www.gnu.org/licenses/gpl.html
+*/
+
+#define QCIF_W  (176)
+#define QCIF_H  (144)
+
+#define CIF_W   (352)
+#define CIF_H   (288)
+
+#define LCD_X_RES	208
+#define LCD_Y_RES	320
+#define LCD_X_PAD	256
+#define LCD_BBP		4	/* Bytes Per Pixel */
+
+#define DISP_MAX_X_SIZE     (320)
+#define DISP_MAX_Y_SIZE     (208)
+
+#define RETURNVAL_BASE (0x400)
+
+enum fb_ioctl_returntype {
+	ENORESOURCESLEFT = RETURNVAL_BASE,
+	ERESOURCESNOTFREED,
+	EPROCNOTOWNER,
+	EFBNOTOWNER,
+	ECOPYFAILED,
+	EIOREMAPFAILED,
+};
diff --git a/drivers/video/pnx4008/pnxrgbfb.c b/drivers/video/pnx4008/pnxrgbfb.c
new file mode 100644
index 0000000..7d9453c
--- /dev/null
+++ b/drivers/video/pnx4008/pnxrgbfb.c
@@ -0,0 +1,213 @@
+/*
+ * drivers/video/pnx4008/pnxrgbfb.c
+ *
+ * PNX4008's framebuffer support
+ *
+ * Author: Grigory Tolstolytkin <gtolstolytkin@ru.mvista.com>
+ * Based on Philips Semiconductors's code
+ *
+ * Copyrght (c) 2005 MontaVista Software, Inc.
+ * Copyright (c) 2005 Philips Semiconductors
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/uaccess.h>
+#include "sdum.h"
+#include "fbcommon.h"
+
+static u32 colreg[16];
+
+static struct fb_var_screeninfo rgbfb_var __initdata = {
+	.xres = LCD_X_RES,
+	.yres = LCD_Y_RES,
+	.xres_virtual = LCD_X_RES,
+	.yres_virtual = LCD_Y_RES,
+	.bits_per_pixel = 32,
+	.red.offset = 16,
+	.red.length = 8,
+	.green.offset = 8,
+	.green.length = 8,
+	.blue.offset = 0,
+	.blue.length = 8,
+	.left_margin = 0,
+	.right_margin = 0,
+	.upper_margin = 0,
+	.lower_margin = 0,
+	.vmode = FB_VMODE_NONINTERLACED,
+};
+static struct fb_fix_screeninfo rgbfb_fix __initdata = {
+	.id = "RGBFB",
+	.line_length = LCD_X_RES * LCD_BBP,
+	.type = FB_TYPE_PACKED_PIXELS,
+	.visual = FB_VISUAL_TRUECOLOR,
+	.xpanstep = 0,
+	.ypanstep = 0,
+	.ywrapstep = 0,
+	.accel = FB_ACCEL_NONE,
+};
+
+static int channel_owned;
+
+static int no_cursor(struct fb_info *info, struct fb_cursor *cursor)
+{
+	return 0;
+}
+
+static int rgbfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
+			   u_int transp, struct fb_info *info)
+{
+	if (regno > 15)
+		return 1;
+
+	colreg[regno] = ((red & 0xff00) << 8) | (green & 0xff00) |
+	    ((blue & 0xff00) >> 8);
+	return 0;
+}
+
+static int rgbfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+	return pnx4008_sdum_mmap(info, vma, NULL);
+}
+
+static struct fb_ops rgbfb_ops = {
+	.fb_mmap = rgbfb_mmap,
+	.fb_setcolreg = rgbfb_setcolreg,
+	.fb_fillrect = cfb_fillrect,
+	.fb_copyarea = cfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+};
+
+static int rgbfb_remove(struct platform_device *pdev)
+{
+	struct fb_info *info = platform_get_drvdata(pdev);
+
+	if (info) {
+		unregister_framebuffer(info);
+		fb_dealloc_cmap(&info->cmap);
+		framebuffer_release(info);
+		platform_set_drvdata(pdev, NULL);
+		kfree(info);
+	}
+
+	pnx4008_free_dum_channel(channel_owned, pdev->id);
+	pnx4008_set_dum_exit_notification(pdev->id);
+
+	return 0;
+}
+
+static int __devinit rgbfb_probe(struct platform_device *pdev)
+{
+	struct fb_info *info;
+	struct dumchannel_uf chan_uf;
+	int ret;
+	char *option;
+
+	info = framebuffer_alloc(sizeof(u32) * 16, &pdev->dev);
+	if (!info) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	pnx4008_get_fb_addresses(FB_TYPE_RGB, (void **)&info->screen_base,
+				 (dma_addr_t *) &rgbfb_fix.smem_start,
+				 &rgbfb_fix.smem_len);
+
+	if ((ret = pnx4008_alloc_dum_channel(pdev->id)) < 0)
+		goto err0;
+	else {
+		channel_owned = ret;
+		chan_uf.channelnr = channel_owned;
+		chan_uf.dirty = (u32 *) NULL;
+		chan_uf.source = (u32 *) rgbfb_fix.smem_start;
+		chan_uf.x_offset = 0;
+		chan_uf.y_offset = 0;
+		chan_uf.width = LCD_X_RES;
+		chan_uf.height = LCD_Y_RES;
+
+		if ((ret = pnx4008_put_dum_channel_uf(chan_uf, pdev->id))< 0)
+			goto err1;
+
+		if ((ret =
+		     pnx4008_set_dum_channel_sync(channel_owned, CONF_SYNC_ON,
+						  pdev->id)) < 0)
+			goto err1;
+
+		if ((ret =
+		     pnx4008_set_dum_channel_dirty_detect(channel_owned,
+							 CONF_DIRTYDETECTION_ON,
+							 pdev->id)) < 0)
+			goto err1;
+	}
+
+	if (!fb_get_options("pnxrgbfb", &option) && !strcmp(option, "nocursor"))
+		rgbfb_ops.fb_cursor = no_cursor;
+
+	info->node = -1;
+	info->flags = FBINFO_FLAG_DEFAULT;
+	info->fbops = &rgbfb_ops;
+	info->fix = rgbfb_fix;
+	info->var = rgbfb_var;
+	info->screen_size = rgbfb_fix.smem_len;
+	info->pseudo_palette = info->par;
+	info->par = NULL;
+
+	ret = fb_alloc_cmap(&info->cmap, 256, 0);
+	if (ret < 0)
+		goto err2;
+
+	ret = register_framebuffer(info);
+	if (ret < 0)
+		goto err3;
+	platform_set_drvdata(pdev, info);
+
+	return 0;
+
+err3:
+	fb_dealloc_cmap(&info->cmap);
+err2:
+	framebuffer_release(info);
+err1:
+	pnx4008_free_dum_channel(channel_owned, pdev->id);
+err0:
+	kfree(info);
+err:
+	return ret;
+}
+
+static struct platform_driver rgbfb_driver = {
+	.driver = {
+		.name = "rgbfb",
+	},
+	.probe = rgbfb_probe,
+	.remove = rgbfb_remove,
+};
+
+static int __init rgbfb_init(void)
+{
+	return platform_driver_register(&rgbfb_driver);
+}
+
+static void __exit rgbfb_exit(void)
+{
+	platform_driver_unregister(&rgbfb_driver);
+}
+
+module_init(rgbfb_init);
+module_exit(rgbfb_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/video/pnx4008/sdum.c b/drivers/video/pnx4008/sdum.c
new file mode 100644
index 0000000..51f0ecc
--- /dev/null
+++ b/drivers/video/pnx4008/sdum.c
@@ -0,0 +1,872 @@
+/*
+ * drivers/video/pnx4008/sdum.c
+ *
+ * Display Update Master support
+ *
+ * Authors: Grigory Tolstolytkin <gtolstolytkin@ru.mvista.com>
+ *          Vitaly Wool <vitalywool@gmail.com>
+ * Based on Philips Semiconductors's code
+ *
+ * Copyrght (c) 2005-2006 MontaVista Software, Inc.
+ * Copyright (c) 2005 Philips Semiconductors
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/fb.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+#include <linux/clk.h>
+#include <asm/uaccess.h>
+#include <asm/arch/gpio.h>
+
+#include "sdum.h"
+#include "fbcommon.h"
+#include "dum.h"
+
+/* Framebuffers we have */
+
+static struct pnx4008_fb_addr {
+	int fb_type;
+	long addr_offset;
+	long fb_length;
+} fb_addr[] = {
+	[0] = {
+		FB_TYPE_YUV, 0, 0xB0000
+	},
+	[1] = {
+		FB_TYPE_RGB, 0xB0000, 0x50000
+	},
+};
+
+static struct dum_data {
+	u32 lcd_phys_start;
+	u32 lcd_virt_start;
+	u32 slave_phys_base;
+	u32 *slave_virt_base;
+	int fb_owning_channel[MAX_DUM_CHANNELS];
+	struct dumchannel_uf chan_uf_store[MAX_DUM_CHANNELS];
+} dum_data;
+
+/* Different local helper functions */
+
+static u32 nof_pixels_dx(struct dum_ch_setup *ch_setup)
+{
+	return (ch_setup->xmax - ch_setup->xmin + 1);
+}
+
+static u32 nof_pixels_dy(struct dum_ch_setup *ch_setup)
+{
+	return (ch_setup->ymax - ch_setup->ymin + 1);
+}
+
+static u32 nof_pixels_dxy(struct dum_ch_setup *ch_setup)
+{
+	return (nof_pixels_dx(ch_setup) * nof_pixels_dy(ch_setup));
+}
+
+static u32 nof_bytes(struct dum_ch_setup *ch_setup)
+{
+	u32 r = nof_pixels_dxy(ch_setup);
+	switch (ch_setup->format) {
+	case RGB888:
+	case RGB666:
+		r *= 4;
+		break;
+
+	default:
+		r *= 2;
+		break;
+	}
+	return r;
+}
+
+static u32 build_command(int disp_no, u32 reg, u32 val)
+{
+	return ((disp_no << 26) | BIT(25) | (val << 16) | (disp_no << 10) |
+		(reg << 0));
+}
+
+static u32 build_double_index(int disp_no, u32 val)
+{
+	return ((disp_no << 26) | (val << 16) | (disp_no << 10) | (val << 0));
+}
+
+static void build_disp_window(struct dum_ch_setup * ch_setup, struct disp_window * dw)
+{
+	dw->ymin = ch_setup->ymin;
+	dw->ymax = ch_setup->ymax;
+	dw->xmin_l = ch_setup->xmin & 0xFF;
+	dw->xmin_h = (ch_setup->xmin & BIT(8)) >> 8;
+	dw->xmax_l = ch_setup->xmax & 0xFF;
+	dw->xmax_h = (ch_setup->xmax & BIT(8)) >> 8;
+}
+
+static int put_channel(struct dumchannel chan)
+{
+	int i = chan.channelnr;
+
+	if (i < 0 || i > MAX_DUM_CHANNELS)
+		return -EINVAL;
+	else {
+		DUM_CH_MIN(i) = chan.dum_ch_min;
+		DUM_CH_MAX(i) = chan.dum_ch_max;
+		DUM_CH_CONF(i) = chan.dum_ch_conf;
+		DUM_CH_CTRL(i) = chan.dum_ch_ctrl;
+	}
+
+	return 0;
+}
+
+static void clear_channel(int channr)
+{
+	struct dumchannel chan;
+
+	chan.channelnr = channr;
+	chan.dum_ch_min = 0;
+	chan.dum_ch_max = 0;
+	chan.dum_ch_conf = 0;
+	chan.dum_ch_ctrl = 0;
+
+	put_channel(chan);
+}
+
+static int put_cmd_string(struct cmdstring cmds)
+{
+	u16 *cmd_str_virtaddr;
+	u32 *cmd_ptr0_virtaddr;
+	u32 cmd_str_physaddr;
+
+	int i = cmds.channelnr;
+
+	if (i < 0 || i > MAX_DUM_CHANNELS)
+		return -EINVAL;
+	else if ((cmd_ptr0_virtaddr =
+		  (int *)ioremap_nocache(DUM_COM_BASE,
+					 sizeof(int) * MAX_DUM_CHANNELS)) ==
+		 NULL)
+		return -EIOREMAPFAILED;
+	else {
+		cmd_str_physaddr = ioread32(&cmd_ptr0_virtaddr[cmds.channelnr]);
+		if ((cmd_str_virtaddr =
+		     (u16 *) ioremap_nocache(cmd_str_physaddr,
+					     sizeof(cmds))) == NULL) {
+			iounmap(cmd_ptr0_virtaddr);
+			return -EIOREMAPFAILED;
+		} else {
+			int t;
+			for (t = 0; t < 8; t++)
+				iowrite16(*((u16 *)&cmds.prestringlen + t),
+					  cmd_str_virtaddr + t);
+
+			for (t = 0; t < cmds.prestringlen / 2; t++)
+				 iowrite16(*((u16 *)&cmds.precmd + t),
+					   cmd_str_virtaddr + t + 8);
+
+			for (t = 0; t < cmds.poststringlen / 2; t++)
+				iowrite16(*((u16 *)&cmds.postcmd + t),
+					  cmd_str_virtaddr + t + 8 +
+					  	cmds.prestringlen / 2);
+
+			iounmap(cmd_ptr0_virtaddr);
+			iounmap(cmd_str_virtaddr);
+		}
+	}
+
+	return 0;
+}
+
+static u32 dum_ch_setup(int ch_no, struct dum_ch_setup * ch_setup)
+{
+	struct cmdstring cmds_c;
+	struct cmdstring *cmds = &cmds_c;
+	struct disp_window dw;
+	int standard;
+	u32 orientation = 0;
+	struct dumchannel chan = { 0 };
+	int ret;
+
+	if ((ch_setup->xmirror) || (ch_setup->ymirror) || (ch_setup->rotate)) {
+		standard = 0;
+
+		orientation = BIT(1);	/* always set 9-bit-bus */
+		if (ch_setup->xmirror)
+			orientation |= BIT(4);
+		if (ch_setup->ymirror)
+			orientation |= BIT(3);
+		if (ch_setup->rotate)
+			orientation |= BIT(0);
+	} else
+		standard = 1;
+
+	cmds->channelnr = ch_no;
+
+	/* build command string header */
+	if (standard) {
+		cmds->prestringlen = 32;
+		cmds->poststringlen = 0;
+	} else {
+		cmds->prestringlen = 48;
+		cmds->poststringlen = 16;
+	}
+
+	cmds->format =
+	    (u16) ((ch_setup->disp_no << 4) | (BIT(3)) | (ch_setup->format));
+	cmds->reserved = 0x0;
+	cmds->startaddr_low = (ch_setup->minadr & 0xFFFF);
+	cmds->startaddr_high = (ch_setup->minadr >> 16);
+
+	if ((ch_setup->minadr == 0) && (ch_setup->maxadr == 0)
+	    && (ch_setup->xmin == 0)
+	    && (ch_setup->ymin == 0) && (ch_setup->xmax == 0)
+	    && (ch_setup->ymax == 0)) {
+		cmds->pixdatlen_low = 0;
+		cmds->pixdatlen_high = 0;
+	} else {
+		u32 nbytes = nof_bytes(ch_setup);
+		cmds->pixdatlen_low = (nbytes & 0xFFFF);
+		cmds->pixdatlen_high = (nbytes >> 16);
+	}
+
+	if (ch_setup->slave_trans)
+		cmds->pixdatlen_high |= BIT(15);
+
+	/* build pre-string */
+	build_disp_window(ch_setup, &dw);
+
+	if (standard) {
+		cmds->precmd[0] =
+		    build_command(ch_setup->disp_no, DISP_XMIN_L_REG, 0x99);
+		cmds->precmd[1] =
+		    build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
+				  dw.xmin_l);
+		cmds->precmd[2] =
+		    build_command(ch_setup->disp_no, DISP_XMIN_H_REG,
+				  dw.xmin_h);
+		cmds->precmd[3] =
+		    build_command(ch_setup->disp_no, DISP_YMIN_REG, dw.ymin);
+		cmds->precmd[4] =
+		    build_command(ch_setup->disp_no, DISP_XMAX_L_REG,
+				  dw.xmax_l);
+		cmds->precmd[5] =
+		    build_command(ch_setup->disp_no, DISP_XMAX_H_REG,
+				  dw.xmax_h);
+		cmds->precmd[6] =
+		    build_command(ch_setup->disp_no, DISP_YMAX_REG, dw.ymax);
+		cmds->precmd[7] =
+		    build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
+	} else {
+		if (dw.xmin_l == ch_no)
+			cmds->precmd[0] =
+			    build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
+					  0x99);
+		else
+			cmds->precmd[0] =
+			    build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
+					  ch_no);
+
+		cmds->precmd[1] =
+		    build_command(ch_setup->disp_no, DISP_XMIN_L_REG,
+				  dw.xmin_l);
+		cmds->precmd[2] =
+		    build_command(ch_setup->disp_no, DISP_XMIN_H_REG,
+				  dw.xmin_h);
+		cmds->precmd[3] =
+		    build_command(ch_setup->disp_no, DISP_YMIN_REG, dw.ymin);
+		cmds->precmd[4] =
+		    build_command(ch_setup->disp_no, DISP_XMAX_L_REG,
+				  dw.xmax_l);
+		cmds->precmd[5] =
+		    build_command(ch_setup->disp_no, DISP_XMAX_H_REG,
+				  dw.xmax_h);
+		cmds->precmd[6] =
+		    build_command(ch_setup->disp_no, DISP_YMAX_REG, dw.ymax);
+		cmds->precmd[7] =
+		    build_command(ch_setup->disp_no, DISP_1_REG, orientation);
+		cmds->precmd[8] =
+		    build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
+		cmds->precmd[9] =
+		    build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
+		cmds->precmd[0xA] =
+		    build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
+		cmds->precmd[0xB] =
+		    build_double_index(ch_setup->disp_no, DISP_PIXEL_REG);
+		cmds->postcmd[0] =
+		    build_command(ch_setup->disp_no, DISP_1_REG, BIT(1));
+		cmds->postcmd[1] =
+		    build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 1);
+		cmds->postcmd[2] =
+		    build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 2);
+		cmds->postcmd[3] =
+		    build_command(ch_setup->disp_no, DISP_DUMMY1_REG, 3);
+	}
+
+	if ((ret = put_cmd_string(cmds_c)) != 0) {
+		return ret;
+	}
+
+	chan.channelnr = cmds->channelnr;
+	chan.dum_ch_min = ch_setup->dirtybuffer + ch_setup->minadr;
+	chan.dum_ch_max = ch_setup->dirtybuffer + ch_setup->maxadr;
+	chan.dum_ch_conf = 0x002;
+	chan.dum_ch_ctrl = 0x04;
+
+	put_channel(chan);
+
+	return 0;
+}
+
+static u32 display_open(int ch_no, int auto_update, u32 * dirty_buffer,
+			u32 * frame_buffer, u32 xpos, u32 ypos, u32 w, u32 h)
+{
+
+	struct dum_ch_setup k;
+	int ret;
+
+	/* keep width & height within display area */
+	if ((xpos + w) > DISP_MAX_X_SIZE)
+		w = DISP_MAX_X_SIZE - xpos;
+
+	if ((ypos + h) > DISP_MAX_Y_SIZE)
+		h = DISP_MAX_Y_SIZE - ypos;
+
+	/* assume 1 display only */
+	k.disp_no = 0;
+	k.xmin = xpos;
+	k.ymin = ypos;
+	k.xmax = xpos + (w - 1);
+	k.ymax = ypos + (h - 1);
+
+	/* adjust min and max values if necessary */
+	if (k.xmin > DISP_MAX_X_SIZE - 1)
+		k.xmin = DISP_MAX_X_SIZE - 1;
+	if (k.ymin > DISP_MAX_Y_SIZE - 1)
+		k.ymin = DISP_MAX_Y_SIZE - 1;
+
+	if (k.xmax > DISP_MAX_X_SIZE - 1)
+		k.xmax = DISP_MAX_X_SIZE - 1;
+	if (k.ymax > DISP_MAX_Y_SIZE - 1)
+		k.ymax = DISP_MAX_Y_SIZE - 1;
+
+	k.xmirror = 0;
+	k.ymirror = 0;
+	k.rotate = 0;
+	k.minadr = (u32) frame_buffer;
+	k.maxadr = (u32) frame_buffer + (((w - 1) << 10) | ((h << 2) - 2));
+	k.pad = PAD_1024;
+	k.dirtybuffer = (u32) dirty_buffer;
+	k.format = RGB888;
+	k.hwdirty = 0;
+	k.slave_trans = 0;
+
+	ret = dum_ch_setup(ch_no, &k);
+
+	return ret;
+}
+
+static void lcd_reset(void)
+{
+	u32 *dum_pio_base = (u32 *)IO_ADDRESS(PNX4008_PIO_BASE);
+
+	udelay(1);
+	iowrite32(BIT(19), &dum_pio_base[2]);
+	udelay(1);
+	iowrite32(BIT(19), &dum_pio_base[1]);
+	udelay(1);
+}
+
+static int dum_init(struct platform_device *pdev)
+{
+	struct clk *clk;
+
+	/* enable DUM clock */
+	clk = clk_get(&pdev->dev, "dum_ck");
+	if (IS_ERR(clk)) {
+		printk(KERN_ERR "pnx4008_dum: Unable to access DUM clock\n");
+		return PTR_ERR(clk);
+	}
+
+	clk_set_rate(clk, 1);
+	clk_put(clk);
+
+	DUM_CTRL = V_DUM_RESET;
+
+	/* set priority to "round-robin". All other params to "false" */
+	DUM_CONF = BIT(9);
+
+	/* Display 1 */
+	DUM_WTCFG1 = PNX4008_DUM_WT_CFG;
+	DUM_RTCFG1 = PNX4008_DUM_RT_CFG;
+	DUM_TCFG = PNX4008_DUM_T_CFG;
+
+	return 0;
+}
+
+static void dum_chan_init(void)
+{
+	int i = 0, ch = 0;
+	u32 *cmdptrs;
+	u32 *cmdstrings;
+
+	DUM_COM_BASE =
+		CMDSTRING_BASEADDR + BYTES_PER_CMDSTRING * NR_OF_CMDSTRINGS;
+
+	if ((cmdptrs =
+	     (u32 *) ioremap_nocache(DUM_COM_BASE,
+				     sizeof(u32) * NR_OF_CMDSTRINGS)) == NULL)
+		return;
+
+	for (ch = 0; ch < NR_OF_CMDSTRINGS; ch++)
+		iowrite32(CMDSTRING_BASEADDR + BYTES_PER_CMDSTRING * ch,
+			  cmdptrs + ch);
+
+	for (ch = 0; ch < MAX_DUM_CHANNELS; ch++)
+		clear_channel(ch);
+
+	/* Clear the cmdstrings */
+	cmdstrings =
+	    (u32 *)ioremap_nocache(*cmdptrs,
+				   BYTES_PER_CMDSTRING * NR_OF_CMDSTRINGS);
+
+	if (!cmdstrings)
+		goto out;
+
+	for (i = 0; i < NR_OF_CMDSTRINGS * BYTES_PER_CMDSTRING / sizeof(u32);
+	     i++)
+		iowrite32(0, cmdstrings + i);
+
+	iounmap((u32 *)cmdstrings);
+
+out:
+	iounmap((u32 *)cmdptrs);
+}
+
+static void lcd_init(void)
+{
+	lcd_reset();
+
+	DUM_OUTP_FORMAT1 = 0; /* RGB666 */
+
+	udelay(1);
+	iowrite32(V_LCD_STANDBY_OFF, dum_data.slave_virt_base);
+	udelay(1);
+	iowrite32(V_LCD_USE_9BIT_BUS, dum_data.slave_virt_base);
+	udelay(1);
+	iowrite32(V_LCD_SYNC_RISE_L, dum_data.slave_virt_base);
+	udelay(1);
+	iowrite32(V_LCD_SYNC_RISE_H, dum_data.slave_virt_base);
+	udelay(1);
+	iowrite32(V_LCD_SYNC_FALL_L, dum_data.slave_virt_base);
+	udelay(1);
+	iowrite32(V_LCD_SYNC_FALL_H, dum_data.slave_virt_base);
+	udelay(1);
+	iowrite32(V_LCD_SYNC_ENABLE, dum_data.slave_virt_base);
+	udelay(1);
+	iowrite32(V_LCD_DISPLAY_ON, dum_data.slave_virt_base);
+	udelay(1);
+}
+
+/* Interface exported to framebuffer drivers */
+
+int pnx4008_get_fb_addresses(int fb_type, void **virt_addr,
+			     dma_addr_t *phys_addr, int *fb_length)
+{
+	int i;
+	int ret = -1;
+	for (i = 0; i < ARRAY_SIZE(fb_addr); i++)
+		if (fb_addr[i].fb_type == fb_type) {
+			*virt_addr = (void *)(dum_data.lcd_virt_start +
+					fb_addr[i].addr_offset);
+			*phys_addr =
+			    dum_data.lcd_phys_start + fb_addr[i].addr_offset;
+			*fb_length = fb_addr[i].fb_length;
+			ret = 0;
+			break;
+		}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(pnx4008_get_fb_addresses);
+
+int pnx4008_alloc_dum_channel(int dev_id)
+{
+	int i = 0;
+
+	while ((i < MAX_DUM_CHANNELS) && (dum_data.fb_owning_channel[i] != -1))
+		i++;
+
+	if (i == MAX_DUM_CHANNELS)
+		return -ENORESOURCESLEFT;
+	else {
+		dum_data.fb_owning_channel[i] = dev_id;
+		return i;
+	}
+}
+
+EXPORT_SYMBOL(pnx4008_alloc_dum_channel);
+
+int pnx4008_free_dum_channel(int channr, int dev_id)
+{
+	if (channr < 0 || channr > MAX_DUM_CHANNELS)
+		return -EINVAL;
+	else if (dum_data.fb_owning_channel[channr] != dev_id)
+		return -EFBNOTOWNER;
+	else {
+		clear_channel(channr);
+		dum_data.fb_owning_channel[channr] = -1;
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL(pnx4008_free_dum_channel);
+
+int pnx4008_put_dum_channel_uf(struct dumchannel_uf chan_uf, int dev_id)
+{
+	int i = chan_uf.channelnr;
+	int ret;
+
+	if (i < 0 || i > MAX_DUM_CHANNELS)
+		return -EINVAL;
+	else if (dum_data.fb_owning_channel[i] != dev_id)
+		return -EFBNOTOWNER;
+	else if ((ret =
+		  display_open(chan_uf.channelnr, 0, chan_uf.dirty,
+			       chan_uf.source, chan_uf.y_offset,
+			       chan_uf.x_offset, chan_uf.height,
+			       chan_uf.width)) != 0)
+		return ret;
+	else {
+		dum_data.chan_uf_store[i].dirty = chan_uf.dirty;
+		dum_data.chan_uf_store[i].source = chan_uf.source;
+		dum_data.chan_uf_store[i].x_offset = chan_uf.x_offset;
+		dum_data.chan_uf_store[i].y_offset = chan_uf.y_offset;
+		dum_data.chan_uf_store[i].width = chan_uf.width;
+		dum_data.chan_uf_store[i].height = chan_uf.height;
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL(pnx4008_put_dum_channel_uf);
+
+int pnx4008_set_dum_channel_sync(int channr, int val, int dev_id)
+{
+	if (channr < 0 || channr > MAX_DUM_CHANNELS)
+		return -EINVAL;
+	else if (dum_data.fb_owning_channel[channr] != dev_id)
+		return -EFBNOTOWNER;
+	else {
+		if (val == CONF_SYNC_ON) {
+			DUM_CH_CONF(channr) |= CONF_SYNCENABLE;
+			DUM_CH_CONF(channr) |= DUM_CHANNEL_CFG_SYNC_MASK |
+				DUM_CHANNEL_CFG_SYNC_MASK_SET;
+		} else if (val == CONF_SYNC_OFF)
+			DUM_CH_CONF(channr) &= ~CONF_SYNCENABLE;
+		else
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL(pnx4008_set_dum_channel_sync);
+
+int pnx4008_set_dum_channel_dirty_detect(int channr, int val, int dev_id)
+{
+	if (channr < 0 || channr > MAX_DUM_CHANNELS)
+		return -EINVAL;
+	else if (dum_data.fb_owning_channel[channr] != dev_id)
+		return -EFBNOTOWNER;
+	else {
+		if (val == CONF_DIRTYDETECTION_ON)
+			DUM_CH_CONF(channr) |= CONF_DIRTYENABLE;
+		else if (val == CONF_DIRTYDETECTION_OFF)
+			DUM_CH_CONF(channr) &= ~CONF_DIRTYENABLE;
+		else
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL(pnx4008_set_dum_channel_dirty_detect);
+
+#if 0 /* Functions not used currently, but likely to be used in future */
+
+static int get_channel(struct dumchannel *p_chan)
+{
+	int i = p_chan->channelnr;
+
+	if (i < 0 || i > MAX_DUM_CHANNELS)
+		return -EINVAL;
+	else {
+		p_chan->dum_ch_min = DUM_CH_MIN(i);
+		p_chan->dum_ch_max = DUM_CH_MAX(i);
+		p_chan->dum_ch_conf = DUM_CH_CONF(i);
+		p_chan->dum_ch_stat = DUM_CH_STAT(i);
+		p_chan->dum_ch_ctrl = 0;	/* WriteOnly control register */
+	}
+
+	return 0;
+}
+
+int pnx4008_get_dum_channel_uf(struct dumchannel_uf *p_chan_uf, int dev_id)
+{
+	int i = p_chan_uf->channelnr;
+
+	if (i < 0 || i > MAX_DUM_CHANNELS)
+		return -EINVAL;
+	else if (dum_data.fb_owning_channel[i] != dev_id)
+		return -EFBNOTOWNER;
+	else {
+		p_chan_uf->dirty = dum_data.chan_uf_store[i].dirty;
+		p_chan_uf->source = dum_data.chan_uf_store[i].source;
+		p_chan_uf->x_offset = dum_data.chan_uf_store[i].x_offset;
+		p_chan_uf->y_offset = dum_data.chan_uf_store[i].y_offset;
+		p_chan_uf->width = dum_data.chan_uf_store[i].width;
+		p_chan_uf->height = dum_data.chan_uf_store[i].height;
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL(pnx4008_get_dum_channel_uf);
+
+int pnx4008_get_dum_channel_config(int channr, int dev_id)
+{
+	int ret;
+	struct dumchannel chan;
+
+	if (channr < 0 || channr > MAX_DUM_CHANNELS)
+		return -EINVAL;
+	else if (dum_data.fb_owning_channel[channr] != dev_id)
+		return -EFBNOTOWNER;
+	else {
+		chan.channelnr = channr;
+		if ((ret = get_channel(&chan)) != 0)
+			return ret;
+	}
+
+	return (chan.dum_ch_conf & DUM_CHANNEL_CFG_MASK);
+}
+
+EXPORT_SYMBOL(pnx4008_get_dum_channel_config);
+
+int pnx4008_force_update_dum_channel(int channr, int dev_id)
+{
+	if (channr < 0 || channr > MAX_DUM_CHANNELS)
+		return -EINVAL;
+
+	else if (dum_data.fb_owning_channel[channr] != dev_id)
+		return -EFBNOTOWNER;
+	else
+		DUM_CH_CTRL(channr) = CTRL_SETDIRTY;
+
+	return 0;
+}
+
+EXPORT_SYMBOL(pnx4008_force_update_dum_channel);
+
+#endif
+
+int pnx4008_sdum_mmap(struct fb_info *info, struct vm_area_struct *vma,
+		      struct device *dev)
+{
+	unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+
+	if (off < info->fix.smem_len) {
+		vma->vm_pgoff += 1;
+		return dma_mmap_writecombine(dev, vma,
+				(void *)dum_data.lcd_virt_start,
+				dum_data.lcd_phys_start,
+				FB_DMA_SIZE);
+	}
+	return -EINVAL;
+}
+
+EXPORT_SYMBOL(pnx4008_sdum_mmap);
+
+int pnx4008_set_dum_exit_notification(int dev_id)
+{
+	int i;
+
+	for (i = 0; i < MAX_DUM_CHANNELS; i++)
+		if (dum_data.fb_owning_channel[i] == dev_id)
+			return -ERESOURCESNOTFREED;
+
+	return 0;
+}
+
+EXPORT_SYMBOL(pnx4008_set_dum_exit_notification);
+
+/* Platform device driver for DUM */
+
+static int sdum_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	int retval = 0;
+	struct clk *clk;
+
+	clk = clk_get(0, "dum_ck");
+	if (!IS_ERR(clk)) {
+		clk_set_rate(clk, 0);
+		clk_put(clk);
+	} else
+		retval = PTR_ERR(clk);
+
+	/* disable BAC */
+	DUM_CTRL = V_BAC_DISABLE_IDLE;
+
+	/* LCD standby & turn off display */
+	lcd_reset();
+
+	return retval;
+}
+
+static int sdum_resume(struct platform_device *pdev)
+{
+	int retval = 0;
+	struct clk *clk;
+
+	clk = clk_get(0, "dum_ck");
+	if (!IS_ERR(clk)) {
+		clk_set_rate(clk, 1);
+		clk_put(clk);
+	} else
+		retval = PTR_ERR(clk);
+
+	/* wait for BAC disable */
+	DUM_CTRL = V_BAC_DISABLE_TRIG;
+
+	while (DUM_CTRL & BAC_ENABLED)
+		udelay(10);
+
+	/* re-init LCD */
+	lcd_init();
+
+	/* enable BAC and reset MUX */
+	DUM_CTRL = V_BAC_ENABLE;
+	udelay(1);
+	DUM_CTRL = V_MUX_RESET;
+	return 0;
+}
+
+static int __devinit sdum_probe(struct platform_device *pdev)
+{
+	int ret = 0, i = 0;
+
+	/* map frame buffer */
+	dum_data.lcd_virt_start = (u32) dma_alloc_writecombine(&pdev->dev,
+						       FB_DMA_SIZE,
+						       &dum_data.lcd_phys_start,
+						       GFP_KERNEL);
+
+	if (!dum_data.lcd_virt_start) {
+		ret = -ENOMEM;
+		goto out_3;
+	}
+
+	/* map slave registers */
+	dum_data.slave_phys_base = PNX4008_DUM_SLAVE_BASE;
+	dum_data.slave_virt_base =
+	    (u32 *) ioremap_nocache(dum_data.slave_phys_base, sizeof(u32));
+
+	if (dum_data.slave_virt_base == NULL) {
+		ret = -ENOMEM;
+		goto out_2;
+	}
+
+	/* initialize DUM and LCD display */
+	ret = dum_init(pdev);
+	if (ret)
+		goto out_1;
+
+	dum_chan_init();
+	lcd_init();
+
+	DUM_CTRL = V_BAC_ENABLE;
+	udelay(1);
+	DUM_CTRL = V_MUX_RESET;
+
+	/* set decode address and sync clock divider */
+	DUM_DECODE = dum_data.lcd_phys_start & DUM_DECODE_MASK;
+	DUM_CLK_DIV = PNX4008_DUM_CLK_DIV;
+
+	for (i = 0; i < MAX_DUM_CHANNELS; i++)
+		dum_data.fb_owning_channel[i] = -1;
+
+	/*setup wakeup interrupt */
+	start_int_set_rising_edge(SE_DISP_SYNC_INT);
+	start_int_ack(SE_DISP_SYNC_INT);
+	start_int_umask(SE_DISP_SYNC_INT);
+
+	return 0;
+
+out_1:
+	iounmap((void *)dum_data.slave_virt_base);
+out_2:
+	dma_free_writecombine(&pdev->dev, FB_DMA_SIZE,
+			(void *)dum_data.lcd_virt_start,
+			dum_data.lcd_phys_start);
+out_3:
+	return ret;
+}
+
+static int sdum_remove(struct platform_device *pdev)
+{
+	struct clk *clk;
+
+	start_int_mask(SE_DISP_SYNC_INT);
+
+	clk = clk_get(0, "dum_ck");
+	if (!IS_ERR(clk)) {
+		clk_set_rate(clk, 0);
+		clk_put(clk);
+	}
+
+	iounmap((void *)dum_data.slave_virt_base);
+
+	dma_free_writecombine(&pdev->dev, FB_DMA_SIZE,
+			(void *)dum_data.lcd_virt_start,
+			dum_data.lcd_phys_start);
+
+	return 0;
+}
+
+static struct platform_driver sdum_driver = {
+	.driver = {
+		.name = "sdum",
+	},
+	.probe = sdum_probe,
+	.remove = sdum_remove,
+	.suspend = sdum_suspend,
+	.resume = sdum_resume,
+};
+
+int __init sdum_init(void)
+{
+	return platform_driver_register(&sdum_driver);
+}
+
+static void __exit sdum_exit(void)
+{
+	platform_driver_unregister(&sdum_driver);
+};
+
+module_init(sdum_init);
+module_exit(sdum_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/video/pnx4008/sdum.h b/drivers/video/pnx4008/sdum.h
new file mode 100644
index 0000000..e8c5dcd
--- /dev/null
+++ b/drivers/video/pnx4008/sdum.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2005 Philips Semiconductors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA, or http://www.gnu.org/licenses/gpl.html
+*/
+
+#define MAX_DUM_CHANNELS	64
+
+#define RGB_MEM_WINDOW(x) (0x10000000 + (x)*0x00100000)
+
+#define QCIF_OFFSET(x) (((x) == 0) ? 0x00000: ((x) == 1) ? 0x30000: -1)
+#define CIF_OFFSET(x)  (((x) == 0) ? 0x00000: ((x) == 1) ? 0x60000: -1)
+
+#define CTRL_SETDIRTY	 	(0x00000001)
+#define CONF_DIRTYENABLE	(0x00000020)
+#define CONF_SYNCENABLE		(0x00000004)
+
+#define DIRTY_ENABLED(conf)	((conf) & 0x0020)
+#define SYNC_ENABLED(conf) 	((conf) & 0x0004)
+
+/* Display 1 & 2 Write Timing Configuration */
+#define PNX4008_DUM_WT_CFG		0x00372000
+
+/* Display 1 & 2 Read Timing Configuration */
+#define PNX4008_DUM_RT_CFG		0x00003A47
+
+/* DUM Transit State Timing Configuration */
+#define PNX4008_DUM_T_CFG		0x1D	/* 29 HCLK cycles */
+
+/* DUM Sync count clock divider */
+#define PNX4008_DUM_CLK_DIV		0x02DD
+
+/* Memory size for framebuffer, allocated through dma_alloc_writecombine().
+ * Must be PAGE aligned
+ */
+#define FB_DMA_SIZE (PAGE_ALIGN(SZ_1M + PAGE_SIZE))
+
+#define OFFSET_RGBBUFFER (0xB0000)
+#define OFFSET_YUVBUFFER (0x00000)
+
+#define YUVBUFFER (lcd_video_start + OFFSET_YUVBUFFER)
+#define RGBBUFFER (lcd_video_start + OFFSET_RGBBUFFER)
+
+#define CMDSTRING_BASEADDR	(0x00C000)	/* iram */
+#define BYTES_PER_CMDSTRING	(0x80)
+#define NR_OF_CMDSTRINGS	(64)
+
+#define MAX_NR_PRESTRINGS (0x40)
+#define MAX_NR_POSTSTRINGS (0x40)
+
+/* various mask definitions */
+#define DUM_CLK_ENABLE 0x01
+#define DUM_CLK_DISABLE 0
+#define DUM_DECODE_MASK 0x1FFFFFFF
+#define DUM_CHANNEL_CFG_MASK 0x01FF
+#define DUM_CHANNEL_CFG_SYNC_MASK 0xFFFE00FF
+#define DUM_CHANNEL_CFG_SYNC_MASK_SET 0x0CA00
+
+#define SDUM_RETURNVAL_BASE (0x500)
+
+#define CONF_SYNC_OFF		(0x602)
+#define CONF_SYNC_ON		(0x603)
+
+#define CONF_DIRTYDETECTION_OFF	(0x600)
+#define CONF_DIRTYDETECTION_ON	(0x601)
+
+/* Set the corresponding bit. */
+#define BIT(n) (0x1U << (n))
+
+struct dumchannel_uf {
+	int channelnr;
+	u32 *dirty;
+	u32 *source;
+	u32 x_offset;
+	u32 y_offset;
+	u32 width;
+	u32 height;
+};
+
+enum {
+	FB_TYPE_YUV,
+	FB_TYPE_RGB
+};
+
+struct cmdstring {
+	int channelnr;
+	uint16_t prestringlen;
+	uint16_t poststringlen;
+	uint16_t format;
+	uint16_t reserved;
+	uint16_t startaddr_low;
+	uint16_t startaddr_high;
+	uint16_t pixdatlen_low;
+	uint16_t pixdatlen_high;
+	u32 precmd[MAX_NR_PRESTRINGS];
+	u32 postcmd[MAX_NR_POSTSTRINGS];
+
+};
+
+struct dumchannel {
+	int channelnr;
+	int dum_ch_min;
+	int dum_ch_max;
+	int dum_ch_conf;
+	int dum_ch_stat;
+	int dum_ch_ctrl;
+};
+
+int pnx4008_alloc_dum_channel(int dev_id);
+int pnx4008_free_dum_channel(int channr, int dev_id);
+
+int pnx4008_get_dum_channel_uf(struct dumchannel_uf *pChan_uf, int dev_id);
+int pnx4008_put_dum_channel_uf(struct dumchannel_uf chan_uf, int dev_id);
+
+int pnx4008_set_dum_channel_sync(int channr, int val, int dev_id);
+int pnx4008_set_dum_channel_dirty_detect(int channr, int val, int dev_id);
+
+int pnx4008_force_dum_update_channel(int channr, int dev_id);
+
+int pnx4008_get_dum_channel_config(int channr, int dev_id);
+
+int pnx4008_sdum_mmap(struct fb_info *info, struct vm_area_struct *vma, struct device *dev);
+int pnx4008_set_dum_exit_notification(int dev_id);
+
+int pnx4008_get_fb_addresses(int fb_type, void **virt_addr,
+			     dma_addr_t * phys_addr, int *fb_length);
-- 
cgit v0.10.2


From d459ec0baa5d22e111dfb139c46d6d788a9eac20 Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 3 Jul 2006 00:24:20 -0700
Subject: [PATCH] vt: Decrement ref count of the VT backend on deallocation

When a VT is newly allocated, the module reference count of the backend
will be incremented.  This should be balanced by a module_put() when this
VT is deallocated.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 3ef823d..da7e66a 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -886,6 +886,7 @@ void vc_disallocate(unsigned int currcons)
 	if (vc_cons_allocated(currcons)) {
 		struct vc_data *vc = vc_cons[currcons].d;
 		vc->vc_sw->con_deinit(vc);
+		module_put(vc->vc_sw->owner);
 		if (vc->vc_kmalloced)
 			kfree(vc->vc_screenbuf);
 		if (currcons >= MIN_NR_CONSOLES)
-- 
cgit v0.10.2


From 62322d2554d2f9680c8ace7bbf1f97d8fa84ad1a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 3 Jul 2006 00:24:21 -0700
Subject: [PATCH] make more file_operation structs static

Mark the static struct file_operations in drivers/char as const.  Making
them const prevents accidental bugs, and moves them to the .rodata section
so that they no longer do any false sharing; in addition with the proper
debug option they are then protected against corruption..

[akpm@osdl.org: build fix]
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/agp/frontend.c b/drivers/char/agp/frontend.c
index ffcf15c..d9c5a91 100644
--- a/drivers/char/agp/frontend.c
+++ b/drivers/char/agp/frontend.c
@@ -1059,7 +1059,7 @@ ioctl_out:
 	return ret_val;
 }
 
-static struct file_operations agp_fops =
+static const struct file_operations agp_fops =
 {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c
index bcc4668..10a389d 100644
--- a/drivers/char/applicom.c
+++ b/drivers/char/applicom.c
@@ -112,7 +112,7 @@ static int ac_ioctl(struct inode *, struct file *, unsigned int,
 		    unsigned long);
 static irqreturn_t ac_interrupt(int, void *, struct pt_regs *);
 
-static struct file_operations ac_fops = {
+static const struct file_operations ac_fops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.read = ac_read,
diff --git a/drivers/char/cs5535_gpio.c b/drivers/char/cs5535_gpio.c
index 46d6603..8ce3f34 100644
--- a/drivers/char/cs5535_gpio.c
+++ b/drivers/char/cs5535_gpio.c
@@ -158,7 +158,7 @@ static int cs5535_gpio_open(struct inode *inode, struct file *file)
 	return nonseekable_open(inode, file);
 }
 
-static struct file_operations cs5535_gpio_fops = {
+static const struct file_operations cs5535_gpio_fops = {
 	.owner	= THIS_MODULE,
 	.write	= cs5535_gpio_write,
 	.read	= cs5535_gpio_read,
diff --git a/drivers/char/ds1286.c b/drivers/char/ds1286.c
index d755cac..21c8229 100644
--- a/drivers/char/ds1286.c
+++ b/drivers/char/ds1286.c
@@ -281,7 +281,7 @@ static unsigned int ds1286_poll(struct file *file, poll_table *wait)
  *	The various file operations we support.
  */
 
-static struct file_operations ds1286_fops = {
+static const struct file_operations ds1286_fops = {
 	.llseek		= no_llseek,
 	.read		= ds1286_read,
 	.poll		= ds1286_poll,
diff --git a/drivers/char/ds1302.c b/drivers/char/ds1302.c
index 625e8b5..bcdb107 100644
--- a/drivers/char/ds1302.c
+++ b/drivers/char/ds1302.c
@@ -282,7 +282,7 @@ get_rtc_status(char *buf)
 
 /* The various file operations we support. */
 
-static struct file_operations rtc_fops = {
+static const struct file_operations rtc_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= rtc_ioctl,
 };
diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c
index 953e670..48cb8f0 100644
--- a/drivers/char/ds1620.c
+++ b/drivers/char/ds1620.c
@@ -336,7 +336,7 @@ proc_therm_ds1620_read(char *buf, char **start, off_t offset,
 static struct proc_dir_entry *proc_therm_ds1620;
 #endif
 
-static struct file_operations ds1620_fops = {
+static const struct file_operations ds1620_fops = {
 	.owner		= THIS_MODULE,
 	.open		= nonseekable_open,
 	.read		= ds1620_read,
diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c
index 09b4136..9b1bf60 100644
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -483,7 +483,7 @@ static int dsp56k_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations dsp56k_fops = {
+static const struct file_operations dsp56k_fops = {
 	.owner		= THIS_MODULE,
 	.read		= dsp56k_read,
 	.write		= dsp56k_write,
diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c
index da2c89f..5e82c3b 100644
--- a/drivers/char/dtlk.c
+++ b/drivers/char/dtlk.c
@@ -94,7 +94,7 @@ static int dtlk_release(struct inode *, struct file *);
 static int dtlk_ioctl(struct inode *inode, struct file *file,
 		      unsigned int cmd, unsigned long arg);
 
-static struct file_operations dtlk_fops =
+static const struct file_operations dtlk_fops =
 {
 	.owner		= THIS_MODULE,
 	.read		= dtlk_read,
diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c
index 0090e7a..004141d 100644
--- a/drivers/char/efirtc.c
+++ b/drivers/char/efirtc.c
@@ -285,7 +285,7 @@ efi_rtc_close(struct inode *inode, struct file *file)
  *	The various file operations we support.
  */
 
-static struct file_operations efi_rtc_fops = {
+static const struct file_operations efi_rtc_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= efi_rtc_ioctl,
 	.open		= efi_rtc_open,
diff --git a/drivers/char/ftape/zftape/zftape-init.c b/drivers/char/ftape/zftape/zftape-init.c
index 5527256..164a1aa 100644
--- a/drivers/char/ftape/zftape/zftape-init.c
+++ b/drivers/char/ftape/zftape/zftape-init.c
@@ -86,7 +86,7 @@ static ssize_t zft_read (struct file *fp, char __user *buff,
 static ssize_t zft_write(struct file *fp, const char __user *buff,
 			 size_t req_len, loff_t *ppos);
 
-static struct file_operations zft_cdev =
+static const struct file_operations zft_cdev =
 {
 	.owner		= THIS_MODULE,
 	.read		= zft_read,
diff --git a/drivers/char/genrtc.c b/drivers/char/genrtc.c
index bebd7e3..817dc40 100644
--- a/drivers/char/genrtc.c
+++ b/drivers/char/genrtc.c
@@ -482,7 +482,7 @@ static inline int gen_rtc_proc_init(void) { return 0; }
  *	The various file operations we support.
  */
 
-static struct file_operations gen_rtc_fops = {
+static const struct file_operations gen_rtc_fops = {
 	.owner		= THIS_MODULE,
 #ifdef CONFIG_GEN_RTC_X
 	.read		= gen_rtc_read,
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index e5643f3..8afba33 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -553,7 +553,7 @@ hpet_ioctl_common(struct hpet_dev *devp, int cmd, unsigned long arg, int kernel)
 	return err;
 }
 
-static struct file_operations hpet_fops = {
+static const struct file_operations hpet_fops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.read = hpet_read,
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 88b0266..154a81d 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -149,7 +149,7 @@ out:
 }
 
 
-static struct file_operations rng_chrdev_ops = {
+static const struct file_operations rng_chrdev_ops = {
 	.owner		= THIS_MODULE,
 	.open		= rng_dev_open,
 	.read		= rng_dev_read,
diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c
index f3c3aaf..353d9f3 100644
--- a/drivers/char/i8k.c
+++ b/drivers/char/i8k.c
@@ -80,7 +80,7 @@ static int i8k_open_fs(struct inode *inode, struct file *file);
 static int i8k_ioctl(struct inode *, struct file *, unsigned int,
 		     unsigned long);
 
-static struct file_operations i8k_fops = {
+static const struct file_operations i8k_fops = {
 	.open		= i8k_open_fs,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c
index a4200a2..518ece7 100644
--- a/drivers/char/ip2/ip2main.c
+++ b/drivers/char/ip2/ip2main.c
@@ -233,7 +233,7 @@ static void  *DevTableMem[IP2_MAX_BOARDS];
 /* This is the driver descriptor for the ip2ipl device, which is used to
  * download the loadware to the boards.
  */
-static struct file_operations ip2_ipl = {
+static const struct file_operations ip2_ipl = {
 	.owner		= THIS_MODULE,
 	.read		= ip2_ipl_read,
 	.write		= ip2_ipl_write,
diff --git a/drivers/char/ip27-rtc.c b/drivers/char/ip27-rtc.c
index 3acdac3..a48da02 100644
--- a/drivers/char/ip27-rtc.c
+++ b/drivers/char/ip27-rtc.c
@@ -196,7 +196,7 @@ static int rtc_release(struct inode *inode, struct file *file)
  *	The various file operations we support.
  */
 
-static struct file_operations rtc_fops = {
+static const struct file_operations rtc_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= rtc_ioctl,
 	.open		= rtc_open,
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index 2fc894f..68d7c61 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -765,7 +765,7 @@ static long compat_ipmi_ioctl(struct file *filep, unsigned int cmd,
 }
 #endif
 
-static struct file_operations ipmi_fops = {
+static const struct file_operations ipmi_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= ipmi_ioctl,
 #ifdef CONFIG_COMPAT
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 74a889c..accaaf1 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -807,7 +807,7 @@ static int ipmi_close(struct inode *ino, struct file *filep)
 	return 0;
 }
 
-static struct file_operations ipmi_wdog_fops = {
+static const struct file_operations ipmi_wdog_fops = {
 	.owner   = THIS_MODULE,
 	.read    = ipmi_read,
 	.poll    = ipmi_poll,
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index fbce2f0..84dfc42 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -748,7 +748,7 @@ static int	stli_initpcibrd(int brdtype, struct pci_dev *devp);
  *	will give access to the shared memory on the Stallion intelligent
  *	board. This is also a very useful debugging tool.
  */
-static struct file_operations	stli_fsiomem = {
+static const struct file_operations	stli_fsiomem = {
 	.owner		= THIS_MODULE,
 	.read		= stli_memread,
 	.write		= stli_memwrite,
diff --git a/drivers/char/ite_gpio.c b/drivers/char/ite_gpio.c
index 747ba45..cde562d 100644
--- a/drivers/char/ite_gpio.c
+++ b/drivers/char/ite_gpio.c
@@ -357,7 +357,7 @@ DEB(printk("interrupt 0x%x %d\n",ITE_GPAISR, i));
 	}
 }
 
-static struct file_operations ite_gpio_fops = {
+static const struct file_operations ite_gpio_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= ite_gpio_ioctl,
 	.open		= ite_gpio_open,
diff --git a/drivers/char/lcd.c b/drivers/char/lcd.c
index 7d49b24..da601fd 100644
--- a/drivers/char/lcd.c
+++ b/drivers/char/lcd.c
@@ -598,7 +598,7 @@ static ssize_t lcd_read(struct file *file, char *buf,
  *	The various file operations we support.
  */
 
-static struct file_operations lcd_fops = {
+static const struct file_operations lcd_fops = {
 	.read = lcd_read,
 	.ioctl = lcd_ioctl,
 	.open = lcd_open,
diff --git a/drivers/char/lp.c b/drivers/char/lp.c
index 582cdbd..f875fda 100644
--- a/drivers/char/lp.c
+++ b/drivers/char/lp.c
@@ -666,7 +666,7 @@ static int lp_ioctl(struct inode *inode, struct file *file,
 	return retval;
 }
 
-static struct file_operations lp_fops = {
+static const struct file_operations lp_fops = {
 	.owner		= THIS_MODULE,
 	.write		= lp_write,
 	.ioctl		= lp_ioctl,
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 70f3954..e97c32c 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -776,7 +776,7 @@ static int open_port(struct inode * inode, struct file * filp)
 #define open_kmem	open_mem
 #define open_oldmem	open_mem
 
-static struct file_operations mem_fops = {
+static const struct file_operations mem_fops = {
 	.llseek		= memory_lseek,
 	.read		= read_mem,
 	.write		= write_mem,
@@ -784,7 +784,7 @@ static struct file_operations mem_fops = {
 	.open		= open_mem,
 };
 
-static struct file_operations kmem_fops = {
+static const struct file_operations kmem_fops = {
 	.llseek		= memory_lseek,
 	.read		= read_kmem,
 	.write		= write_kmem,
@@ -792,7 +792,7 @@ static struct file_operations kmem_fops = {
 	.open		= open_kmem,
 };
 
-static struct file_operations null_fops = {
+static const struct file_operations null_fops = {
 	.llseek		= null_lseek,
 	.read		= read_null,
 	.write		= write_null,
@@ -800,7 +800,7 @@ static struct file_operations null_fops = {
 };
 
 #if defined(CONFIG_ISA) || !defined(__mc68000__)
-static struct file_operations port_fops = {
+static const struct file_operations port_fops = {
 	.llseek		= memory_lseek,
 	.read		= read_port,
 	.write		= write_port,
@@ -808,7 +808,7 @@ static struct file_operations port_fops = {
 };
 #endif
 
-static struct file_operations zero_fops = {
+static const struct file_operations zero_fops = {
 	.llseek		= zero_lseek,
 	.read		= read_zero,
 	.write		= write_zero,
@@ -819,14 +819,14 @@ static struct backing_dev_info zero_bdi = {
 	.capabilities	= BDI_CAP_MAP_COPY,
 };
 
-static struct file_operations full_fops = {
+static const struct file_operations full_fops = {
 	.llseek		= full_lseek,
 	.read		= read_full,
 	.write		= write_full,
 };
 
 #ifdef CONFIG_CRASH_DUMP
-static struct file_operations oldmem_fops = {
+static const struct file_operations oldmem_fops = {
 	.read	= read_oldmem,
 	.open	= open_oldmem,
 };
@@ -853,7 +853,7 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf,
 	return ret;
 }
 
-static struct file_operations kmsg_fops = {
+static const struct file_operations kmsg_fops = {
 	.write =	kmsg_write,
 };
 
@@ -903,7 +903,7 @@ static int memory_open(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-static struct file_operations memory_fops = {
+static const struct file_operations memory_fops = {
 	.open		= memory_open,	/* just a selector for the real open */
 };
 
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index d5fa19d..62ebe09 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -113,7 +113,7 @@ static int misc_seq_open(struct inode *inode, struct file *file)
 	return seq_open(file, &misc_seq_ops);
 }
 
-static struct file_operations misc_proc_fops = {
+static const struct file_operations misc_proc_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = misc_seq_open,
 	.read    = seq_read,
@@ -176,7 +176,7 @@ fail:
  */
 static struct class *misc_class;
 
-static struct file_operations misc_fops = {
+static const struct file_operations misc_fops = {
 	.owner		= THIS_MODULE,
 	.open		= misc_open,
 };
diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index 70b774f..1f0f2b6 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -63,7 +63,7 @@ static int mmtimer_mmap(struct file *file, struct vm_area_struct *vma);
  */
 static unsigned long mmtimer_femtoperiod = 0;
 
-static struct file_operations mmtimer_fops = {
+static const struct file_operations mmtimer_fops = {
 	.owner =	THIS_MODULE,
 	.mmap =		mmtimer_mmap,
 	.ioctl =	mmtimer_ioctl,
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index d3ba2f8..39a2e66 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -454,7 +454,7 @@ static int register_serial_portandirq(unsigned int port, int irq)
 }
 
 
-static struct file_operations mwave_fops = {
+static const struct file_operations mwave_fops = {
 	.owner		= THIS_MODULE,
 	.read		= mwave_read,
 	.write		= mwave_write,
diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index 8c5f102..a39f19c 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -437,7 +437,7 @@ nvram_read_proc(char *buffer, char **start, off_t offset,
 
 #endif /* CONFIG_PROC_FS */
 
-static struct file_operations nvram_fops = {
+static const struct file_operations nvram_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= nvram_llseek,
 	.read		= nvram_read,
diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c
index f240a10..7c57ebf 100644
--- a/drivers/char/nwbutton.c
+++ b/drivers/char/nwbutton.c
@@ -183,7 +183,7 @@ static int button_read (struct file *filp, char __user *buffer,
  * attempts to perform these operations on the device.
  */
 
-static struct file_operations button_fops = {
+static const struct file_operations button_fops = {
 	.owner		= THIS_MODULE,
 	.read		= button_read,
 };
diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c
index 8865387..206cf6f 100644
--- a/drivers/char/nwflash.c
+++ b/drivers/char/nwflash.c
@@ -642,7 +642,7 @@ static void kick_open(void)
 	udelay(25);
 }
 
-static struct file_operations flash_fops =
+static const struct file_operations flash_fops =
 {
 	.owner		= THIS_MODULE,
 	.llseek		= flash_llseek,
diff --git a/drivers/char/pc8736x_gpio.c b/drivers/char/pc8736x_gpio.c
index c860de6..4005ee0 100644
--- a/drivers/char/pc8736x_gpio.c
+++ b/drivers/char/pc8736x_gpio.c
@@ -236,7 +236,7 @@ static int pc8736x_gpio_open(struct inode *inode, struct file *file)
 	return nonseekable_open(inode, file);
 }
 
-static struct file_operations pc8736x_gpio_fops = {
+static const struct file_operations pc8736x_gpio_fops = {
 	.owner	= THIS_MODULE,
 	.open	= pc8736x_gpio_open,
 	.write	= nsc_gpio_write,
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 31c8a21..50d20aa 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1938,7 +1938,7 @@ static void cm4000_detach(struct pcmcia_device *link)
 	return;
 }
 
-static struct file_operations cm4000_fops = {
+static const struct file_operations cm4000_fops = {
 	.owner	= THIS_MODULE,
 	.read	= cmm_read,
 	.write	= cmm_write,
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index 47a8465..55cf4be 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -688,7 +688,7 @@ static void reader_detach(struct pcmcia_device *link)
 	return;
 }
 
-static struct file_operations reader_fops = {
+static const struct file_operations reader_fops = {
 	.owner		= THIS_MODULE,
 	.read		= cm4040_read,
 	.write		= cm4040_write,
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index 24231d9..520d2cf 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -739,7 +739,7 @@ static unsigned int pp_poll (struct file * file, poll_table * wait)
 
 static struct class *ppdev_class;
 
-static struct file_operations pp_fops = {
+static const struct file_operations pp_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= pp_read,
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 9bf97c5..579868a 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -30,7 +30,7 @@ struct raw_device_data {
 static struct class *raw_class;
 static struct raw_device_data raw_devices[MAX_RAW_MINORS];
 static DEFINE_MUTEX(raw_mutex);
-static struct file_operations raw_ctl_fops;	     /* forward declaration */
+static const struct file_operations raw_ctl_fops; /* forward declaration */
 
 /*
  * Open/close code for raw IO.
@@ -261,7 +261,7 @@ static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf,
 }
 
 
-static struct file_operations raw_fops = {
+static const struct file_operations raw_fops = {
 	.read	=	generic_file_read,
 	.aio_read = 	generic_file_aio_read,
 	.write	=	raw_file_write,
@@ -274,7 +274,7 @@ static struct file_operations raw_fops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct file_operations raw_ctl_fops = {
+static const struct file_operations raw_ctl_fops = {
 	.ioctl	=	raw_ctl_ioctl,
 	.open	=	raw_open,
 	.owner	=	THIS_MODULE,
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index 3afc6a4..3fa80aa 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -243,7 +243,7 @@ static struct real_driver rio_real_driver = {
  *
  */
 
-static struct file_operations rio_fw_fops = {
+static const struct file_operations rio_fw_fops = {
 	.owner = THIS_MODULE,
 	.ioctl = rio_fw_ioctl,
 };
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index aefac4a..cc7bd1a 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -877,7 +877,7 @@ int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
  *	The various file operations we support.
  */
 
-static struct file_operations rtc_fops = {
+static const struct file_operations rtc_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= rtc_read,
@@ -896,7 +896,7 @@ static struct miscdevice rtc_dev = {
 	.fops		= &rtc_fops,
 };
 
-static struct file_operations rtc_proc_fops = {
+static const struct file_operations rtc_proc_fops = {
 	.owner = THIS_MODULE,
 	.open = rtc_proc_open,
 	.read  = seq_read,
diff --git a/drivers/char/scx200_gpio.c b/drivers/char/scx200_gpio.c
index 45083e5..425c587 100644
--- a/drivers/char/scx200_gpio.c
+++ b/drivers/char/scx200_gpio.c
@@ -63,7 +63,7 @@ static int scx200_gpio_release(struct inode *inode, struct file *file)
 }
 
 
-static struct file_operations scx200_gpio_fops = {
+static const struct file_operations scx200_gpio_fops = {
 	.owner   = THIS_MODULE,
 	.write   = nsc_gpio_write,
 	.read    = nsc_gpio_read,
diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
index 203240b..afc6eda 100644
--- a/drivers/char/snsc.c
+++ b/drivers/char/snsc.c
@@ -347,7 +347,7 @@ scdrv_poll(struct file *file, struct poll_table_struct *wait)
 	return mask;
 }
 
-static struct file_operations scdrv_fops = {
+static const struct file_operations scdrv_fops = {
 	.owner =	THIS_MODULE,
 	.read =		scdrv_read,
 	.write =	scdrv_write,
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 45508a0..d4e434d 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -1106,7 +1106,7 @@ static int sonypi_misc_ioctl(struct inode *ip, struct file *fp,
 	return ret;
 }
 
-static struct file_operations sonypi_misc_fops = {
+static const struct file_operations sonypi_misc_fops = {
 	.owner		= THIS_MODULE,
 	.read		= sonypi_misc_read,
 	.poll		= sonypi_misc_poll,
diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c
index ed7b8ea..3beb220 100644
--- a/drivers/char/stallion.c
+++ b/drivers/char/stallion.c
@@ -707,7 +707,7 @@ static unsigned int	sc26198_baudtable[] = {
  *	Define the driver info for a user level control device. Used mainly
  *	to get at port stats - only not using the port device itself.
  */
-static struct file_operations	stl_fsiomem = {
+static const struct file_operations	stl_fsiomem = {
 	.owner		= THIS_MODULE,
 	.ioctl		= stl_memioctl,
 };
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 45c193a..e1cd2bc 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -410,7 +410,7 @@ static struct real_driver sx_real_driver = {
  *
  */
 
-static struct file_operations sx_fw_fops = {
+static const struct file_operations sx_fw_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= sx_fw_ioctl,
 };
diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c
index a80c832..bb1bad4 100644
--- a/drivers/char/tb0219.c
+++ b/drivers/char/tb0219.c
@@ -255,7 +255,7 @@ static int tanbac_tb0219_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations tb0219_fops = {
+static const struct file_operations tb0219_fops = {
 	.owner		= THIS_MODULE,
 	.read		= tanbac_tb0219_read,
 	.write		= tanbac_tb0219_write,
diff --git a/drivers/char/tipar.c b/drivers/char/tipar.c
index e0633a1..d30dc09 100644
--- a/drivers/char/tipar.c
+++ b/drivers/char/tipar.c
@@ -381,7 +381,7 @@ tipar_ioctl(struct inode *inode, struct file *file,
 
 /* ----- kernel module registering ------------------------------------ */
 
-static struct file_operations tipar_fops = {
+static const struct file_operations tipar_fops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.read = tipar_read,
diff --git a/drivers/char/tlclk.c b/drivers/char/tlclk.c
index 952b829..d2c5ba4 100644
--- a/drivers/char/tlclk.c
+++ b/drivers/char/tlclk.c
@@ -247,7 +247,7 @@ static ssize_t tlclk_write(struct file *filp, const char __user *buf, size_t cou
 	return 0;
 }
 
-static struct file_operations tlclk_fops = {
+static const struct file_operations tlclk_fops = {
 	.read = tlclk_read,
 	.write = tlclk_write,
 	.open = tlclk_open,
diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c
index e2fb234..dd36fd0 100644
--- a/drivers/char/toshiba.c
+++ b/drivers/char/toshiba.c
@@ -92,7 +92,7 @@ static int tosh_ioctl(struct inode *, struct file *, unsigned int,
 	unsigned long);
 
 
-static struct file_operations tosh_fops = {
+static const struct file_operations tosh_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= tosh_ioctl,
 };
diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c
index 58a258c..ad8ffe4 100644
--- a/drivers/char/tpm/tpm_atmel.c
+++ b/drivers/char/tpm/tpm_atmel.c
@@ -116,7 +116,7 @@ static u8 tpm_atml_status(struct tpm_chip *chip)
 	return ioread8(chip->vendor.iobase + 1);
 }
 
-static struct file_operations atmel_ops = {
+static const struct file_operations atmel_ops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.open = tpm_open,
diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c
index adfff21..1353b5a 100644
--- a/drivers/char/tpm/tpm_infineon.c
+++ b/drivers/char/tpm/tpm_infineon.c
@@ -338,7 +338,7 @@ static struct attribute *inf_attrs[] = {
 
 static struct attribute_group inf_attr_grp = {.attrs = inf_attrs };
 
-static struct file_operations inf_ops = {
+static const struct file_operations inf_ops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.open = tpm_open,
diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c
index 4c8bc06..26287aa 100644
--- a/drivers/char/tpm/tpm_nsc.c
+++ b/drivers/char/tpm/tpm_nsc.c
@@ -226,7 +226,7 @@ static u8 tpm_nsc_status(struct tpm_chip *chip)
 	return inb(chip->vendor.base + NSC_STATUS);
 }
 
-static struct file_operations nsc_ops = {
+static const struct file_operations nsc_ops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.open = tpm_open,
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index abb0f2a..3232b19 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -330,7 +330,7 @@ out_err:
 	return rc;
 }
 
-static struct file_operations tis_ops = {
+static const struct file_operations tis_ops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.open = tpm_open,
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 615e934..6fb7795 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -912,7 +912,7 @@ static int hung_up_tty_ioctl(struct inode * inode, struct file * file,
 	return cmd == TIOCSPGRP ? -ENOTTY : -EIO;
 }
 
-static struct file_operations tty_fops = {
+static const struct file_operations tty_fops = {
 	.llseek		= no_llseek,
 	.read		= tty_read,
 	.write		= tty_write,
@@ -924,7 +924,7 @@ static struct file_operations tty_fops = {
 };
 
 #ifdef CONFIG_UNIX98_PTYS
-static struct file_operations ptmx_fops = {
+static const struct file_operations ptmx_fops = {
 	.llseek		= no_llseek,
 	.read		= tty_read,
 	.write		= tty_write,
@@ -936,7 +936,7 @@ static struct file_operations ptmx_fops = {
 };
 #endif
 
-static struct file_operations console_fops = {
+static const struct file_operations console_fops = {
 	.llseek		= no_llseek,
 	.read		= tty_read,
 	.write		= redirected_tty_write,
@@ -947,7 +947,7 @@ static struct file_operations console_fops = {
 	.fasync		= tty_fasync,
 };
 
-static struct file_operations hung_up_tty_fops = {
+static const struct file_operations hung_up_tty_fops = {
 	.llseek		= no_llseek,
 	.read		= hung_up_tty_read,
 	.write		= hung_up_tty_write,
diff --git a/drivers/char/vc_screen.c b/drivers/char/vc_screen.c
index 45e9bd8..a9247b5 100644
--- a/drivers/char/vc_screen.c
+++ b/drivers/char/vc_screen.c
@@ -465,7 +465,7 @@ vcs_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static struct file_operations vcs_fops = {
+static const struct file_operations vcs_fops = {
 	.llseek		= vcs_lseek,
 	.read		= vcs_read,
 	.write		= vcs_write,
diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c
index 7d42c8e..b72b204 100644
--- a/drivers/char/viotape.c
+++ b/drivers/char/viotape.c
@@ -292,7 +292,7 @@ static int proc_viotape_open(struct inode *inode, struct file *file)
 	return single_open(file, proc_viotape_show, NULL);
 }
 
-static struct file_operations proc_viotape_operations = {
+static const struct file_operations proc_viotape_operations = {
 	.open		= proc_viotape_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c
index 073da48..1b9b1f1 100644
--- a/drivers/char/vr41xx_giu.c
+++ b/drivers/char/vr41xx_giu.c
@@ -605,7 +605,7 @@ static int gpio_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations gpio_fops = {
+static const struct file_operations gpio_fops = {
 	.owner		= THIS_MODULE,
 	.read		= gpio_read,
 	.write		= gpio_write,
diff --git a/drivers/char/watchdog/acquirewdt.c b/drivers/char/watchdog/acquirewdt.c
index 7289f4a..c77fe3c 100644
--- a/drivers/char/watchdog/acquirewdt.c
+++ b/drivers/char/watchdog/acquirewdt.c
@@ -231,7 +231,7 @@ static int acq_notify_sys(struct notifier_block *this, unsigned long code,
  *	Kernel Interfaces
  */
 
-static struct file_operations acq_fops = {
+static const struct file_operations acq_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= acq_write,
diff --git a/drivers/char/watchdog/advantechwdt.c b/drivers/char/watchdog/advantechwdt.c
index 194a3fd..8069be4 100644
--- a/drivers/char/watchdog/advantechwdt.c
+++ b/drivers/char/watchdog/advantechwdt.c
@@ -227,7 +227,7 @@ advwdt_notify_sys(struct notifier_block *this, unsigned long code,
  *	Kernel Interfaces
  */
 
-static struct file_operations advwdt_fops = {
+static const struct file_operations advwdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= advwdt_write,
diff --git a/drivers/char/watchdog/alim1535_wdt.c b/drivers/char/watchdog/alim1535_wdt.c
index 8338ca3..c5c94e4 100644
--- a/drivers/char/watchdog/alim1535_wdt.c
+++ b/drivers/char/watchdog/alim1535_wdt.c
@@ -362,7 +362,7 @@ static int __init ali_find_watchdog(void)
  *	Kernel Interfaces
  */
 
-static struct file_operations ali_fops = {
+static const struct file_operations ali_fops = {
 	.owner =	THIS_MODULE,
 	.llseek =	no_llseek,
 	.write =	ali_write,
diff --git a/drivers/char/watchdog/alim7101_wdt.c b/drivers/char/watchdog/alim7101_wdt.c
index c05ac18..ffd7684 100644
--- a/drivers/char/watchdog/alim7101_wdt.c
+++ b/drivers/char/watchdog/alim7101_wdt.c
@@ -281,7 +281,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd, u
 	}
 }
 
-static struct file_operations wdt_fops = {
+static const struct file_operations wdt_fops = {
 	.owner=		THIS_MODULE,
 	.llseek=	no_llseek,
 	.write=		fop_write,
diff --git a/drivers/char/watchdog/at91_wdt.c b/drivers/char/watchdog/at91_wdt.c
index f61dedc..cc26671 100644
--- a/drivers/char/watchdog/at91_wdt.c
+++ b/drivers/char/watchdog/at91_wdt.c
@@ -183,7 +183,7 @@ static ssize_t at91_wdt_write(struct file *file, const char *data, size_t len, l
 
 /* ......................................................................... */
 
-static struct file_operations at91wdt_fops = {
+static const struct file_operations at91wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.ioctl		= at91_wdt_ioctl,
diff --git a/drivers/char/watchdog/booke_wdt.c b/drivers/char/watchdog/booke_wdt.c
index 537f5c6..e3cefc5 100644
--- a/drivers/char/watchdog/booke_wdt.c
+++ b/drivers/char/watchdog/booke_wdt.c
@@ -145,7 +145,7 @@ static int booke_wdt_open (struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations booke_wdt_fops = {
+static const struct file_operations booke_wdt_fops = {
   .owner = THIS_MODULE,
   .llseek = no_llseek,
   .write = booke_wdt_write,
diff --git a/drivers/char/watchdog/cpu5wdt.c b/drivers/char/watchdog/cpu5wdt.c
index 3e8410b..04c7e49 100644
--- a/drivers/char/watchdog/cpu5wdt.c
+++ b/drivers/char/watchdog/cpu5wdt.c
@@ -198,7 +198,7 @@ static ssize_t cpu5wdt_write(struct file *file, const char __user *buf, size_t c
 	return count;
 }
 
-static struct file_operations cpu5wdt_fops = {
+static const struct file_operations cpu5wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.ioctl		= cpu5wdt_ioctl,
diff --git a/drivers/char/watchdog/ep93xx_wdt.c b/drivers/char/watchdog/ep93xx_wdt.c
index 9021dbb..77c8a95 100644
--- a/drivers/char/watchdog/ep93xx_wdt.c
+++ b/drivers/char/watchdog/ep93xx_wdt.c
@@ -187,7 +187,7 @@ static int ep93xx_wdt_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations ep93xx_wdt_fops = {
+static const struct file_operations ep93xx_wdt_fops = {
 	.owner		= THIS_MODULE,
 	.write		= ep93xx_wdt_write,
 	.ioctl		= ep93xx_wdt_ioctl,
diff --git a/drivers/char/watchdog/eurotechwdt.c b/drivers/char/watchdog/eurotechwdt.c
index ea670de..62dbccb 100644
--- a/drivers/char/watchdog/eurotechwdt.c
+++ b/drivers/char/watchdog/eurotechwdt.c
@@ -356,7 +356,7 @@ static int eurwdt_notify_sys(struct notifier_block *this, unsigned long code,
  */
 
 
-static struct file_operations eurwdt_fops = {
+static const struct file_operations eurwdt_fops = {
 	.owner	= THIS_MODULE,
 	.llseek	= no_llseek,
 	.write	= eurwdt_write,
diff --git a/drivers/char/watchdog/i6300esb.c b/drivers/char/watchdog/i6300esb.c
index 93785f1..870539e 100644
--- a/drivers/char/watchdog/i6300esb.c
+++ b/drivers/char/watchdog/i6300esb.c
@@ -337,7 +337,7 @@ static int esb_notify_sys (struct notifier_block *this, unsigned long code, void
  *      Kernel Interfaces
  */
 
-static struct file_operations esb_fops = {
+static const struct file_operations esb_fops = {
         .owner =        THIS_MODULE,
         .llseek =       no_llseek,
         .write =        esb_write,
diff --git a/drivers/char/watchdog/i8xx_tco.c b/drivers/char/watchdog/i8xx_tco.c
index bfbdbbf..8385dd3 100644
--- a/drivers/char/watchdog/i8xx_tco.c
+++ b/drivers/char/watchdog/i8xx_tco.c
@@ -378,7 +378,7 @@ static int i8xx_tco_notify_sys (struct notifier_block *this, unsigned long code,
  *	Kernel Interfaces
  */
 
-static struct file_operations i8xx_tco_fops = {
+static const struct file_operations i8xx_tco_fops = {
 	.owner =	THIS_MODULE,
 	.llseek =	no_llseek,
 	.write =	i8xx_tco_write,
diff --git a/drivers/char/watchdog/ib700wdt.c b/drivers/char/watchdog/ib700wdt.c
index a2e53c7..fd95f73 100644
--- a/drivers/char/watchdog/ib700wdt.c
+++ b/drivers/char/watchdog/ib700wdt.c
@@ -255,7 +255,7 @@ ibwdt_notify_sys(struct notifier_block *this, unsigned long code,
  *	Kernel Interfaces
  */
 
-static struct file_operations ibwdt_fops = {
+static const struct file_operations ibwdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= ibwdt_write,
diff --git a/drivers/char/watchdog/ibmasr.c b/drivers/char/watchdog/ibmasr.c
index b0741cb..26ceee7 100644
--- a/drivers/char/watchdog/ibmasr.c
+++ b/drivers/char/watchdog/ibmasr.c
@@ -322,7 +322,7 @@ static int asr_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations asr_fops = {
+static const struct file_operations asr_fops = {
 	.owner =	THIS_MODULE,
 	.llseek	=	no_llseek,
 	.write =	asr_write,
diff --git a/drivers/char/watchdog/indydog.c b/drivers/char/watchdog/indydog.c
index d387979..dacc1c2 100644
--- a/drivers/char/watchdog/indydog.c
+++ b/drivers/char/watchdog/indydog.c
@@ -154,7 +154,7 @@ static int indydog_notify_sys(struct notifier_block *this, unsigned long code, v
 	return NOTIFY_DONE;
 }
 
-static struct file_operations indydog_fops = {
+static const struct file_operations indydog_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= indydog_write,
diff --git a/drivers/char/watchdog/ixp2000_wdt.c b/drivers/char/watchdog/ixp2000_wdt.c
index aa29a7d..6929088 100644
--- a/drivers/char/watchdog/ixp2000_wdt.c
+++ b/drivers/char/watchdog/ixp2000_wdt.c
@@ -168,7 +168,7 @@ ixp2000_wdt_release(struct inode *inode, struct file *file)
 }
 
 
-static struct file_operations ixp2000_wdt_fops =
+static const struct file_operations ixp2000_wdt_fops =
 {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
diff --git a/drivers/char/watchdog/ixp4xx_wdt.c b/drivers/char/watchdog/ixp4xx_wdt.c
index e6a3fe8..9db5cf2 100644
--- a/drivers/char/watchdog/ixp4xx_wdt.c
+++ b/drivers/char/watchdog/ixp4xx_wdt.c
@@ -162,7 +162,7 @@ ixp4xx_wdt_release(struct inode *inode, struct file *file)
 }
 
 
-static struct file_operations ixp4xx_wdt_fops =
+static const struct file_operations ixp4xx_wdt_fops =
 {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
diff --git a/drivers/char/watchdog/machzwd.c b/drivers/char/watchdog/machzwd.c
index b67b487..23734e0 100644
--- a/drivers/char/watchdog/machzwd.c
+++ b/drivers/char/watchdog/machzwd.c
@@ -388,7 +388,7 @@ static int zf_notify_sys(struct notifier_block *this, unsigned long code,
 
 
-static struct file_operations zf_fops = {
+static const struct file_operations zf_fops = {
 	.owner          = THIS_MODULE,
 	.llseek         = no_llseek,
 	.write          = zf_write,
diff --git a/drivers/char/watchdog/mixcomwd.c b/drivers/char/watchdog/mixcomwd.c
index 433c27f..ae94332 100644
--- a/drivers/char/watchdog/mixcomwd.c
+++ b/drivers/char/watchdog/mixcomwd.c
@@ -190,7 +190,7 @@ static int mixcomwd_ioctl(struct inode *inode, struct file *file,
 	return 0;
 }
 
-static struct file_operations mixcomwd_fops=
+static const struct file_operations mixcomwd_fops=
 {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
diff --git a/drivers/char/watchdog/mpc83xx_wdt.c b/drivers/char/watchdog/mpc83xx_wdt.c
index dac1381..a480903 100644
--- a/drivers/char/watchdog/mpc83xx_wdt.c
+++ b/drivers/char/watchdog/mpc83xx_wdt.c
@@ -129,7 +129,7 @@ static int mpc83xx_wdt_ioctl(struct inode *inode, struct file *file,
 	}
 }
 
-static struct file_operations mpc83xx_wdt_fops = {
+static const struct file_operations mpc83xx_wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= mpc83xx_wdt_write,
diff --git a/drivers/char/watchdog/mpc8xx_wdt.c b/drivers/char/watchdog/mpc8xx_wdt.c
index 11f0ccd..35dd9e6 100644
--- a/drivers/char/watchdog/mpc8xx_wdt.c
+++ b/drivers/char/watchdog/mpc8xx_wdt.c
@@ -132,7 +132,7 @@ static int mpc8xx_wdt_ioctl(struct inode *inode, struct file *file,
 	return 0;
 }
 
-static struct file_operations mpc8xx_wdt_fops = {
+static const struct file_operations mpc8xx_wdt_fops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.write = mpc8xx_wdt_write,
diff --git a/drivers/char/watchdog/mpcore_wdt.c b/drivers/char/watchdog/mpcore_wdt.c
index c2d492c..54b3c56 100644
--- a/drivers/char/watchdog/mpcore_wdt.c
+++ b/drivers/char/watchdog/mpcore_wdt.c
@@ -297,7 +297,7 @@ static void mpcore_wdt_shutdown(struct platform_device *dev)
 /*
  *	Kernel Interfaces
  */
-static struct file_operations mpcore_wdt_fops = {
+static const struct file_operations mpcore_wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= mpcore_wdt_write,
diff --git a/drivers/char/watchdog/mv64x60_wdt.c b/drivers/char/watchdog/mv64x60_wdt.c
index 20a6cbb..5c8fab3 100644
--- a/drivers/char/watchdog/mv64x60_wdt.c
+++ b/drivers/char/watchdog/mv64x60_wdt.c
@@ -166,7 +166,7 @@ static int mv64x60_wdt_ioctl(struct inode *inode, struct file *file,
 	return 0;
 }
 
-static struct file_operations mv64x60_wdt_fops = {
+static const struct file_operations mv64x60_wdt_fops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.write = mv64x60_wdt_write,
diff --git a/drivers/char/watchdog/pcwd.c b/drivers/char/watchdog/pcwd.c
index 6d44ca6..cd7d1b6 100644
--- a/drivers/char/watchdog/pcwd.c
+++ b/drivers/char/watchdog/pcwd.c
@@ -740,7 +740,7 @@ static int pcwd_notify_sys(struct notifier_block *this, unsigned long code, void
  *	Kernel Interfaces
  */
 
-static struct file_operations pcwd_fops = {
+static const struct file_operations pcwd_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= pcwd_write,
@@ -755,7 +755,7 @@ static struct miscdevice pcwd_miscdev = {
 	.fops =		&pcwd_fops,
 };
 
-static struct file_operations pcwd_temp_fops = {
+static const struct file_operations pcwd_temp_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= pcwd_temp_read,
diff --git a/drivers/char/watchdog/pcwd_pci.c b/drivers/char/watchdog/pcwd_pci.c
index 1f40ece..c7cfd6d 100644
--- a/drivers/char/watchdog/pcwd_pci.c
+++ b/drivers/char/watchdog/pcwd_pci.c
@@ -625,7 +625,7 @@ static int pcipcwd_notify_sys(struct notifier_block *this, unsigned long code, v
  *	Kernel Interfaces
  */
 
-static struct file_operations pcipcwd_fops = {
+static const struct file_operations pcipcwd_fops = {
 	.owner =	THIS_MODULE,
 	.llseek =	no_llseek,
 	.write =	pcipcwd_write,
@@ -640,7 +640,7 @@ static struct miscdevice pcipcwd_miscdev = {
 	.fops =		&pcipcwd_fops,
 };
 
-static struct file_operations pcipcwd_temp_fops = {
+static const struct file_operations pcipcwd_temp_fops = {
 	.owner =	THIS_MODULE,
 	.llseek =	no_llseek,
 	.read =		pcipcwd_temp_read,
diff --git a/drivers/char/watchdog/pcwd_usb.c b/drivers/char/watchdog/pcwd_usb.c
index 92bf8c1..b7ae73d 100644
--- a/drivers/char/watchdog/pcwd_usb.c
+++ b/drivers/char/watchdog/pcwd_usb.c
@@ -523,7 +523,7 @@ static int usb_pcwd_notify_sys(struct notifier_block *this, unsigned long code,
  *	Kernel Interfaces
  */
 
-static struct file_operations usb_pcwd_fops = {
+static const struct file_operations usb_pcwd_fops = {
 	.owner =	THIS_MODULE,
 	.llseek =	no_llseek,
 	.write =	usb_pcwd_write,
@@ -538,7 +538,7 @@ static struct miscdevice usb_pcwd_miscdev = {
 	.fops =		&usb_pcwd_fops,
 };
 
-static struct file_operations usb_pcwd_temperature_fops = {
+static const struct file_operations usb_pcwd_temperature_fops = {
 	.owner =	THIS_MODULE,
 	.llseek =	no_llseek,
 	.read =		usb_pcwd_temperature_read,
diff --git a/drivers/char/watchdog/s3c2410_wdt.c b/drivers/char/watchdog/s3c2410_wdt.c
index f267dad..be978e8 100644
--- a/drivers/char/watchdog/s3c2410_wdt.c
+++ b/drivers/char/watchdog/s3c2410_wdt.c
@@ -319,7 +319,7 @@ static int s3c2410wdt_ioctl(struct inode *inode, struct file *file,
 
 /* kernel interface */
 
-static struct file_operations s3c2410wdt_fops = {
+static const struct file_operations s3c2410wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= s3c2410wdt_write,
diff --git a/drivers/char/watchdog/sa1100_wdt.c b/drivers/char/watchdog/sa1100_wdt.c
index b22e95c..1fc16d9 100644
--- a/drivers/char/watchdog/sa1100_wdt.c
+++ b/drivers/char/watchdog/sa1100_wdt.c
@@ -135,7 +135,7 @@ static int sa1100dog_ioctl(struct inode *inode, struct file *file,
 	return ret;
 }
 
-static struct file_operations sa1100dog_fops =
+static const struct file_operations sa1100dog_fops =
 {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
diff --git a/drivers/char/watchdog/sbc60xxwdt.c b/drivers/char/watchdog/sbc60xxwdt.c
index ed0bd55..4663c2f 100644
--- a/drivers/char/watchdog/sbc60xxwdt.c
+++ b/drivers/char/watchdog/sbc60xxwdt.c
@@ -282,7 +282,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 	}
 }
 
-static struct file_operations wdt_fops = {
+static const struct file_operations wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= fop_write,
diff --git a/drivers/char/watchdog/sbc8360.c b/drivers/char/watchdog/sbc8360.c
index 6562aa9..1035be5 100644
--- a/drivers/char/watchdog/sbc8360.c
+++ b/drivers/char/watchdog/sbc8360.c
@@ -305,7 +305,7 @@ static int sbc8360_notify_sys(struct notifier_block *this, unsigned long code,
  *	Kernel Interfaces
  */
 
-static struct file_operations sbc8360_fops = {
+static const struct file_operations sbc8360_fops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.write = sbc8360_write,
diff --git a/drivers/char/watchdog/sbc_epx_c3.c b/drivers/char/watchdog/sbc_epx_c3.c
index 09867fa..bfc475d 100644
--- a/drivers/char/watchdog/sbc_epx_c3.c
+++ b/drivers/char/watchdog/sbc_epx_c3.c
@@ -154,7 +154,7 @@ static int epx_c3_notify_sys(struct notifier_block *this, unsigned long code,
 	return NOTIFY_DONE;
 }
 
-static struct file_operations epx_c3_fops = {
+static const struct file_operations epx_c3_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= epx_c3_write,
diff --git a/drivers/char/watchdog/sc1200wdt.c b/drivers/char/watchdog/sc1200wdt.c
index 78ef633..7c3cf29 100644
--- a/drivers/char/watchdog/sc1200wdt.c
+++ b/drivers/char/watchdog/sc1200wdt.c
@@ -292,7 +292,7 @@ static struct notifier_block sc1200wdt_notifier =
 	.notifier_call =	sc1200wdt_notify_sys,
 };
 
-static struct file_operations sc1200wdt_fops =
+static const struct file_operations sc1200wdt_fops =
 {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
diff --git a/drivers/char/watchdog/sc520_wdt.c b/drivers/char/watchdog/sc520_wdt.c
index 4ee9974..2c7c9db 100644
--- a/drivers/char/watchdog/sc520_wdt.c
+++ b/drivers/char/watchdog/sc520_wdt.c
@@ -336,7 +336,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 	}
 }
 
-static struct file_operations wdt_fops = {
+static const struct file_operations wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= fop_write,
diff --git a/drivers/char/watchdog/scx200_wdt.c b/drivers/char/watchdog/scx200_wdt.c
index c0b4754..c561299 100644
--- a/drivers/char/watchdog/scx200_wdt.c
+++ b/drivers/char/watchdog/scx200_wdt.c
@@ -194,7 +194,7 @@ static int scx200_wdt_ioctl(struct inode *inode, struct file *file,
 	}
 }
 
-static struct file_operations scx200_wdt_fops = {
+static const struct file_operations scx200_wdt_fops = {
 	.owner	 = THIS_MODULE,
 	.llseek	 = no_llseek,
 	.write   = scx200_wdt_write,
diff --git a/drivers/char/watchdog/shwdt.c b/drivers/char/watchdog/shwdt.c
index 803701b..1355038 100644
--- a/drivers/char/watchdog/shwdt.c
+++ b/drivers/char/watchdog/shwdt.c
@@ -344,7 +344,7 @@ static int sh_wdt_notify_sys(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
-static struct file_operations sh_wdt_fops = {
+static const struct file_operations sh_wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= sh_wdt_write,
diff --git a/drivers/char/watchdog/softdog.c b/drivers/char/watchdog/softdog.c
index 79ce5c6..ef8da51 100644
--- a/drivers/char/watchdog/softdog.c
+++ b/drivers/char/watchdog/softdog.c
@@ -243,7 +243,7 @@ static int softdog_notify_sys(struct notifier_block *this, unsigned long code,
  *	Kernel Interfaces
  */
 
-static struct file_operations softdog_fops = {
+static const struct file_operations softdog_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= softdog_write,
diff --git a/drivers/char/watchdog/w83627hf_wdt.c b/drivers/char/watchdog/w83627hf_wdt.c
index d15ca9a..13f16d4 100644
--- a/drivers/char/watchdog/w83627hf_wdt.c
+++ b/drivers/char/watchdog/w83627hf_wdt.c
@@ -274,7 +274,7 @@ wdt_notify_sys(struct notifier_block *this, unsigned long code,
  *	Kernel Interfaces
  */
 
-static struct file_operations wdt_fops = {
+static const struct file_operations wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= wdt_write,
diff --git a/drivers/char/watchdog/w83877f_wdt.c b/drivers/char/watchdog/w83877f_wdt.c
index 52a8bd0..ccf6c09 100644
--- a/drivers/char/watchdog/w83877f_wdt.c
+++ b/drivers/char/watchdog/w83877f_wdt.c
@@ -299,7 +299,7 @@ static int fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 	}
 }
 
-static struct file_operations wdt_fops = {
+static const struct file_operations wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= fop_write,
diff --git a/drivers/char/watchdog/w83977f_wdt.c b/drivers/char/watchdog/w83977f_wdt.c
index c31849e..98f4e17 100644
--- a/drivers/char/watchdog/w83977f_wdt.c
+++ b/drivers/char/watchdog/w83977f_wdt.c
@@ -449,7 +449,7 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
 	return NOTIFY_DONE;
 }
 
-static struct file_operations wdt_fops=
+static const struct file_operations wdt_fops=
 {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
diff --git a/drivers/char/watchdog/wafer5823wdt.c b/drivers/char/watchdog/wafer5823wdt.c
index 7cf6c9b..2bb6a9d 100644
--- a/drivers/char/watchdog/wafer5823wdt.c
+++ b/drivers/char/watchdog/wafer5823wdt.c
@@ -222,7 +222,7 @@ static int wafwdt_notify_sys(struct notifier_block *this, unsigned long code, vo
  *	Kernel Interfaces
  */
 
-static struct file_operations wafwdt_fops = {
+static const struct file_operations wafwdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= wafwdt_write,
diff --git a/drivers/char/watchdog/wdrtas.c b/drivers/char/watchdog/wdrtas.c
index 3a462c3..5c38cdf 100644
--- a/drivers/char/watchdog/wdrtas.c
+++ b/drivers/char/watchdog/wdrtas.c
@@ -520,7 +520,7 @@ wdrtas_reboot(struct notifier_block *this, unsigned long code, void *ptr)
 
 /*** initialization stuff */
 
-static struct file_operations wdrtas_fops = {
+static const struct file_operations wdrtas_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= wdrtas_write,
@@ -535,7 +535,7 @@ static struct miscdevice wdrtas_miscdev = {
 	.fops =		&wdrtas_fops,
 };
 
-static struct file_operations wdrtas_temp_fops = {
+static const struct file_operations wdrtas_temp_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= wdrtas_temp_read,
diff --git a/drivers/char/watchdog/wdt.c b/drivers/char/watchdog/wdt.c
index a1d972c..70be81e 100644
--- a/drivers/char/watchdog/wdt.c
+++ b/drivers/char/watchdog/wdt.c
@@ -494,7 +494,7 @@ static int wdt_notify_sys(struct notifier_block *this, unsigned long code,
  */
 
 
-static struct file_operations wdt_fops = {
+static const struct file_operations wdt_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= wdt_write,
@@ -510,7 +510,7 @@ static struct miscdevice wdt_miscdev = {
 };
 
 #ifdef CONFIG_WDT_501
-static struct file_operations wdt_temp_fops = {
+static const struct file_operations wdt_temp_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= wdt_temp_read,
diff --git a/drivers/char/watchdog/wdt285.c b/drivers/char/watchdog/wdt285.c
index 52825a1..6555fb8 100644
--- a/drivers/char/watchdog/wdt285.c
+++ b/drivers/char/watchdog/wdt285.c
@@ -178,7 +178,7 @@ watchdog_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 	return ret;
 }
 
-static struct file_operations watchdog_fops = {
+static const struct file_operations watchdog_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= watchdog_write,
diff --git a/drivers/char/watchdog/wdt977.c b/drivers/char/watchdog/wdt977.c
index 3cde2b9..a0935bc 100644
--- a/drivers/char/watchdog/wdt977.c
+++ b/drivers/char/watchdog/wdt977.c
@@ -418,7 +418,7 @@ static int wdt977_notify_sys(struct notifier_block *this, unsigned long code,
 	return NOTIFY_DONE;
 }
 
-static struct file_operations wdt977_fops=
+static const struct file_operations wdt977_fops=
 {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
diff --git a/drivers/char/watchdog/wdt_pci.c b/drivers/char/watchdog/wdt_pci.c
index 7529ecd..5918ca2 100644
--- a/drivers/char/watchdog/wdt_pci.c
+++ b/drivers/char/watchdog/wdt_pci.c
@@ -543,7 +543,7 @@ static int wdtpci_notify_sys(struct notifier_block *this, unsigned long code,
  */
 
 
-static struct file_operations wdtpci_fops = {
+static const struct file_operations wdtpci_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.write		= wdtpci_write,
@@ -559,7 +559,7 @@ static struct miscdevice wdtpci_miscdev = {
 };
 
 #ifdef CONFIG_WDT_501_PCI
-static struct file_operations wdtpci_temp_fops = {
+static const struct file_operations wdtpci_temp_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= wdtpci_temp_read,
-- 
cgit v0.10.2


From 6b8f3ef454d60e0c7ec42400400dc0fe7b5a4f7d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 3 Jul 2006 00:24:21 -0700
Subject: [PATCH] sparc i8042 build fix

drivers/input/serio/i8042-sparcio.h:91: error: '__mod_of_device_table' aliased to undefined symbol 'i8042_match'

Cc: Dmitry Torokhov <dtor_core@ameritech.net>
DESC
sparc: resource warning fix
EDESC
From: Andrew Morton <akpm@osdl.org>

sound/sparc/amd7930.c: In function 'amd7930_attach_common':
sound/sparc/amd7930.c:1040: warning: format '%08lx' expects type 'long unsigned int', but argument 5 has type 'resource_size_t'

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/input/serio/i8042-sparcio.h b/drivers/input/serio/i8042-sparcio.h
index 7d9fafe..54adba2 100644
--- a/drivers/input/serio/i8042-sparcio.h
+++ b/drivers/input/serio/i8042-sparcio.h
@@ -88,7 +88,7 @@ static struct of_device_id sparc_i8042_match[] = {
 	},
 	{},
 };
-MODULE_DEVICE_TABLE(of, i8042_match);
+MODULE_DEVICE_TABLE(of, sparc_i8042_match);
 
 static struct of_platform_driver sparc_i8042_driver = {
 	.name		= "i8042",
-- 
cgit v0.10.2


From 5863aa651bd79a6bb15972894306923db088e71c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 3 Jul 2006 00:24:22 -0700
Subject: [PATCH] sparc: resource warning fix

sound/sparc/amd7930.c: In function 'amd7930_attach_common':
sound/sparc/amd7930.c:1040: warning: format '%08lx' expects type 'long unsigned int', but argument 5 has type 'resource_size_t'

sound/sparc/cs4231.c:2043: warning: format '%016lx' expects type 'long unsigned int', but argument 5 has type 'resource_size_t'

sound/sparc/dbri.c: In function 'dbri_attach':
sound/sparc/dbri.c:2650: warning: format '%016lx' expects type 'long unsigned int', but argument 5 has type 'resource_size_t'

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/sparc/amd7930.c b/sound/sparc/amd7930.c
index db3e22e..2bd8e40 100644
--- a/sound/sparc/amd7930.c
+++ b/sound/sparc/amd7930.c
@@ -1033,10 +1033,10 @@ static int __init amd7930_attach_common(struct resource *rp, int irq)
 
 	strcpy(card->driver, "AMD7930");
 	strcpy(card->shortname, "Sun AMD7930");
-	sprintf(card->longname, "%s at 0x%02lx:0x%08lx, irq %d",
+	sprintf(card->longname, "%s at 0x%02lx:0x%08Lx, irq %d",
 		card->shortname,
 		rp->flags & 0xffL,
-		rp->start,
+		(unsigned long long)rp->start,
 		irq);
 
 	if ((err = snd_amd7930_create(card, rp,
diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
index 5018fcf..9a06c3bd 100644
--- a/sound/sparc/cs4231.c
+++ b/sound/sparc/cs4231.c
@@ -2036,7 +2036,7 @@ static int __init cs4231_sbus_attach(struct sbus_dev *sdev)
 	if (err)
 		return err;
 
-	sprintf(card->longname, "%s at 0x%02lx:0x%016lx, irq %d",
+	sprintf(card->longname, "%s at 0x%02lx:0x%016Lx, irq %d",
 		card->shortname,
 		rp->flags & 0xffL,
 		(unsigned long long)rp->start,
diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 59a02a0..f3ae6e2 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -2645,7 +2645,7 @@ static int __init dbri_attach(int prom_node, struct sbus_dev *sdev)
 	strcpy(card->driver, "DBRI");
 	strcpy(card->shortname, "Sun DBRI");
 	rp = &sdev->resource[0];
-	sprintf(card->longname, "%s at 0x%02lx:0x%016lx, irq %d",
+	sprintf(card->longname, "%s at 0x%02lx:0x%016Lx, irq %d",
 		card->shortname,
 		rp->flags & 0xffL, (unsigned long long)rp->start, irq.pri);
 
-- 
cgit v0.10.2


From 3e541a4ae534a7e59ad464af9abea382b3035724 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:23 -0700
Subject: [PATCH] lockdep: floppy.c irq release fix

The lock validator triggered a number of bugs in the floppy driver, all
related to the floppy driver allocating and freeing irq and dma resources from
interrupt context.  The initial solution was to use schedule_work() to push
this into process context, but this caused further problems: for example the
current floppy driver in -mm2 is totally broken and all floppy commands time
out with an error.  (as reported by Barry K.  Nathan)

This patch tries another solution: simply get rid of all that dynamic IRQ and
DMA allocation/freeing.  I doubt it made much sense back in the heydays of
floppies (if two devices raced for DMA or IRQ resources then we didnt handle
those cases too gracefully anyway), and today it makes near zero sense.

So the new code does the simplest and most straightforward thing: allocate IRQ
and DMA resources at module init time, and free them at module removal time.
Dont try to release while the driver is operational.  This, besides making the
floppy driver functional again has an added bonus, floppy IRQ stats are
finally persistent and visible in /proc/interrupts:

  6: 63 XT-PIC-level floppy

Besides normal floppy IO i have also tested IO error handling, motor-off
timeouts, etc.  - and everything seems to be working fine.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 0242cbb..5109fa3 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -249,18 +249,6 @@ static int irqdma_allocated;
 #include <linux/cdrom.h>	/* for the compatibility eject ioctl */
 #include <linux/completion.h>
 
-/*
- * Interrupt freeing also means /proc VFS work - dont do it
- * from interrupt context. We push this work into keventd:
- */
-static void fd_free_irq_fn(void *data)
-{
-	fd_free_irq();
-}
-
-static DECLARE_WORK(fd_free_irq_work, fd_free_irq_fn, NULL);
-
-
 static struct request *current_req;
 static struct request_queue *floppy_queue;
 static void do_fd_request(request_queue_t * q);
@@ -826,15 +814,6 @@ static int set_dor(int fdc, char mask, char data)
 			UDRS->select_date = jiffies;
 		}
 	}
-	/*
-	 *      We should propagate failures to grab the resources back
-	 *      nicely from here. Actually we ought to rewrite the fd
-	 *      driver some day too.
-	 */
-	if (newdor & FLOPPY_MOTOR_MASK)
-		floppy_grab_irq_and_dma();
-	if (olddor & FLOPPY_MOTOR_MASK)
-		floppy_release_irq_and_dma();
 	return olddor;
 }
 
@@ -892,8 +871,6 @@ static int _lock_fdc(int drive, int interruptible, int line)
 		       line);
 		return -1;
 	}
-	if (floppy_grab_irq_and_dma() == -1)
-		return -EBUSY;
 
 	if (test_and_set_bit(0, &fdc_busy)) {
 		DECLARE_WAITQUEUE(wait, current);
@@ -915,6 +892,8 @@ static int _lock_fdc(int drive, int interruptible, int line)
 
 		set_current_state(TASK_RUNNING);
 		remove_wait_queue(&fdc_wait, &wait);
+
+		flush_scheduled_work();
 	}
 	command_status = FD_COMMAND_NONE;
 
@@ -948,7 +927,6 @@ static inline void unlock_fdc(void)
 	if (elv_next_request(floppy_queue))
 		do_fd_request(floppy_queue);
 	spin_unlock_irqrestore(&floppy_lock, flags);
-	floppy_release_irq_and_dma();
 	wake_up(&fdc_wait);
 }
 
@@ -3694,8 +3672,8 @@ static int floppy_release(struct inode *inode, struct file *filp)
 	}
 	if (!UDRS->fd_ref)
 		opened_bdev[drive] = NULL;
-	floppy_release_irq_and_dma();
 	mutex_unlock(&open_lock);
+
 	return 0;
 }
 
@@ -3726,9 +3704,6 @@ static int floppy_open(struct inode *inode, struct file *filp)
 	if (UDRS->fd_ref == -1 || (UDRS->fd_ref && (filp->f_flags & O_EXCL)))
 		goto out2;
 
-	if (floppy_grab_irq_and_dma())
-		goto out2;
-
 	if (filp->f_flags & O_EXCL)
 		UDRS->fd_ref = -1;
 	else
@@ -3805,7 +3780,6 @@ out:
 		UDRS->fd_ref--;
 	if (!UDRS->fd_ref)
 		opened_bdev[drive] = NULL;
-	floppy_release_irq_and_dma();
 out2:
 	mutex_unlock(&open_lock);
 	return res;
@@ -3822,14 +3796,9 @@ static int check_floppy_change(struct gendisk *disk)
 		return 1;
 
 	if (time_after(jiffies, UDRS->last_checked + UDP->checkfreq)) {
-		if (floppy_grab_irq_and_dma()) {
-			return 1;
-		}
-
 		lock_fdc(drive, 0);
 		poll_drive(0, 0);
 		process_fd_request();
-		floppy_release_irq_and_dma();
 	}
 
 	if (UTESTF(FD_DISK_CHANGED) ||
@@ -4346,7 +4315,6 @@ static int __init floppy_init(void)
 	fdc = 0;
 	del_timer(&fd_timeout);
 	current_drive = 0;
-	floppy_release_irq_and_dma();
 	initialising = 0;
 	if (have_no_fdc) {
 		DPRINT("no floppy controllers found\n");
@@ -4504,7 +4472,7 @@ static void floppy_release_irq_and_dma(void)
 	if (irqdma_allocated) {
 		fd_disable_dma();
 		fd_free_dma();
-		schedule_work(&fd_free_irq_work);
+		fd_free_irq();
 		irqdma_allocated = 0;
 	}
 	set_dor(0, ~0, 8);
@@ -4600,8 +4568,6 @@ void cleanup_module(void)
 	/* eject disk, if any */
 	fd_eject(0);
 
-	flush_scheduled_work();		/* fd_free_irq() might be pending */
-
 	wait_for_completion(&device_release);
 }
 
-- 
cgit v0.10.2


From 93e028148fce0be9787de7fb097fa4c8582b78c1 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 3 Jul 2006 00:24:24 -0700
Subject: [PATCH] lockdep: console_init after local_irq_enable()

s390's console_init must enable interrupts, but early_boot_irqs_on() gets
called later.  To avoid problems move console_init() after local_irq_enable().

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/init/main.c b/init/main.c
index c8960b9..d604dfe 100644
--- a/init/main.c
+++ b/init/main.c
@@ -498,6 +498,10 @@ asmlinkage void __init start_kernel(void)
 	softirq_init();
 	timekeeping_init();
 	time_init();
+	profile_init();
+	if (!irqs_disabled())
+		printk("start_kernel(): bug: interrupts were enabled early\n");
+	local_irq_enable();
 
 	/*
 	 * HACK ALERT! This is early. We're enabling the console before
@@ -507,10 +511,6 @@ asmlinkage void __init start_kernel(void)
 	console_init();
 	if (panic_later)
 		panic(panic_later, panic_param);
-	profile_init();
-	if (!irqs_disabled())
-		printk("start_kernel(): bug: interrupts were enabled early\n");
-	local_irq_enable();
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (initrd_start && !initrd_below_start_ok &&
 			initrd_start < min_low_pfn << PAGE_SHIFT) {
-- 
cgit v0.10.2


From 4d435f9d8ff01ae726a2a84edb9c2457787a337e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:24 -0700
Subject: [PATCH] lockdep: add is_module_address()

Add is_module_address() method - to be used by lockdep.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/module.h b/include/linux/module.h
index 9e9dc7c..d06c74f 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -358,6 +358,7 @@ static inline int module_is_live(struct module *mod)
 /* Is this address in a module? (second is with no locks, for oops) */
 struct module *module_text_address(unsigned long addr);
 struct module *__module_text_address(unsigned long addr);
+int is_module_address(unsigned long addr);
 
 /* Returns module and fills in value, defined and namebuf, or NULL if
    symnum out of range. */
@@ -496,6 +497,11 @@ static inline struct module *__module_text_address(unsigned long addr)
 	return NULL;
 }
 
+static inline int is_module_address(unsigned long addr)
+{
+	return 0;
+}
+
 /* Get/put a kernel symbol (calls should be symmetric) */
 #define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
 #define symbol_put(x) do { } while(0)
diff --git a/kernel/module.c b/kernel/module.c
index 281172f..0351625 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2159,6 +2159,29 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
 	return e;
 }
 
+/*
+ * Is this a valid module address?
+ */
+int is_module_address(unsigned long addr)
+{
+	unsigned long flags;
+	struct module *mod;
+
+	spin_lock_irqsave(&modlist_lock, flags);
+
+	list_for_each_entry(mod, &modules, list) {
+		if (within(addr, mod->module_core, mod->core_size)) {
+			spin_unlock_irqrestore(&modlist_lock, flags);
+			return 1;
+		}
+	}
+
+	spin_unlock_irqrestore(&modlist_lock, flags);
+
+	return 0;
+}
+
+
 /* Is this a valid kernel address?  We don't grab the lock: we are oopsing. */
 struct module *__module_text_address(unsigned long addr)
 {
-- 
cgit v0.10.2


From 8d8fdf5c76816e5263073008f03f097ffc713db3 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 3 Jul 2006 00:24:25 -0700
Subject: [PATCH] lockdep: add print_ip_sym()

Provide a common print_ip_sym() function that prints the passed instruction
pointer as well as the symbol belonging to it.  Avoids adding a bunch of
#ifdef CONFIG_64BIT in order to get the printk format right on 32/64 bit
platforms.

Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index ad71ac0..849043c 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -64,4 +64,18 @@ static inline void print_symbol(const char *fmt, unsigned long addr)
 		       __builtin_extract_return_addr((void *)addr));
 }
 
+#ifndef CONFIG_64BIT
+#define print_ip_sym(ip)		\
+do {					\
+	printk("[<%08lx>]", ip);	\
+	print_symbol(" %s\n", ip);	\
+} while(0)
+#else
+#define print_ip_sym(ip)		\
+do {					\
+	printk("[<%016lx>]", ip);	\
+	print_symbol(" %s\n", ip);	\
+} while(0)
+#endif
+
 #endif /*_LINUX_KALLSYMS_H*/
-- 
cgit v0.10.2


From a875a69f8b00a38b4f40d9632a4fc71a159f0e0d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:26 -0700
Subject: [PATCH] lockdep: add per_cpu_offset()

Add the per_cpu_offset() generic method. (used by the lock validator)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index c745211..e160e04 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -7,6 +7,8 @@
 
 extern unsigned long __per_cpu_offset[NR_CPUS];
 
+#define per_cpu_offset(x) (__per_cpu_offset[x])
+
 /* Separate out the type, so (int[3], foo) works. */
 #define DEFINE_PER_CPU(type, name) \
     __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h
index 24d898b..fbe5cf3 100644
--- a/include/asm-ia64/percpu.h
+++ b/include/asm-ia64/percpu.h
@@ -36,6 +36,7 @@
 #ifdef CONFIG_SMP
 
 extern unsigned long __per_cpu_offset[NR_CPUS];
+#define per_cpu_offset(x) (__per_cpu_offset(x))
 
 /* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */
 DECLARE_PER_CPU(unsigned long, local_per_cpu_offset);
diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h
index faa1fc7..2f2e302 100644
--- a/include/asm-powerpc/percpu.h
+++ b/include/asm-powerpc/percpu.h
@@ -14,6 +14,7 @@
 
 #define __per_cpu_offset(cpu) (paca[cpu].data_offset)
 #define __my_cpu_offset() get_paca()->data_offset
+#define per_cpu_offset(x) (__per_cpu_offset(x))
 
 /* Separate out the type, so (int[3], foo) works. */
 #define DEFINE_PER_CPU(type, name) \
diff --git a/include/asm-s390/percpu.h b/include/asm-s390/percpu.h
index d9a8cca..28b3517 100644
--- a/include/asm-s390/percpu.h
+++ b/include/asm-s390/percpu.h
@@ -42,6 +42,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
 #define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
 #define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu])
+#define per_cpu_offset(x) (__per_cpu_offset[x])
 
 /* A macro to avoid #include hell... */
 #define percpu_modcopy(pcpudst, src, size)			\
diff --git a/include/asm-sparc64/percpu.h b/include/asm-sparc64/percpu.h
index a6ece06..ced8cbd 100644
--- a/include/asm-sparc64/percpu.h
+++ b/include/asm-sparc64/percpu.h
@@ -11,6 +11,7 @@ extern unsigned long __per_cpu_base;
 extern unsigned long __per_cpu_shift;
 #define __per_cpu_offset(__cpu) \
 	(__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift))
+#define per_cpu_offset(x) (__per_cpu_offset(x))
 
 /* Separate out the type, so (int[3], foo) works. */
 #define DEFINE_PER_CPU(type, name) \
diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h
index 549eb92..08dd9f9 100644
--- a/include/asm-x86_64/percpu.h
+++ b/include/asm-x86_64/percpu.h
@@ -14,6 +14,8 @@
 #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
 #define __my_cpu_offset() read_pda(data_offset)
 
+#define per_cpu_offset(x) (__per_cpu_offset(x))
+
 /* Separate out the type, so (int[3], foo) works. */
 #define DEFINE_PER_CPU(type, name) \
     __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name
-- 
cgit v0.10.2


From c01d403b2e3e3f231b18ebd07ad64ecbe6a258a5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:27 -0700
Subject: [PATCH] lockdep: add disable/enable_irq_lockdep() API

lockdep wants to use the disable_irq()/enable_irq() prototypes before they are
provied by the platform's asm/irq.h.  So move them out of the
CONFIG_GENERIC_HARDIRQS define - all architectures have a common prototype for
this anyway.

Add special lockdep variants of irq line disabling/enabling.

These should be used for locking constructs that know that a particular irq
context which is disabled, and which is the only irq-context user of a lock,
that it's safe to take the lock in the irq-disabled section without disabling
hardirqs.

[akpm@osdl.org: build fix]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index da3e0db..2c5452c 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -86,6 +86,41 @@ extern void disable_irq_nosync(unsigned int irq);
 extern void disable_irq(unsigned int irq);
 extern void enable_irq(unsigned int irq);
 
+/*
+ * Special lockdep variants of irq disabling/enabling.
+ * These should be used for locking constructs that
+ * know that a particular irq context which is disabled,
+ * and which is the only irq-context user of a lock,
+ * that it's safe to take the lock in the irq-disabled
+ * section without disabling hardirqs.
+ *
+ * On !CONFIG_LOCKDEP they are equivalent to the normal
+ * irq disable/enable methods.
+ */
+static inline void disable_irq_nosync_lockdep(unsigned int irq)
+{
+	disable_irq_nosync(irq);
+#ifdef CONFIG_LOCKDEP
+	local_irq_disable();
+#endif
+}
+
+static inline void disable_irq_lockdep(unsigned int irq)
+{
+	disable_irq(irq);
+#ifdef CONFIG_LOCKDEP
+	local_irq_disable();
+#endif
+}
+
+static inline void enable_irq_lockdep(unsigned int irq)
+{
+#ifdef CONFIG_LOCKDEP
+	local_irq_enable();
+#endif
+	enable_irq(irq);
+}
+
 /* IRQ wakeup (PM) control: */
 extern int set_irq_wake(unsigned int irq, unsigned int on);
 
@@ -99,7 +134,19 @@ static inline int disable_irq_wake(unsigned int irq)
 	return set_irq_wake(irq, 0);
 }
 
-#endif
+#else /* !CONFIG_GENERIC_HARDIRQS */
+/*
+ * NOTE: non-genirq architectures, if they want to support the lock
+ * validator need to define the methods below in their asm/irq.h
+ * files, under an #ifdef CONFIG_LOCKDEP section.
+ */
+# ifndef CONFIG_LOCKDEP
+#  define disable_irq_nosync_lockdep(irq)	disable_irq_nosync(irq)
+#  define disable_irq_lockdep(irq)		disable_irq(irq)
+#  define enable_irq_lockdep(irq)		enable_irq(irq)
+# endif
+
+#endif /* CONFIG_GENERIC_HARDIRQS */
 
 #ifndef __ARCH_SET_SOFTIRQ_PENDING
 #define set_softirq_pending(x) (local_softirq_pending() = (x))
-- 
cgit v0.10.2


From d7e9629de051bb4b1d104588cd97673ad770809e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:27 -0700
Subject: [PATCH] lockdep: add local_irq_enable_in_hardirq() API

Introduce local_irq_enable_in_hardirq() API.  It is currently aliased to
local_irq_enable(), hence has no functional effects.

This API will be used by lockdep, but even without lockdep this will better
document places in the kernel where a hardirq context enables hardirqs.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 2c5452c..73463fb 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -80,6 +80,23 @@ extern int request_irq(unsigned int,
 		       unsigned long, const char *, void *);
 extern void free_irq(unsigned int, void *);
 
+/*
+ * On lockdep we dont want to enable hardirqs in hardirq
+ * context. Use local_irq_enable_in_hardirq() to annotate
+ * kernel code that has to do this nevertheless (pretty much
+ * the only valid case is for old/broken hardware that is
+ * insanely slow).
+ *
+ * NOTE: in theory this might break fragile code that relies
+ * on hardirq delivery - in practice we dont seem to have such
+ * places left. So the only effect should be slightly increased
+ * irqs-off latencies.
+ */
+#ifdef CONFIG_LOCKDEP
+# define local_irq_enable_in_hardirq()	do { } while (0)
+#else
+# define local_irq_enable_in_hardirq()	local_irq_enable()
+#endif
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 extern void disable_irq_nosync(unsigned int irq);
-- 
cgit v0.10.2


From 8b3db9c542e18b71d4820da4dd9401ee030feacb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:28 -0700
Subject: [PATCH] lockdep: add DECLARE_COMPLETION_ONSTACK() API

lockdep needs to have the waitqueue lock initialized for on-stack waitqueues
implicitly initialized by DECLARE_COMPLETION().  Introduce the API.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 90663ad..251c41e 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -21,6 +21,18 @@ struct completion {
 #define DECLARE_COMPLETION(work) \
 	struct completion work = COMPLETION_INITIALIZER(work)
 
+/*
+ * Lockdep needs to run a non-constant initializer for on-stack
+ * completions - so we use the _ONSTACK() variant for those that
+ * are on the kernel stack:
+ */
+#ifdef CONFIG_LOCKDEP
+# define DECLARE_COMPLETION_ONSTACK(work) \
+	struct completion work = ({ init_completion(&work); work; })
+#else
+# define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
+#endif
+
 static inline void init_completion(struct completion *x)
 {
 	x->done = 0;
-- 
cgit v0.10.2


From c4e05116a2c4d8187127dbf77ab790aa57a47388 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:29 -0700
Subject: [PATCH] lockdep: clean up rwsems

Clean up rwsems.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h
index be4ab85..558804e 100644
--- a/include/asm-i386/rwsem.h
+++ b/include/asm-i386/rwsem.h
@@ -61,23 +61,11 @@ struct rw_semaphore {
 #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
 };
 
-/*
- * initialisation
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
 #define __RWSEM_INITIALIZER(name) \
 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
-	__RWSEM_DEBUG_INIT }
+	}
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -87,9 +75,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
 }
 
 /*
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index f30f805..d68afcc 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -32,22 +32,10 @@ struct rw_semaphore {
 	__s32			activity;
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
 };
 
-/*
- * initialisation
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
 #define __RWSEM_INITIALIZER(name) \
-{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT }
+{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) }
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index f99fe90..9358153 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -9,8 +9,6 @@
 
 #include <linux/linkage.h>
 
-#define RWSEM_DEBUG 0
-
 #ifdef __KERNEL__
 
 #include <linux/types.h>
@@ -26,23 +24,13 @@ struct rw_semaphore;
 #include <asm/rwsem.h> /* use an arch-specific implementation */
 #endif
 
-#ifndef rwsemtrace
-#if RWSEM_DEBUG
-extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str));
-#else
-#define rwsemtrace(SEM,FMT)
-#endif
-#endif
-
 /*
  * lock for reading
  */
 static inline void down_read(struct rw_semaphore *sem)
 {
 	might_sleep();
-	rwsemtrace(sem,"Entering down_read");
 	__down_read(sem);
-	rwsemtrace(sem,"Leaving down_read");
 }
 
 /*
@@ -51,9 +39,7 @@ static inline void down_read(struct rw_semaphore *sem)
 static inline int down_read_trylock(struct rw_semaphore *sem)
 {
 	int ret;
-	rwsemtrace(sem,"Entering down_read_trylock");
 	ret = __down_read_trylock(sem);
-	rwsemtrace(sem,"Leaving down_read_trylock");
 	return ret;
 }
 
@@ -63,9 +49,7 @@ static inline int down_read_trylock(struct rw_semaphore *sem)
 static inline void down_write(struct rw_semaphore *sem)
 {
 	might_sleep();
-	rwsemtrace(sem,"Entering down_write");
 	__down_write(sem);
-	rwsemtrace(sem,"Leaving down_write");
 }
 
 /*
@@ -74,9 +58,7 @@ static inline void down_write(struct rw_semaphore *sem)
 static inline int down_write_trylock(struct rw_semaphore *sem)
 {
 	int ret;
-	rwsemtrace(sem,"Entering down_write_trylock");
 	ret = __down_write_trylock(sem);
-	rwsemtrace(sem,"Leaving down_write_trylock");
 	return ret;
 }
 
@@ -85,9 +67,7 @@ static inline int down_write_trylock(struct rw_semaphore *sem)
  */
 static inline void up_read(struct rw_semaphore *sem)
 {
-	rwsemtrace(sem,"Entering up_read");
 	__up_read(sem);
-	rwsemtrace(sem,"Leaving up_read");
 }
 
 /*
@@ -95,9 +75,7 @@ static inline void up_read(struct rw_semaphore *sem)
  */
 static inline void up_write(struct rw_semaphore *sem)
 {
-	rwsemtrace(sem,"Entering up_write");
 	__up_write(sem);
-	rwsemtrace(sem,"Leaving up_write");
 }
 
 /*
@@ -105,9 +83,7 @@ static inline void up_write(struct rw_semaphore *sem)
  */
 static inline void downgrade_write(struct rw_semaphore *sem)
 {
-	rwsemtrace(sem,"Entering downgrade_write");
 	__downgrade_write(sem);
-	rwsemtrace(sem,"Leaving downgrade_write");
 }
 
 #endif /* __KERNEL__ */
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
new file mode 100644
index 0000000..790a99b
--- /dev/null
+++ b/kernel/rwsem.c
@@ -0,0 +1,105 @@
+/* kernel/rwsem.c: R/W semaphores, public implementation
+ *
+ * Written by David Howells (dhowells@redhat.com).
+ * Derived from asm-i386/semaphore.h
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rwsem.h>
+
+#include <asm/system.h>
+#include <asm/atomic.h>
+
+/*
+ * lock for reading
+ */
+void down_read(struct rw_semaphore *sem)
+{
+	might_sleep();
+	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
+
+	__down_read(sem);
+}
+
+EXPORT_SYMBOL(down_read);
+
+/*
+ * trylock for reading -- returns 1 if successful, 0 if contention
+ */
+int down_read_trylock(struct rw_semaphore *sem)
+{
+	int ret = __down_read_trylock(sem);
+
+	if (ret == 1)
+		rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
+	return ret;
+}
+
+EXPORT_SYMBOL(down_read_trylock);
+
+/*
+ * lock for writing
+ */
+void down_write(struct rw_semaphore *sem)
+{
+	might_sleep();
+	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+
+	__down_write(sem);
+}
+
+EXPORT_SYMBOL(down_write);
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+int down_write_trylock(struct rw_semaphore *sem)
+{
+	int ret = __down_write_trylock(sem);
+
+	if (ret == 1)
+		rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+	return ret;
+}
+
+EXPORT_SYMBOL(down_write_trylock);
+
+/*
+ * release a read lock
+ */
+void up_read(struct rw_semaphore *sem)
+{
+	rwsem_release(&sem->dep_map, 1, _RET_IP_);
+
+	__up_read(sem);
+}
+
+EXPORT_SYMBOL(up_read);
+
+/*
+ * release a write lock
+ */
+void up_write(struct rw_semaphore *sem)
+{
+	rwsem_release(&sem->dep_map, 1, _RET_IP_);
+
+	__up_write(sem);
+}
+
+EXPORT_SYMBOL(up_write);
+
+/*
+ * downgrade write lock to read lock
+ */
+void downgrade_write(struct rw_semaphore *sem)
+{
+	/*
+	 * lockdep: a downgraded write will live on as a write
+	 * dependency.
+	 */
+	__downgrade_write(sem);
+}
+
+EXPORT_SYMBOL(downgrade_write);
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 40ffde9..03b6097 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -17,16 +17,6 @@ struct rwsem_waiter {
 #define RWSEM_WAITING_FOR_WRITE	0x00000002
 };
 
-#if RWSEM_DEBUG
-void rwsemtrace(struct rw_semaphore *sem, const char *str)
-{
-	if (sem->debug)
-		printk("[%d] %s({%d,%d})\n",
-		       current->pid, str, sem->activity,
-		       list_empty(&sem->wait_list) ? 0 : 1);
-}
-#endif
-
 /*
  * initialise the semaphore
  */
@@ -35,9 +25,6 @@ void fastcall init_rwsem(struct rw_semaphore *sem)
 	sem->activity = 0;
 	spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
 }
 
 /*
@@ -56,8 +43,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
 	struct task_struct *tsk;
 	int woken;
 
-	rwsemtrace(sem, "Entering __rwsem_do_wake");
-
 	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
 
 	if (!wakewrite) {
@@ -104,7 +89,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
 	sem->activity += woken;
 
  out:
-	rwsemtrace(sem, "Leaving __rwsem_do_wake");
 	return sem;
 }
 
@@ -138,8 +122,6 @@ void fastcall __sched __down_read(struct rw_semaphore *sem)
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk;
 
-	rwsemtrace(sem, "Entering __down_read");
-
 	spin_lock_irq(&sem->wait_lock);
 
 	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
@@ -171,9 +153,8 @@ void fastcall __sched __down_read(struct rw_semaphore *sem)
 	}
 
 	tsk->state = TASK_RUNNING;
-
  out:
-	rwsemtrace(sem, "Leaving __down_read");
+	;
 }
 
 /*
@@ -184,7 +165,6 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem)
 	unsigned long flags;
 	int ret = 0;
 
-	rwsemtrace(sem, "Entering __down_read_trylock");
 
 	spin_lock_irqsave(&sem->wait_lock, flags);
 
@@ -196,7 +176,6 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem)
 
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
 
-	rwsemtrace(sem, "Leaving __down_read_trylock");
 	return ret;
 }
 
@@ -209,8 +188,6 @@ void fastcall __sched __down_write(struct rw_semaphore *sem)
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk;
 
-	rwsemtrace(sem, "Entering __down_write");
-
 	spin_lock_irq(&sem->wait_lock);
 
 	if (sem->activity == 0 && list_empty(&sem->wait_list)) {
@@ -242,9 +219,8 @@ void fastcall __sched __down_write(struct rw_semaphore *sem)
 	}
 
 	tsk->state = TASK_RUNNING;
-
  out:
-	rwsemtrace(sem, "Leaving __down_write");
+	;
 }
 
 /*
@@ -255,8 +231,6 @@ int fastcall __down_write_trylock(struct rw_semaphore *sem)
 	unsigned long flags;
 	int ret = 0;
 
-	rwsemtrace(sem, "Entering __down_write_trylock");
-
 	spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (sem->activity == 0 && list_empty(&sem->wait_list)) {
@@ -267,7 +241,6 @@ int fastcall __down_write_trylock(struct rw_semaphore *sem)
 
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
 
-	rwsemtrace(sem, "Leaving __down_write_trylock");
 	return ret;
 }
 
@@ -278,16 +251,12 @@ void fastcall __up_read(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
-	rwsemtrace(sem, "Entering __up_read");
-
 	spin_lock_irqsave(&sem->wait_lock, flags);
 
 	if (--sem->activity == 0 && !list_empty(&sem->wait_list))
 		sem = __rwsem_wake_one_writer(sem);
 
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	rwsemtrace(sem, "Leaving __up_read");
 }
 
 /*
@@ -297,8 +266,6 @@ void fastcall __up_write(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
-	rwsemtrace(sem, "Entering __up_write");
-
 	spin_lock_irqsave(&sem->wait_lock, flags);
 
 	sem->activity = 0;
@@ -306,8 +273,6 @@ void fastcall __up_write(struct rw_semaphore *sem)
 		sem = __rwsem_do_wake(sem, 1);
 
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	rwsemtrace(sem, "Leaving __up_write");
 }
 
 /*
@@ -318,8 +283,6 @@ void fastcall __downgrade_write(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
-	rwsemtrace(sem, "Entering __downgrade_write");
-
 	spin_lock_irqsave(&sem->wait_lock, flags);
 
 	sem->activity = 1;
@@ -327,8 +290,6 @@ void fastcall __downgrade_write(struct rw_semaphore *sem)
 		sem = __rwsem_do_wake(sem, 0);
 
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	rwsemtrace(sem, "Leaving __downgrade_write");
 }
 
 EXPORT_SYMBOL(init_rwsem);
@@ -339,6 +300,3 @@ EXPORT_SYMBOL(__down_write_trylock);
 EXPORT_SYMBOL(__up_read);
 EXPORT_SYMBOL(__up_write);
 EXPORT_SYMBOL(__downgrade_write);
-#if RWSEM_DEBUG
-EXPORT_SYMBOL(rwsemtrace);
-#endif
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 62fa4eb..bae5972 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -16,17 +16,6 @@ struct rwsem_waiter {
 #define RWSEM_WAITING_FOR_WRITE	0x00000002
 };
 
-#if RWSEM_DEBUG
-#undef rwsemtrace
-void rwsemtrace(struct rw_semaphore *sem, const char *str)
-{
-	printk("sem=%p\n", sem);
-	printk("(sem)=%08lx\n", sem->count);
-	if (sem->debug)
-		printk("[%d] %s({%08lx})\n", current->pid, str, sem->count);
-}
-#endif
-
 /*
  * handle the lock release when processes blocked on it that can now run
  * - if we come here from up_xxxx(), then:
@@ -45,8 +34,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
 	struct list_head *next;
 	signed long oldcount, woken, loop;
 
-	rwsemtrace(sem, "Entering __rwsem_do_wake");
-
 	if (downgrading)
 		goto dont_wake_writers;
 
@@ -127,7 +114,6 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
 	next->prev = &sem->wait_list;
 
  out:
-	rwsemtrace(sem, "Leaving __rwsem_do_wake");
 	return sem;
 
 	/* undo the change to count, but check for a transition 1->0 */
@@ -186,13 +172,9 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
 {
 	struct rwsem_waiter waiter;
 
-	rwsemtrace(sem, "Entering rwsem_down_read_failed");
-
 	waiter.flags = RWSEM_WAITING_FOR_READ;
 	rwsem_down_failed_common(sem, &waiter,
 				RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS);
-
-	rwsemtrace(sem, "Leaving rwsem_down_read_failed");
 	return sem;
 }
 
@@ -204,12 +186,9 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
 {
 	struct rwsem_waiter waiter;
 
-	rwsemtrace(sem, "Entering rwsem_down_write_failed");
-
 	waiter.flags = RWSEM_WAITING_FOR_WRITE;
 	rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS);
 
-	rwsemtrace(sem, "Leaving rwsem_down_write_failed");
 	return sem;
 }
 
@@ -221,8 +200,6 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
-	rwsemtrace(sem, "Entering rwsem_wake");
-
 	spin_lock_irqsave(&sem->wait_lock, flags);
 
 	/* do nothing if list empty */
@@ -231,8 +208,6 @@ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem)
 
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
 
-	rwsemtrace(sem, "Leaving rwsem_wake");
-
 	return sem;
 }
 
@@ -245,8 +220,6 @@ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
 {
 	unsigned long flags;
 
-	rwsemtrace(sem, "Entering rwsem_downgrade_wake");
-
 	spin_lock_irqsave(&sem->wait_lock, flags);
 
 	/* do nothing if list empty */
@@ -255,7 +228,6 @@ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem)
 
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
 
-	rwsemtrace(sem, "Leaving rwsem_downgrade_wake");
 	return sem;
 }
 
@@ -263,6 +235,3 @@ EXPORT_SYMBOL(rwsem_down_read_failed);
 EXPORT_SYMBOL(rwsem_down_write_failed);
 EXPORT_SYMBOL(rwsem_wake);
 EXPORT_SYMBOL(rwsem_downgrade_wake);
-#if RWSEM_DEBUG
-EXPORT_SYMBOL(rwsemtrace);
-#endif
-- 
cgit v0.10.2


From 61f4c3d6db3ecbdd4e1a2a7a1710c1410d085dd1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:29 -0700
Subject: [PATCH] lockdep: remove RWSEM_DEBUG remnants

RWSEM_DEBUG used to be a printk based 'tracing' facility, probably used for
very early prototypes of the rwsem code.  Remove it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-alpha/rwsem.h b/include/asm-alpha/rwsem.h
index fafdd4f..1570c0b 100644
--- a/include/asm-alpha/rwsem.h
+++ b/include/asm-alpha/rwsem.h
@@ -36,20 +36,11 @@ struct rw_semaphore {
 #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
 };
 
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
 #define __RWSEM_INITIALIZER(name) \
 	{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
-	LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT }
+	LIST_HEAD_INIT((name).wait_list) }
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -59,9 +50,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
 }
 
 static inline void __down_read(struct rw_semaphore *sem)
diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h
index 1327c91..2d1640c 100644
--- a/include/asm-ia64/rwsem.h
+++ b/include/asm-ia64/rwsem.h
@@ -33,9 +33,6 @@ struct rw_semaphore {
 	signed long		count;
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
 };
 
 #define RWSEM_UNLOCKED_VALUE		__IA64_UL_CONST(0x0000000000000000)
@@ -45,19 +42,9 @@ struct rw_semaphore {
 #define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
 #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 
-/*
- * initialization
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
 #define __RWSEM_INITIALIZER(name) \
 	{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
-	  LIST_HEAD_INIT((name).wait_list) \
-	  __RWSEM_DEBUG_INIT }
+	  LIST_HEAD_INIT((name).wait_list) }
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -73,9 +60,6 @@ init_rwsem (struct rw_semaphore *sem)
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
 }
 
 /*
diff --git a/include/asm-powerpc/rwsem.h b/include/asm-powerpc/rwsem.h
index 2c2fe96..e929145 100644
--- a/include/asm-powerpc/rwsem.h
+++ b/include/asm-powerpc/rwsem.h
@@ -28,24 +28,11 @@ struct rw_semaphore {
 #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
 };
 
-/*
- * initialisation
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
 #define __RWSEM_INITIALIZER(name) \
 	{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
-	  LIST_HEAD_INIT((name).wait_list) \
-	  __RWSEM_DEBUG_INIT }
+	  LIST_HEAD_INIT((name).wait_list) }
 
 #define DECLARE_RWSEM(name)		\
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -60,9 +47,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
 }
 
 /*
diff --git a/include/asm-sh/rwsem.h b/include/asm-sh/rwsem.h
index 0262d3d..9d2aea5 100644
--- a/include/asm-sh/rwsem.h
+++ b/include/asm-sh/rwsem.h
@@ -25,24 +25,11 @@ struct rw_semaphore {
 #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
 };
 
-/*
- * initialisation
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
 #define __RWSEM_INITIALIZER(name) \
 	{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
-	  LIST_HEAD_INIT((name).wait_list) \
-	  __RWSEM_DEBUG_INIT }
+	  LIST_HEAD_INIT((name).wait_list) }
 
 #define DECLARE_RWSEM(name)		\
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -57,9 +44,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
 }
 
 /*
diff --git a/include/asm-xtensa/rwsem.h b/include/asm-xtensa/rwsem.h
index abcd86d..0aad3a5 100644
--- a/include/asm-xtensa/rwsem.h
+++ b/include/asm-xtensa/rwsem.h
@@ -31,24 +31,11 @@ struct rw_semaphore {
 #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
 };
 
-/*
- * initialisation
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
 #define __RWSEM_INITIALIZER(name) \
 	{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
-	  LIST_HEAD_INIT((name).wait_list) \
-	  __RWSEM_DEBUG_INIT }
+	  LIST_HEAD_INIT((name).wait_list) }
 
 #define DECLARE_RWSEM(name)		\
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -63,9 +50,6 @@ static inline void init_rwsem(struct rw_semaphore *sem)
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
 }
 
 /*
-- 
cgit v0.10.2


From 9e7f4d451e99b7592a96ad0efaf8bcc1e7b2f854 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:30 -0700
Subject: [PATCH] lockdep: rename DEBUG_WARN_ON()

Rename DEBUG_WARN_ON() to the less generic DEBUG_LOCKS_WARN_ON() name, so that
it's clear that this is a lock-debugging internal mechanism.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index e38e4ba..fe0e9f2 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -373,7 +373,7 @@ void debug_mutex_set_owner(struct mutex *lock,
 			   struct thread_info *new_owner __IP_DECL__)
 {
 	lock->owner = new_owner;
-	DEBUG_WARN_ON(!list_empty(&lock->held_list));
+	DEBUG_LOCKS_WARN_ON(!list_empty(&lock->held_list));
 	if (debug_mutex_on) {
 		list_add_tail(&lock->held_list, &debug_mutex_held_locks);
 		lock->acquire_ip = ip;
@@ -389,22 +389,22 @@ void debug_mutex_init_waiter(struct mutex_waiter *waiter)
 
 void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
 {
-	SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock));
-	DEBUG_WARN_ON(list_empty(&lock->wait_list));
-	DEBUG_WARN_ON(waiter->magic != waiter);
-	DEBUG_WARN_ON(list_empty(&waiter->list));
+	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
+	DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
+	DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
+	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
 }
 
 void debug_mutex_free_waiter(struct mutex_waiter *waiter)
 {
-	DEBUG_WARN_ON(!list_empty(&waiter->list));
+	DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
 	memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
 }
 
 void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 			    struct thread_info *ti __IP_DECL__)
 {
-	SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock));
+	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
 	check_deadlock(lock, 0, ti, ip);
 	/* Mark the current thread as blocked on the lock: */
 	ti->task->blocked_on = waiter;
@@ -414,9 +414,9 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 			 struct thread_info *ti)
 {
-	DEBUG_WARN_ON(list_empty(&waiter->list));
-	DEBUG_WARN_ON(waiter->task != ti->task);
-	DEBUG_WARN_ON(ti->task->blocked_on != waiter);
+	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+	DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
+	DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
 	ti->task->blocked_on = NULL;
 
 	list_del_init(&waiter->list);
@@ -425,11 +425,11 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 
 void debug_mutex_unlock(struct mutex *lock)
 {
-	DEBUG_WARN_ON(lock->magic != lock);
-	DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
-	DEBUG_WARN_ON(lock->owner != current_thread_info());
+	DEBUG_LOCKS_WARN_ON(lock->magic != lock);
+	DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
+	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
 	if (debug_mutex_on) {
-		DEBUG_WARN_ON(list_empty(&lock->held_list));
+		DEBUG_LOCKS_WARN_ON(list_empty(&lock->held_list));
 		list_del_init(&lock->held_list);
 	}
 }
@@ -456,7 +456,7 @@ void debug_mutex_init(struct mutex *lock, const char *name)
  */
 void fastcall mutex_destroy(struct mutex *lock)
 {
-	DEBUG_WARN_ON(mutex_is_locked(lock));
+	DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
 	lock->magic = NULL;
 }
 
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index a5196c3..8417b52 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -65,10 +65,10 @@ extern void debug_mutex_init(struct mutex *lock, const char *name);
 	do {						\
 		struct mutex *l = container_of(lock, struct mutex, wait_lock); \
 							\
-		DEBUG_WARN_ON(in_interrupt());		\
+		DEBUG_LOCKS_WARN_ON(in_interrupt());	\
 		debug_spin_lock_save(&debug_mutex_lock, flags); \
 		spin_lock(lock);			\
-		DEBUG_WARN_ON(l->magic != l);		\
+		DEBUG_LOCKS_WARN_ON(l->magic != l);	\
 	} while (0)
 
 #define spin_unlock_mutex(lock, flags)			\
@@ -95,7 +95,7 @@ do {							\
 	}						\
 } while (0)
 
-#define DEBUG_WARN_ON(c)				\
+#define DEBUG_LOCKS_WARN_ON(c)				\
 do {							\
 	if (unlikely(c && debug_mutex_on)) {		\
 		DEBUG_OFF();				\
@@ -110,10 +110,10 @@ do {							\
 } while (0)
 
 #ifdef CONFIG_SMP
-# define SMP_DEBUG_WARN_ON(c)			DEBUG_WARN_ON(c)
-# define SMP_DEBUG_BUG_ON(c)			DEBUG_BUG_ON(c)
+# define SMP_DEBUG_LOCKS_WARN_ON(c)			DEBUG_LOCKS_WARN_ON(c)
+# define SMP_DEBUG_BUG_ON(c)				DEBUG_BUG_ON(c)
 #else
-# define SMP_DEBUG_WARN_ON(c)			do { } while (0)
-# define SMP_DEBUG_BUG_ON(c)			do { } while (0)
+# define SMP_DEBUG_LOCKS_WARN_ON(c)			do { } while (0)
+# define SMP_DEBUG_BUG_ON(c)				do { } while (0)
 #endif
 
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 7043db2..101ceeb 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -183,8 +183,8 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
 
 	debug_mutex_free_waiter(&waiter);
 
-	DEBUG_WARN_ON(list_empty(&lock->held_list));
-	DEBUG_WARN_ON(lock->owner != task->thread_info);
+	DEBUG_LOCKS_WARN_ON(list_empty(&lock->held_list));
+	DEBUG_LOCKS_WARN_ON(lock->owner != task->thread_info);
 
 	return 0;
 }
@@ -206,7 +206,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 	unsigned long flags;
 
-	DEBUG_WARN_ON(lock->owner != current_thread_info());
+	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
 
 	spin_lock_mutex(&lock->wait_lock, flags);
 
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 0691899..7e1ed48 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,7 +16,7 @@
 #define mutex_remove_waiter(lock, waiter, ti) \
 		__list_del((waiter)->list.prev, (waiter)->list.next)
 
-#define DEBUG_WARN_ON(c)				do { } while (0)
+#define DEBUG_LOCKS_WARN_ON(c)				do { } while (0)
 #define debug_mutex_set_owner(lock, new_owner)		do { } while (0)
 #define debug_mutex_clear_owner(lock)			do { } while (0)
 #define debug_mutex_init_waiter(waiter)			do { } while (0)
-- 
cgit v0.10.2


From 36596243daf7e1d795c647de04af95e835b8c5b4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:31 -0700
Subject: [PATCH] lockdep: remove DEBUG_BUG_ON()

cleanup: remove unused DEBUG_BUG_ON() defines.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 8417b52..bdab13a 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -103,17 +103,9 @@ do {							\
 	}						\
 } while (0)
 
-# define DEBUG_BUG_ON(c)				\
-do {							\
-	if (unlikely(c))				\
-		DEBUG_BUG();				\
-} while (0)
-
 #ifdef CONFIG_SMP
 # define SMP_DEBUG_LOCKS_WARN_ON(c)			DEBUG_LOCKS_WARN_ON(c)
-# define SMP_DEBUG_BUG_ON(c)				DEBUG_BUG_ON(c)
 #else
 # define SMP_DEBUG_LOCKS_WARN_ON(c)			do { } while (0)
-# define SMP_DEBUG_BUG_ON(c)				do { } while (0)
 #endif
 
-- 
cgit v0.10.2


From fb7e42413a098cc45b3adf858da290033af62bae Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:31 -0700
Subject: [PATCH] lockdep: remove mutex deadlock checking code

With the lock validator we detect mutex deadlocks (and more), the mutex
deadlock checking code is both redundant and slower.  So remove it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index fe0e9f2..a92de14 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -51,321 +51,6 @@ LIST_HEAD(debug_mutex_held_locks);
  */
 int debug_mutex_on = 1;
 
-static void printk_task(struct task_struct *p)
-{
-	if (p)
-		printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
-	else
-		printk("<none>");
-}
-
-static void printk_ti(struct thread_info *ti)
-{
-	if (ti)
-		printk_task(ti->task);
-	else
-		printk("<none>");
-}
-
-static void printk_task_short(struct task_struct *p)
-{
-	if (p)
-		printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
-	else
-		printk("<none>");
-}
-
-static void printk_lock(struct mutex *lock, int print_owner)
-{
-	printk(" [%p] {%s}\n", lock, lock->name);
-
-	if (print_owner && lock->owner) {
-		printk(".. held by:  ");
-		printk_ti(lock->owner);
-		printk("\n");
-	}
-	if (lock->owner) {
-		printk("... acquired at:               ");
-		print_symbol("%s\n", lock->acquire_ip);
-	}
-}
-
-/*
- * printk locks held by a task:
- */
-static void show_task_locks(struct task_struct *p)
-{
-	switch (p->state) {
-	case TASK_RUNNING:		printk("R"); break;
-	case TASK_INTERRUPTIBLE:	printk("S"); break;
-	case TASK_UNINTERRUPTIBLE:	printk("D"); break;
-	case TASK_STOPPED:		printk("T"); break;
-	case EXIT_ZOMBIE:		printk("Z"); break;
-	case EXIT_DEAD:			printk("X"); break;
-	default:			printk("?"); break;
-	}
-	printk_task(p);
-	if (p->blocked_on) {
-		struct mutex *lock = p->blocked_on->lock;
-
-		printk(" blocked on mutex:");
-		printk_lock(lock, 1);
-	} else
-		printk(" (not blocked on mutex)\n");
-}
-
-/*
- * printk all locks held in the system (if filter == NULL),
- * or all locks belonging to a single task (if filter != NULL):
- */
-void show_held_locks(struct task_struct *filter)
-{
-	struct list_head *curr, *cursor = NULL;
-	struct mutex *lock;
-	struct thread_info *t;
-	unsigned long flags;
-	int count = 0;
-
-	if (filter) {
-		printk("------------------------------\n");
-		printk("| showing all locks held by: |  (");
-		printk_task_short(filter);
-		printk("):\n");
-		printk("------------------------------\n");
-	} else {
-		printk("---------------------------\n");
-		printk("| showing all locks held: |\n");
-		printk("---------------------------\n");
-	}
-
-	/*
-	 * Play safe and acquire the global trace lock. We
-	 * cannot printk with that lock held so we iterate
-	 * very carefully:
-	 */
-next:
-	debug_spin_lock_save(&debug_mutex_lock, flags);
-	list_for_each(curr, &debug_mutex_held_locks) {
-		if (cursor && curr != cursor)
-			continue;
-		lock = list_entry(curr, struct mutex, held_list);
-		t = lock->owner;
-		if (filter && (t != filter->thread_info))
-			continue;
-		count++;
-		cursor = curr->next;
-		debug_spin_unlock_restore(&debug_mutex_lock, flags);
-
-		printk("\n#%03d:            ", count);
-		printk_lock(lock, filter ? 0 : 1);
-		goto next;
-	}
-	debug_spin_unlock_restore(&debug_mutex_lock, flags);
-	printk("\n");
-}
-
-void mutex_debug_show_all_locks(void)
-{
-	struct task_struct *g, *p;
-	int count = 10;
-	int unlock = 1;
-
-	printk("\nShowing all blocking locks in the system:\n");
-
-	/*
-	 * Here we try to get the tasklist_lock as hard as possible,
-	 * if not successful after 2 seconds we ignore it (but keep
-	 * trying). This is to enable a debug printout even if a
-	 * tasklist_lock-holding task deadlocks or crashes.
-	 */
-retry:
-	if (!read_trylock(&tasklist_lock)) {
-		if (count == 10)
-			printk("hm, tasklist_lock locked, retrying... ");
-		if (count) {
-			count--;
-			printk(" #%d", 10-count);
-			mdelay(200);
-			goto retry;
-		}
-		printk(" ignoring it.\n");
-		unlock = 0;
-	}
-	if (count != 10)
-		printk(" locked it.\n");
-
-	do_each_thread(g, p) {
-		show_task_locks(p);
-		if (!unlock)
-			if (read_trylock(&tasklist_lock))
-				unlock = 1;
-	} while_each_thread(g, p);
-
-	printk("\n");
-	show_held_locks(NULL);
-	printk("=============================================\n\n");
-
-	if (unlock)
-		read_unlock(&tasklist_lock);
-}
-
-static void report_deadlock(struct task_struct *task, struct mutex *lock,
-			    struct mutex *lockblk, unsigned long ip)
-{
-	printk("\n%s/%d is trying to acquire this lock:\n",
-		current->comm, current->pid);
-	printk_lock(lock, 1);
-	printk("... trying at:                 ");
-	print_symbol("%s\n", ip);
-	show_held_locks(current);
-
-	if (lockblk) {
-		printk("but %s/%d is deadlocking current task %s/%d!\n\n",
-			task->comm, task->pid, current->comm, current->pid);
-		printk("\n%s/%d is blocked on this lock:\n",
-			task->comm, task->pid);
-		printk_lock(lockblk, 1);
-
-		show_held_locks(task);
-
-		printk("\n%s/%d's [blocked] stackdump:\n\n",
-			task->comm, task->pid);
-		show_stack(task, NULL);
-	}
-
-	printk("\n%s/%d's [current] stackdump:\n\n",
-		current->comm, current->pid);
-	dump_stack();
-	mutex_debug_show_all_locks();
-	printk("[ turning off deadlock detection. Please report this. ]\n\n");
-	local_irq_disable();
-}
-
-/*
- * Recursively check for mutex deadlocks:
- */
-static int check_deadlock(struct mutex *lock, int depth,
-			  struct thread_info *ti, unsigned long ip)
-{
-	struct mutex *lockblk;
-	struct task_struct *task;
-
-	if (!debug_mutex_on)
-		return 0;
-
-	ti = lock->owner;
-	if (!ti)
-		return 0;
-
-	task = ti->task;
-	lockblk = NULL;
-	if (task->blocked_on)
-		lockblk = task->blocked_on->lock;
-
-	/* Self-deadlock: */
-	if (current == task) {
-		DEBUG_OFF();
-		if (depth)
-			return 1;
-		printk("\n==========================================\n");
-		printk(  "[ BUG: lock recursion deadlock detected! |\n");
-		printk(  "------------------------------------------\n");
-		report_deadlock(task, lock, NULL, ip);
-		return 0;
-	}
-
-	/* Ugh, something corrupted the lock data structure? */
-	if (depth > 20) {
-		DEBUG_OFF();
-		printk("\n===========================================\n");
-		printk(  "[ BUG: infinite lock dependency detected!? |\n");
-		printk(  "-------------------------------------------\n");
-		report_deadlock(task, lock, lockblk, ip);
-		return 0;
-	}
-
-	/* Recursively check for dependencies: */
-	if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) {
-		printk("\n============================================\n");
-		printk(  "[ BUG: circular locking deadlock detected! ]\n");
-		printk(  "--------------------------------------------\n");
-		report_deadlock(task, lock, lockblk, ip);
-		return 0;
-	}
-	return 0;
-}
-
-/*
- * Called when a task exits, this function checks whether the
- * task is holding any locks, and reports the first one if so:
- */
-void mutex_debug_check_no_locks_held(struct task_struct *task)
-{
-	struct list_head *curr, *next;
-	struct thread_info *t;
-	unsigned long flags;
-	struct mutex *lock;
-
-	if (!debug_mutex_on)
-		return;
-
-	debug_spin_lock_save(&debug_mutex_lock, flags);
-	list_for_each_safe(curr, next, &debug_mutex_held_locks) {
-		lock = list_entry(curr, struct mutex, held_list);
-		t = lock->owner;
-		if (t != task->thread_info)
-			continue;
-		list_del_init(curr);
-		DEBUG_OFF();
-		debug_spin_unlock_restore(&debug_mutex_lock, flags);
-
-		printk("BUG: %s/%d, lock held at task exit time!\n",
-			task->comm, task->pid);
-		printk_lock(lock, 1);
-		if (lock->owner != task->thread_info)
-			printk("exiting task is not even the owner??\n");
-		return;
-	}
-	debug_spin_unlock_restore(&debug_mutex_lock, flags);
-}
-
-/*
- * Called when kernel memory is freed (or unmapped), or if a mutex
- * is destroyed or reinitialized - this code checks whether there is
- * any held lock in the memory range of <from> to <to>:
- */
-void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
-{
-	struct list_head *curr, *next;
-	const void *to = from + len;
-	unsigned long flags;
-	struct mutex *lock;
-	void *lock_addr;
-
-	if (!debug_mutex_on)
-		return;
-
-	debug_spin_lock_save(&debug_mutex_lock, flags);
-	list_for_each_safe(curr, next, &debug_mutex_held_locks) {
-		lock = list_entry(curr, struct mutex, held_list);
-		lock_addr = lock;
-		if (lock_addr < from || lock_addr >= to)
-			continue;
-		list_del_init(curr);
-		DEBUG_OFF();
-		debug_spin_unlock_restore(&debug_mutex_lock, flags);
-
-		printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
-			current->comm, current->pid, lock, from, to);
-		dump_stack();
-		printk_lock(lock, 1);
-		if (lock->owner != current_thread_info())
-			printk("freeing task is not even the owner??\n");
-		return;
-	}
-	debug_spin_unlock_restore(&debug_mutex_lock, flags);
-}
-
 /*
  * Must be called with lock->wait_lock held.
  */
@@ -405,7 +90,6 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 			    struct thread_info *ti __IP_DECL__)
 {
 	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
-	check_deadlock(lock, 0, ti, ip);
 	/* Mark the current thread as blocked on the lock: */
 	ti->task->blocked_on = waiter;
 	waiter->lock = lock;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4fcbd1..7b3863d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -115,14 +115,6 @@ config DEBUG_PREEMPT
 	  if kernel code uses it in a preemption-unsafe way. Also, the kernel
 	  will detect preemption count underflows.
 
-config DEBUG_MUTEXES
-	bool "Mutex debugging, deadlock detection"
-	default n
-	depends on DEBUG_KERNEL
-	help
-	 This allows mutex semantics violations and mutex related deadlocks
-	 (lockups) to be detected and reported automatically.
-
 config DEBUG_RT_MUTEXES
 	bool "RT Mutex debugging, deadlock detection"
 	depends on DEBUG_KERNEL && RT_MUTEXES
-- 
cgit v0.10.2


From 9a11b49a805665e13a56aa067afaf81d43ec1514 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:33 -0700
Subject: [PATCH] lockdep: better lock debugging

Generic lock debugging:

 - generalized lock debugging framework. For example, a bug in one lock
   subsystem turns off debugging in all lock subsystems.

 - got rid of the caller address passing (__IP__/__IP_DECL__/etc.) from
   the mutex/rtmutex debugging code: it caused way too much prototype
   hackery, and lockdep will give the same information anyway.

 - ability to do silent tests

 - check lock freeing in vfree too.

 - more finegrained debugging options, to allow distributions to
   turn off more expensive debugging features.

There's no separate 'held mutexes' list anymore - but there's a 'held locks'
stack within lockdep, which unifies deadlock detection across all lock
classes.  (this is independent of the lockdep validation stuff - lockdep first
checks whether we are holding a lock already)

Here are the current debugging options:

CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_LOCK_ALLOC=y

which do:

 config DEBUG_MUTEXES
          bool "Mutex debugging, basic checks"

 config DEBUG_LOCK_ALLOC
         bool "Detect incorrect freeing of live mutexes"

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index a064ee9..e31f079 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -151,7 +151,7 @@ static struct sysrq_key_op sysrq_mountro_op = {
 static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs,
 				struct tty_struct *tty)
 {
-	mutex_debug_show_all_locks();
+	debug_show_all_locks();
 }
 static struct sysrq_key_op sysrq_showlocks_op = {
 	.handler	= sysrq_handle_showlocks,
diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h
index 5cf8b7c..254a126 100644
--- a/include/asm-generic/mutex-null.h
+++ b/include/asm-generic/mutex-null.h
@@ -10,15 +10,10 @@
 #ifndef _ASM_GENERIC_MUTEX_NULL_H
 #define _ASM_GENERIC_MUTEX_NULL_H
 
-/* extra parameter only needed for mutex debugging: */
-#ifndef __IP__
-# define __IP__
-#endif
-
-#define __mutex_fastpath_lock(count, fail_fn)	      fail_fn(count __RET_IP__)
-#define __mutex_fastpath_lock_retval(count, fail_fn)  fail_fn(count __RET_IP__)
-#define __mutex_fastpath_unlock(count, fail_fn)       fail_fn(count __RET_IP__)
-#define __mutex_fastpath_trylock(count, fail_fn)      fail_fn(count)
-#define __mutex_slowpath_needs_to_unlock()	      1
+#define __mutex_fastpath_lock(count, fail_fn)		fail_fn(count)
+#define __mutex_fastpath_lock_retval(count, fail_fn)	fail_fn(count)
+#define __mutex_fastpath_unlock(count, fail_fn)		fail_fn(count)
+#define __mutex_fastpath_trylock(count, fail_fn)	fail_fn(count)
+#define __mutex_slowpath_needs_to_unlock()		1
 
 #endif
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
new file mode 100644
index 0000000..6a70478
--- /dev/null
+++ b/include/linux/debug_locks.h
@@ -0,0 +1,69 @@
+#ifndef __LINUX_DEBUG_LOCKING_H
+#define __LINUX_DEBUG_LOCKING_H
+
+extern int debug_locks;
+extern int debug_locks_silent;
+
+/*
+ * Generic 'turn off all lock debugging' function:
+ */
+extern int debug_locks_off(void);
+
+/*
+ * In the debug case we carry the caller's instruction pointer into
+ * other functions, but we dont want the function argument overhead
+ * in the nondebug case - hence these macros:
+ */
+#define _RET_IP_		(unsigned long)__builtin_return_address(0)
+#define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
+
+#define DEBUG_LOCKS_WARN_ON(c)						\
+({									\
+	int __ret = 0;							\
+									\
+	if (unlikely(c)) {						\
+		if (debug_locks_off())					\
+			WARN_ON(1);					\
+		__ret = 1;						\
+	}								\
+	__ret;								\
+})
+
+#ifdef CONFIG_SMP
+# define SMP_DEBUG_LOCKS_WARN_ON(c)			DEBUG_LOCKS_WARN_ON(c)
+#else
+# define SMP_DEBUG_LOCKS_WARN_ON(c)			do { } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS
+  extern void locking_selftest(void);
+#else
+# define locking_selftest()	do { } while (0)
+#endif
+
+#ifdef CONFIG_LOCKDEP
+extern void debug_show_all_locks(void);
+extern void debug_show_held_locks(struct task_struct *task);
+extern void debug_check_no_locks_freed(const void *from, unsigned long len);
+extern void debug_check_no_locks_held(struct task_struct *task);
+#else
+static inline void debug_show_all_locks(void)
+{
+}
+
+static inline void debug_show_held_locks(struct task_struct *task)
+{
+}
+
+static inline void
+debug_check_no_locks_freed(const void *from, unsigned long len)
+{
+}
+
+static inline void
+debug_check_no_locks_held(struct task_struct *task)
+{
+}
+#endif
+
+#endif
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3a25695..678c1a9 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -124,7 +124,6 @@ extern struct group_info init_groups;
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 	.fs_excl	= ATOMIC_INIT(0),				\
 	.pi_lock	= SPIN_LOCK_UNLOCKED,				\
-	INIT_RT_MUTEXES(tsk)						\
 }
 
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7517952..990957e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -14,6 +14,7 @@
 #include <linux/prio_tree.h>
 #include <linux/fs.h>
 #include <linux/mutex.h>
+#include <linux/debug_locks.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1034,13 +1035,6 @@ static inline void vm_stat_account(struct mm_struct *mm,
 }
 #endif /* CONFIG_PROC_FS */
 
-static inline void
-debug_check_no_locks_freed(const void *from, unsigned long len)
-{
-	mutex_debug_check_no_locks_freed(from, len);
-	rt_mutex_debug_check_no_locks_freed(from, len);
-}
-
 #ifndef CONFIG_DEBUG_PAGEALLOC
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
index 8b5769f..70a2609 100644
--- a/include/linux/mutex-debug.h
+++ b/include/linux/mutex-debug.h
@@ -7,17 +7,11 @@
  * Mutexes - debugging helpers:
  */
 
-#define __DEBUG_MUTEX_INITIALIZER(lockname) \
-	, .held_list = LIST_HEAD_INIT(lockname.held_list), \
-	  .name = #lockname , .magic = &lockname
+#define __DEBUG_MUTEX_INITIALIZER(lockname)				\
+	, .magic = &lockname
 
-#define mutex_init(sem)		__mutex_init(sem, __FUNCTION__)
+#define mutex_init(sem)		__mutex_init(sem, __FILE__":"#sem)
 
 extern void FASTCALL(mutex_destroy(struct mutex *lock));
 
-extern void mutex_debug_show_all_locks(void);
-extern void mutex_debug_show_held_locks(struct task_struct *filter);
-extern void mutex_debug_check_no_locks_held(struct task_struct *task);
-extern void mutex_debug_check_no_locks_freed(const void *from, unsigned long len);
-
 #endif
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index f1ac507..caafecd 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -50,8 +50,6 @@ struct mutex {
 	struct list_head	wait_list;
 #ifdef CONFIG_DEBUG_MUTEXES
 	struct thread_info	*owner;
-	struct list_head	held_list;
-	unsigned long		acquire_ip;
 	const char 		*name;
 	void			*magic;
 #endif
@@ -76,10 +74,6 @@ struct mutex_waiter {
 # define __DEBUG_MUTEX_INITIALIZER(lockname)
 # define mutex_init(mutex)			__mutex_init(mutex, NULL)
 # define mutex_destroy(mutex)				do { } while (0)
-# define mutex_debug_show_all_locks()			do { } while (0)
-# define mutex_debug_show_held_locks(p)			do { } while (0)
-# define mutex_debug_check_no_locks_held(task)		do { } while (0)
-# define mutex_debug_check_no_locks_freed(from, len)	do { } while (0)
 #endif
 
 #define __MUTEX_INITIALIZER(lockname) \
diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index fa4a3b8..5d41dee 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -29,8 +29,6 @@ struct rt_mutex {
 	struct task_struct	*owner;
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 	int			save_state;
-	struct list_head	held_list_entry;
-	unsigned long		acquire_ip;
 	const char 		*name, *file;
 	int			line;
 	void			*magic;
@@ -98,14 +96,6 @@ extern int rt_mutex_trylock(struct rt_mutex *lock);
 
 extern void rt_mutex_unlock(struct rt_mutex *lock);
 
-#ifdef CONFIG_DEBUG_RT_MUTEXES
-# define INIT_RT_MUTEX_DEBUG(tsk)					\
-	.held_list_head	= LIST_HEAD_INIT(tsk.held_list_head),		\
-	.held_list_lock	= SPIN_LOCK_UNLOCKED
-#else
-# define INIT_RT_MUTEX_DEBUG(tsk)
-#endif
-
 #ifdef CONFIG_RT_MUTEXES
 # define INIT_RT_MUTEXES(tsk)						\
 	.pi_waiters	= PLIST_HEAD_INIT(tsk.pi_waiters, tsk.pi_lock),	\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index aaf7233..bdabeee 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -865,10 +865,6 @@ struct task_struct {
 	struct plist_head pi_waiters;
 	/* Deadlock detection and priority inheritance handling */
 	struct rt_mutex_waiter *pi_blocked_on;
-# ifdef CONFIG_DEBUG_RT_MUTEXES
-	spinlock_t held_list_lock;
-	struct list_head held_list_head;
-# endif
 #endif
 
 #ifdef CONFIG_DEBUG_MUTEXES
diff --git a/init/main.c b/init/main.c
index d604dfe..fc73e1c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -47,6 +47,7 @@
 #include <linux/key.h>
 #include <linux/unwind.h>
 #include <linux/buffer_head.h>
+#include <linux/debug_locks.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -511,6 +512,13 @@ asmlinkage void __init start_kernel(void)
 	console_init();
 	if (panic_later)
 		panic(panic_later, panic_param);
+	/*
+	 * Need to run this when irqs are enabled, because it wants
+	 * to self-test [hard/soft]-irqs on/off lock inversion bugs
+	 * too:
+	 */
+	locking_selftest();
+
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (initrd_start && !initrd_below_start_ok &&
 			initrd_start < min_low_pfn << PAGE_SHIFT) {
diff --git a/kernel/exit.c b/kernel/exit.c
index 7f7ef22..c595db1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -933,10 +933,9 @@ fastcall NORET_TYPE void do_exit(long code)
 	if (unlikely(current->pi_state_cache))
 		kfree(current->pi_state_cache);
 	/*
-	 * If DEBUG_MUTEXES is on, make sure we are holding no locks:
+	 * Make sure we are holding no locks:
 	 */
-	mutex_debug_check_no_locks_held(tsk);
-	rt_mutex_debug_check_no_locks_held(tsk);
+	debug_check_no_locks_held(tsk);
 
 	if (tsk->io_context)
 		exit_io_context();
diff --git a/kernel/fork.c b/kernel/fork.c
index 9064bf9..1cd46a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -919,10 +919,6 @@ static inline void rt_mutex_init_task(struct task_struct *p)
 	spin_lock_init(&p->pi_lock);
 	plist_head_init(&p->pi_waiters, &p->pi_lock);
 	p->pi_blocked_on = NULL;
-# ifdef CONFIG_DEBUG_RT_MUTEXES
-	spin_lock_init(&p->held_list_lock);
-	INIT_LIST_HEAD(&p->held_list_head);
-# endif
 #endif
 }
 
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index a92de14..5569766 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -20,52 +20,19 @@
 #include <linux/spinlock.h>
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
+#include <linux/debug_locks.h>
 
 #include "mutex-debug.h"
 
 /*
- * We need a global lock when we walk through the multi-process
- * lock tree. Only used in the deadlock-debugging case.
- */
-DEFINE_SPINLOCK(debug_mutex_lock);
-
-/*
- * All locks held by all tasks, in a single global list:
- */
-LIST_HEAD(debug_mutex_held_locks);
-
-/*
- * In the debug case we carry the caller's instruction pointer into
- * other functions, but we dont want the function argument overhead
- * in the nondebug case - hence these macros:
- */
-#define __IP_DECL__		, unsigned long ip
-#define __IP__			, ip
-#define __RET_IP__		, (unsigned long)__builtin_return_address(0)
-
-/*
- * "mutex debugging enabled" flag. We turn it off when we detect
- * the first problem because we dont want to recurse back
- * into the tracing code when doing error printk or
- * executing a BUG():
- */
-int debug_mutex_on = 1;
-
-/*
  * Must be called with lock->wait_lock held.
  */
-void debug_mutex_set_owner(struct mutex *lock,
-			   struct thread_info *new_owner __IP_DECL__)
+void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
 {
 	lock->owner = new_owner;
-	DEBUG_LOCKS_WARN_ON(!list_empty(&lock->held_list));
-	if (debug_mutex_on) {
-		list_add_tail(&lock->held_list, &debug_mutex_held_locks);
-		lock->acquire_ip = ip;
-	}
 }
 
-void debug_mutex_init_waiter(struct mutex_waiter *waiter)
+void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
 {
 	memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
 	waiter->magic = waiter;
@@ -87,9 +54,10 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter)
 }
 
 void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-			    struct thread_info *ti __IP_DECL__)
+			    struct thread_info *ti)
 {
 	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
+
 	/* Mark the current thread as blocked on the lock: */
 	ti->task->blocked_on = waiter;
 	waiter->lock = lock;
@@ -109,13 +77,10 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 
 void debug_mutex_unlock(struct mutex *lock)
 {
+	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
 	DEBUG_LOCKS_WARN_ON(lock->magic != lock);
 	DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
 	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
-	if (debug_mutex_on) {
-		DEBUG_LOCKS_WARN_ON(list_empty(&lock->held_list));
-		list_del_init(&lock->held_list);
-	}
 }
 
 void debug_mutex_init(struct mutex *lock, const char *name)
@@ -123,10 +88,8 @@ void debug_mutex_init(struct mutex *lock, const char *name)
 	/*
 	 * Make sure we are not reinitializing a held lock:
 	 */
-	mutex_debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
 	lock->owner = NULL;
-	INIT_LIST_HEAD(&lock->held_list);
-	lock->name = name;
 	lock->magic = lock;
 }
 
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index bdab13a..babfbdf 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -10,102 +10,44 @@
  * More details are in kernel/mutex-debug.c.
  */
 
-extern spinlock_t debug_mutex_lock;
-extern struct list_head debug_mutex_held_locks;
-extern int debug_mutex_on;
-
-/*
- * In the debug case we carry the caller's instruction pointer into
- * other functions, but we dont want the function argument overhead
- * in the nondebug case - hence these macros:
- */
-#define __IP_DECL__		, unsigned long ip
-#define __IP__			, ip
-#define __RET_IP__		, (unsigned long)__builtin_return_address(0)
-
 /*
  * This must be called with lock->wait_lock held.
  */
-extern void debug_mutex_set_owner(struct mutex *lock,
-				  struct thread_info *new_owner __IP_DECL__);
+extern void
+debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
 
 static inline void debug_mutex_clear_owner(struct mutex *lock)
 {
 	lock->owner = NULL;
 }
 
-extern void debug_mutex_init_waiter(struct mutex_waiter *waiter);
+extern void debug_mutex_lock_common(struct mutex *lock,
+				    struct mutex_waiter *waiter);
 extern void debug_mutex_wake_waiter(struct mutex *lock,
 				    struct mutex_waiter *waiter);
 extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
 extern void debug_mutex_add_waiter(struct mutex *lock,
 				   struct mutex_waiter *waiter,
-				   struct thread_info *ti __IP_DECL__);
+				   struct thread_info *ti);
 extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 				struct thread_info *ti);
 extern void debug_mutex_unlock(struct mutex *lock);
-extern void debug_mutex_init(struct mutex *lock, const char *name);
-
-#define debug_spin_lock_save(lock, flags)		\
-	do {						\
-		local_irq_save(flags);			\
-		if (debug_mutex_on)			\
-			spin_lock(lock);		\
-	} while (0)
-
-#define debug_spin_unlock_restore(lock, flags)		\
-	do {						\
-		if (debug_mutex_on)			\
-			spin_unlock(lock);		\
-		local_irq_restore(flags);		\
-		preempt_check_resched();		\
-	} while (0)
+extern void debug_mutex_init(struct mutex *lock, const char *name,
+			     struct lock_class_key *key);
 
 #define spin_lock_mutex(lock, flags)			\
 	do {						\
 		struct mutex *l = container_of(lock, struct mutex, wait_lock); \
 							\
 		DEBUG_LOCKS_WARN_ON(in_interrupt());	\
-		debug_spin_lock_save(&debug_mutex_lock, flags); \
-		spin_lock(lock);			\
+		local_irq_save(flags);			\
+		__raw_spin_lock(&(lock)->raw_lock);	\
 		DEBUG_LOCKS_WARN_ON(l->magic != l);	\
 	} while (0)
 
 #define spin_unlock_mutex(lock, flags)			\
 	do {						\
-		spin_unlock(lock);			\
-		debug_spin_unlock_restore(&debug_mutex_lock, flags);	\
+		__raw_spin_unlock(&(lock)->raw_lock);	\
+		local_irq_restore(flags);		\
+		preempt_check_resched();		\
 	} while (0)
-
-#define DEBUG_OFF()					\
-do {							\
-	if (debug_mutex_on) {				\
-		debug_mutex_on = 0;			\
-		console_verbose();			\
-		if (spin_is_locked(&debug_mutex_lock))	\
-			spin_unlock(&debug_mutex_lock);	\
-	}						\
-} while (0)
-
-#define DEBUG_BUG()					\
-do {							\
-	if (debug_mutex_on) {				\
-		DEBUG_OFF();				\
-		BUG();					\
-	}						\
-} while (0)
-
-#define DEBUG_LOCKS_WARN_ON(c)				\
-do {							\
-	if (unlikely(c && debug_mutex_on)) {		\
-		DEBUG_OFF();				\
-		WARN_ON(1);				\
-	}						\
-} while (0)
-
-#ifdef CONFIG_SMP
-# define SMP_DEBUG_LOCKS_WARN_ON(c)			DEBUG_LOCKS_WARN_ON(c)
-#else
-# define SMP_DEBUG_LOCKS_WARN_ON(c)			do { } while (0)
-#endif
-
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 101ceeb..3aad0b7 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/debug_locks.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -38,7 +39,7 @@
  *
  * It is not allowed to initialize an already locked mutex.
  */
-void fastcall __mutex_init(struct mutex *lock, const char *name)
+__always_inline void fastcall __mutex_init(struct mutex *lock, const char *name)
 {
 	atomic_set(&lock->count, 1);
 	spin_lock_init(&lock->wait_lock);
@@ -56,7 +57,7 @@ EXPORT_SYMBOL(__mutex_init);
  * branch is predicted by the CPU as default-untaken.
  */
 static void fastcall noinline __sched
-__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);
+__mutex_lock_slowpath(atomic_t *lock_count);
 
 /***
  * mutex_lock - acquire the mutex
@@ -79,7 +80,7 @@ __mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);
  *
  * This function is similar to (but not equivalent to) down().
  */
-void fastcall __sched mutex_lock(struct mutex *lock)
+void inline fastcall __sched mutex_lock(struct mutex *lock)
 {
 	might_sleep();
 	/*
@@ -92,7 +93,7 @@ void fastcall __sched mutex_lock(struct mutex *lock)
 EXPORT_SYMBOL(mutex_lock);
 
 static void fastcall noinline __sched
-__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__);
+__mutex_unlock_slowpath(atomic_t *lock_count);
 
 /***
  * mutex_unlock - release the mutex
@@ -120,18 +121,17 @@ EXPORT_SYMBOL(mutex_unlock);
  * Lock a mutex (possibly interruptible), slowpath:
  */
 static inline int __sched
-__mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
+__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
 {
 	struct task_struct *task = current;
 	struct mutex_waiter waiter;
 	unsigned int old_val;
 	unsigned long flags;
 
-	debug_mutex_init_waiter(&waiter);
-
 	spin_lock_mutex(&lock->wait_lock, flags);
 
-	debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip);
+	debug_mutex_lock_common(lock, &waiter);
+	debug_mutex_add_waiter(lock, &waiter, task->thread_info);
 
 	/* add waiting tasks to the end of the waitqueue (FIFO): */
 	list_add_tail(&waiter.list, &lock->wait_list);
@@ -173,7 +173,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
 
 	/* got the lock - rejoice! */
 	mutex_remove_waiter(lock, &waiter, task->thread_info);
-	debug_mutex_set_owner(lock, task->thread_info __IP__);
+	debug_mutex_set_owner(lock, task->thread_info);
 
 	/* set it to 0 if there are no waiters left: */
 	if (likely(list_empty(&lock->wait_list)))
@@ -183,32 +183,28 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
 
 	debug_mutex_free_waiter(&waiter);
 
-	DEBUG_LOCKS_WARN_ON(list_empty(&lock->held_list));
-	DEBUG_LOCKS_WARN_ON(lock->owner != task->thread_info);
-
 	return 0;
 }
 
 static void fastcall noinline __sched
-__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__)
+__mutex_lock_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__);
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
 }
 
 /*
  * Release the lock, slowpath:
  */
-static fastcall noinline void
-__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
+static fastcall inline void
+__mutex_unlock_common_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 	unsigned long flags;
 
-	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
-
 	spin_lock_mutex(&lock->wait_lock, flags);
+	debug_mutex_unlock(lock);
 
 	/*
 	 * some architectures leave the lock unlocked in the fastpath failure
@@ -218,8 +214,6 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
 	if (__mutex_slowpath_needs_to_unlock())
 		atomic_set(&lock->count, 1);
 
-	debug_mutex_unlock(lock);
-
 	if (!list_empty(&lock->wait_list)) {
 		/* get the first entry from the wait-list: */
 		struct mutex_waiter *waiter =
@@ -237,11 +231,20 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
 }
 
 /*
+ * Release the lock, slowpath:
+ */
+static fastcall noinline void
+__mutex_unlock_slowpath(atomic_t *lock_count)
+{
+	__mutex_unlock_common_slowpath(lock_count);
+}
+
+/*
  * Here come the less common (and hence less performance-critical) APIs:
  * mutex_lock_interruptible() and mutex_trylock().
  */
 static int fastcall noinline __sched
-__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__);
+__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
 
 /***
  * mutex_lock_interruptible - acquire the mutex, interruptable
@@ -264,11 +267,11 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
 EXPORT_SYMBOL(mutex_lock_interruptible);
 
 static int fastcall noinline __sched
-__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__)
+__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__);
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
 }
 
 /*
@@ -285,7 +288,8 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
 
 	prev = atomic_xchg(&lock->count, -1);
 	if (likely(prev == 1))
-		debug_mutex_set_owner(lock, current_thread_info() __RET_IP__);
+		debug_mutex_set_owner(lock, current_thread_info());
+
 	/* Set it back to 0 if there are no waiters: */
 	if (likely(list_empty(&lock->wait_list)))
 		atomic_set(&lock->count, 0);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 7e1ed48..aeb2d91 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,22 +16,15 @@
 #define mutex_remove_waiter(lock, waiter, ti) \
 		__list_del((waiter)->list.prev, (waiter)->list.next)
 
-#define DEBUG_LOCKS_WARN_ON(c)				do { } while (0)
 #define debug_mutex_set_owner(lock, new_owner)		do { } while (0)
 #define debug_mutex_clear_owner(lock)			do { } while (0)
-#define debug_mutex_init_waiter(waiter)			do { } while (0)
 #define debug_mutex_wake_waiter(lock, waiter)		do { } while (0)
 #define debug_mutex_free_waiter(waiter)			do { } while (0)
-#define debug_mutex_add_waiter(lock, waiter, ti, ip)	do { } while (0)
+#define debug_mutex_add_waiter(lock, waiter, ti)	do { } while (0)
 #define debug_mutex_unlock(lock)			do { } while (0)
 #define debug_mutex_init(lock, name)			do { } while (0)
 
-/*
- * Return-address parameters/declarations. They are very useful for
- * debugging, but add overhead in the !DEBUG case - so we go the
- * trouble of using this not too elegant but zero-cost solution:
- */
-#define __IP_DECL__
-#define __IP__
-#define __RET_IP__
-
+static inline void
+debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
+{
+}
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 4aa8a2c..353a853 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/plist.h>
 #include <linux/fs.h>
+#include <linux/debug_locks.h>
 
 #include "rtmutex_common.h"
 
@@ -45,8 +46,6 @@ do {								\
 		console_verbose();				\
 		if (spin_is_locked(&current->pi_lock))		\
 			spin_unlock(&current->pi_lock);		\
-		if (spin_is_locked(&current->held_list_lock))	\
-			spin_unlock(&current->held_list_lock);	\
 	}							\
 } while (0)
 
@@ -105,14 +104,6 @@ static void printk_task(task_t *p)
 		printk("<none>");
 }
 
-static void printk_task_short(task_t *p)
-{
-	if (p)
-		printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
-	else
-		printk("<none>");
-}
-
 static void printk_lock(struct rt_mutex *lock, int print_owner)
 {
 	if (lock->name)
@@ -128,222 +119,6 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)
 		printk_task(rt_mutex_owner(lock));
 		printk("\n");
 	}
-	if (rt_mutex_owner(lock)) {
-		printk("... acquired at:               ");
-		print_symbol("%s\n", lock->acquire_ip);
-	}
-}
-
-static void printk_waiter(struct rt_mutex_waiter *w)
-{
-	printk("-------------------------\n");
-	printk("| waiter struct %p:\n", w);
-	printk("| w->list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
-	       w->list_entry.plist.prio_list.prev, w->list_entry.plist.prio_list.next,
-	       w->list_entry.plist.node_list.prev, w->list_entry.plist.node_list.next,
-	       w->list_entry.prio);
-	printk("| w->pi_list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
-	       w->pi_list_entry.plist.prio_list.prev, w->pi_list_entry.plist.prio_list.next,
-	       w->pi_list_entry.plist.node_list.prev, w->pi_list_entry.plist.node_list.next,
-	       w->pi_list_entry.prio);
-	printk("\n| lock:\n");
-	printk_lock(w->lock, 1);
-	printk("| w->ti->task:\n");
-	printk_task(w->task);
-	printk("| blocked at:  ");
-	print_symbol("%s\n", w->ip);
-	printk("-------------------------\n");
-}
-
-static void show_task_locks(task_t *p)
-{
-	switch (p->state) {
-	case TASK_RUNNING:		printk("R"); break;
-	case TASK_INTERRUPTIBLE:	printk("S"); break;
-	case TASK_UNINTERRUPTIBLE:	printk("D"); break;
-	case TASK_STOPPED:		printk("T"); break;
-	case EXIT_ZOMBIE:		printk("Z"); break;
-	case EXIT_DEAD:			printk("X"); break;
-	default:			printk("?"); break;
-	}
-	printk_task(p);
-	if (p->pi_blocked_on) {
-		struct rt_mutex *lock = p->pi_blocked_on->lock;
-
-		printk(" blocked on:");
-		printk_lock(lock, 1);
-	} else
-		printk(" (not blocked)\n");
-}
-
-void rt_mutex_show_held_locks(task_t *task, int verbose)
-{
-	struct list_head *curr, *cursor = NULL;
-	struct rt_mutex *lock;
-	task_t *t;
-	unsigned long flags;
-	int count = 0;
-
-	if (!rt_trace_on)
-		return;
-
-	if (verbose) {
-		printk("------------------------------\n");
-		printk("| showing all locks held by: |  (");
-		printk_task_short(task);
-		printk("):\n");
-		printk("------------------------------\n");
-	}
-
-next:
-	spin_lock_irqsave(&task->held_list_lock, flags);
-	list_for_each(curr, &task->held_list_head) {
-		if (cursor && curr != cursor)
-			continue;
-		lock = list_entry(curr, struct rt_mutex, held_list_entry);
-		t = rt_mutex_owner(lock);
-		WARN_ON(t != task);
-		count++;
-		cursor = curr->next;
-		spin_unlock_irqrestore(&task->held_list_lock, flags);
-
-		printk("\n#%03d:            ", count);
-		printk_lock(lock, 0);
-		goto next;
-	}
-	spin_unlock_irqrestore(&task->held_list_lock, flags);
-
-	printk("\n");
-}
-
-void rt_mutex_show_all_locks(void)
-{
-	task_t *g, *p;
-	int count = 10;
-	int unlock = 1;
-
-	printk("\n");
-	printk("----------------------\n");
-	printk("| showing all tasks: |\n");
-	printk("----------------------\n");
-
-	/*
-	 * Here we try to get the tasklist_lock as hard as possible,
-	 * if not successful after 2 seconds we ignore it (but keep
-	 * trying). This is to enable a debug printout even if a
-	 * tasklist_lock-holding task deadlocks or crashes.
-	 */
-retry:
-	if (!read_trylock(&tasklist_lock)) {
-		if (count == 10)
-			printk("hm, tasklist_lock locked, retrying... ");
-		if (count) {
-			count--;
-			printk(" #%d", 10-count);
-			mdelay(200);
-			goto retry;
-		}
-		printk(" ignoring it.\n");
-		unlock = 0;
-	}
-	if (count != 10)
-		printk(" locked it.\n");
-
-	do_each_thread(g, p) {
-		show_task_locks(p);
-		if (!unlock)
-			if (read_trylock(&tasklist_lock))
-				unlock = 1;
-	} while_each_thread(g, p);
-
-	printk("\n");
-
-	printk("-----------------------------------------\n");
-	printk("| showing all locks held in the system: |\n");
-	printk("-----------------------------------------\n");
-
-	do_each_thread(g, p) {
-		rt_mutex_show_held_locks(p, 0);
-		if (!unlock)
-			if (read_trylock(&tasklist_lock))
-				unlock = 1;
-	} while_each_thread(g, p);
-
-
-	printk("=============================================\n\n");
-
-	if (unlock)
-		read_unlock(&tasklist_lock);
-}
-
-void rt_mutex_debug_check_no_locks_held(task_t *task)
-{
-	struct rt_mutex_waiter *w;
-	struct list_head *curr;
-	struct rt_mutex *lock;
-
-	if (!rt_trace_on)
-		return;
-	if (!rt_prio(task->normal_prio) && rt_prio(task->prio)) {
-		printk("BUG: PI priority boost leaked!\n");
-		printk_task(task);
-		printk("\n");
-	}
-	if (list_empty(&task->held_list_head))
-		return;
-
-	spin_lock(&task->pi_lock);
-	plist_for_each_entry(w, &task->pi_waiters, pi_list_entry) {
-		TRACE_OFF();
-
-		printk("hm, PI interest held at exit time? Task:\n");
-		printk_task(task);
-		printk_waiter(w);
-		return;
-	}
-	spin_unlock(&task->pi_lock);
-
-	list_for_each(curr, &task->held_list_head) {
-		lock = list_entry(curr, struct rt_mutex, held_list_entry);
-
-		printk("BUG: %s/%d, lock held at task exit time!\n",
-		       task->comm, task->pid);
-		printk_lock(lock, 1);
-		if (rt_mutex_owner(lock) != task)
-			printk("exiting task is not even the owner??\n");
-	}
-}
-
-int rt_mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
-{
-	const void *to = from + len;
-	struct list_head *curr;
-	struct rt_mutex *lock;
-	unsigned long flags;
-	void *lock_addr;
-
-	if (!rt_trace_on)
-		return 0;
-
-	spin_lock_irqsave(&current->held_list_lock, flags);
-	list_for_each(curr, &current->held_list_head) {
-		lock = list_entry(curr, struct rt_mutex, held_list_entry);
-		lock_addr = lock;
-		if (lock_addr < from || lock_addr >= to)
-			continue;
-		TRACE_OFF();
-
-		printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
-			current->comm, current->pid, lock, from, to);
-		dump_stack();
-		printk_lock(lock, 1);
-		if (rt_mutex_owner(lock) != current)
-			printk("freeing task is not even the owner??\n");
-		return 1;
-	}
-	spin_unlock_irqrestore(&current->held_list_lock, flags);
-
-	return 0;
 }
 
 void rt_mutex_debug_task_free(struct task_struct *task)
@@ -395,85 +170,41 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
 	       current->comm, current->pid);
 	printk_lock(waiter->lock, 1);
 
-	printk("... trying at:                 ");
-	print_symbol("%s\n", waiter->ip);
-
 	printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);
 	printk_lock(waiter->deadlock_lock, 1);
 
-	rt_mutex_show_held_locks(current, 1);
-	rt_mutex_show_held_locks(task, 1);
+	debug_show_held_locks(current);
+	debug_show_held_locks(task);
 
 	printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);
 	show_stack(task, NULL);
 	printk("\n%s/%d's [current] stackdump:\n\n",
 	       current->comm, current->pid);
 	dump_stack();
-	rt_mutex_show_all_locks();
+	debug_show_all_locks();
+
 	printk("[ turning off deadlock detection."
 	       "Please report this trace. ]\n\n");
 	local_irq_disable();
 }
 
-void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__)
+void debug_rt_mutex_lock(struct rt_mutex *lock)
 {
-	unsigned long flags;
-
-	if (rt_trace_on) {
-		TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
-
-		spin_lock_irqsave(&current->held_list_lock, flags);
-		list_add_tail(&lock->held_list_entry, &current->held_list_head);
-		spin_unlock_irqrestore(&current->held_list_lock, flags);
-
-		lock->acquire_ip = ip;
-	}
 }
 
 void debug_rt_mutex_unlock(struct rt_mutex *lock)
 {
-	unsigned long flags;
-
-	if (rt_trace_on) {
-		TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
-		TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
-
-		spin_lock_irqsave(&current->held_list_lock, flags);
-		list_del_init(&lock->held_list_entry);
-		spin_unlock_irqrestore(&current->held_list_lock, flags);
-	}
+	TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
 }
 
-void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
-			       struct task_struct *powner __IP_DECL__)
+void
+debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
 {
-	unsigned long flags;
-
-	if (rt_trace_on) {
-		TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
-
-		spin_lock_irqsave(&powner->held_list_lock, flags);
-		list_add_tail(&lock->held_list_entry, &powner->held_list_head);
-		spin_unlock_irqrestore(&powner->held_list_lock, flags);
-
-		lock->acquire_ip = ip;
-	}
 }
 
 void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
 {
-	unsigned long flags;
-
-	if (rt_trace_on) {
-		struct task_struct *owner = rt_mutex_owner(lock);
-
-		TRACE_WARN_ON_LOCKED(!owner);
-		TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
-
-		spin_lock_irqsave(&owner->held_list_lock, flags);
-		list_del_init(&lock->held_list_entry);
-		spin_unlock_irqrestore(&owner->held_list_lock, flags);
-	}
+	TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));
 }
 
 void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
@@ -493,14 +224,11 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
 
 void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
 {
-	void *addr = lock;
-
-	if (rt_trace_on) {
-		rt_mutex_debug_check_no_locks_freed(addr,
-						    sizeof(struct rt_mutex));
-		INIT_LIST_HEAD(&lock->held_list_entry);
-		lock->name = name;
-	}
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+	lock->name = name;
 }
 
 void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, task_t *task)
diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h
index 7612fbc..14193d5 100644
--- a/kernel/rtmutex-debug.h
+++ b/kernel/rtmutex-debug.h
@@ -9,20 +9,16 @@
  * This file contains macros used solely by rtmutex.c. Debug version.
  */
 
-#define __IP_DECL__		, unsigned long ip
-#define __IP__			, ip
-#define __RET_IP__		, (unsigned long)__builtin_return_address(0)
-
 extern void
 rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
 extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
 extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
 extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
 extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
-extern void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__);
+extern void debug_rt_mutex_lock(struct rt_mutex *lock);
 extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
 extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
-				      struct task_struct *powner __IP_DECL__);
+				      struct task_struct *powner);
 extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
 extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,
 				    struct rt_mutex *lock);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 45d6101..91b699a 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -161,8 +161,7 @@ static int rt_mutex_adjust_prio_chain(task_t *task,
 				      int deadlock_detect,
 				      struct rt_mutex *orig_lock,
 				      struct rt_mutex_waiter *orig_waiter,
-				      struct task_struct *top_task
-				      __IP_DECL__)
+				      struct task_struct *top_task)
 {
 	struct rt_mutex *lock;
 	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -357,7 +356,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
  *
  * Must be called with lock->wait_lock held.
  */
-static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
+static int try_to_take_rt_mutex(struct rt_mutex *lock)
 {
 	/*
 	 * We have to be careful here if the atomic speedups are
@@ -384,7 +383,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
 		return 0;
 
 	/* We got the lock. */
-	debug_rt_mutex_lock(lock __IP__);
+	debug_rt_mutex_lock(lock);
 
 	rt_mutex_set_owner(lock, current, 0);
 
@@ -402,8 +401,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
  */
 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 				   struct rt_mutex_waiter *waiter,
-				   int detect_deadlock
-				   __IP_DECL__)
+				   int detect_deadlock)
 {
 	struct rt_mutex_waiter *top_waiter = waiter;
 	task_t *owner = rt_mutex_owner(lock);
@@ -454,7 +452,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 	spin_unlock(&lock->wait_lock);
 
 	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
-					 current __IP__);
+					 current);
 
 	spin_lock(&lock->wait_lock);
 
@@ -526,7 +524,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
  * Must be called with lock->wait_lock held
  */
 static void remove_waiter(struct rt_mutex *lock,
-			  struct rt_mutex_waiter *waiter  __IP_DECL__)
+			  struct rt_mutex_waiter *waiter)
 {
 	int first = (waiter == rt_mutex_top_waiter(lock));
 	int boost = 0;
@@ -568,7 +566,7 @@ static void remove_waiter(struct rt_mutex *lock,
 
 	spin_unlock(&lock->wait_lock);
 
-	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current __IP__);
+	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
 
 	spin_lock(&lock->wait_lock);
 }
@@ -595,7 +593,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
 	get_task_struct(task);
 	spin_unlock_irqrestore(&task->pi_lock, flags);
 
-	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task __RET_IP__);
+	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
 }
 
 /*
@@ -604,7 +602,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
 static int __sched
 rt_mutex_slowlock(struct rt_mutex *lock, int state,
 		  struct hrtimer_sleeper *timeout,
-		  int detect_deadlock __IP_DECL__)
+		  int detect_deadlock)
 {
 	struct rt_mutex_waiter waiter;
 	int ret = 0;
@@ -615,7 +613,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 	spin_lock(&lock->wait_lock);
 
 	/* Try to acquire the lock again: */
-	if (try_to_take_rt_mutex(lock __IP__)) {
+	if (try_to_take_rt_mutex(lock)) {
 		spin_unlock(&lock->wait_lock);
 		return 0;
 	}
@@ -629,7 +627,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 
 	for (;;) {
 		/* Try to acquire the lock: */
-		if (try_to_take_rt_mutex(lock __IP__))
+		if (try_to_take_rt_mutex(lock))
 			break;
 
 		/*
@@ -653,7 +651,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 		 */
 		if (!waiter.task) {
 			ret = task_blocks_on_rt_mutex(lock, &waiter,
-						      detect_deadlock __IP__);
+						      detect_deadlock);
 			/*
 			 * If we got woken up by the owner then start loop
 			 * all over without going into schedule to try
@@ -680,7 +678,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 	set_current_state(TASK_RUNNING);
 
 	if (unlikely(waiter.task))
-		remove_waiter(lock, &waiter __IP__);
+		remove_waiter(lock, &waiter);
 
 	/*
 	 * try_to_take_rt_mutex() sets the waiter bit
@@ -711,7 +709,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
  * Slow path try-lock function:
  */
 static inline int
-rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__)
+rt_mutex_slowtrylock(struct rt_mutex *lock)
 {
 	int ret = 0;
 
@@ -719,7 +717,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__)
 
 	if (likely(rt_mutex_owner(lock) != current)) {
 
-		ret = try_to_take_rt_mutex(lock __IP__);
+		ret = try_to_take_rt_mutex(lock);
 		/*
 		 * try_to_take_rt_mutex() sets the lock waiters
 		 * bit unconditionally. Clean this up.
@@ -769,13 +767,13 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
 		  int detect_deadlock,
 		  int (*slowfn)(struct rt_mutex *lock, int state,
 				struct hrtimer_sleeper *timeout,
-				int detect_deadlock __IP_DECL__))
+				int detect_deadlock))
 {
 	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 		rt_mutex_deadlock_account_lock(lock, current);
 		return 0;
 	} else
-		return slowfn(lock, state, NULL, detect_deadlock __RET_IP__);
+		return slowfn(lock, state, NULL, detect_deadlock);
 }
 
 static inline int
@@ -783,24 +781,24 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
 			struct hrtimer_sleeper *timeout, int detect_deadlock,
 			int (*slowfn)(struct rt_mutex *lock, int state,
 				      struct hrtimer_sleeper *timeout,
-				      int detect_deadlock __IP_DECL__))
+				      int detect_deadlock))
 {
 	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 		rt_mutex_deadlock_account_lock(lock, current);
 		return 0;
 	} else
-		return slowfn(lock, state, timeout, detect_deadlock __RET_IP__);
+		return slowfn(lock, state, timeout, detect_deadlock);
 }
 
 static inline int
 rt_mutex_fasttrylock(struct rt_mutex *lock,
-		     int (*slowfn)(struct rt_mutex *lock __IP_DECL__))
+		     int (*slowfn)(struct rt_mutex *lock))
 {
 	if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 		rt_mutex_deadlock_account_lock(lock, current);
 		return 1;
 	}
-	return slowfn(lock __RET_IP__);
+	return slowfn(lock);
 }
 
 static inline void
@@ -948,7 +946,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
 				struct task_struct *proxy_owner)
 {
 	__rt_mutex_init(lock, NULL);
-	debug_rt_mutex_proxy_lock(lock, proxy_owner __RET_IP__);
+	debug_rt_mutex_proxy_lock(lock, proxy_owner);
 	rt_mutex_set_owner(lock, proxy_owner, 0);
 	rt_mutex_deadlock_account_lock(lock, proxy_owner);
 }
diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h
index 1e0fca1..a1a1dd0 100644
--- a/kernel/rtmutex.h
+++ b/kernel/rtmutex.h
@@ -10,9 +10,6 @@
  * Non-debug version.
  */
 
-#define __IP_DECL__
-#define __IP__
-#define __RET_IP__
 #define rt_mutex_deadlock_check(l)			(0)
 #define rt_mutex_deadlock_account_lock(m, t)		do { } while (0)
 #define rt_mutex_deadlock_account_unlock(l)		do { } while (0)
diff --git a/kernel/sched.c b/kernel/sched.c
index d5e3707..48c1faa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -30,6 +30,7 @@
 #include <linux/capability.h>
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
+#include <linux/debug_locks.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
 #include <linux/profile.h>
@@ -3142,12 +3143,13 @@ void fastcall add_preempt_count(int val)
 	/*
 	 * Underflow?
 	 */
-	BUG_ON((preempt_count() < 0));
+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
+		return;
 	preempt_count() += val;
 	/*
 	 * Spinlock count overflowing soon?
 	 */
-	BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
 }
 EXPORT_SYMBOL(add_preempt_count);
 
@@ -3156,11 +3158,15 @@ void fastcall sub_preempt_count(int val)
 	/*
 	 * Underflow?
 	 */
-	BUG_ON(val > preempt_count());
+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+		return;
 	/*
 	 * Is the spinlock portion underflowing?
 	 */
-	BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK));
+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
+			!(preempt_count() & PREEMPT_MASK)))
+		return;
+
 	preempt_count() -= val;
 }
 EXPORT_SYMBOL(sub_preempt_count);
@@ -4690,7 +4696,7 @@ void show_state(void)
 	} while_each_thread(g, p);
 
 	read_unlock(&tasklist_lock);
-	mutex_debug_show_all_locks();
+	debug_show_all_locks();
 }
 
 /**
diff --git a/lib/Makefile b/lib/Makefile
index 10c13c9..4f5d019 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,7 @@ lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o kref.o kobject_uevent.o klist.o
 
-obj-y += sort.o parser.o halfmd4.o iomap_copy.o
+obj-y += sort.o parser.o halfmd4.o iomap_copy.o debug_locks.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
new file mode 100644
index 0000000..0ef01d1
--- /dev/null
+++ b/lib/debug_locks.c
@@ -0,0 +1,45 @@
+/*
+ * lib/debug_locks.c
+ *
+ * Generic place for common debugging facilities for various locks:
+ * spinlocks, rwlocks, mutexes and rwsems.
+ *
+ * Started by Ingo Molnar:
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/debug_locks.h>
+
+/*
+ * We want to turn all lock-debugging facilities on/off at once,
+ * via a global flag. The reason is that once a single bug has been
+ * detected and reported, there might be cascade of followup bugs
+ * that would just muddy the log. So we report the first one and
+ * shut up after that.
+ */
+int debug_locks = 1;
+
+/*
+ * The locking-testsuite uses <debug_locks_silent> to get a
+ * 'silent failure': nothing is printed to the console when
+ * a locking bug is detected.
+ */
+int debug_locks_silent;
+
+/*
+ * Generic 'turn off all lock debugging' function:
+ */
+int debug_locks_off(void)
+{
+	if (xchg(&debug_locks, 0)) {
+		if (!debug_locks_silent) {
+			console_verbose();
+			return 1;
+		}
+	}
+	return 0;
+}
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 93c15ee..3de2ccf 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -8,38 +8,35 @@
 
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/debug_locks.h>
 #include <linux/delay.h>
+#include <linux/module.h>
 
 static void spin_bug(spinlock_t *lock, const char *msg)
 {
-	static long print_once = 1;
 	struct task_struct *owner = NULL;
 
-	if (xchg(&print_once, 0)) {
-		if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
-			owner = lock->owner;
-		printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
-			msg, raw_smp_processor_id(),
-			current->comm, current->pid);
-		printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, "
-				".owner_cpu: %d\n",
-			lock, lock->magic,
-			owner ? owner->comm : "<none>",
-			owner ? owner->pid : -1,
-			lock->owner_cpu);
-		dump_stack();
-#ifdef CONFIG_SMP
-		/*
-		 * We cannot continue on SMP:
-		 */
-//		panic("bad locking");
-#endif
-	}
+	if (!debug_locks_off())
+		return;
+
+	if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
+		owner = lock->owner;
+	printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
+		msg, raw_smp_processor_id(),
+		current->comm, current->pid);
+	printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, "
+			".owner_cpu: %d\n",
+		lock, lock->magic,
+		owner ? owner->comm : "<none>",
+		owner ? owner->pid : -1,
+		lock->owner_cpu);
+	dump_stack();
 }
 
 #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
 
-static inline void debug_spin_lock_before(spinlock_t *lock)
+static inline void
+debug_spin_lock_before(spinlock_t *lock)
 {
 	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
 	SPIN_BUG_ON(lock->owner == current, lock, "recursion");
@@ -118,20 +115,13 @@ void _raw_spin_unlock(spinlock_t *lock)
 
 static void rwlock_bug(rwlock_t *lock, const char *msg)
 {
-	static long print_once = 1;
-
-	if (xchg(&print_once, 0)) {
-		printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
-			msg, raw_smp_processor_id(), current->comm,
-			current->pid, lock);
-		dump_stack();
-#ifdef CONFIG_SMP
-		/*
-		 * We cannot continue on SMP:
-		 */
-		panic("bad locking");
-#endif
-	}
+	if (!debug_locks_off())
+		return;
+
+	printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
+		msg, raw_smp_processor_id(), current->comm,
+		current->pid, lock);
+	dump_stack();
 }
 
 #define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 35f8553..7b45079 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -330,6 +330,8 @@ void __vunmap(void *addr, int deallocate_pages)
 		return;
 	}
 
+	debug_check_no_locks_freed(addr, area->size);
+
 	if (deallocate_pages) {
 		int i;
 
-- 
cgit v0.10.2


From 9cebb5526833059f327d237a032422c762378b2a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:33 -0700
Subject: [PATCH] lockdep: mutex section binutils workaround

Work around weird section nesting build bug causing smp-alternatives failures
under certain circumstances.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/mutex.c b/kernel/mutex.c
index 3aad0b7..43a50c1 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -313,7 +313,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
  * This function must not be used in interrupt context. The
  * mutex must be released by the same task that acquired it.
  */
-int fastcall mutex_trylock(struct mutex *lock)
+int fastcall __sched mutex_trylock(struct mutex *lock)
 {
 	return __mutex_fastpath_trylock(&lock->count,
 					__mutex_trylock_slowpath);
-- 
cgit v0.10.2


From e4d919188554a77c798a267e098059bc9aa39726 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:34 -0700
Subject: [PATCH] lockdep: locking init debugging improvement

Locking init improvement:

 - introduce and use __SPIN_LOCK_UNLOCKED for array initializations,
   to pass in the name string of locks, used by debugging

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 164bdda..4c3a5ca 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -416,7 +416,7 @@ static struct entropy_store input_pool = {
 	.poolinfo = &poolinfo_table[0],
 	.name = "input",
 	.limit = 1,
-	.lock = SPIN_LOCK_UNLOCKED,
+	.lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock),
 	.pool = input_pool_data
 };
 
@@ -425,7 +425,7 @@ static struct entropy_store blocking_pool = {
 	.name = "blocking",
 	.limit = 1,
 	.pull = &input_pool,
-	.lock = SPIN_LOCK_UNLOCKED,
+	.lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock),
 	.pool = blocking_pool_data
 };
 
@@ -433,7 +433,7 @@ static struct entropy_store nonblocking_pool = {
 	.poolinfo = &poolinfo_table[1],
 	.name = "nonblocking",
 	.pull = &input_pool,
-	.lock = SPIN_LOCK_UNLOCKED,
+	.lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock),
 	.pool = nonblocking_pool_data
 };
 
diff --git a/fs/dcache.c b/fs/dcache.c
index c6e3535..bec4de1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -38,7 +38,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
-static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(dcache_lock);
 
diff --git a/include/linux/idr.h b/include/linux/idr.h
index f559a71..8268034 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -66,7 +66,7 @@ struct idr {
 	.id_free	= NULL,					\
 	.layers 	= 0,					\
 	.id_free_cnt	= 0,					\
-	.lock		= SPIN_LOCK_UNLOCKED,			\
+	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),	\
 }
 #define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 678c1a9..1b7bb37 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -21,7 +21,7 @@
 	.count		= ATOMIC_INIT(1), 		\
 	.fdt		= &init_files.fdtab, 		\
 	.fdtab		= INIT_FDTABLE,			\
-	.file_lock	= SPIN_LOCK_UNLOCKED, 		\
+	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock), \
 	.next_fd	= 0, 				\
 	.close_on_exec_init = { { 0, } }, 		\
 	.open_fds_init	= { { 0, } }, 			\
@@ -36,7 +36,7 @@
 	.user_id	= 0,				\
 	.next		= NULL,				\
 	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \
-	.ctx_lock	= SPIN_LOCK_UNLOCKED,		\
+	.ctx_lock	= __SPIN_LOCK_UNLOCKED(name.ctx_lock), \
 	.reqs_active	= 0U,				\
 	.max_reqs	= ~0U,				\
 }
@@ -48,7 +48,7 @@
 	.mm_users	= ATOMIC_INIT(2), 			\
 	.mm_count	= ATOMIC_INIT(1), 			\
 	.mmap_sem	= __RWSEM_INITIALIZER(name.mmap_sem),	\
-	.page_table_lock =  SPIN_LOCK_UNLOCKED, 		\
+	.page_table_lock =  __SPIN_LOCK_UNLOCKED(name.page_table_lock),	\
 	.mmlist		= LIST_HEAD_INIT(name.mmlist),		\
 	.cpu_vm_mask	= CPU_MASK_ALL,				\
 }
@@ -69,7 +69,7 @@
 #define INIT_SIGHAND(sighand) {						\
 	.count		= ATOMIC_INIT(1), 				\
 	.action		= { { { .sa_handler = NULL, } }, },		\
-	.siglock	= SPIN_LOCK_UNLOCKED, 				\
+	.siglock	= __SPIN_LOCK_UNLOCKED(sighand.siglock),	\
 }
 
 extern struct group_info init_groups;
@@ -119,7 +119,7 @@ extern struct group_info init_groups;
 		.list = LIST_HEAD_INIT(tsk.pending.list),		\
 		.signal = {{0}}},					\
 	.blocked	= {{0}},					\
-	.alloc_lock	= SPIN_LOCK_UNLOCKED,				\
+	.alloc_lock	= __SPIN_LOCK_UNLOCKED(tsk.alloc_lock),		\
 	.journal_info	= NULL,						\
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 	.fs_excl	= ATOMIC_INIT(0),				\
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 51dbab9..7ff386a 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -65,7 +65,7 @@ struct raw_notifier_head {
 	} while (0)
 
 #define ATOMIC_NOTIFIER_INIT(name) {				\
-		.lock = SPIN_LOCK_UNLOCKED,			\
+		.lock = __SPIN_LOCK_UNLOCKED(name.lock),	\
 		.head = NULL }
 #define BLOCKING_NOTIFIER_INIT(name) {				\
 		.rwsem = __RWSEM_INITIALIZER((name).rwsem),	\
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 7bc5c7c..4600093 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -38,9 +38,17 @@ typedef struct {
  * These macros triggered gcc-3.x compile-time problems.  We think these are
  * OK now.  Be cautious.
  */
-#define SEQLOCK_UNLOCKED { 0, SPIN_LOCK_UNLOCKED }
-#define seqlock_init(x)	do { *(x) = (seqlock_t) SEQLOCK_UNLOCKED; } while (0)
+#define __SEQLOCK_UNLOCKED(lockname) \
+		 { 0, __SPIN_LOCK_UNLOCKED(lockname) }
 
+#define SEQLOCK_UNLOCKED \
+		 __SEQLOCK_UNLOCKED(old_style_seqlock_init)
+
+#define seqlock_init(x) \
+		do { *(x) = (seqlock_t) __SEQLOCK_UNLOCKED(x); } while (0)
+
+#define DEFINE_SEQLOCK(x) \
+		seqlock_t x = __SEQLOCK_UNLOCKED(x)
 
 /* Lock out other writers and update the count.
  * Acts like a normal spin_lock/unlock.
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 9cb51e0..f5d4ed7 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -44,24 +44,27 @@ typedef struct {
 #define SPINLOCK_OWNER_INIT	((void *)-1L)
 
 #ifdef CONFIG_DEBUG_SPINLOCK
-# define SPIN_LOCK_UNLOCKED						\
+# define __SPIN_LOCK_UNLOCKED(lockname)					\
 	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
 				.magic = SPINLOCK_MAGIC,		\
 				.owner = SPINLOCK_OWNER_INIT,		\
 				.owner_cpu = -1 }
-#define RW_LOCK_UNLOCKED						\
+#define __RW_LOCK_UNLOCKED(lockname)					\
 	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
 				.magic = RWLOCK_MAGIC,			\
 				.owner = SPINLOCK_OWNER_INIT,		\
 				.owner_cpu = -1 }
 #else
-# define SPIN_LOCK_UNLOCKED \
+# define __SPIN_LOCK_UNLOCKED(lockname) \
 	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED }
-#define RW_LOCK_UNLOCKED \
+#define __RW_LOCK_UNLOCKED(lockname) \
 	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED }
 #endif
 
-#define DEFINE_SPINLOCK(x)	spinlock_t x = SPIN_LOCK_UNLOCKED
-#define DEFINE_RWLOCK(x)	rwlock_t x = RW_LOCK_UNLOCKED
+#define SPIN_LOCK_UNLOCKED	__SPIN_LOCK_UNLOCKED(old_style_spin_init)
+#define RW_LOCK_UNLOCKED	__RW_LOCK_UNLOCKED(old_style_rw_init)
+
+#define DEFINE_SPINLOCK(x)	spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
+#define DEFINE_RWLOCK(x)	rwlock_t x = __RW_LOCK_UNLOCKED(x)
 
 #endif /* __LINUX_SPINLOCK_TYPES_H */
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 544e855..bc4f389 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -68,7 +68,7 @@ struct task_struct;
 	wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk)
 
 #define __WAIT_QUEUE_HEAD_INITIALIZER(name) {				\
-	.lock		= SPIN_LOCK_UNLOCKED,				\
+	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),		\
 	.task_list	= { &(name).task_list, &(name).task_list } }
 
 #define DECLARE_WAIT_QUEUE_HEAD(name) \
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f464f5a..759805c 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -53,13 +53,13 @@
 static struct rcu_ctrlblk rcu_ctrlblk = {
 	.cur = -300,
 	.completed = -300,
-	.lock = SPIN_LOCK_UNLOCKED,
+	.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
 	.cpumask = CPU_MASK_NONE,
 };
 static struct rcu_ctrlblk rcu_bh_ctrlblk = {
 	.cur = -300,
 	.completed = -300,
-	.lock = SPIN_LOCK_UNLOCKED,
+	.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
 	.cpumask = CPU_MASK_NONE,
 };
 
diff --git a/kernel/timer.c b/kernel/timer.c
index 5a89602..4dd9a10 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1208,7 +1208,7 @@ unsigned long wall_jiffies = INITIAL_JIFFIES;
  * playing with xtime and avenrun.
  */
 #ifndef ARCH_HAVE_XTIME_LOCK
-seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
 
 EXPORT_SYMBOL(xtime_lock);
 #endif
diff --git a/mm/swap_state.c b/mm/swap_state.c
index fccbd9b..5f7cf2a 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -38,7 +38,7 @@ static struct backing_dev_info swap_backing_dev_info = {
 
 struct address_space swapper_space = {
 	.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
-	.tree_lock	= RW_LOCK_UNLOCKED,
+	.tree_lock	= __RW_LOCK_UNLOCKED(swapper_space.tree_lock),
 	.a_ops		= &swap_aops,
 	.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
 	.backing_dev_info = &swap_backing_dev_info,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8355b72..8237172 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -90,7 +90,7 @@ static struct socket *tcp_socket;
 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
 
 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
-	.lhash_lock	= RW_LOCK_UNLOCKED,
+	.lhash_lock	= __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
 	.lhash_users	= ATOMIC_INIT(0),
 	.lhash_wait	= __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
 };
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e085169..0ccb7cb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -40,7 +40,7 @@ int sysctl_tcp_abort_on_overflow;
 struct inet_timewait_death_row tcp_death_row = {
 	.sysctl_max_tw_buckets = NR_FILE * 2,
 	.period		= TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
-	.death_lock	= SPIN_LOCK_UNLOCKED,
+	.death_lock	= __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
 	.hashinfo	= &tcp_hashinfo,
 	.tw_timer	= TIMER_INITIALIZER(inet_twdr_hangman, 0,
 					    (unsigned long)&tcp_death_row),
-- 
cgit v0.10.2


From 3ac94932a2c859e8c57a8af091fa39334e1c3f23 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:36 -0700
Subject: [PATCH] lockdep: beautify x86_64 stacktraces

Beautify x86_64 stacktraces to be more readable.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index ca56e19..bb6745d 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -296,7 +296,7 @@ void __show_regs(struct pt_regs * regs)
 		system_utsname.version);
 	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
 	printk_address(regs->rip); 
-	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
+	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
 		regs->eflags);
 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->rax, regs->rbx, regs->rcx);
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 5a5311d..02dc155 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -110,28 +110,31 @@ static int kstack_depth_to_print = 12;
 static int call_trace = 1;
 
 #ifdef CONFIG_KALLSYMS
-#include <linux/kallsyms.h> 
-int printk_address(unsigned long address)
-{ 
+# include <linux/kallsyms.h>
+void printk_address(unsigned long address)
+{
 	unsigned long offset = 0, symsize;
 	const char *symname;
 	char *modname;
-	char *delim = ":"; 
+	char *delim = ":";
 	char namebuf[128];
 
-	symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); 
-	if (!symname) 
-		return printk("[<%016lx>]", address);
-	if (!modname) 
+	symname = kallsyms_lookup(address, &symsize, &offset,
+					&modname, namebuf);
+	if (!symname) {
+		printk(" [<%016lx>]\n", address);
+		return;
+	}
+	if (!modname)
 		modname = delim = ""; 		
-        return printk("<%016lx>{%s%s%s%s%+ld}",
-		      address, delim, modname, delim, symname, offset); 
-} 
+	printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
+		address, delim, modname, delim, symname, offset, symsize);
+}
 #else
-int printk_address(unsigned long address)
-{ 
-	return printk("[<%016lx>]", address);
-} 
+void printk_address(unsigned long address)
+{
+	printk(" [<%016lx>]\n", address);
+}
 #endif
 
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -193,20 +196,14 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 
 static int show_trace_unwind(struct unwind_frame_info *info, void *context)
 {
-	int i = 11, n = 0;
+	int n = 0;
 
 	while (unwind(info) == 0 && UNW_PC(info)) {
-		++n;
-		if (i > 50) {
-			printk("\n       ");
-			i = 7;
-		} else
-			i += printk(" ");
-		i += printk_address(UNW_PC(info));
+		n++;
+		printk_address(UNW_PC(info));
 		if (arch_unw_user_mode(info))
 			break;
 	}
-	printk("\n");
 	return n;
 }
 
@@ -224,7 +221,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 	int i = 11;
 	unsigned used = 0;
 
-	printk("\nCall Trace:");
+	printk("\nCall Trace:\n");
 
 	if (!tsk)
 		tsk = current;
@@ -254,12 +251,6 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 	do while (cond) { \
 		unsigned long addr = *stack++; \
 		if (kernel_text_address(addr)) { \
-			if (i > 50) { \
-				printk("\n       "); \
-				i = 0; \
-			} \
-			else \
-				i += printk(" "); \
 			/* \
 			 * If the address is either in the text segment of the \
 			 * kernel, or in the region which contains vmalloc'ed \
@@ -268,20 +259,20 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 			 * down the cause of the crash will be able to figure \
 			 * out the call path that was taken. \
 			 */ \
-			i += printk_address(addr); \
+			printk_address(addr); \
 		} \
 	} while (0)
 
-	for(; ; ) {
+	for( ; ; ) {
 		const char *id;
 		unsigned long *estack_end;
 		estack_end = in_exception_stack(cpu, (unsigned long)stack,
 						&used, &id);
 
 		if (estack_end) {
-			i += printk(" <%s>", id);
+			printk(" <%s>", id);
 			HANDLE_STACK (stack < estack_end);
-			i += printk(" <EOE>");
+			printk(" <EOE>");
 			stack = (unsigned long *) estack_end[-2];
 			continue;
 		}
@@ -291,11 +282,11 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 				(IRQSTACKSIZE - 64) / sizeof(*irqstack);
 
 			if (stack >= irqstack && stack < irqstack_end) {
-				i += printk(" <IRQ>");
+				printk(" <IRQ>");
 				HANDLE_STACK (stack < irqstack_end);
 				stack = (unsigned long *) (irqstack_end[-1]);
 				irqstack_end = NULL;
-				i += printk(" <EOI>");
+				printk(" <EOI>");
 				continue;
 			}
 		}
@@ -304,6 +295,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 
 	HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
 #undef HANDLE_STACK
+
 	printk("\n");
 }
 
@@ -337,8 +329,8 @@ static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned
 			break;
 		}
 		if (i && ((i % 4) == 0))
-			printk("\n       ");
-		printk("%016lx ", *stack++);
+			printk("\n");
+		printk(" %016lx", *stack++);
 		touch_nmi_watchdog();
 	}
 	show_trace(tsk, regs, rsp);
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 5afcf6e..ac8ea66 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -570,7 +570,6 @@ no_context:
 		printk(KERN_ALERT "Unable to handle kernel paging request");
 	printk(" at %016lx RIP: \n" KERN_ALERT,address);
 	printk_address(regs->rip);
-	printk("\n");
 	dump_pagetable(address);
 	tsk->thread.cr2 = address;
 	tsk->thread.trap_no = 14;
diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h
index cd52c7f..2b0c088 100644
--- a/include/asm-x86_64/kdebug.h
+++ b/include/asm-x86_64/kdebug.h
@@ -49,7 +49,7 @@ static inline int notify_die(enum die_val val, const char *str,
 	return atomic_notifier_call_chain(&die_chain, val, &args);
 } 
 
-extern int printk_address(unsigned long address);
+extern void printk_address(unsigned long address);
 extern void die(const char *,struct pt_regs *,long);
 extern void __die(const char *,struct pt_regs *,long);
 extern void show_registers(struct pt_regs *regs);
-- 
cgit v0.10.2


From c9ca1ba5bde45839cdc4f8ab93730cb68e6ee8a6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:36 -0700
Subject: [PATCH] lockdep: x86_64 document stack frame internals

Document stack frame nesting internals some more.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 02dc155..79d05c4 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -152,10 +152,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 	};
 	unsigned k;
 
+	/*
+	 * Iterate over all exception stacks, and figure out whether
+	 * 'stack' is in one of them:
+	 */
 	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
 		unsigned long end;
 
+		/*
+		 * set 'end' to the end of the exception stack.
+		 */
 		switch (k + 1) {
+		/*
+		 * TODO: this block is not needed i think, because
+		 * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
+		 * properly too.
+		 */
 #if DEBUG_STKSZ > EXCEPTION_STKSZ
 		case DEBUG_STACK:
 			end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
@@ -165,19 +177,43 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 			end = per_cpu(init_tss, cpu).ist[k];
 			break;
 		}
+		/*
+		 * Is 'stack' above this exception frame's end?
+		 * If yes then skip to the next frame.
+		 */
 		if (stack >= end)
 			continue;
+		/*
+		 * Is 'stack' above this exception frame's start address?
+		 * If yes then we found the right frame.
+		 */
 		if (stack >= end - EXCEPTION_STKSZ) {
+			/*
+			 * Make sure we only iterate through an exception
+			 * stack once. If it comes up for the second time
+			 * then there's something wrong going on - just
+			 * break out and return NULL:
+			 */
 			if (*usedp & (1U << k))
 				break;
 			*usedp |= 1U << k;
 			*idp = ids[k];
 			return (unsigned long *)end;
 		}
+		/*
+		 * If this is a debug stack, and if it has a larger size than
+		 * the usual exception stacks, then 'stack' might still
+		 * be within the lower portion of the debug stack:
+		 */
 #if DEBUG_STKSZ > EXCEPTION_STKSZ
 		if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
 			unsigned j = N_EXCEPTION_STACKS - 1;
 
+			/*
+			 * Black magic. A large debug stack is composed of
+			 * multiple exception stack entries, which we
+			 * iterate through now. Dont look:
+			 */
 			do {
 				++j;
 				end -= EXCEPTION_STKSZ;
@@ -247,6 +283,11 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 		}
 	}
 
+	/*
+	 * Print function call entries within a stack. 'cond' is the
+	 * "end of stackframe" condition, that the 'stack++'
+	 * iteration will eventually trigger.
+	 */
 #define HANDLE_STACK(cond) \
 	do while (cond) { \
 		unsigned long addr = *stack++; \
@@ -263,7 +304,12 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 		} \
 	} while (0)
 
-	for( ; ; ) {
+	/*
+	 * Print function call entries in all stacks, starting at the
+	 * current stack address. If the stacks consist of nested
+	 * exceptions
+	 */
+	for ( ; ; ) {
 		const char *id;
 		unsigned long *estack_end;
 		estack_end = in_exception_stack(cpu, (unsigned long)stack,
@@ -273,6 +319,11 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 			printk(" <%s>", id);
 			HANDLE_STACK (stack < estack_end);
 			printk(" <EOE>");
+			/*
+			 * We link to the next stack via the
+			 * second-to-last pointer (index -2 to end) in the
+			 * exception stack:
+			 */
 			stack = (unsigned long *) estack_end[-2];
 			continue;
 		}
@@ -284,6 +335,11 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 			if (stack >= irqstack && stack < irqstack_end) {
 				printk(" <IRQ>");
 				HANDLE_STACK (stack < irqstack_end);
+				/*
+				 * We link to the next stack (which would be
+				 * the process stack normally) the last
+				 * pointer (index -1 to end) in the IRQ stack:
+				 */
 				stack = (unsigned long *) (irqstack_end[-1]);
 				irqstack_end = NULL;
 				printk(" <EOI>");
@@ -293,6 +349,9 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 		break;
 	}
 
+	/*
+	 * This prints the process stack:
+	 */
 	HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
 #undef HANDLE_STACK
 
-- 
cgit v0.10.2


From f0a5c315eb266edc608a29971bb4ff1a3025c58f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:37 -0700
Subject: [PATCH] lockdep: i386 remove multi entry backtraces

Remove CONFIG_STACK_BACKTRACE_COLS.

This feature didnt work out: instead of making kernel debugging more
efficient, it produces much harder to read stacktraces!  Check out this trace
for example:

  http://static.flickr.com/47/158326090_35d0129147_b_d.jpg

That backtrace could have been printed much nicer as a one-entry-per-line
thing, taking the same amount of screen real-estate.

Plus we remove 30 lines of kernel code as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index c92191b..b20ddd0 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -31,15 +31,6 @@ config DEBUG_STACK_USAGE
 
 	  This option will slow down process creation somewhat.
 
-config STACK_BACKTRACE_COLS
-	int "Stack backtraces per line" if DEBUG_KERNEL
-	range 1 3
-	default 2
-	help
-	  Selects how many stack backtrace entries per line to display.
-
-	  This can save screen space when displaying traces.
-
 comment "Page alloc debug is incompatible with Software Suspend on i386"
 	depends on DEBUG_KERNEL && SOFTWARE_SUSPEND
 
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index e8c6086..2bf8b55 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -115,28 +115,13 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 }
 
 /*
- * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line.
+ * Print one address/symbol entries per line.
  */
-static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl,
-					int printed)
+static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
 {
-	if (!printed)
-		printk(log_lvl);
-
-#if CONFIG_STACK_BACKTRACE_COLS == 1
 	printk(" [<%08lx>] ", addr);
-#else
-	printk(" <%08lx> ", addr);
-#endif
-	print_symbol("%s", addr);
 
-	printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS;
-	if (printed)
-		printk(" ");
-	else
-		printk("\n");
-
-	return printed;
+	print_symbol("%s\n", addr);
 }
 
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
@@ -144,12 +129,11 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 				char *log_lvl)
 {
 	unsigned long addr;
-	int printed = 0; /* nr of entries already printed on current line */
 
 #ifdef	CONFIG_FRAME_POINTER
 	while (valid_stack_ptr(tinfo, (void *)ebp)) {
 		addr = *(unsigned long *)(ebp + 4);
-		printed = print_addr_and_symbol(addr, log_lvl, printed);
+		print_addr_and_symbol(addr, log_lvl);
 		/*
 		 * break out of recursive entries (such as
 		 * end_of_stack_stop_unwind_function):
@@ -162,28 +146,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 	while (valid_stack_ptr(tinfo, stack)) {
 		addr = *stack++;
 		if (__kernel_text_address(addr))
-			printed = print_addr_and_symbol(addr, log_lvl, printed);
+			print_addr_and_symbol(addr, log_lvl);
 	}
 #endif
-	if (printed)
-		printk("\n");
-
 	return ebp;
 }
 
-static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
+static asmlinkage int
+show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
 {
 	int n = 0;
-	int printed = 0; /* nr of entries already printed on current line */
 
 	while (unwind(info) == 0 && UNW_PC(info)) {
-		++n;
-		printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed);
+		n++;
+		print_addr_and_symbol(UNW_PC(info), log_lvl);
 		if (arch_unw_user_mode(info))
 			break;
 	}
-	if (printed)
-		printk("\n");
 	return n;
 }
 
-- 
cgit v0.10.2


From 8637c09901049f061b94f684915d4f18ecf91d79 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:38 -0700
Subject: [PATCH] lockdep: stacktrace subsystem, core

Framework to generate and save stacktraces quickly, without printing anything
to the console.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
new file mode 100644
index 0000000..9cc81e5
--- /dev/null
+++ b/include/linux/stacktrace.h
@@ -0,0 +1,20 @@
+#ifndef __LINUX_STACKTRACE_H
+#define __LINUX_STACKTRACE_H
+
+#ifdef CONFIG_STACKTRACE
+struct stack_trace {
+	unsigned int nr_entries, max_entries;
+	unsigned long *entries;
+};
+
+extern void save_stack_trace(struct stack_trace *trace,
+			     struct task_struct *task, int all_contexts,
+			     unsigned int skip);
+
+extern void print_stack_trace(struct stack_trace *trace, int spaces);
+#else
+# define save_stack_trace(trace, task, all, skip)	do { } while (0)
+# define print_stack_trace(trace)			do { } while (0)
+#endif
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 82fb182..e7fd20e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o
 
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
 obj-$(CONFIG_FUTEX) += futex.o
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
new file mode 100644
index 0000000..b71816e
--- /dev/null
+++ b/kernel/stacktrace.c
@@ -0,0 +1,24 @@
+/*
+ * kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <linux/stacktrace.h>
+
+void print_stack_trace(struct stack_trace *trace, int spaces)
+{
+	int i, j;
+
+	for (i = 0; i < trace->nr_entries; i++) {
+		unsigned long ip = trace->entries[i];
+
+		for (j = 0; j < spaces + 1; j++)
+			printk(" ");
+		print_ip_sym(ip);
+	}
+}
+
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7b3863d..04e374c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -107,7 +107,7 @@ config DEBUG_SLAB_LEAK
 
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
-	depends on DEBUG_KERNEL && PREEMPT
+	depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
 	default y
 	help
 	  If you say Y here then the kernel will use a debug variant of the
@@ -149,6 +149,10 @@ config DEBUG_SPINLOCK_SLEEP
 	  If you say Y here, various routines which may sleep will become very
 	  noisy if they are called with a spinlock held.
 
+config STACKTRACE
+	bool
+	depends on STACKTRACE_SUPPORT
+
 config DEBUG_KOBJECT
 	bool "kobject debugging"
 	depends on DEBUG_KERNEL
-- 
cgit v0.10.2


From cbbd1fa72d22896332301ca7e6b7164284be105c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 3 Jul 2006 00:24:38 -0700
Subject: [PATCH] lockdep: s390 CONFIG_FRAME_POINTER support

CONFIG_FRAME_POINTER support for s390.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index b3791fb..74ef57d 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -34,6 +34,11 @@ cflags-$(CONFIG_MARCH_G5)   += $(call cc-option,-march=g5)
 cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900)
 cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990)
 
+#
+# Prevent tail-call optimizations, to get clearer backtraces:
+#
+cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
+
 # old style option for packed stacks
 ifeq ($(call cc-option-yn,-mkernel-backchain),y)
 cflags-$(CONFIG_PACK_STACK)  += -mkernel-backchain -D__PACK_STACK
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 04e374c..91e338a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -208,7 +208,7 @@ config DEBUG_VM
 
 config FRAME_POINTER
 	bool "Compile the kernel with frame pointers"
-	depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML)
+	depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML || S390)
 	default y if DEBUG_INFO && UML
 	help
 	  If you say Y here the resulting kernel image will be slightly larger
-- 
cgit v0.10.2


From 4a7c7197817e7180f56110334d961f4aa6ac69cb Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:39 -0700
Subject: [PATCH] lockdep: stacktrace subsystem, i386 support

Framework to generate and save stacktraces quickly, without printing anything
to the console.  i386 support.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 27d8ddd..076a72c 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -18,6 +18,10 @@ config GENERIC_TIME
 	bool
 	default y
 
+config STACKTRACE_SUPPORT
+	bool
+	default y
+
 config SEMAPHORE_SLEEPERS
 	bool
 	default y
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index cbc1184..1b452a1 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -9,6 +9,7 @@ obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o \
 		pci-dma.o i386_ksyms.o i387.o bootflag.o \
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
 
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
 obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c
new file mode 100644
index 0000000..e62a037
--- /dev/null
+++ b/arch/i386/kernel/stacktrace.c
@@ -0,0 +1,98 @@
+/*
+ * arch/i386/kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+{
+	return	p > (void *)tinfo &&
+		p < (void *)tinfo + THREAD_SIZE - 3;
+}
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer:
+ */
+static inline unsigned long
+save_context_stack(struct stack_trace *trace, unsigned int skip,
+		   struct thread_info *tinfo, unsigned long *stack,
+		   unsigned long ebp)
+{
+	unsigned long addr;
+
+#ifdef CONFIG_FRAME_POINTER
+	while (valid_stack_ptr(tinfo, (void *)ebp)) {
+		addr = *(unsigned long *)(ebp + 4);
+		if (!skip)
+			trace->entries[trace->nr_entries++] = addr;
+		else
+			skip--;
+		if (trace->nr_entries >= trace->max_entries)
+			break;
+		/*
+		 * break out of recursive entries (such as
+		 * end_of_stack_stop_unwind_function):
+	 	 */
+		if (ebp == *(unsigned long *)ebp)
+			break;
+
+		ebp = *(unsigned long *)ebp;
+	}
+#else
+	while (valid_stack_ptr(tinfo, stack)) {
+		addr = *stack++;
+		if (__kernel_text_address(addr)) {
+			if (!skip)
+				trace->entries[trace->nr_entries++] = addr;
+			else
+				skip--;
+			if (trace->nr_entries >= trace->max_entries)
+				break;
+		}
+	}
+#endif
+
+	return ebp;
+}
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ * If all_contexts is set, all contexts (hardirq, softirq and process)
+ * are saved. If not set then only the current context is saved.
+ */
+void save_stack_trace(struct stack_trace *trace,
+		      struct task_struct *task, int all_contexts,
+		      unsigned int skip)
+{
+	unsigned long ebp;
+	unsigned long *stack = &ebp;
+
+	WARN_ON(trace->nr_entries || !trace->max_entries);
+
+	if (!task || task == current) {
+		/* Grab ebp right from our regs: */
+		asm ("movl %%ebp, %0" : "=r" (ebp));
+	} else {
+		/* ebp is the last reg pushed by switch_to(): */
+		ebp = *(unsigned long *) task->thread.esp;
+	}
+
+	while (1) {
+		struct thread_info *context = (struct thread_info *)
+				((unsigned long)stack & (~(THREAD_SIZE - 1)));
+
+		ebp = save_context_stack(trace, skip, context, stack, ebp);
+		stack = (unsigned long *)context->previous_esp;
+		if (!all_contexts || !stack ||
+				trace->nr_entries >= trace->max_entries)
+			break;
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+		if (trace->nr_entries >= trace->max_entries)
+			break;
+	}
+}
+
-- 
cgit v0.10.2


From 21b32bbff950771f196da91011249fa05fa83b32 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:40 -0700
Subject: [PATCH] lockdep: stacktrace subsystem, x86_64 support

Framework to generate and save stacktraces quickly, without printing anything
to the console.  x86_64 support.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index e856804..445f436 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,10 @@ config X86
 	bool
 	default y
 
+config STACKTRACE_SUPPORT
+	bool
+	default y
+
 config SEMAPHORE_SLEEPERS
 	bool
 	default y
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 819e84e..b5aaeaf 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y	:= process.o signal.o entry.o traps.o irq.o \
 		setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
 		pci-dma.o pci-nommu.o alternative.o
 
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
 obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd.o
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c
new file mode 100644
index 0000000..32cf55e
--- /dev/null
+++ b/arch/x86_64/kernel/stacktrace.c
@@ -0,0 +1,221 @@
+/*
+ * arch/x86_64/kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+#include <asm/smp.h>
+
+static inline int
+in_range(unsigned long start, unsigned long addr, unsigned long end)
+{
+	return addr >= start && addr <= end;
+}
+
+static unsigned long
+get_stack_end(struct task_struct *task, unsigned long stack)
+{
+	unsigned long stack_start, stack_end, flags;
+	int i, cpu;
+
+	/*
+	 * The most common case is that we are in the task stack:
+	 */
+	stack_start = (unsigned long)task->thread_info;
+	stack_end = stack_start + THREAD_SIZE;
+
+	if (in_range(stack_start, stack, stack_end))
+		return stack_end;
+
+	/*
+	 * We are in an interrupt if irqstackptr is set:
+	 */
+	raw_local_irq_save(flags);
+	cpu = safe_smp_processor_id();
+	stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr;
+
+	if (stack_end) {
+		stack_start = stack_end & ~(IRQSTACKSIZE-1);
+		if (in_range(stack_start, stack, stack_end))
+			goto out_restore;
+		/*
+		 * We get here if we are in an IRQ context but we
+		 * are also in an exception stack.
+		 */
+	}
+
+	/*
+	 * Iterate over all exception stacks, and figure out whether
+	 * 'stack' is in one of them:
+	 */
+	for (i = 0; i < N_EXCEPTION_STACKS; i++) {
+		/*
+		 * set 'end' to the end of the exception stack.
+		 */
+		stack_end = per_cpu(init_tss, cpu).ist[i];
+		stack_start = stack_end - EXCEPTION_STKSZ;
+
+		/*
+		 * Is 'stack' above this exception frame's end?
+		 * If yes then skip to the next frame.
+		 */
+		if (stack >= stack_end)
+			continue;
+		/*
+		 * Is 'stack' above this exception frame's start address?
+		 * If yes then we found the right frame.
+		 */
+		if (stack >= stack_start)
+			goto out_restore;
+
+		/*
+		 * If this is a debug stack, and if it has a larger size than
+		 * the usual exception stacks, then 'stack' might still
+		 * be within the lower portion of the debug stack:
+		 */
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+		if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) {
+			/*
+			 * Black magic. A large debug stack is composed of
+			 * multiple exception stack entries, which we
+			 * iterate through now. Dont look:
+			 */
+			do {
+				stack_end -= EXCEPTION_STKSZ;
+				stack_start -= EXCEPTION_STKSZ;
+			} while (stack < stack_start);
+
+			goto out_restore;
+		}
+#endif
+	}
+	/*
+	 * Ok, 'stack' is not pointing to any of the system stacks.
+	 */
+	stack_end = 0;
+
+out_restore:
+	raw_local_irq_restore(flags);
+
+	return stack_end;
+}
+
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer:
+ */
+static inline unsigned long
+save_context_stack(struct stack_trace *trace, unsigned int skip,
+		   unsigned long stack, unsigned long stack_end)
+{
+	unsigned long addr;
+
+#ifdef CONFIG_FRAME_POINTER
+	unsigned long prev_stack = 0;
+
+	while (in_range(prev_stack, stack, stack_end)) {
+		pr_debug("stack:          %p\n", (void *)stack);
+		addr = (unsigned long)(((unsigned long *)stack)[1]);
+		pr_debug("addr:           %p\n", (void *)addr);
+		if (!skip)
+			trace->entries[trace->nr_entries++] = addr-1;
+		else
+			skip--;
+		if (trace->nr_entries >= trace->max_entries)
+			break;
+		if (!addr)
+			return 0;
+		/*
+		 * Stack frames must go forwards (otherwise a loop could
+		 * happen if the stackframe is corrupted), so we move
+		 * prev_stack forwards:
+		 */
+		prev_stack = stack;
+		stack = (unsigned long)(((unsigned long *)stack)[0]);
+	}
+	pr_debug("invalid:        %p\n", (void *)stack);
+#else
+	while (stack < stack_end) {
+		addr = ((unsigned long *)stack)[0];
+		stack += sizeof(long);
+		if (__kernel_text_address(addr)) {
+			if (!skip)
+				trace->entries[trace->nr_entries++] = addr-1;
+			else
+				skip--;
+			if (trace->nr_entries >= trace->max_entries)
+				break;
+		}
+	}
+#endif
+	return stack;
+}
+
+#define MAX_STACKS 10
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ * If all_contexts is set, all contexts (hardirq, softirq and process)
+ * are saved. If not set then only the current context is saved.
+ */
+void save_stack_trace(struct stack_trace *trace,
+		      struct task_struct *task, int all_contexts,
+		      unsigned int skip)
+{
+	unsigned long stack = (unsigned long)&stack;
+	int i, nr_stacks = 0, stacks_done[MAX_STACKS];
+
+	WARN_ON(trace->nr_entries || !trace->max_entries);
+
+	if (!task)
+		task = current;
+
+	pr_debug("task: %p, ti: %p\n", task, task->thread_info);
+
+	if (!task || task == current) {
+		/* Grab rbp right from our regs: */
+		asm ("mov %%rbp, %0" : "=r" (stack));
+		pr_debug("rbp:            %p\n", (void *)stack);
+	} else {
+		/* rbp is the last reg pushed by switch_to(): */
+		stack = task->thread.rsp;
+		pr_debug("other task rsp: %p\n", (void *)stack);
+		stack = (unsigned long)(((unsigned long *)stack)[0]);
+		pr_debug("other task rbp: %p\n", (void *)stack);
+	}
+
+	while (1) {
+		unsigned long stack_end = get_stack_end(task, stack);
+
+		pr_debug("stack:          %p\n", (void *)stack);
+		pr_debug("stack end:      %p\n", (void *)stack_end);
+
+		/*
+		 * Invalid stack addres?
+		 */
+		if (!stack_end)
+			return;
+		/*
+		 * Were we in this stack already? (recursion)
+		 */
+		for (i = 0; i < nr_stacks; i++)
+			if (stacks_done[i] == stack_end)
+				return;
+		stacks_done[nr_stacks] = stack_end;
+
+		stack = save_context_stack(trace, skip, stack, stack_end);
+		if (!all_contexts || !stack ||
+				trace->nr_entries >= trace->max_entries)
+			return;
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+		if (trace->nr_entries >= trace->max_entries)
+			return;
+		if (++nr_stacks >= MAX_STACKS)
+			return;
+	}
+}
+
-- 
cgit v0.10.2


From 5bdc9b447c0076f494a56fdcd93ee8c5e78a2afd Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 3 Jul 2006 00:24:41 -0700
Subject: [PATCH] lockdep: stacktrace subsystem, s390 support

stacktrace interface for s390 as needed by lock validator.

[clg@fr.ibm.com: build fix]
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Cedric Le Goater <clg@fr.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 821a141..5b28da4 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -7,6 +7,10 @@ config MMU
 	bool
 	default y
 
+config STACKTRACE_SUPPORT
+	bool
+	default y
+
 config RWSEM_GENERIC_SPINLOCK
 	bool
 
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 9269b57..eabf00a 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o \
 obj-$(CONFIG_BINFMT_ELF32)	+= binfmt_elf32.o
 
 obj-$(CONFIG_VIRT_TIMER)	+= vtime.o
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 
 # Kexec part
 S390_KEXEC_OBJS := machine_kexec.o crash.o
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
new file mode 100644
index 0000000..de83f38
--- /dev/null
+++ b/arch/s390/kernel/stacktrace.c
@@ -0,0 +1,90 @@
+/*
+ * arch/s390/kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ *  Copyright (C) IBM Corp. 2006
+ *  Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+
+static inline unsigned long save_context_stack(struct stack_trace *trace,
+					       unsigned int *skip,
+					       unsigned long sp,
+					       unsigned long low,
+					       unsigned long high)
+{
+	struct stack_frame *sf;
+	struct pt_regs *regs;
+	unsigned long addr;
+
+	while(1) {
+		sp &= PSW_ADDR_INSN;
+		if (sp < low || sp > high)
+			return sp;
+		sf = (struct stack_frame *)sp;
+		while(1) {
+			addr = sf->gprs[8] & PSW_ADDR_INSN;
+			if (!(*skip))
+				trace->entries[trace->nr_entries++] = addr;
+			else
+				(*skip)--;
+			if (trace->nr_entries >= trace->max_entries)
+				return sp;
+			low = sp;
+			sp = sf->back_chain & PSW_ADDR_INSN;
+			if (!sp)
+				break;
+			if (sp <= low || sp > high - sizeof(*sf))
+				return sp;
+			sf = (struct stack_frame *)sp;
+		}
+		/* Zero backchain detected, check for interrupt frame. */
+		sp = (unsigned long)(sf + 1);
+		if (sp <= low || sp > high - sizeof(*regs))
+			return sp;
+		regs = (struct pt_regs *)sp;
+		addr = regs->psw.addr & PSW_ADDR_INSN;
+		if (!(*skip))
+			trace->entries[trace->nr_entries++] = addr;
+		else
+			(*skip)--;
+		if (trace->nr_entries >= trace->max_entries)
+			return sp;
+		low = sp;
+		sp = regs->gprs[15];
+	}
+}
+
+void save_stack_trace(struct stack_trace *trace,
+		      struct task_struct *task, int all_contexts,
+		      unsigned int skip)
+{
+	register unsigned long sp asm ("15");
+	unsigned long orig_sp;
+
+	sp &= PSW_ADDR_INSN;
+	orig_sp = sp;
+
+	sp = save_context_stack(trace, &skip, sp,
+				S390_lowcore.panic_stack - PAGE_SIZE,
+				S390_lowcore.panic_stack);
+	if ((sp != orig_sp) && !all_contexts)
+		return;
+	sp = save_context_stack(trace, &skip, sp,
+				S390_lowcore.async_stack - ASYNC_SIZE,
+				S390_lowcore.async_stack);
+	if ((sp != orig_sp) && !all_contexts)
+		return;
+	if (task)
+		save_context_stack(trace, &skip, sp,
+				   (unsigned long) task_stack_page(task),
+				   (unsigned long) task_stack_page(task) + THREAD_SIZE);
+	else
+		save_context_stack(trace, &skip, sp, S390_lowcore.thread_info,
+				   S390_lowcore.thread_info + THREAD_SIZE);
+	return;
+}
-- 
cgit v0.10.2


From de30a2b355ea85350ca2f58f3b9bf4e5bc007986 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:42 -0700
Subject: [PATCH] lockdep: irqtrace subsystem, core

Accurate hard-IRQ-flags and softirq-flags state tracing.

This allows us to attach extra functionality to IRQ flags on/off
events (such as trace-on/off).

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 525baab..027728b 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -429,7 +429,7 @@ void do_softirq(void)
 		local_bh_disable();
 		do_softirq_onstack();
 		account_system_vtime(current);
-		__local_bh_enable();
+		_local_bh_enable();
 	}
 
 	local_irq_restore(flags);
diff --git a/include/asm-powerpc/irqflags.h b/include/asm-powerpc/irqflags.h
new file mode 100644
index 0000000..7970cba
--- /dev/null
+++ b/include/asm-powerpc/irqflags.h
@@ -0,0 +1,31 @@
+/*
+ * include/asm-powerpc/irqflags.h
+ *
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() macros from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+/*
+ * Get definitions for raw_local_save_flags(x), etc.
+ */
+#include <asm-powerpc/hw_irq.h>
+
+/*
+ * Do the CPU's IRQ-state tracing from assembly code. We call a
+ * C function, so save all the C-clobbered registers:
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+#error No support on PowerPC yet for CONFIG_TRACE_IRQFLAGS
+
+#else
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+
+#endif
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 114ae58..b1d4332 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -86,9 +86,6 @@ extern void synchronize_irq(unsigned int irq);
 # define synchronize_irq(irq)	barrier()
 #endif
 
-#define nmi_enter()		irq_enter()
-#define nmi_exit()		sub_preempt_count(HARDIRQ_OFFSET)
-
 struct task_struct;
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
@@ -97,12 +94,35 @@ static inline void account_system_vtime(struct task_struct *tsk)
 }
 #endif
 
+/*
+ * It is safe to do non-atomic ops on ->hardirq_context,
+ * because NMI handlers may not preempt and the ops are
+ * always balanced, so the interrupted value of ->hardirq_context
+ * will always be restored.
+ */
 #define irq_enter()					\
 	do {						\
 		account_system_vtime(current);		\
 		add_preempt_count(HARDIRQ_OFFSET);	\
+		trace_hardirq_enter();			\
+	} while (0)
+
+/*
+ * Exit irq context without processing softirqs:
+ */
+#define __irq_exit()					\
+	do {						\
+		trace_hardirq_exit();			\
+		account_system_vtime(current);		\
+		sub_preempt_count(HARDIRQ_OFFSET);	\
 	} while (0)
 
+/*
+ * Exit irq context and process softirqs if needed:
+ */
 extern void irq_exit(void);
 
+#define nmi_enter()		irq_enter()
+#define nmi_exit()		__irq_exit()
+
 #endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 1b7bb37..444a3ae 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -3,6 +3,7 @@
 
 #include <linux/file.h>
 #include <linux/rcupdate.h>
+#include <linux/irqflags.h>
 
 #define INIT_FDTABLE \
 {							\
@@ -124,6 +125,7 @@ extern struct group_info init_groups;
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 	.fs_excl	= ATOMIC_INIT(0),				\
 	.pi_lock	= SPIN_LOCK_UNLOCKED,				\
+	INIT_TRACE_IRQFLAGS						\
 }
 
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 73463fb..d5afee9 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -10,6 +10,7 @@
 #include <linux/irqreturn.h>
 #include <linux/hardirq.h>
 #include <linux/sched.h>
+#include <linux/irqflags.h>
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
@@ -199,13 +200,11 @@ static inline void __deprecated save_and_cli(unsigned long *x)
 #define save_and_cli(x)	save_and_cli(&x)
 #endif /* CONFIG_SMP */
 
-/* SoftIRQ primitives.  */
-#define local_bh_disable() \
-		do { add_preempt_count(SOFTIRQ_OFFSET); barrier(); } while (0)
-#define __local_bh_enable() \
-		do { barrier(); sub_preempt_count(SOFTIRQ_OFFSET); } while (0)
-
+extern void local_bh_disable(void);
+extern void __local_bh_enable(void);
+extern void _local_bh_enable(void);
 extern void local_bh_enable(void);
+extern void local_bh_enable_ip(unsigned long ip);
 
 /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
    frequency threaded job scheduling. For almost all the purposes
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
new file mode 100644
index 0000000..412e025
--- /dev/null
+++ b/include/linux/irqflags.h
@@ -0,0 +1,96 @@
+/*
+ * include/linux/irqflags.h
+ *
+ * IRQ flags tracing: follow the state of the hardirq and softirq flags and
+ * provide callbacks for transitions between ON and OFF states.
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() macros from the lowlevel headers.
+ */
+#ifndef _LINUX_TRACE_IRQFLAGS_H
+#define _LINUX_TRACE_IRQFLAGS_H
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+  extern void trace_hardirqs_on(void);
+  extern void trace_hardirqs_off(void);
+  extern void trace_softirqs_on(unsigned long ip);
+  extern void trace_softirqs_off(unsigned long ip);
+# define trace_hardirq_context(p)	((p)->hardirq_context)
+# define trace_softirq_context(p)	((p)->softirq_context)
+# define trace_hardirqs_enabled(p)	((p)->hardirqs_enabled)
+# define trace_softirqs_enabled(p)	((p)->softirqs_enabled)
+# define trace_hardirq_enter()	do { current->hardirq_context++; } while (0)
+# define trace_hardirq_exit()	do { current->hardirq_context--; } while (0)
+# define trace_softirq_enter()	do { current->softirq_context++; } while (0)
+# define trace_softirq_exit()	do { current->softirq_context--; } while (0)
+# define INIT_TRACE_IRQFLAGS	.softirqs_enabled = 1,
+#else
+# define trace_hardirqs_on()		do { } while (0)
+# define trace_hardirqs_off()		do { } while (0)
+# define trace_softirqs_on(ip)		do { } while (0)
+# define trace_softirqs_off(ip)		do { } while (0)
+# define trace_hardirq_context(p)	0
+# define trace_softirq_context(p)	0
+# define trace_hardirqs_enabled(p)	0
+# define trace_softirqs_enabled(p)	0
+# define trace_hardirq_enter()		do { } while (0)
+# define trace_hardirq_exit()		do { } while (0)
+# define trace_softirq_enter()		do { } while (0)
+# define trace_softirq_exit()		do { } while (0)
+# define INIT_TRACE_IRQFLAGS
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+
+#include <asm/irqflags.h>
+
+#define local_irq_enable() \
+	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
+#define local_irq_disable() \
+	do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
+#define local_irq_save(flags) \
+	do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0)
+
+#define local_irq_restore(flags)				\
+	do {							\
+		if (raw_irqs_disabled_flags(flags)) {		\
+			raw_local_irq_restore(flags);		\
+			trace_hardirqs_off();			\
+		} else {					\
+			trace_hardirqs_on();			\
+			raw_local_irq_restore(flags);		\
+		}						\
+	} while (0)
+#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
+/*
+ * The local_irq_*() APIs are equal to the raw_local_irq*()
+ * if !TRACE_IRQFLAGS.
+ */
+# define raw_local_irq_disable()	local_irq_disable()
+# define raw_local_irq_enable()		local_irq_enable()
+# define raw_local_irq_save(flags)	local_irq_save(flags)
+# define raw_local_irq_restore(flags)	local_irq_restore(flags)
+#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
+
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+#define safe_halt()						\
+	do {							\
+		trace_hardirqs_on();				\
+		raw_safe_halt();				\
+	} while (0)
+
+#define local_save_flags(flags)		raw_local_save_flags(flags)
+
+#define irqs_disabled()						\
+({								\
+	unsigned long flags;					\
+								\
+	raw_local_save_flags(flags);				\
+	raw_irqs_disabled_flags(flags);				\
+})
+
+#define irqs_disabled_flags(flags)	raw_irqs_disabled_flags(flags)
+#endif		/* CONFIG_X86 */
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bdabeee..ad7a890 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -871,6 +871,21 @@ struct task_struct {
 	/* mutex deadlock detection */
 	struct mutex_waiter *blocked_on;
 #endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+	unsigned int irq_events;
+	int hardirqs_enabled;
+	unsigned long hardirq_enable_ip;
+	unsigned int hardirq_enable_event;
+	unsigned long hardirq_disable_ip;
+	unsigned int hardirq_disable_event;
+	int softirqs_enabled;
+	unsigned long softirq_disable_ip;
+	unsigned int softirq_disable_event;
+	unsigned long softirq_enable_ip;
+	unsigned int softirq_enable_event;
+	int hardirq_context;
+	int softirq_context;
+#endif
 
 /* journalling filesystem info */
 	void *journal_info;
diff --git a/kernel/fork.c b/kernel/fork.c
index 1cd46a4..b7db7fb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -968,6 +968,10 @@ static task_t *copy_process(unsigned long clone_flags,
 	if (!p)
 		goto fork_out;
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
+	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
+#endif
 	retval = -EAGAIN;
 	if (atomic_read(&p->user->processes) >=
 			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
@@ -1042,6 +1046,21 @@ static task_t *copy_process(unsigned long clone_flags,
  	}
 	mpol_fix_fork_child_flag(p);
 #endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+	p->irq_events = 0;
+	p->hardirqs_enabled = 0;
+	p->hardirq_enable_ip = 0;
+	p->hardirq_enable_event = 0;
+	p->hardirq_disable_ip = _THIS_IP_;
+	p->hardirq_disable_event = 0;
+	p->softirqs_enabled = 1;
+	p->softirq_enable_ip = _THIS_IP_;
+	p->softirq_enable_event = 0;
+	p->softirq_disable_ip = 0;
+	p->softirq_disable_event = 0;
+	p->hardirq_context = 0;
+	p->softirq_context = 0;
+#endif
 
 	rt_mutex_init_task(p);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 48c1faa..9118299 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4462,7 +4462,9 @@ int __sched cond_resched_softirq(void)
 	BUG_ON(!in_softirq());
 
 	if (need_resched() && __resched_legal()) {
-		__local_bh_enable();
+		raw_local_irq_disable();
+		_local_bh_enable();
+		raw_local_irq_enable();
 		__cond_resched();
 		local_bh_disable();
 		return 1;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8f03e3b..584609b 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -62,6 +62,119 @@ static inline void wakeup_softirqd(void)
 }
 
 /*
+ * This one is for softirq.c-internal use,
+ * where hardirqs are disabled legitimately:
+ */
+static void __local_bh_disable(unsigned long ip)
+{
+	unsigned long flags;
+
+	WARN_ON_ONCE(in_irq());
+
+	raw_local_irq_save(flags);
+	add_preempt_count(SOFTIRQ_OFFSET);
+	/*
+	 * Were softirqs turned off above:
+	 */
+	if (softirq_count() == SOFTIRQ_OFFSET)
+		trace_softirqs_off(ip);
+	raw_local_irq_restore(flags);
+}
+
+void local_bh_disable(void)
+{
+	__local_bh_disable((unsigned long)__builtin_return_address(0));
+}
+
+EXPORT_SYMBOL(local_bh_disable);
+
+void __local_bh_enable(void)
+{
+	WARN_ON_ONCE(in_irq());
+
+	/*
+	 * softirqs should never be enabled by __local_bh_enable(),
+	 * it always nests inside local_bh_enable() sections:
+	 */
+	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
+
+	sub_preempt_count(SOFTIRQ_OFFSET);
+}
+EXPORT_SYMBOL_GPL(__local_bh_enable);
+
+/*
+ * Special-case - softirqs can safely be enabled in
+ * cond_resched_softirq(), or by __do_softirq(),
+ * without processing still-pending softirqs:
+ */
+void _local_bh_enable(void)
+{
+	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (softirq_count() == SOFTIRQ_OFFSET)
+		trace_softirqs_on((unsigned long)__builtin_return_address(0));
+	sub_preempt_count(SOFTIRQ_OFFSET);
+}
+
+EXPORT_SYMBOL(_local_bh_enable);
+
+void local_bh_enable(void)
+{
+	unsigned long flags;
+
+	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(irqs_disabled());
+
+	local_irq_save(flags);
+	/*
+	 * Are softirqs going to be turned on now:
+	 */
+	if (softirq_count() == SOFTIRQ_OFFSET)
+		trace_softirqs_on((unsigned long)__builtin_return_address(0));
+	/*
+	 * Keep preemption disabled until we are done with
+	 * softirq processing:
+ 	 */
+ 	sub_preempt_count(SOFTIRQ_OFFSET - 1);
+
+	if (unlikely(!in_interrupt() && local_softirq_pending()))
+		do_softirq();
+
+	dec_preempt_count();
+	local_irq_restore(flags);
+	preempt_check_resched();
+}
+EXPORT_SYMBOL(local_bh_enable);
+
+void local_bh_enable_ip(unsigned long ip)
+{
+	unsigned long flags;
+
+	WARN_ON_ONCE(in_irq());
+
+	local_irq_save(flags);
+	/*
+	 * Are softirqs going to be turned on now:
+	 */
+	if (softirq_count() == SOFTIRQ_OFFSET)
+		trace_softirqs_on(ip);
+	/*
+	 * Keep preemption disabled until we are done with
+	 * softirq processing:
+ 	 */
+ 	sub_preempt_count(SOFTIRQ_OFFSET - 1);
+
+	if (unlikely(!in_interrupt() && local_softirq_pending()))
+		do_softirq();
+
+	dec_preempt_count();
+	local_irq_restore(flags);
+	preempt_check_resched();
+}
+EXPORT_SYMBOL(local_bh_enable_ip);
+
+/*
  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
  * and we fall back to softirqd after that.
  *
@@ -80,8 +193,9 @@ asmlinkage void __do_softirq(void)
 	int cpu;
 
 	pending = local_softirq_pending();
+	__local_bh_disable((unsigned long)__builtin_return_address(0));
+	trace_softirq_enter();
 
-	local_bh_disable();
 	cpu = smp_processor_id();
 restart:
 	/* Reset the pending bitmask before enabling irqs */
@@ -109,7 +223,8 @@ restart:
 	if (pending)
 		wakeup_softirqd();
 
-	__local_bh_enable();
+	trace_softirq_exit();
+	_local_bh_enable();
 }
 
 #ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -136,23 +251,6 @@ EXPORT_SYMBOL(do_softirq);
 
 #endif
 
-void local_bh_enable(void)
-{
-	WARN_ON(irqs_disabled());
-	/*
-	 * Keep preemption disabled until we are done with
-	 * softirq processing:
- 	 */
- 	sub_preempt_count(SOFTIRQ_OFFSET - 1);
-
-	if (unlikely(!in_interrupt() && local_softirq_pending()))
-		do_softirq();
-
-	dec_preempt_count();
-	preempt_check_resched();
-}
-EXPORT_SYMBOL(local_bh_enable);
-
 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
 # define invoke_softirq()	__do_softirq()
 #else
@@ -165,6 +263,7 @@ EXPORT_SYMBOL(local_bh_enable);
 void irq_exit(void)
 {
 	account_system_vtime(current);
+	trace_hardirq_exit();
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
 		invoke_softirq();
-- 
cgit v0.10.2


From 55df314fbdb44c20fa7a5112d16546ee970c1d76 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:43 -0700
Subject: [PATCH] lockdep: irqtrace subsystem, docs

Add Documentation/irqflags-tracing.txt.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt
new file mode 100644
index 0000000..6a44487
--- /dev/null
+++ b/Documentation/irqflags-tracing.txt
@@ -0,0 +1,57 @@
+IRQ-flags state tracing
+
+started by Ingo Molnar <mingo@redhat.com>
+
+the "irq-flags tracing" feature "traces" hardirq and softirq state, in
+that it gives interested subsystems an opportunity to be notified of
+every hardirqs-off/hardirqs-on, softirqs-off/softirqs-on event that
+happens in the kernel.
+
+CONFIG_TRACE_IRQFLAGS_SUPPORT is needed for CONFIG_PROVE_SPIN_LOCKING
+and CONFIG_PROVE_RW_LOCKING to be offered by the generic lock debugging
+code. Otherwise only CONFIG_PROVE_MUTEX_LOCKING and
+CONFIG_PROVE_RWSEM_LOCKING will be offered on an architecture - these
+are locking APIs that are not used in IRQ context. (the one exception
+for rwsems is worked around)
+
+architecture support for this is certainly not in the "trivial"
+category, because lots of lowlevel assembly code deal with irq-flags
+state changes. But an architecture can be irq-flags-tracing enabled in a
+rather straightforward and risk-free manner.
+
+Architectures that want to support this need to do a couple of
+code-organizational changes first:
+
+- move their irq-flags manipulation code from their asm/system.h header
+  to asm/irqflags.h
+
+- rename local_irq_disable()/etc to raw_local_irq_disable()/etc. so that
+  the linux/irqflags.h code can inject callbacks and can construct the
+  real local_irq_disable()/etc APIs.
+
+- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file
+
+and then a couple of functional changes are needed as well to implement
+irq-flags-tracing support:
+
+- in lowlevel entry code add (build-conditional) calls to the
+  trace_hardirqs_off()/trace_hardirqs_on() functions. The lock validator
+  closely guards whether the 'real' irq-flags matches the 'virtual'
+  irq-flags state, and complains loudly (and turns itself off) if the
+  two do not match. Usually most of the time for arch support for
+  irq-flags-tracing is spent in this state: look at the lockdep
+  complaint, try to figure out the assembly code we did not cover yet,
+  fix and repeat. Once the system has booted up and works without a
+  lockdep complaint in the irq-flags-tracing functions arch support is
+  complete.
+- if the architecture has non-maskable interrupts then those need to be
+  excluded from the irq-tracing [and lock validation] mechanism via
+  lockdep_off()/lockdep_on().
+
+in general there is no risk from having an incomplete irq-flags-tracing
+implementation in an architecture: lockdep will detect that and will
+turn itself off. I.e. the lock validator will still be reliable. There
+should be no crashes due to irq-tracing bugs. (except if the assembly
+changes break other code by modifying conditions or registers that
+shouldnt be)
+
-- 
cgit v0.10.2


From 55f327fa9e876758491a82af7491104f1cc3fc4d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:43 -0700
Subject: [PATCH] lockdep: irqtrace subsystem, i386 support

Add irqflags-tracing support to i386.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index b20ddd0..b31c080 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -1,5 +1,9 @@
 menu "Kernel hacking"
 
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+	default y
+
 source "lib/Kconfig.debug"
 
 config EARLY_PRINTK
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 787190c..d9a260f 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -42,6 +42,7 @@
 
 #include <linux/linkage.h>
 #include <asm/thread_info.h>
+#include <asm/irqflags.h>
 #include <asm/errno.h>
 #include <asm/segment.h>
 #include <asm/smp.h>
@@ -76,12 +77,21 @@ NT_MASK		= 0x00004000
 VM_MASK		= 0x00020000
 
 #ifdef CONFIG_PREEMPT
-#define preempt_stop		cli
+#define preempt_stop		cli; TRACE_IRQS_OFF
 #else
 #define preempt_stop
 #define resume_kernel		restore_nocheck
 #endif
 
+.macro TRACE_IRQS_IRET
+#ifdef CONFIG_TRACE_IRQFLAGS
+	testl $IF_MASK,EFLAGS(%esp)     # interrupts off?
+	jz 1f
+	TRACE_IRQS_ON
+1:
+#endif
+.endm
+
 #ifdef CONFIG_VM86
 #define resume_userspace_sig	check_userspace
 #else
@@ -257,6 +267,10 @@ ENTRY(sysenter_entry)
 	CFI_REGISTER esp, ebp
 	movl TSS_sysenter_esp0(%esp),%esp
 sysenter_past_esp:
+	/*
+	 * No need to follow this irqs on/off section: the syscall
+	 * disabled irqs and here we enable it straight after entry:
+	 */
 	sti
 	pushl $(__USER_DS)
 	CFI_ADJUST_CFA_OFFSET 4
@@ -303,6 +317,7 @@ sysenter_past_esp:
 	call *sys_call_table(,%eax,4)
 	movl %eax,EAX(%esp)
 	cli
+	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
 	jne syscall_exit_work
@@ -310,6 +325,7 @@ sysenter_past_esp:
 	movl EIP(%esp), %edx
 	movl OLDESP(%esp), %ecx
 	xorl %ebp,%ebp
+	TRACE_IRQS_ON
 	sti
 	sysexit
 	CFI_ENDPROC
@@ -339,6 +355,7 @@ syscall_exit:
 	cli				# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
+	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx	# current->work
 	jne syscall_exit_work
@@ -355,12 +372,15 @@ restore_all:
 	CFI_REMEMBER_STATE
 	je ldt_ss			# returning to user-space with LDT SS
 restore_nocheck:
+	TRACE_IRQS_IRET
+restore_nocheck_notrace:
 	RESTORE_REGS
 	addl $4, %esp
 	CFI_ADJUST_CFA_OFFSET -4
 1:	iret
 .section .fixup,"ax"
 iret_exc:
+	TRACE_IRQS_ON
 	sti
 	pushl $0			# no error code
 	pushl $do_iret_error
@@ -386,11 +406,13 @@ ldt_ss:
 	subl $8, %esp		# reserve space for switch16 pointer
 	CFI_ADJUST_CFA_OFFSET 8
 	cli
+	TRACE_IRQS_OFF
 	movl %esp, %eax
 	/* Set up the 16bit stack frame with switch32 pointer on top,
 	 * and a switch16 pointer on top of the current frame. */
 	call setup_x86_bogus_stack
 	CFI_ADJUST_CFA_OFFSET -8	# frame has moved
+	TRACE_IRQS_IRET
 	RESTORE_REGS
 	lss 20+4(%esp), %esp	# switch to 16bit stack
 1:	iret
@@ -411,6 +433,7 @@ work_resched:
 	cli				# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
+	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
 					# than syscall tracing?
@@ -462,6 +485,7 @@ syscall_trace_entry:
 syscall_exit_work:
 	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
 	jz work_pending
+	TRACE_IRQS_ON
 	sti				# could let do_syscall_trace() call
 					# schedule() instead
 	movl %esp, %eax
@@ -535,9 +559,14 @@ ENTRY(irq_entries_start)
 vector=vector+1
 .endr
 
+/*
+ * the CPU automatically disables interrupts when executing an IRQ vector,
+ * so IRQ-flags tracing has to follow that:
+ */
 	ALIGN
 common_interrupt:
 	SAVE_ALL
+	TRACE_IRQS_OFF
 	movl %esp,%eax
 	call do_IRQ
 	jmp ret_from_intr
@@ -549,9 +578,10 @@ ENTRY(name)				\
 	pushl $~(nr);			\
 	CFI_ADJUST_CFA_OFFSET 4;	\
 	SAVE_ALL;			\
+	TRACE_IRQS_OFF			\
 	movl %esp,%eax;			\
 	call smp_/**/name;		\
-	jmp ret_from_intr;	\
+	jmp ret_from_intr;		\
 	CFI_ENDPROC
 
 /* The include is where all of the SMP etc. interrupts come from */
@@ -726,7 +756,7 @@ nmi_stack_correct:
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	call do_nmi
-	jmp restore_all
+	jmp restore_nocheck_notrace
 	CFI_ENDPROC
 
 nmi_stack_fixup:
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 16b4917..6cb529f 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -166,7 +166,7 @@ void irq_ctx_init(int cpu)
 	irqctx->tinfo.task              = NULL;
 	irqctx->tinfo.exec_domain       = NULL;
 	irqctx->tinfo.cpu               = cpu;
-	irqctx->tinfo.preempt_count     = SOFTIRQ_OFFSET;
+	irqctx->tinfo.preempt_count     = 0;
 	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
 
 	softirq_ctx[cpu] = irqctx;
@@ -211,6 +211,10 @@ asmlinkage void do_softirq(void)
 			: "0"(isp)
 			: "memory", "cc", "edx", "ecx", "eax"
 		);
+		/*
+		 * Shouldnt happen, we returned above if in_interrupt():
+	 	 */
+		WARN_ON_ONCE(softirq_count());
 	}
 
 	local_irq_restore(flags);
diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h
new file mode 100644
index 0000000..ca777e8
--- /dev/null
+++ b/include/asm-i386/irqflags.h
@@ -0,0 +1,56 @@
+/*
+ * include/asm-i386/irqflags.h
+ *
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() macros from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#define raw_local_save_flags(x)	do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
+#define raw_local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
+#define raw_local_irq_disable()	__asm__ __volatile__("cli": : :"memory")
+#define raw_local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define raw_safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
+/* used when interrupts are already enabled or to shutdown the processor */
+#define halt()			__asm__ __volatile__("hlt": : :"memory")
+
+#define raw_irqs_disabled_flags(flags)	(!((flags) & (1<<9)))
+
+/* For spinlocks etc */
+#define raw_local_irq_save(x)	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+
+/*
+ * Do the CPU's IRQ-state tracing from assembly code. We call a
+ * C function, so save all the C-clobbered registers:
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+# define TRACE_IRQS_ON				\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_on;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+# define TRACE_IRQS_OFF				\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_off;		\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#else
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+
+#endif
diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h
index 04ba302..7e29b51 100644
--- a/include/asm-i386/spinlock.h
+++ b/include/asm-i386/spinlock.h
@@ -31,6 +31,11 @@
 	"jmp 1b\n" \
 	"3:\n\t"
 
+/*
+ * NOTE: there's an irqs-on section here, which normally would have to be
+ * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use
+ * __raw_spin_lock_string_flags().
+ */
 #define __raw_spin_lock_string_flags \
 	"\n1:\t" \
 	"lock ; decb %0\n\t" \
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index cab0180..db398d8 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -456,25 +456,7 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long l
 
 #define set_wmb(var, value) do { var = value; wmb(); } while (0)
 
-/* interrupt control.. */
-#define local_save_flags(x)	do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
-#define local_irq_restore(x) 	do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
-#define local_irq_disable() 	__asm__ __volatile__("cli": : :"memory")
-#define local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt()			__asm__ __volatile__("hlt": : :"memory")
-
-#define irqs_disabled()			\
-({					\
-	unsigned long flags;		\
-	local_save_flags(flags);	\
-	!(flags & (1<<9));		\
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x)	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+#include <linux/irqflags.h>
 
 /*
  * disable hlt during certain critical i/o operations
-- 
cgit v0.10.2


From c8558fcdecb1f920df8050be4f2d5f499060030e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:44 -0700
Subject: [PATCH] lockdep: irqtrace cleanup of include/asm-i386/irqflags.h

Clean up the x86 irqflags.h file:

 - macro => inline function transformation
 - simplifications
 - style fixes

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h
index ca777e8..e1bdb97 100644
--- a/include/asm-i386/irqflags.h
+++ b/include/asm-i386/irqflags.h
@@ -5,24 +5,95 @@
  *
  * This file gets included from lowlevel asm headers too, to provide
  * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() macros from the lowlevel headers.
+ * raw_local_irq_*() functions from the lowlevel headers.
  */
 #ifndef _ASM_IRQFLAGS_H
 #define _ASM_IRQFLAGS_H
 
-#define raw_local_save_flags(x)	do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
-#define raw_local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
-#define raw_local_irq_disable()	__asm__ __volatile__("cli": : :"memory")
-#define raw_local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define raw_safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt()			__asm__ __volatile__("hlt": : :"memory")
+#ifndef __ASSEMBLY__
 
-#define raw_irqs_disabled_flags(flags)	(!((flags) & (1<<9)))
+static inline unsigned long __raw_local_save_flags(void)
+{
+	unsigned long flags;
 
-/* For spinlocks etc */
-#define raw_local_irq_save(x)	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+	__asm__ __volatile__(
+		"pushfl ; popl %0"
+		: "=g" (flags)
+		: /* no input */
+	);
+
+	return flags;
+}
+
+#define raw_local_save_flags(flags) \
+		do { (flags) = __raw_local_save_flags(); } while (0)
+
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+	__asm__ __volatile__(
+		"pushl %0 ; popfl"
+		: /* no output */
+		:"g" (flags)
+		:"memory", "cc"
+	);
+}
+
+static inline void raw_local_irq_disable(void)
+{
+	__asm__ __volatile__("cli" : : : "memory");
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	__asm__ __volatile__("sti" : : : "memory");
+}
+
+/*
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+static inline void raw_safe_halt(void)
+{
+	__asm__ __volatile__("sti; hlt" : : : "memory");
+}
+
+/*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+static inline void halt(void)
+{
+	__asm__ __volatile__("hlt": : :"memory");
+}
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (1 << 9));
+}
+
+static inline int raw_irqs_disabled(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	return raw_irqs_disabled_flags(flags);
+}
+
+/*
+ * For spinlocks, etc:
+ */
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_disable();
+
+	return flags;
+}
+
+#define raw_local_irq_save(flags) \
+		do { (flags) = __raw_local_irq_save(); } while (0)
+
+#endif /* __ASSEMBLY__ */
 
 /*
  * Do the CPU's IRQ-state tracing from assembly code. We call a
-- 
cgit v0.10.2


From 2601e64d262ee5ed4d4a5737345803800d9c4db3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:45 -0700
Subject: [PATCH] lockdep: irqtrace subsystem, x86_64 support

Add irqflags-tracing support to x86_64.

[akpm@osdl.org: build fix]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index 1d92ab5..775d211 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -1,5 +1,9 @@
 menu "Kernel hacking"
 
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+	default y
+
 source "lib/Kconfig.debug"
 
 config DEBUG_RODATA
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index c536fa9..9b5bb41 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -13,6 +13,7 @@
 #include <asm/thread_info.h>	
 #include <asm/segment.h>
 #include <asm/vsyscall32.h>
+#include <asm/irqflags.h>
 #include <linux/linkage.h>
 
 #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target)
 	swapgs
 	movq	%gs:pda_kernelstack, %rsp
 	addq	$(PDA_STACKOFFSET),%rsp	
+	/*
+	 * No need to follow this irqs on/off section: the syscall
+	 * disabled irqs, here we enable it straight after entry:
+	 */
 	sti	
  	movl	%ebp,%ebp		/* zero extension */
 	pushq	$__USER32_DS
@@ -118,6 +123,7 @@ sysenter_do_call:
 	movq	%rax,RAX-ARGOFFSET(%rsp)
 	GET_THREAD_INFO(%r10)
 	cli
+	TRACE_IRQS_OFF
 	testl	$_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
 	jnz	int_ret_from_sys_call
 	andl    $~TS_COMPAT,threadinfo_status(%r10)
@@ -132,6 +138,7 @@ sysenter_do_call:
 	CFI_REGISTER rsp,rcx
 	movl	$VSYSCALL32_SYSEXIT,%edx	/* User %eip */
 	CFI_REGISTER rip,rdx
+	TRACE_IRQS_ON
 	swapgs
 	sti		/* sti only takes effect after the next instruction */
 	/* sysexit */
@@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target)
 	movl	%esp,%r8d
 	CFI_REGISTER	rsp,r8
 	movq	%gs:pda_kernelstack,%rsp
+	/*
+	 * No need to follow this irqs on/off section: the syscall
+	 * disabled irqs and here we enable it straight after entry:
+	 */
 	sti
 	SAVE_ARGS 8,1,1
 	movl 	%eax,%eax	/* zero extension */
@@ -220,6 +231,7 @@ cstar_do_call:
 	movq %rax,RAX-ARGOFFSET(%rsp)
 	GET_THREAD_INFO(%r10)
 	cli
+	TRACE_IRQS_OFF
 	testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
 	jnz  int_ret_from_sys_call
 	andl $~TS_COMPAT,threadinfo_status(%r10)
@@ -228,6 +240,7 @@ cstar_do_call:
 	CFI_REGISTER rip,rcx
 	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
 	/*CFI_REGISTER rflags,r11*/
+	TRACE_IRQS_ON
 	movl RSP-ARGOFFSET(%rsp),%esp
 	CFI_RESTORE rsp
 	swapgs
@@ -286,7 +299,11 @@ ENTRY(ia32_syscall)
 	/*CFI_REL_OFFSET	rflags,EFLAGS-RIP*/
 	/*CFI_REL_OFFSET	cs,CS-RIP*/
 	CFI_REL_OFFSET	rip,RIP-RIP
-	swapgs	
+	swapgs
+	/*
+	 * No need to follow this irqs on/off section: the syscall
+	 * disabled irqs and here we enable it straight after entry:
+	 */
 	sti
 	movl %eax,%eax
 	pushq %rax
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed92c29..d464dde 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -41,13 +41,24 @@
 #include <asm/thread_info.h>
 #include <asm/hw_irq.h>
 #include <asm/page.h>
+#include <asm/irqflags.h>
 
 	.code64
 
 #ifndef CONFIG_PREEMPT
 #define retint_kernel retint_restore_args
 #endif	
-	
+
+
+.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
+	jnc  1f
+	TRACE_IRQS_ON
+1:
+#endif
+.endm
+
 /*
  * C code is not supposed to know about undefined top of stack. Every time 
  * a C function with an pt_regs argument is called from the SYSCALL based 
@@ -194,6 +205,10 @@ ENTRY(system_call)
 	swapgs
 	movq	%rsp,%gs:pda_oldrsp 
 	movq	%gs:pda_kernelstack,%rsp
+	/*
+	 * No need to follow this irqs off/on section - it's straight
+	 * and short:
+	 */
 	sti					
 	SAVE_ARGS 8,1
 	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
@@ -219,10 +234,15 @@ ret_from_sys_call:
 sysret_check:		
 	GET_THREAD_INFO(%rcx)
 	cli
+	TRACE_IRQS_OFF
 	movl threadinfo_flags(%rcx),%edx
 	andl %edi,%edx
 	CFI_REMEMBER_STATE
 	jnz  sysret_careful 
+	/*
+	 * sysretq will re-enable interrupts:
+	 */
+	TRACE_IRQS_ON
 	movq RIP-ARGOFFSET(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
 	RESTORE_ARGS 0,-ARG_SKIP,1
@@ -237,6 +257,7 @@ sysret_careful:
 	CFI_RESTORE_STATE
 	bt $TIF_NEED_RESCHED,%edx
 	jnc sysret_signal
+	TRACE_IRQS_ON
 	sti
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
@@ -247,6 +268,7 @@ sysret_careful:
 
 	/* Handle a signal */ 
 sysret_signal:
+	TRACE_IRQS_ON
 	sti
 	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    1f
@@ -261,6 +283,7 @@ sysret_signal:
 	/* Use IRET because user could have changed frame. This
 	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
 	cli
+	TRACE_IRQS_OFF
 	jmp int_with_check
 	
 badsys:
@@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call)
 	CFI_REL_OFFSET	r10,R10-ARGOFFSET
 	CFI_REL_OFFSET	r11,R11-ARGOFFSET
 	cli
+	TRACE_IRQS_OFF
 	testl $3,CS-ARGOFFSET(%rsp)
 	je retint_restore_args
 	movl $_TIF_ALLWORK_MASK,%edi
@@ -327,6 +351,7 @@ int_with_check:
 int_careful:
 	bt $TIF_NEED_RESCHED,%edx
 	jnc  int_very_careful
+	TRACE_IRQS_ON
 	sti
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
@@ -334,10 +359,12 @@ int_careful:
 	popq %rdi
 	CFI_ADJUST_CFA_OFFSET -8
 	cli
+	TRACE_IRQS_OFF
 	jmp int_with_check
 
 	/* handle signals and tracing -- both require a full stack frame */
 int_very_careful:
+	TRACE_IRQS_ON
 	sti
 	SAVE_REST
 	/* Check for syscall exit trace */	
@@ -351,6 +378,7 @@ int_very_careful:
 	CFI_ADJUST_CFA_OFFSET -8
 	andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
 	cli
+	TRACE_IRQS_OFF
 	jmp int_restore_rest
 	
 int_signal:
@@ -363,6 +391,7 @@ int_signal:
 int_restore_rest:
 	RESTORE_REST
 	cli
+	TRACE_IRQS_OFF
 	jmp int_with_check
 	CFI_ENDPROC
 END(int_ret_from_sys_call)
@@ -484,6 +513,10 @@ END(stub_rt_sigreturn)
 	swapgs	
 1:	incl	%gs:pda_irqcount	# RED-PEN should check preempt count
 	cmoveq %gs:pda_irqstackptr,%rsp
+	/*
+	 * We entered an interrupt context - irqs are off:
+	 */
+	TRACE_IRQS_OFF
 	call \func
 	.endm
 
@@ -493,6 +526,7 @@ ENTRY(common_interrupt)
 	/* 0(%rsp): oldrsp-ARGOFFSET */
 ret_from_intr:
 	cli	
+	TRACE_IRQS_OFF
 	decl %gs:pda_irqcount
 	leaveq
 	CFI_DEF_CFA_REGISTER	rsp
@@ -515,9 +549,21 @@ retint_check:
 	CFI_REMEMBER_STATE
 	jnz  retint_careful
 retint_swapgs:	 	
+	/*
+	 * The iretq could re-enable interrupts:
+	 */
+	cli
+	TRACE_IRQS_IRETQ
 	swapgs 
+	jmp restore_args
+
 retint_restore_args:				
 	cli
+	/*
+	 * The iretq could re-enable interrupts:
+	 */
+	TRACE_IRQS_IRETQ
+restore_args:
 	RESTORE_ARGS 0,8,0						
 iret_label:	
 	iretq
@@ -530,6 +576,7 @@ iret_label:
 	/* running with kernel gs */
 bad_iret:
 	movq $11,%rdi	/* SIGSEGV */
+	TRACE_IRQS_ON
 	sti
 	jmp do_exit			
 	.previous	
@@ -539,6 +586,7 @@ retint_careful:
 	CFI_RESTORE_STATE
 	bt    $TIF_NEED_RESCHED,%edx
 	jnc   retint_signal
+	TRACE_IRQS_ON
 	sti
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET	8
@@ -547,11 +595,13 @@ retint_careful:
 	CFI_ADJUST_CFA_OFFSET	-8
 	GET_THREAD_INFO(%rcx)
 	cli
+	TRACE_IRQS_OFF
 	jmp retint_check
 	
 retint_signal:
 	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    retint_swapgs
+	TRACE_IRQS_ON
 	sti
 	SAVE_REST
 	movq $-1,ORIG_RAX(%rsp) 			
@@ -560,6 +610,7 @@ retint_signal:
 	call do_notify_resume
 	RESTORE_REST
 	cli
+	TRACE_IRQS_OFF
 	movl $_TIF_NEED_RESCHED,%edi
 	GET_THREAD_INFO(%rcx)
 	jmp retint_check
@@ -666,7 +717,7 @@ END(spurious_interrupt)
 
 	/* error code is on the stack already */
 	/* handle NMI like exceptions that can happen everywhere */
-	.macro paranoidentry sym, ist=0
+	.macro paranoidentry sym, ist=0, irqtrace=1
 	SAVE_ALL
 	cld
 	movl $1,%ebx
@@ -691,8 +742,73 @@ END(spurious_interrupt)
 	addq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
 	.endif
 	cli
+	.if \irqtrace
+	TRACE_IRQS_OFF
+	.endif
 	.endm
-	
+
+	/*
+ 	 * "Paranoid" exit path from exception stack.
+  	 * Paranoid because this is used by NMIs and cannot take
+	 * any kernel state for granted.
+	 * We don't do kernel preemption checks here, because only
+	 * NMI should be common and it does not enable IRQs and
+	 * cannot get reschedule ticks.
+	 *
+	 * "trace" is 0 for the NMI handler only, because irq-tracing
+	 * is fundamentally NMI-unsafe. (we cannot change the soft and
+	 * hard flags at once, atomically)
+	 */
+	.macro paranoidexit trace=1
+	/* ebx:	no swapgs flag */
+paranoid_exit\trace:
+	testl %ebx,%ebx				/* swapgs needed? */
+	jnz paranoid_restore\trace
+	testl $3,CS(%rsp)
+	jnz   paranoid_userspace\trace
+paranoid_swapgs\trace:
+	TRACE_IRQS_IRETQ 0
+	swapgs
+paranoid_restore\trace:
+	RESTORE_ALL 8
+	iretq
+paranoid_userspace\trace:
+	GET_THREAD_INFO(%rcx)
+	movl threadinfo_flags(%rcx),%ebx
+	andl $_TIF_WORK_MASK,%ebx
+	jz paranoid_swapgs\trace
+	movq %rsp,%rdi			/* &pt_regs */
+	call sync_regs
+	movq %rax,%rsp			/* switch stack for scheduling */
+	testl $_TIF_NEED_RESCHED,%ebx
+	jnz paranoid_schedule\trace
+	movl %ebx,%edx			/* arg3: thread flags */
+	.if \trace
+	TRACE_IRQS_ON
+	.endif
+	sti
+	xorl %esi,%esi 			/* arg2: oldset */
+	movq %rsp,%rdi 			/* arg1: &pt_regs */
+	call do_notify_resume
+	cli
+	.if \trace
+	TRACE_IRQS_OFF
+	.endif
+	jmp paranoid_userspace\trace
+paranoid_schedule\trace:
+	.if \trace
+	TRACE_IRQS_ON
+	.endif
+	sti
+	call schedule
+	cli
+	.if \trace
+	TRACE_IRQS_OFF
+	.endif
+	jmp paranoid_userspace\trace
+	CFI_ENDPROC
+	.endm
+
 /*
  * Exception entry point. This expects an error code/orig_rax on the stack
  * and the exception handler in %rax.	
@@ -748,6 +864,7 @@ error_exit:
 	movl %ebx,%eax		
 	RESTORE_REST
 	cli
+	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)	
 	testl %eax,%eax
 	jne  retint_kernel
@@ -755,6 +872,10 @@ error_exit:
 	movl  $_TIF_WORK_MASK,%edi
 	andl  %edi,%edx
 	jnz  retint_careful
+	/*
+	 * The iret might restore flags:
+	 */
+	TRACE_IRQS_IRETQ
 	swapgs 
 	RESTORE_ARGS 0,8,0						
 	jmp iret_label
@@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug)
 	pushq $0
 	CFI_ADJUST_CFA_OFFSET 8		
 	paranoidentry do_debug, DEBUG_STACK
-	jmp paranoid_exit
-	CFI_ENDPROC
+	paranoidexit
 END(debug)
 	.previous .text
 
@@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi)
 	INTR_FRAME
 	pushq $-1
 	CFI_ADJUST_CFA_OFFSET 8
-	paranoidentry do_nmi
-	/*
- 	 * "Paranoid" exit path from exception stack.
-  	 * Paranoid because this is used by NMIs and cannot take
-	 * any kernel state for granted.
-	 * We don't do kernel preemption checks here, because only
-	 * NMI should be common and it does not enable IRQs and
-	 * cannot get reschedule ticks.
-	 */
-	/* ebx:	no swapgs flag */
-paranoid_exit:
-	testl %ebx,%ebx				/* swapgs needed? */
-	jnz paranoid_restore
-	testl $3,CS(%rsp)
-	jnz   paranoid_userspace
-paranoid_swapgs:	
-	swapgs
-paranoid_restore:	
-	RESTORE_ALL 8
-	iretq
-paranoid_userspace:	
-	GET_THREAD_INFO(%rcx)
-	movl threadinfo_flags(%rcx),%ebx
-	andl $_TIF_WORK_MASK,%ebx
-	jz paranoid_swapgs
-	movq %rsp,%rdi			/* &pt_regs */
-	call sync_regs
-	movq %rax,%rsp			/* switch stack for scheduling */
-	testl $_TIF_NEED_RESCHED,%ebx
-	jnz paranoid_schedule
-	movl %ebx,%edx			/* arg3: thread flags */
-	sti
-	xorl %esi,%esi 			/* arg2: oldset */
-	movq %rsp,%rdi 			/* arg1: &pt_regs */
-	call do_notify_resume
-	cli
-	jmp paranoid_userspace
-paranoid_schedule:
-	sti
-	call schedule
-	cli
-	jmp paranoid_userspace
-	CFI_ENDPROC
+	paranoidentry do_nmi, 0, 0
+#ifdef CONFIG_TRACE_IRQFLAGS
+	paranoidexit 0
+#else
+	jmp paranoid_exit1
+ 	CFI_ENDPROC
+#endif
 END(nmi)
 	.previous .text
 
@@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3)
  	pushq $0
  	CFI_ADJUST_CFA_OFFSET 8
  	paranoidentry do_int3, DEBUG_STACK
- 	jmp paranoid_exit
+ 	jmp paranoid_exit1
  	CFI_ENDPROC
 END(int3)
 	.previous .text
@@ -1006,7 +1090,7 @@ END(reserved)
 ENTRY(double_fault)
 	XCPT_FRAME
 	paranoidentry do_double_fault
-	jmp paranoid_exit
+	jmp paranoid_exit1
 	CFI_ENDPROC
 END(double_fault)
 
@@ -1022,7 +1106,7 @@ END(segment_not_present)
 ENTRY(stack_segment)
 	XCPT_FRAME
 	paranoidentry do_stack_segment
-	jmp paranoid_exit
+	jmp paranoid_exit1
 	CFI_ENDPROC
 END(stack_segment)
 
@@ -1050,7 +1134,7 @@ ENTRY(machine_check)
 	pushq $0
 	CFI_ADJUST_CFA_OFFSET 8	
 	paranoidentry do_machine_check
-	jmp paranoid_exit
+	jmp paranoid_exit1
 	CFI_ENDPROC
 END(machine_check)
 #endif
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index a1f1df5..5221a53 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -177,8 +177,10 @@ asmlinkage void do_softirq(void)
  	local_irq_save(flags);
  	pending = local_softirq_pending();
  	/* Switch to interrupt stack */
- 	if (pending)
+ 	if (pending) {
 		call_softirq();
+		WARN_ON_ONCE(softirq_count());
+	}
  	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(do_softirq);
diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h
new file mode 100644
index 0000000..22f3c06
--- /dev/null
+++ b/include/asm-x86_64/irqflags.h
@@ -0,0 +1,61 @@
+/*
+ * include/asm-x86_64/irqflags.h
+ *
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() macros from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#ifndef __ASSEMBLY__
+
+/* interrupt control.. */
+#define raw_local_save_flags(x)	do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
+#define raw_local_irq_restore(x) 	__asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
+
+#ifdef CONFIG_X86_VSMP
+/* Interrupt control for VSMP  architecture */
+#define raw_local_irq_disable()	do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0)
+#define raw_local_irq_enable()	do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0)
+
+#define raw_irqs_disabled_flags(flags)	\
+({						\
+	(flags & (1<<18)) || !(flags & (1<<9));	\
+})
+
+/* For spinlocks etc */
+#define raw_local_irq_save(x)	do { raw_local_save_flags(x); raw_local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
+#else  /* CONFIG_X86_VSMP */
+#define raw_local_irq_disable() 	__asm__ __volatile__("cli": : :"memory")
+#define raw_local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
+
+#define raw_irqs_disabled_flags(flags)	\
+({						\
+	!(flags & (1<<9));			\
+})
+
+/* For spinlocks etc */
+#define raw_local_irq_save(x) 	do { warn_if_not_ulong(x); __asm__ __volatile__("# raw_local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
+#endif
+
+#define raw_irqs_disabled()			\
+({						\
+	unsigned long flags;			\
+	raw_local_save_flags(flags);		\
+	raw_irqs_disabled_flags(flags);		\
+})
+
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define raw_safe_halt()	__asm__ __volatile__("sti; hlt": : :"memory")
+/* used when interrupts are already enabled or to shutdown the processor */
+#define halt()			__asm__ __volatile__("hlt": : :"memory")
+
+#else /* __ASSEMBLY__: */
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+
+#endif
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
index 68e559f..f67f287 100644
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -244,43 +244,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 
 #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
 
-/* interrupt control.. */
-#define local_save_flags(x)	do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
-#define local_irq_restore(x) 	__asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
-
-#ifdef CONFIG_X86_VSMP
-/* Interrupt control for VSMP  architecture */
-#define local_irq_disable()	do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0)
-#define local_irq_enable()	do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0)
-
-#define irqs_disabled()					\
-({							\
-	unsigned long flags;				\
-	local_save_flags(flags);			\
-	(flags & (1<<18)) || !(flags & (1<<9));		\
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x)	do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
-#else  /* CONFIG_X86_VSMP */
-#define local_irq_disable() 	__asm__ __volatile__("cli": : :"memory")
-#define local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
-
-#define irqs_disabled()			\
-({					\
-	unsigned long flags;		\
-	local_save_flags(flags);	\
-	!(flags & (1<<9));		\
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x) 	do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
-#endif
-
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt()			__asm__ __volatile__("hlt": : :"memory")
+#include <linux/irqflags.h>
 
 void cpu_idle_wait(void);
 
-- 
cgit v0.10.2


From 6375e2b74c620794e1a27a26e4338aec2e41346a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:45 -0700
Subject: [PATCH] lockdep: irqtrace cleanup of include/asm-x86_64/irqflags.h

Clean up the x86-64 irqflags.h file:

 - macro => inline function transformation
 - simplifications
 - style fixes

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Andi Kleen <ak@muc.de>
Cc: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
index e49af00..332ea5d 100644
--- a/arch/x86_64/lib/thunk.S
+++ b/arch/x86_64/lib/thunk.S
@@ -47,6 +47,11 @@
 	thunk_retrax __down_failed_interruptible,__down_interruptible
 	thunk_retrax __down_failed_trylock,__down_trylock
 	thunk __up_wakeup,__up
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	thunk trace_hardirqs_on_thunk,trace_hardirqs_on
+	thunk trace_hardirqs_off_thunk,trace_hardirqs_off
+#endif
 	
 	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
 	CFI_STARTPROC
diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h
index 22f3c06..cce6937 100644
--- a/include/asm-x86_64/irqflags.h
+++ b/include/asm-x86_64/irqflags.h
@@ -5,57 +5,137 @@
  *
  * This file gets included from lowlevel asm headers too, to provide
  * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() macros from the lowlevel headers.
+ * raw_local_irq_*() functions from the lowlevel headers.
  */
 #ifndef _ASM_IRQFLAGS_H
 #define _ASM_IRQFLAGS_H
 
 #ifndef __ASSEMBLY__
+/*
+ * Interrupt control:
+ */
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+	unsigned long flags;
+
+	__asm__ __volatile__(
+		"# __raw_save_flags\n\t"
+		"pushfq ; popq %q0"
+		: "=g" (flags)
+		: /* no input */
+		: "memory"
+	);
 
-/* interrupt control.. */
-#define raw_local_save_flags(x)	do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
-#define raw_local_irq_restore(x) 	__asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
+	return flags;
+}
+
+#define raw_local_save_flags(flags) \
+		do { (flags) = __raw_local_save_flags(); } while (0)
+
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+	__asm__ __volatile__(
+		"pushq %0 ; popfq"
+		: /* no output */
+		:"g" (flags)
+		:"memory", "cc"
+	);
+}
 
 #ifdef CONFIG_X86_VSMP
-/* Interrupt control for VSMP  architecture */
-#define raw_local_irq_disable()	do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0)
-#define raw_local_irq_enable()	do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0)
-
-#define raw_irqs_disabled_flags(flags)	\
-({						\
-	(flags & (1<<18)) || !(flags & (1<<9));	\
-})
-
-/* For spinlocks etc */
-#define raw_local_irq_save(x)	do { raw_local_save_flags(x); raw_local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
-#else  /* CONFIG_X86_VSMP */
-#define raw_local_irq_disable() 	__asm__ __volatile__("cli": : :"memory")
-#define raw_local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
-
-#define raw_irqs_disabled_flags(flags)	\
-({						\
-	!(flags & (1<<9));			\
-})
-
-/* For spinlocks etc */
-#define raw_local_irq_save(x) 	do { warn_if_not_ulong(x); __asm__ __volatile__("# raw_local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
+
+/*
+ * Interrupt control for the VSMP architecture:
+ */
+
+static inline void raw_local_irq_disable(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18));
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18));
+}
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (1<<9)) || (flags & (1 << 18));
+}
+
+#else /* CONFIG_X86_VSMP */
+
+static inline void raw_local_irq_disable(void)
+{
+	__asm__ __volatile__("cli" : : : "memory");
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	__asm__ __volatile__("sti" : : : "memory");
+}
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (1 << 9));
+}
+
 #endif
 
-#define raw_irqs_disabled()			\
-({						\
-	unsigned long flags;			\
-	raw_local_save_flags(flags);		\
-	raw_irqs_disabled_flags(flags);		\
-})
+/*
+ * For spinlocks, etc.:
+ */
+
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_disable();
+
+	return flags;
+}
 
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define raw_safe_halt()	__asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt()			__asm__ __volatile__("hlt": : :"memory")
+#define raw_local_irq_save(flags) \
+		do { (flags) = __raw_local_irq_save(); } while (0)
+
+static inline int raw_irqs_disabled(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	return raw_irqs_disabled_flags(flags);
+}
+
+/*
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+static inline void raw_safe_halt(void)
+{
+	__asm__ __volatile__("sti; hlt" : : : "memory");
+}
+
+/*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+static inline void halt(void)
+{
+	__asm__ __volatile__("hlt": : :"memory");
+}
 
 #else /* __ASSEMBLY__: */
-# define TRACE_IRQS_ON
-# define TRACE_IRQS_OFF
+# ifdef CONFIG_TRACE_IRQFLAGS
+#  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk
+#  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk
+# else
+#  define TRACE_IRQS_ON
+#  define TRACE_IRQS_OFF
+# endif
 #endif
 
 #endif
-- 
cgit v0.10.2


From 1f194a4c393103ac925001d7e04b05fbb122580d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 3 Jul 2006 00:24:46 -0700
Subject: [PATCH] lockdep: irqtrace subsystem, s390 support

irqtrace support for s390.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index f53b6d5..2283933 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -1,5 +1,9 @@
 menu "Kernel hacking"
 
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+	default y
+
 source "lib/Kconfig.debug"
 
 endmenu
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index d8948c3..5b5799a 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -58,6 +58,21 @@ STACK_SIZE  = 1 << STACK_SHIFT
 
 #define BASED(name) name-system_call(%r13)
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	.macro	TRACE_IRQS_ON
+	l	%r1,BASED(.Ltrace_irq_on)
+	basr	%r14,%r1
+	.endm
+
+	.macro	TRACE_IRQS_OFF
+	l	%r1,BASED(.Ltrace_irq_off)
+	basr	%r14,%r1
+	.endm
+#else
+#define TRACE_IRQS_ON
+#define TRACE_IRQS_OFF
+#endif
+
 /*
  * Register usage in interrupt handlers:
  *    R9  - pointer to current task structure
@@ -361,6 +376,7 @@ ret_from_fork:
 	st	%r15,SP_R15(%r15)	# store stack pointer for new kthread
 0:	l       %r1,BASED(.Lschedtail)
 	basr    %r14,%r1
+	TRACE_IRQS_ON
         stosm   __SF_EMPTY(%r15),0x03     # reenable interrupts
 	b	BASED(sysc_return)
 
@@ -516,6 +532,7 @@ pgm_no_vtime3:
 	mvc	__THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
 	oi	__TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	TRACE_IRQS_ON
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	b	BASED(sysc_do_svc)
 
@@ -539,9 +556,11 @@ io_int_handler:
 io_no_vtime:
 #endif
 	l	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	TRACE_IRQS_OFF
         l       %r1,BASED(.Ldo_IRQ)        # load address of do_IRQ
         la      %r2,SP_PTREGS(%r15) # address of register-save area
         basr    %r14,%r1          # branch to standard irq handler
+	TRACE_IRQS_ON
 
 io_return:
         tm      SP_PSW+1(%r15),0x01    # returning to user ?
@@ -651,10 +670,12 @@ ext_int_handler:
 ext_no_vtime:
 #endif
 	l	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	TRACE_IRQS_OFF
 	la	%r2,SP_PTREGS(%r15)    # address of register-save area
 	lh	%r3,__LC_EXT_INT_CODE  # get interruption code
 	l	%r1,BASED(.Ldo_extint)
 	basr	%r14,%r1
+	TRACE_IRQS_ON
 	b	BASED(io_return)
 
 __critical_end:
@@ -731,8 +752,10 @@ mcck_no_vtime:
 	stosm	__SF_EMPTY(%r15),0x04	# turn dat on
 	tm	__TI_flags+3(%r9),_TIF_MCCK_PENDING
 	bno	BASED(mcck_return)
+	TRACE_IRQS_OFF
 	l	%r1,BASED(.Ls390_handle_mcck)
 	basr	%r14,%r1		# call machine check handler
+	TRACE_IRQS_ON
 mcck_return:
 	mvc	__LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW
 	ni	__LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
@@ -1012,7 +1035,11 @@ cleanup_io_leave_insn:
 .Lvfork:       .long  sys_vfork
 .Lschedtail:   .long  schedule_tail
 .Lsysc_table:  .long  sys_call_table
-
+#ifdef CONFIG_TRACE_IRQFLAGS
+.Ltrace_irq_on:.long  trace_hardirqs_on
+.Ltrace_irq_off:
+	       .long  trace_hardirqs_off
+#endif
 .Lcritical_start:
                .long  __critical_start + 0x80000000
 .Lcritical_end:
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 1ca499f..56f5f61 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -58,6 +58,19 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \
 
 #define BASED(name) name-system_call(%r13)
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	.macro	TRACE_IRQS_ON
+	 brasl	%r14,trace_hardirqs_on
+	.endm
+
+	.macro	TRACE_IRQS_OFF
+	 brasl	%r14,trace_hardirqs_off
+	.endm
+#else
+#define TRACE_IRQS_ON
+#define TRACE_IRQS_OFF
+#endif
+
 	.macro  STORE_TIMER lc_offset
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 	stpt	\lc_offset
@@ -354,6 +367,7 @@ ret_from_fork:
 	jo	0f
 	stg	%r15,SP_R15(%r15)	# store stack pointer for new kthread
 0:	brasl   %r14,schedule_tail
+	TRACE_IRQS_ON
         stosm   24(%r15),0x03     # reenable interrupts
 	j	sysc_return
 
@@ -535,6 +549,7 @@ pgm_no_vtime3:
 	mvc	__THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
 	oi	__TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	TRACE_IRQS_ON
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	j	sysc_do_svc
 
@@ -557,8 +572,10 @@ io_int_handler:
 io_no_vtime:
 #endif
 	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	TRACE_IRQS_OFF
         la      %r2,SP_PTREGS(%r15)    # address of register-save area
 	brasl   %r14,do_IRQ            # call standard irq handler
+	TRACE_IRQS_ON
 
 io_return:
         tm      SP_PSW+1(%r15),0x01    # returning to user ?
@@ -665,9 +682,11 @@ ext_int_handler:
 ext_no_vtime:
 #endif
 	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	TRACE_IRQS_OFF
 	la	%r2,SP_PTREGS(%r15)    # address of register-save area
 	llgh	%r3,__LC_EXT_INT_CODE  # get interruption code
 	brasl   %r14,do_extint
+	TRACE_IRQS_ON
 	j	io_return
 
 __critical_end:
@@ -743,7 +762,9 @@ mcck_no_vtime:
 	stosm	__SF_EMPTY(%r15),0x04	# turn dat on
 	tm	__TI_flags+7(%r9),_TIF_MCCK_PENDING
 	jno	mcck_return
+	TRACE_IRQS_OFF
 	brasl	%r14,s390_handle_mcck
+	TRACE_IRQS_ON
 mcck_return:
 	mvc	__LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW
 	ni	__LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 480b6a5..e347190 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -97,7 +97,7 @@ asmlinkage void do_softirq(void)
 
 	account_system_vtime(current);
 
-	__local_bh_enable();
+	_local_bh_enable();
 
 	local_irq_restore(flags);
 }
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 1f93991..78c8e55 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -142,6 +142,7 @@ static void default_idle(void)
 		return;
 	}
 
+	trace_hardirqs_on();
 	/* Wait for external, I/O or machine check interrupt. */
 	__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_WAIT |
 			PSW_MASK_IO | PSW_MASK_EXT);
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 4138564..985d161 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -383,6 +383,7 @@ void
 sclp_sync_wait(void)
 {
 	unsigned long psw_mask;
+	unsigned long flags;
 	unsigned long cr0, cr0_sync;
 	u64 timeout;
 
@@ -395,9 +396,11 @@ sclp_sync_wait(void)
 			  sclp_tod_from_jiffies(sclp_request_timer.expires -
 						jiffies);
 	}
+	local_irq_save(flags);
 	/* Prevent bottom half from executing once we force interrupts open */
 	local_bh_disable();
 	/* Enable service-signal interruption, disable timer interrupts */
+	trace_hardirqs_on();
 	__ctl_store(cr0, 0, 0);
 	cr0_sync = cr0;
 	cr0_sync |= 0x00000200;
@@ -415,11 +418,10 @@ sclp_sync_wait(void)
 		barrier();
 		cpu_relax();
 	}
-	/* Restore interrupt settings */
-	asm volatile ("SSM 0(%0)"
-		      : : "a" (&psw_mask) : "memory");
+	local_irq_disable();
 	__ctl_load(cr0, 0, 0);
-	__local_bh_enable();
+	_local_bh_enable();
+	local_irq_restore(flags);
 }
 
 EXPORT_SYMBOL(sclp_sync_wait);
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index a342326..6fec90e 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -147,7 +147,7 @@ cio_tpi(void)
 		sch->driver->irq(&sch->dev);
 	spin_unlock(&sch->lock);
 	irq_exit ();
-	__local_bh_enable();
+	_local_bh_enable();
 	return 1;
 }
 
diff --git a/include/asm-s390/irqflags.h b/include/asm-s390/irqflags.h
new file mode 100644
index 0000000..65f4db6
--- /dev/null
+++ b/include/asm-s390/irqflags.h
@@ -0,0 +1,50 @@
+/*
+ *  include/asm-s390/irqflags.h
+ *
+ *    Copyright (C) IBM Corp. 2006
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#ifndef __ASM_IRQFLAGS_H
+#define __ASM_IRQFLAGS_H
+
+#ifdef __KERNEL__
+
+/* interrupt control.. */
+#define raw_local_irq_enable() ({ \
+	unsigned long  __dummy; \
+	__asm__ __volatile__ ( \
+		"stosm 0(%1),0x03" \
+		: "=m" (__dummy) : "a" (&__dummy) : "memory" ); \
+	})
+
+#define raw_local_irq_disable() ({ \
+	unsigned long __flags; \
+	__asm__ __volatile__ ( \
+		"stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \
+	__flags; \
+	})
+
+#define raw_local_save_flags(x) \
+	__asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) )
+
+#define raw_local_irq_restore(x) \
+	__asm__ __volatile__("ssm   0(%0)" : : "a" (&x), "m" (x) : "memory")
+
+#define raw_irqs_disabled()		\
+({					\
+	unsigned long flags;		\
+	local_save_flags(flags);	\
+	!((flags >> __FLAG_SHIFT) & 3);	\
+})
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !((flags >> __FLAG_SHIFT) & 3);
+}
+
+/* For spinlocks etc */
+#define raw_local_irq_save(x)	((x) = raw_local_irq_disable())
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_IRQFLAGS_H */
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index 71a0732..9ab186f 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -301,34 +301,6 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
 #define set_mb(var, value)      do { var = value; mb(); } while (0)
 #define set_wmb(var, value)     do { var = value; wmb(); } while (0)
 
-/* interrupt control.. */
-#define local_irq_enable() ({ \
-        unsigned long  __dummy; \
-        __asm__ __volatile__ ( \
-                "stosm 0(%1),0x03" \
-		: "=m" (__dummy) : "a" (&__dummy) : "memory" ); \
-        })
-
-#define local_irq_disable() ({ \
-        unsigned long __flags; \
-        __asm__ __volatile__ ( \
-                "stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \
-        __flags; \
-        })
-
-#define local_save_flags(x) \
-        __asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) )
-
-#define local_irq_restore(x) \
-        __asm__ __volatile__("ssm   0(%0)" : : "a" (&x), "m" (x) : "memory")
-
-#define irqs_disabled()			\
-({					\
-	unsigned long flags;		\
-	local_save_flags(flags);	\
-        !((flags >> __FLAG_SHIFT) & 3);	\
-})
-
 #ifdef __s390x__
 
 #define __ctl_load(array, low, high) ({ \
@@ -442,8 +414,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
         })
 #endif /* __s390x__ */
 
-/* For spinlocks etc */
-#define local_irq_save(x)	((x) = local_irq_disable())
+#include <linux/irqflags.h>
 
 /*
  * Use to set psw mask except for the first byte which
@@ -482,4 +453,3 @@ extern void (*_machine_power_off)(void);
 #endif /* __KERNEL__ */
 
 #endif
-
-- 
cgit v0.10.2


From cae2ed9aa573415c6e5de9a09b7ff0d74af793bc Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:48 -0700
Subject: [PATCH] lockdep: locking API self tests

Introduce DEBUG_LOCKING_API_SELFTESTS, which uses the generic lock debugging
code's silent-failure feature to run a matrix of testcases.  There are 210
testcases currently:

  +-----------------------
  | Locking API testsuite:
  +------------------------------+------+------+------+------+------+------+
                                 | spin |wlock |rlock |mutex | wsem | rsem |
  -------------------------------+------+------+------+------+------+------+
                     A-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                 A-B-B-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
             A-B-B-C-C-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
             A-B-C-A-B-C deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
         A-B-B-C-C-D-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
         A-B-C-D-B-D-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
         A-B-C-D-B-C-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                    double unlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                 bad unlock order:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
  --------------------------------------+------+------+------+------+------+
              recursive read-lock:             |  ok  |             |  ok  |
  --------------------------------------+------+------+------+------+------+
                non-nested unlock:  ok  |  ok  |  ok  |  ok  |
  --------------------------------------+------+------+------+
     hard-irqs-on + irq-safe-A/12:  ok  |  ok  |  ok  |
     soft-irqs-on + irq-safe-A/12:  ok  |  ok  |  ok  |
     hard-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
     soft-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
       sirq-safe-A => hirqs-on/12:  ok  |  ok  |  ok  |
       sirq-safe-A => hirqs-on/21:  ok  |  ok  |  ok  |
         hard-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
         soft-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
         hard-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
         soft-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/132:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/132:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/213:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/213:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/231:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/231:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/312:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/312:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #1/321:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #1/321:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/123:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/123:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/132:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/132:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/213:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/213:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/231:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/231:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/312:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/312:  ok  |  ok  |  ok  |
    hard-safe-A + unsafe-B #2/321:  ok  |  ok  |  ok  |
    soft-safe-A + unsafe-B #2/321:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/123:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/123:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/132:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/132:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/213:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/213:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/231:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/231:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/312:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/312:  ok  |  ok  |  ok  |
      hard-irq lock-inversion/321:  ok  |  ok  |  ok  |
      soft-irq lock-inversion/321:  ok  |  ok  |  ok  |
      hard-irq read-recursion/123:  ok  |
      soft-irq read-recursion/123:  ok  |
      hard-irq read-recursion/132:  ok  |
      soft-irq read-recursion/132:  ok  |
      hard-irq read-recursion/213:  ok  |
      soft-irq read-recursion/213:  ok  |
      hard-irq read-recursion/231:  ok  |
      soft-irq read-recursion/231:  ok  |
      hard-irq read-recursion/312:  ok  |
      soft-irq read-recursion/312:  ok  |
      hard-irq read-recursion/321:  ok  |
      soft-irq read-recursion/321:  ok  |
  --------------------------------+-----+----------------
  Good, all 210 testcases passed! |
  --------------------------------+

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 86e9282..149f62b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -435,6 +435,15 @@ running once the system is up.
 
 	debug		[KNL] Enable kernel debugging (events log level).
 
+	debug_locks_verbose=
+			[KNL] verbose self-tests
+			Format=<0|1>
+			Print debugging info while doing the locking API
+			self-tests.
+			We default to 0 (no extra messages), setting it to
+			1 will print _a lot_ more information - normally
+			only useful to kernel developers.
+
 	decnet=		[HW,NET]
 			Format: <area>[,<node>]
 			See also Documentation/networking/decnet.txt.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 91e338a..16021b0 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -149,6 +149,17 @@ config DEBUG_SPINLOCK_SLEEP
 	  If you say Y here, various routines which may sleep will become very
 	  noisy if they are called with a spinlock held.
 
+config DEBUG_LOCKING_API_SELFTESTS
+	bool "Locking API boot-time self-tests"
+	depends on DEBUG_KERNEL
+	help
+	  Say Y here if you want the kernel to run a short self-test during
+	  bootup. The self-test checks whether common types of locking bugs
+	  are detected by debugging mechanisms or not. (if you disable
+	  lock debugging then those bugs wont be detected of course.)
+	  The following locking APIs are covered: spinlocks, rwlocks,
+	  mutexes and rwsems.
+
 config STACKTRACE
 	bool
 	depends on STACKTRACE_SUPPORT
diff --git a/lib/Makefile b/lib/Makefile
index 4f5d019..be9719a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,6 +18,7 @@ CFLAGS_kobject.o += -DDEBUG
 CFLAGS_kobject_uevent.o += -DDEBUG
 endif
 
+obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff --git a/lib/locking-selftest-hardirq.h b/lib/locking-selftest-hardirq.h
new file mode 100644
index 0000000..10d4a15
--- /dev/null
+++ b/lib/locking-selftest-hardirq.h
@@ -0,0 +1,9 @@
+#undef IRQ_DISABLE
+#undef IRQ_ENABLE
+#undef IRQ_ENTER
+#undef IRQ_EXIT
+
+#define IRQ_ENABLE		HARDIRQ_ENABLE
+#define IRQ_DISABLE		HARDIRQ_DISABLE
+#define IRQ_ENTER		HARDIRQ_ENTER
+#define IRQ_EXIT		HARDIRQ_EXIT
diff --git a/lib/locking-selftest-mutex.h b/lib/locking-selftest-mutex.h
new file mode 100644
index 0000000..68601b6
--- /dev/null
+++ b/lib/locking-selftest-mutex.h
@@ -0,0 +1,11 @@
+#undef LOCK
+#define LOCK		ML
+
+#undef UNLOCK
+#define UNLOCK		MU
+
+#undef RLOCK
+#undef WLOCK
+
+#undef INIT
+#define INIT		MI
diff --git a/lib/locking-selftest-rlock-hardirq.h b/lib/locking-selftest-rlock-hardirq.h
new file mode 100644
index 0000000..9f517eb
--- /dev/null
+++ b/lib/locking-selftest-rlock-hardirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-rlock.h"
+#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-rlock-softirq.h b/lib/locking-selftest-rlock-softirq.h
new file mode 100644
index 0000000..981455d
--- /dev/null
+++ b/lib/locking-selftest-rlock-softirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-rlock.h"
+#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-rlock.h b/lib/locking-selftest-rlock.h
new file mode 100644
index 0000000..6789044
--- /dev/null
+++ b/lib/locking-selftest-rlock.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK		RL
+
+#undef UNLOCK
+#define UNLOCK		RU
+
+#undef RLOCK
+#define RLOCK		RL
+
+#undef WLOCK
+#define WLOCK		WL
+
+#undef INIT
+#define INIT		RWI
diff --git a/lib/locking-selftest-rsem.h b/lib/locking-selftest-rsem.h
new file mode 100644
index 0000000..62da886
--- /dev/null
+++ b/lib/locking-selftest-rsem.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK		RSL
+
+#undef UNLOCK
+#define UNLOCK		RSU
+
+#undef RLOCK
+#define RLOCK		RSL
+
+#undef WLOCK
+#define WLOCK		WSL
+
+#undef INIT
+#define INIT		RWSI
diff --git a/lib/locking-selftest-softirq.h b/lib/locking-selftest-softirq.h
new file mode 100644
index 0000000..a83de2a
--- /dev/null
+++ b/lib/locking-selftest-softirq.h
@@ -0,0 +1,9 @@
+#undef IRQ_DISABLE
+#undef IRQ_ENABLE
+#undef IRQ_ENTER
+#undef IRQ_EXIT
+
+#define IRQ_DISABLE		SOFTIRQ_DISABLE
+#define IRQ_ENABLE		SOFTIRQ_ENABLE
+#define IRQ_ENTER		SOFTIRQ_ENTER
+#define IRQ_EXIT		SOFTIRQ_EXIT
diff --git a/lib/locking-selftest-spin-hardirq.h b/lib/locking-selftest-spin-hardirq.h
new file mode 100644
index 0000000..693198d
--- /dev/null
+++ b/lib/locking-selftest-spin-hardirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-spin.h"
+#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-spin-softirq.h b/lib/locking-selftest-spin-softirq.h
new file mode 100644
index 0000000..c472e2a
--- /dev/null
+++ b/lib/locking-selftest-spin-softirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-spin.h"
+#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-spin.h b/lib/locking-selftest-spin.h
new file mode 100644
index 0000000..ccd1b4b
--- /dev/null
+++ b/lib/locking-selftest-spin.h
@@ -0,0 +1,11 @@
+#undef LOCK
+#define LOCK		L
+
+#undef UNLOCK
+#define UNLOCK		U
+
+#undef RLOCK
+#undef WLOCK
+
+#undef INIT
+#define INIT		SI
diff --git a/lib/locking-selftest-wlock-hardirq.h b/lib/locking-selftest-wlock-hardirq.h
new file mode 100644
index 0000000..2dd2e51
--- /dev/null
+++ b/lib/locking-selftest-wlock-hardirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-wlock.h"
+#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-wlock-softirq.h b/lib/locking-selftest-wlock-softirq.h
new file mode 100644
index 0000000..cb80d1c
--- /dev/null
+++ b/lib/locking-selftest-wlock-softirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-wlock.h"
+#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-wlock.h b/lib/locking-selftest-wlock.h
new file mode 100644
index 0000000..0815322
--- /dev/null
+++ b/lib/locking-selftest-wlock.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK		WL
+
+#undef UNLOCK
+#define UNLOCK		WU
+
+#undef RLOCK
+#define RLOCK		RL
+
+#undef WLOCK
+#define WLOCK		WL
+
+#undef INIT
+#define INIT		RWI
diff --git a/lib/locking-selftest-wsem.h b/lib/locking-selftest-wsem.h
new file mode 100644
index 0000000..b88c5f2
--- /dev/null
+++ b/lib/locking-selftest-wsem.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK		WSL
+
+#undef UNLOCK
+#define UNLOCK		WSU
+
+#undef RLOCK
+#define RLOCK		RSL
+
+#undef WLOCK
+#define WLOCK		WSL
+
+#undef INIT
+#define INIT		RWSI
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
new file mode 100644
index 0000000..5cd05f20
--- /dev/null
+++ b/lib/locking-selftest.c
@@ -0,0 +1,1218 @@
+/*
+ * lib/locking-selftest.c
+ *
+ * Testsuite for various locking APIs: spinlocks, rwlocks,
+ * mutexes and rw-semaphores.
+ *
+ * It is checking both false positives and false negatives.
+ *
+ * Started by Ingo Molnar:
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/debug_locks.h>
+#include <linux/irqflags.h>
+
+/*
+ * Change this to 1 if you want to see the failure printouts:
+ */
+static unsigned int debug_locks_verbose;
+
+static int __init setup_debug_locks_verbose(char *str)
+{
+	get_option(&str, &debug_locks_verbose);
+
+	return 1;
+}
+
+__setup("debug_locks_verbose=", setup_debug_locks_verbose);
+
+#define FAILURE		0
+#define SUCCESS		1
+
+#define LOCKTYPE_SPIN	0x1
+#define LOCKTYPE_RWLOCK	0x2
+#define LOCKTYPE_MUTEX	0x4
+#define LOCKTYPE_RWSEM	0x8
+
+/*
+ * Normal standalone locks, for the circular and irq-context
+ * dependency tests:
+ */
+static DEFINE_SPINLOCK(lock_A);
+static DEFINE_SPINLOCK(lock_B);
+static DEFINE_SPINLOCK(lock_C);
+static DEFINE_SPINLOCK(lock_D);
+
+static DEFINE_RWLOCK(rwlock_A);
+static DEFINE_RWLOCK(rwlock_B);
+static DEFINE_RWLOCK(rwlock_C);
+static DEFINE_RWLOCK(rwlock_D);
+
+static DEFINE_MUTEX(mutex_A);
+static DEFINE_MUTEX(mutex_B);
+static DEFINE_MUTEX(mutex_C);
+static DEFINE_MUTEX(mutex_D);
+
+static DECLARE_RWSEM(rwsem_A);
+static DECLARE_RWSEM(rwsem_B);
+static DECLARE_RWSEM(rwsem_C);
+static DECLARE_RWSEM(rwsem_D);
+
+/*
+ * Locks that we initialize dynamically as well so that
+ * e.g. X1 and X2 becomes two instances of the same class,
+ * but X* and Y* are different classes. We do this so that
+ * we do not trigger a real lockup:
+ */
+static DEFINE_SPINLOCK(lock_X1);
+static DEFINE_SPINLOCK(lock_X2);
+static DEFINE_SPINLOCK(lock_Y1);
+static DEFINE_SPINLOCK(lock_Y2);
+static DEFINE_SPINLOCK(lock_Z1);
+static DEFINE_SPINLOCK(lock_Z2);
+
+static DEFINE_RWLOCK(rwlock_X1);
+static DEFINE_RWLOCK(rwlock_X2);
+static DEFINE_RWLOCK(rwlock_Y1);
+static DEFINE_RWLOCK(rwlock_Y2);
+static DEFINE_RWLOCK(rwlock_Z1);
+static DEFINE_RWLOCK(rwlock_Z2);
+
+static DEFINE_MUTEX(mutex_X1);
+static DEFINE_MUTEX(mutex_X2);
+static DEFINE_MUTEX(mutex_Y1);
+static DEFINE_MUTEX(mutex_Y2);
+static DEFINE_MUTEX(mutex_Z1);
+static DEFINE_MUTEX(mutex_Z2);
+
+static DECLARE_RWSEM(rwsem_X1);
+static DECLARE_RWSEM(rwsem_X2);
+static DECLARE_RWSEM(rwsem_Y1);
+static DECLARE_RWSEM(rwsem_Y2);
+static DECLARE_RWSEM(rwsem_Z1);
+static DECLARE_RWSEM(rwsem_Z2);
+
+/*
+ * non-inlined runtime initializers, to let separate locks share
+ * the same lock-class:
+ */
+#define INIT_CLASS_FUNC(class) 				\
+static noinline void					\
+init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \
+		 struct rw_semaphore *rwsem)		\
+{							\
+	spin_lock_init(lock);				\
+	rwlock_init(rwlock);				\
+	mutex_init(mutex);				\
+	init_rwsem(rwsem);				\
+}
+
+INIT_CLASS_FUNC(X)
+INIT_CLASS_FUNC(Y)
+INIT_CLASS_FUNC(Z)
+
+static void init_shared_classes(void)
+{
+	init_class_X(&lock_X1, &rwlock_X1, &mutex_X1, &rwsem_X1);
+	init_class_X(&lock_X2, &rwlock_X2, &mutex_X2, &rwsem_X2);
+
+	init_class_Y(&lock_Y1, &rwlock_Y1, &mutex_Y1, &rwsem_Y1);
+	init_class_Y(&lock_Y2, &rwlock_Y2, &mutex_Y2, &rwsem_Y2);
+
+	init_class_Z(&lock_Z1, &rwlock_Z1, &mutex_Z1, &rwsem_Z1);
+	init_class_Z(&lock_Z2, &rwlock_Z2, &mutex_Z2, &rwsem_Z2);
+}
+
+/*
+ * For spinlocks and rwlocks we also do hardirq-safe / softirq-safe tests.
+ * The following functions use a lock from a simulated hardirq/softirq
+ * context, causing the locks to be marked as hardirq-safe/softirq-safe:
+ */
+
+#define HARDIRQ_DISABLE		local_irq_disable
+#define HARDIRQ_ENABLE		local_irq_enable
+
+#define HARDIRQ_ENTER()				\
+	local_irq_disable();			\
+	irq_enter();				\
+	WARN_ON(!in_irq());
+
+#define HARDIRQ_EXIT()				\
+	__irq_exit();				\
+	local_irq_enable();
+
+#define SOFTIRQ_DISABLE		local_bh_disable
+#define SOFTIRQ_ENABLE		local_bh_enable
+
+#define SOFTIRQ_ENTER()				\
+		local_bh_disable();		\
+		local_irq_disable();		\
+		trace_softirq_enter();		\
+		WARN_ON(!in_softirq());
+
+#define SOFTIRQ_EXIT()				\
+		trace_softirq_exit();		\
+		local_irq_enable();		\
+		local_bh_enable();
+
+/*
+ * Shortcuts for lock/unlock API variants, to keep
+ * the testcases compact:
+ */
+#define L(x)			spin_lock(&lock_##x)
+#define U(x)			spin_unlock(&lock_##x)
+#define LU(x)			L(x); U(x)
+#define SI(x)			spin_lock_init(&lock_##x)
+
+#define WL(x)			write_lock(&rwlock_##x)
+#define WU(x)			write_unlock(&rwlock_##x)
+#define WLU(x)			WL(x); WU(x)
+
+#define RL(x)			read_lock(&rwlock_##x)
+#define RU(x)			read_unlock(&rwlock_##x)
+#define RLU(x)			RL(x); RU(x)
+#define RWI(x)			rwlock_init(&rwlock_##x)
+
+#define ML(x)			mutex_lock(&mutex_##x)
+#define MU(x)			mutex_unlock(&mutex_##x)
+#define MI(x)			mutex_init(&mutex_##x)
+
+#define WSL(x)			down_write(&rwsem_##x)
+#define WSU(x)			up_write(&rwsem_##x)
+
+#define RSL(x)			down_read(&rwsem_##x)
+#define RSU(x)			up_read(&rwsem_##x)
+#define RWSI(x)			init_rwsem(&rwsem_##x)
+
+#define LOCK_UNLOCK_2(x,y)	LOCK(x); LOCK(y); UNLOCK(y); UNLOCK(x)
+
+/*
+ * Generate different permutations of the same testcase, using
+ * the same basic lock-dependency/state events:
+ */
+
+#define GENERATE_TESTCASE(name)			\
+						\
+static void name(void) { E(); }
+
+#define GENERATE_PERMUTATIONS_2_EVENTS(name)	\
+						\
+static void name##_12(void) { E1(); E2(); }	\
+static void name##_21(void) { E2(); E1(); }
+
+#define GENERATE_PERMUTATIONS_3_EVENTS(name)		\
+							\
+static void name##_123(void) { E1(); E2(); E3(); }	\
+static void name##_132(void) { E1(); E3(); E2(); }	\
+static void name##_213(void) { E2(); E1(); E3(); }	\
+static void name##_231(void) { E2(); E3(); E1(); }	\
+static void name##_312(void) { E3(); E1(); E2(); }	\
+static void name##_321(void) { E3(); E2(); E1(); }
+
+/*
+ * AA deadlock:
+ */
+
+#define E()					\
+						\
+	LOCK(X1);				\
+	LOCK(X2); /* this one should fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(AA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(AA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(AA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(AA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(AA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(AA_rsem)
+
+#undef E
+
+/*
+ * Special-case for read-locking, they are
+ * allowed to recurse on the same lock instance:
+ */
+static void rlock_AA1(void)
+{
+	RL(X1);
+	RL(X1); // this one should NOT fail
+}
+
+static void rlock_AA1B(void)
+{
+	RL(X1);
+	RL(X2); // this one should fail
+}
+
+static void rsem_AA1(void)
+{
+	RSL(X1);
+	RSL(X1); // this one should fail
+}
+
+static void rsem_AA1B(void)
+{
+	RSL(X1);
+	RSL(X2); // this one should fail
+}
+/*
+ * The mixing of read and write locks is not allowed:
+ */
+static void rlock_AA2(void)
+{
+	RL(X1);
+	WL(X2); // this one should fail
+}
+
+static void rsem_AA2(void)
+{
+	RSL(X1);
+	WSL(X2); // this one should fail
+}
+
+static void rlock_AA3(void)
+{
+	WL(X1);
+	RL(X2); // this one should fail
+}
+
+static void rsem_AA3(void)
+{
+	WSL(X1);
+	RSL(X2); // this one should fail
+}
+
+/*
+ * ABBA deadlock:
+ */
+
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(B, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABBA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABBA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABBA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABBA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABBA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABBA_rsem)
+
+#undef E
+
+/*
+ * AB BC CA deadlock:
+ */
+
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(B, C);			\
+	LOCK_UNLOCK_2(C, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABBCCA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABBCCA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABBCCA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABBCCA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABBCCA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABBCCA_rsem)
+
+#undef E
+
+/*
+ * AB CA BC deadlock:
+ */
+
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(C, A);			\
+	LOCK_UNLOCK_2(B, C); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABCABC_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABCABC_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABCABC_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABCABC_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABCABC_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABCABC_rsem)
+
+#undef E
+
+/*
+ * AB BC CD DA deadlock:
+ */
+
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(B, C);			\
+	LOCK_UNLOCK_2(C, D);			\
+	LOCK_UNLOCK_2(D, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABBCCDDA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABBCCDDA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABBCCDDA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABBCCDDA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABBCCDDA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABBCCDDA_rsem)
+
+#undef E
+
+/*
+ * AB CD BD DA deadlock:
+ */
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(C, D);			\
+	LOCK_UNLOCK_2(B, D);			\
+	LOCK_UNLOCK_2(D, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABCDBDDA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABCDBDDA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABCDBDDA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABCDBDDA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABCDBDDA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABCDBDDA_rsem)
+
+#undef E
+
+/*
+ * AB CD BC DA deadlock:
+ */
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(C, D);			\
+	LOCK_UNLOCK_2(B, C);			\
+	LOCK_UNLOCK_2(D, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABCDBCDA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABCDBCDA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABCDBCDA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABCDBCDA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABCDBCDA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABCDBCDA_rsem)
+
+#undef E
+
+/*
+ * Double unlock:
+ */
+#define E()					\
+						\
+	LOCK(A);				\
+	UNLOCK(A);				\
+	UNLOCK(A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(double_unlock_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(double_unlock_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(double_unlock_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(double_unlock_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(double_unlock_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(double_unlock_rsem)
+
+#undef E
+
+/*
+ * Bad unlock ordering:
+ */
+#define E()					\
+						\
+	LOCK(A);				\
+	LOCK(B);				\
+	UNLOCK(A); /* fail */			\
+	UNLOCK(B);
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(bad_unlock_order_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(bad_unlock_order_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(bad_unlock_order_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(bad_unlock_order_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(bad_unlock_order_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(bad_unlock_order_rsem)
+
+#undef E
+
+/*
+ * initializing a held lock:
+ */
+#define E()					\
+						\
+	LOCK(A);				\
+	INIT(A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(init_held_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(init_held_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(init_held_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(init_held_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(init_held_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(init_held_rsem)
+
+#undef E
+
+/*
+ * locking an irq-safe lock with irqs enabled:
+ */
+#define E1()				\
+					\
+	IRQ_ENTER();			\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	IRQ_EXIT();
+
+#define E2()				\
+					\
+	LOCK(A);			\
+	UNLOCK(A);
+
+/*
+ * Generate 24 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
+
+#undef E1
+#undef E2
+
+/*
+ * Enabling hardirqs with a softirq-safe lock held:
+ */
+#define E1()				\
+					\
+	SOFTIRQ_ENTER();		\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	SOFTIRQ_EXIT();
+
+#define E2()				\
+					\
+	HARDIRQ_DISABLE();		\
+	LOCK(A);			\
+	HARDIRQ_ENABLE();		\
+	UNLOCK(A);
+
+/*
+ * Generate 12 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_spin)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_wlock)
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
+
+#undef E1
+#undef E2
+
+/*
+ * Enabling irqs with an irq-safe lock held:
+ */
+#define E1()				\
+					\
+	IRQ_ENTER();			\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	IRQ_EXIT();
+
+#define E2()				\
+					\
+	IRQ_DISABLE();			\
+	LOCK(A);			\
+	IRQ_ENABLE();			\
+	UNLOCK(A);
+
+/*
+ * Generate 24 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
+
+#undef E1
+#undef E2
+
+/*
+ * Acquiring a irq-unsafe lock while holding an irq-safe-lock:
+ */
+#define E1()				\
+					\
+	LOCK(A);			\
+	LOCK(B);			\
+	UNLOCK(B);			\
+	UNLOCK(A);			\
+
+#define E2()				\
+					\
+	LOCK(B);			\
+	UNLOCK(B);
+
+#define E3()				\
+					\
+	IRQ_ENTER();			\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	IRQ_EXIT();
+
+/*
+ * Generate 36 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * If a lock turns into softirq-safe, but earlier it took
+ * a softirq-unsafe lock:
+ */
+
+#define E1()				\
+	IRQ_DISABLE();			\
+	LOCK(A);			\
+	LOCK(B);			\
+	UNLOCK(B);			\
+	UNLOCK(A);			\
+	IRQ_ENABLE();
+
+#define E2()				\
+	LOCK(B);			\
+	UNLOCK(B);
+
+#define E3()				\
+	IRQ_ENTER();			\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	IRQ_EXIT();
+
+/*
+ * Generate 36 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * read-lock / write-lock irq inversion.
+ *
+ * Deadlock scenario:
+ *
+ * CPU#1 is at #1, i.e. it has write-locked A, but has not
+ * taken B yet.
+ *
+ * CPU#2 is at #2, i.e. it has locked B.
+ *
+ * Hardirq hits CPU#2 at point #2 and is trying to read-lock A.
+ *
+ * The deadlock occurs because CPU#1 will spin on B, and CPU#2
+ * will spin on A.
+ */
+
+#define E1()				\
+					\
+	IRQ_DISABLE();			\
+	WL(A);				\
+	LOCK(B);			\
+	UNLOCK(B);			\
+	WU(A);				\
+	IRQ_ENABLE();
+
+#define E2()				\
+					\
+	LOCK(B);			\
+	UNLOCK(B);
+
+#define E3()				\
+					\
+	IRQ_ENTER();			\
+	RL(A);				\
+	RU(A);				\
+	IRQ_EXIT();
+
+/*
+ * Generate 36 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * read-lock / write-lock recursion that is actually safe.
+ */
+
+#define E1()				\
+					\
+	IRQ_DISABLE();			\
+	WL(A);				\
+	WU(A);				\
+	IRQ_ENABLE();
+
+#define E2()				\
+					\
+	RL(A);				\
+	RU(A);				\
+
+#define E3()				\
+					\
+	IRQ_ENTER();			\
+	RL(A);				\
+	L(B);				\
+	U(B);				\
+	RU(A);				\
+	IRQ_EXIT();
+
+/*
+ * Generate 12 testcases:
+ */
+#include "locking-selftest-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard)
+
+#include "locking-selftest-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * read-lock / write-lock recursion that is unsafe.
+ */
+
+#define E1()				\
+					\
+	IRQ_DISABLE();			\
+	L(B);				\
+	WL(A);				\
+	WU(A);				\
+	U(B);				\
+	IRQ_ENABLE();
+
+#define E2()				\
+					\
+	RL(A);				\
+	RU(A);				\
+
+#define E3()				\
+					\
+	IRQ_ENTER();			\
+	L(B);				\
+	U(B);				\
+	IRQ_EXIT();
+
+/*
+ * Generate 12 testcases:
+ */
+#include "locking-selftest-hardirq.h"
+// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard)
+
+#include "locking-selftest-softirq.h"
+// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)
+
+#define lockdep_reset()
+#define lockdep_reset_lock(x)
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
+# define I_RWLOCK(x)	lockdep_reset_lock(&rwlock_##x.dep_map)
+# define I_MUTEX(x)	lockdep_reset_lock(&mutex_##x.dep_map)
+# define I_RWSEM(x)	lockdep_reset_lock(&rwsem_##x.dep_map)
+#else
+# define I_SPINLOCK(x)
+# define I_RWLOCK(x)
+# define I_MUTEX(x)
+# define I_RWSEM(x)
+#endif
+
+#define I1(x)					\
+	do {					\
+		I_SPINLOCK(x);			\
+		I_RWLOCK(x);			\
+		I_MUTEX(x);			\
+		I_RWSEM(x);			\
+	} while (0)
+
+#define I2(x)					\
+	do {					\
+		spin_lock_init(&lock_##x);	\
+		rwlock_init(&rwlock_##x);	\
+		mutex_init(&mutex_##x);		\
+		init_rwsem(&rwsem_##x);		\
+	} while (0)
+
+static void reset_locks(void)
+{
+	local_irq_disable();
+	I1(A); I1(B); I1(C); I1(D);
+	I1(X1); I1(X2); I1(Y1); I1(Y2); I1(Z1); I1(Z2);
+	lockdep_reset();
+	I2(A); I2(B); I2(C); I2(D);
+	init_shared_classes();
+	local_irq_enable();
+}
+
+#undef I
+
+static int testcase_total;
+static int testcase_successes;
+static int expected_testcase_failures;
+static int unexpected_testcase_failures;
+
+static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
+{
+	unsigned long saved_preempt_count = preempt_count();
+	int expected_failure = 0;
+
+	WARN_ON(irqs_disabled());
+
+	testcase_fn();
+	/*
+	 * Filter out expected failures:
+	 */
+#ifndef CONFIG_PROVE_LOCKING
+	if ((lockclass_mask & LOCKTYPE_SPIN) && debug_locks != expected)
+		expected_failure = 1;
+	if ((lockclass_mask & LOCKTYPE_RWLOCK) && debug_locks != expected)
+		expected_failure = 1;
+	if ((lockclass_mask & LOCKTYPE_MUTEX) && debug_locks != expected)
+		expected_failure = 1;
+	if ((lockclass_mask & LOCKTYPE_RWSEM) && debug_locks != expected)
+		expected_failure = 1;
+#endif
+	if (debug_locks != expected) {
+		if (expected_failure) {
+			expected_testcase_failures++;
+			printk("failed|");
+		} else {
+			unexpected_testcase_failures++;
+			printk("FAILED|");
+		}
+	} else {
+		testcase_successes++;
+		printk("  ok  |");
+	}
+	testcase_total++;
+
+	if (debug_locks_verbose)
+		printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
+			lockclass_mask, debug_locks, expected);
+	/*
+	 * Some tests (e.g. double-unlock) might corrupt the preemption
+	 * count, so restore it:
+	 */
+	preempt_count() = saved_preempt_count;
+#ifdef CONFIG_TRACE_IRQFLAGS
+	if (softirq_count())
+		current->softirqs_enabled = 0;
+	else
+		current->softirqs_enabled = 1;
+#endif
+
+	reset_locks();
+}
+
+static inline void print_testname(const char *testname)
+{
+	printk("%33s:", testname);
+}
+
+#define DO_TESTCASE_1(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);		\
+	printk("\n");
+
+#define DO_TESTCASE_1B(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
+	printk("\n");
+
+#define DO_TESTCASE_3(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);	\
+	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
+	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
+	printk("\n");
+
+#define DO_TESTCASE_3RW(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
+	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
+	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
+	printk("\n");
+
+#define DO_TESTCASE_6(desc, name)				\
+	print_testname(desc);					\
+	dotest(name##_spin, FAILURE, LOCKTYPE_SPIN);		\
+	dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK);		\
+	dotest(name##_rlock, FAILURE, LOCKTYPE_RWLOCK);		\
+	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
+	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
+	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
+	printk("\n");
+
+#define DO_TESTCASE_6_SUCCESS(desc, name)			\
+	print_testname(desc);					\
+	dotest(name##_spin, SUCCESS, LOCKTYPE_SPIN);		\
+	dotest(name##_wlock, SUCCESS, LOCKTYPE_RWLOCK);		\
+	dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK);		\
+	dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX);		\
+	dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM);		\
+	dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM);		\
+	printk("\n");
+
+/*
+ * 'read' variant: rlocks must not trigger.
+ */
+#define DO_TESTCASE_6R(desc, name)				\
+	print_testname(desc);					\
+	dotest(name##_spin, FAILURE, LOCKTYPE_SPIN);		\
+	dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK);		\
+	dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK);		\
+	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
+	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
+	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
+	printk("\n");
+
+#define DO_TESTCASE_2I(desc, name, nr)				\
+	DO_TESTCASE_1("hard-"desc, name##_hard, nr);		\
+	DO_TESTCASE_1("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_2IB(desc, name, nr)				\
+	DO_TESTCASE_1B("hard-"desc, name##_hard, nr);		\
+	DO_TESTCASE_1B("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_6I(desc, name, nr)				\
+	DO_TESTCASE_3("hard-"desc, name##_hard, nr);		\
+	DO_TESTCASE_3("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_6IRW(desc, name, nr)			\
+	DO_TESTCASE_3RW("hard-"desc, name##_hard, nr);		\
+	DO_TESTCASE_3RW("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_2x3(desc, name)				\
+	DO_TESTCASE_3(desc, name, 12);				\
+	DO_TESTCASE_3(desc, name, 21);
+
+#define DO_TESTCASE_2x6(desc, name)				\
+	DO_TESTCASE_6I(desc, name, 12);				\
+	DO_TESTCASE_6I(desc, name, 21);
+
+#define DO_TESTCASE_6x2(desc, name)				\
+	DO_TESTCASE_2I(desc, name, 123);			\
+	DO_TESTCASE_2I(desc, name, 132);			\
+	DO_TESTCASE_2I(desc, name, 213);			\
+	DO_TESTCASE_2I(desc, name, 231);			\
+	DO_TESTCASE_2I(desc, name, 312);			\
+	DO_TESTCASE_2I(desc, name, 321);
+
+#define DO_TESTCASE_6x2B(desc, name)				\
+	DO_TESTCASE_2IB(desc, name, 123);			\
+	DO_TESTCASE_2IB(desc, name, 132);			\
+	DO_TESTCASE_2IB(desc, name, 213);			\
+	DO_TESTCASE_2IB(desc, name, 231);			\
+	DO_TESTCASE_2IB(desc, name, 312);			\
+	DO_TESTCASE_2IB(desc, name, 321);
+
+#define DO_TESTCASE_6x6(desc, name)				\
+	DO_TESTCASE_6I(desc, name, 123);			\
+	DO_TESTCASE_6I(desc, name, 132);			\
+	DO_TESTCASE_6I(desc, name, 213);			\
+	DO_TESTCASE_6I(desc, name, 231);			\
+	DO_TESTCASE_6I(desc, name, 312);			\
+	DO_TESTCASE_6I(desc, name, 321);
+
+#define DO_TESTCASE_6x6RW(desc, name)				\
+	DO_TESTCASE_6IRW(desc, name, 123);			\
+	DO_TESTCASE_6IRW(desc, name, 132);			\
+	DO_TESTCASE_6IRW(desc, name, 213);			\
+	DO_TESTCASE_6IRW(desc, name, 231);			\
+	DO_TESTCASE_6IRW(desc, name, 312);			\
+	DO_TESTCASE_6IRW(desc, name, 321);
+
+
+void locking_selftest(void)
+{
+	/*
+	 * Got a locking failure before the selftest ran?
+	 */
+	if (!debug_locks) {
+		printk("----------------------------------\n");
+		printk("| Locking API testsuite disabled |\n");
+		printk("----------------------------------\n");
+		return;
+	}
+
+	/*
+	 * Run the testsuite:
+	 */
+	printk("------------------------\n");
+	printk("| Locking API testsuite:\n");
+	printk("----------------------------------------------------------------------------\n");
+	printk("                                 | spin |wlock |rlock |mutex | wsem | rsem |\n");
+	printk("  --------------------------------------------------------------------------\n");
+
+	init_shared_classes();
+	debug_locks_silent = !debug_locks_verbose;
+
+	DO_TESTCASE_6("A-A deadlock", AA);
+	DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
+	DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA);
+	DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC);
+	DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA);
+	DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA);
+	DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
+	DO_TESTCASE_6("double unlock", double_unlock);
+	DO_TESTCASE_6("initialize held", init_held);
+	DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);
+
+	printk("  --------------------------------------------------------------------------\n");
+	print_testname("recursive read-lock");
+	printk("             |");
+	dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
+	printk("             |");
+	dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
+	printk("\n");
+
+	print_testname("recursive read-lock #2");
+	printk("             |");
+	dotest(rlock_AA1B, FAILURE, LOCKTYPE_RWLOCK);
+	printk("             |");
+	dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
+	printk("\n");
+
+	print_testname("mixed read-write-lock");
+	printk("             |");
+	dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
+	printk("             |");
+	dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
+	printk("\n");
+
+	print_testname("mixed write-read-lock");
+	printk("             |");
+	dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
+	printk("             |");
+	dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
+	printk("\n");
+
+	printk("  --------------------------------------------------------------------------\n");
+
+	/*
+	 * irq-context testcases:
+	 */
+	DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
+	DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
+	DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
+	DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
+	DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
+	DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion);
+
+	DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
+//	DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
+
+	if (unexpected_testcase_failures) {
+		printk("-----------------------------------------------------------------\n");
+		debug_locks = 0;
+		printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
+			unexpected_testcase_failures, testcase_total);
+		printk("-----------------------------------------------------------------\n");
+	} else if (expected_testcase_failures && testcase_successes) {
+		printk("--------------------------------------------------------\n");
+		printk("%3d out of %3d testcases failed, as expected. |\n",
+			expected_testcase_failures, testcase_total);
+		printk("----------------------------------------------------\n");
+		debug_locks = 1;
+	} else if (expected_testcase_failures && !testcase_successes) {
+		printk("--------------------------------------------------------\n");
+		printk("All %3d testcases failed, as expected. |\n",
+			expected_testcase_failures);
+		printk("----------------------------------------\n");
+		debug_locks = 1;
+	} else {
+		printk("-------------------------------------------------------\n");
+		printk("Good, all %3d testcases passed! |\n",
+			testcase_successes);
+		printk("---------------------------------\n");
+		debug_locks = 1;
+	}
+	debug_locks_silent = 0;
+}
-- 
cgit v0.10.2


From fbb9ce9530fd9b66096d5187fa6a115d16d9746c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:50 -0700
Subject: [PATCH] lockdep: core

Do 'make oldconfig' and accept all the defaults for new config options -
reboot into the kernel and if everything goes well it should boot up fine and
you should have /proc/lockdep and /proc/lockdep_stats files.

Typically if the lock validator finds some problem it will print out
voluminous debug output that begins with "BUG: ..." and which syslog output
can be used by kernel developers to figure out the precise locking scenario.

What does the lock validator do?  It "observes" and maps all locking rules as
they occur dynamically (as triggered by the kernel's natural use of spinlocks,
rwlocks, mutexes and rwsems).  Whenever the lock validator subsystem detects a
new locking scenario, it validates this new rule against the existing set of
rules.  If this new rule is consistent with the existing set of rules then the
new rule is added transparently and the kernel continues as normal.  If the
new rule could create a deadlock scenario then this condition is printed out.

When determining validity of locking, all possible "deadlock scenarios" are
considered: assuming arbitrary number of CPUs, arbitrary irq context and task
context constellations, running arbitrary combinations of all the existing
locking scenarios.  In a typical system this means millions of separate
scenarios.  This is why we call it a "locking correctness" validator - for all
rules that are observed the lock validator proves it with mathematical
certainty that a deadlock could not occur (assuming that the lock validator
implementation itself is correct and its internal data structures are not
corrupted by some other kernel subsystem).  [see more details and conditionals
of this statement in include/linux/lockdep.h and
Documentation/lockdep-design.txt]

Furthermore, this "all possible scenarios" property of the validator also
enables the finding of complex, highly unlikely multi-CPU multi-context races
via single single-context rules, increasing the likelyhood of finding bugs
drastically.  In practical terms: the lock validator already found a bug in
the upstream kernel that could only occur on systems with 3 or more CPUs, and
which needed 3 very unlikely code sequences to occur at once on the 3 CPUs.
That bug was found and reported on a single-CPU system (!).  So in essence a
race will be found "piecemail-wise", triggering all the necessary components
for the race, without having to reproduce the race scenario itself!  In its
short existence the lock validator found and reported many bugs before they
actually caused a real deadlock.

To further increase the efficiency of the validator, the mapping is not per
"lock instance", but per "lock-class".  For example, all struct inode objects
in the kernel have inode->inotify_mutex.  If there are 10,000 inodes cached,
then there are 10,000 lock objects.  But ->inotify_mutex is a single "lock
type", and all locking activities that occur against ->inotify_mutex are
"unified" into this single lock-class.  The advantage of the lock-class
approach is that all historical ->inotify_mutex uses are mapped into a single
(and as narrow as possible) set of locking rules - regardless of how many
different tasks or inode structures it took to build this set of rules.  The
set of rules persist during the lifetime of the kernel.

To see the rough magnitude of checking that the lock validator does, here's a
portion of /proc/lockdep_stats, fresh after bootup:

 lock-classes:                            694 [max: 2048]
 direct dependencies:                  1598 [max: 8192]
 indirect dependencies:               17896
 all direct dependencies:             16206
 dependency chains:                    1910 [max: 8192]
 in-hardirq chains:                      17
 in-softirq chains:                     105
 in-process chains:                    1065
 stack-trace entries:                 38761 [max: 131072]
 combined max dependencies:         2033928
 hardirq-safe locks:                     24
 hardirq-unsafe locks:                  176
 softirq-safe locks:                     53
 softirq-unsafe locks:                  137
 irq-safe locks:                         59
 irq-unsafe locks:                      176

The lock validator has observed 1598 actual single-thread locking patterns,
and has validated all possible 2033928 distinct locking scenarios.

More details about the design of the lock validator can be found in
Documentation/lockdep-design.txt, which can also found at:

   http://redhat.com/~mingo/lockdep-patches/lockdep-design.txt

[bunk@stusta.de: cleanups]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index b1d4332..50d8b57 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -3,6 +3,7 @@
 
 #include <linux/preempt.h>
 #include <linux/smp_lock.h>
+#include <linux/lockdep.h>
 #include <asm/hardirq.h>
 #include <asm/system.h>
 
@@ -122,7 +123,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
  */
 extern void irq_exit(void);
 
-#define nmi_enter()		irq_enter()
-#define nmi_exit()		__irq_exit()
+#define nmi_enter()		do { lockdep_off(); irq_enter(); } while (0)
+#define nmi_exit()		do { __irq_exit(); lockdep_on(); } while (0)
 
 #endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 444a3ae..60aac2c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -4,6 +4,7 @@
 #include <linux/file.h>
 #include <linux/rcupdate.h>
 #include <linux/irqflags.h>
+#include <linux/lockdep.h>
 
 #define INIT_FDTABLE \
 {							\
@@ -126,6 +127,7 @@ extern struct group_info init_groups;
 	.fs_excl	= ATOMIC_INIT(0),				\
 	.pi_lock	= SPIN_LOCK_UNLOCKED,				\
 	INIT_TRACE_IRQFLAGS						\
+	INIT_LOCKDEP							\
 }
 
 
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
new file mode 100644
index 0000000..80ec7a4
--- /dev/null
+++ b/include/linux/lockdep.h
@@ -0,0 +1,347 @@
+/*
+ * Runtime locking correctness validator
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * see Documentation/lockdep-design.txt for more details.
+ */
+#ifndef __LINUX_LOCKDEP_H
+#define __LINUX_LOCKDEP_H
+
+#include <linux/linkage.h>
+#include <linux/list.h>
+#include <linux/debug_locks.h>
+#include <linux/stacktrace.h>
+
+#ifdef CONFIG_LOCKDEP
+
+/*
+ * Lock-class usage-state bits:
+ */
+enum lock_usage_bit
+{
+	LOCK_USED = 0,
+	LOCK_USED_IN_HARDIRQ,
+	LOCK_USED_IN_SOFTIRQ,
+	LOCK_ENABLED_SOFTIRQS,
+	LOCK_ENABLED_HARDIRQS,
+	LOCK_USED_IN_HARDIRQ_READ,
+	LOCK_USED_IN_SOFTIRQ_READ,
+	LOCK_ENABLED_SOFTIRQS_READ,
+	LOCK_ENABLED_HARDIRQS_READ,
+	LOCK_USAGE_STATES
+};
+
+/*
+ * Usage-state bitmasks:
+ */
+#define LOCKF_USED			(1 << LOCK_USED)
+#define LOCKF_USED_IN_HARDIRQ		(1 << LOCK_USED_IN_HARDIRQ)
+#define LOCKF_USED_IN_SOFTIRQ		(1 << LOCK_USED_IN_SOFTIRQ)
+#define LOCKF_ENABLED_HARDIRQS		(1 << LOCK_ENABLED_HARDIRQS)
+#define LOCKF_ENABLED_SOFTIRQS		(1 << LOCK_ENABLED_SOFTIRQS)
+
+#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS)
+#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
+
+#define LOCKF_USED_IN_HARDIRQ_READ	(1 << LOCK_USED_IN_HARDIRQ_READ)
+#define LOCKF_USED_IN_SOFTIRQ_READ	(1 << LOCK_USED_IN_SOFTIRQ_READ)
+#define LOCKF_ENABLED_HARDIRQS_READ	(1 << LOCK_ENABLED_HARDIRQS_READ)
+#define LOCKF_ENABLED_SOFTIRQS_READ	(1 << LOCK_ENABLED_SOFTIRQS_READ)
+
+#define LOCKF_ENABLED_IRQS_READ \
+		(LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ)
+#define LOCKF_USED_IN_IRQ_READ \
+		(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
+
+#define MAX_LOCKDEP_SUBCLASSES		8UL
+
+/*
+ * Lock-classes are keyed via unique addresses, by embedding the
+ * lockclass-key into the kernel (or module) .data section. (For
+ * static locks we use the lock address itself as the key.)
+ */
+struct lockdep_subclass_key {
+	char __one_byte;
+} __attribute__ ((__packed__));
+
+struct lock_class_key {
+	struct lockdep_subclass_key	subkeys[MAX_LOCKDEP_SUBCLASSES];
+};
+
+/*
+ * The lock-class itself:
+ */
+struct lock_class {
+	/*
+	 * class-hash:
+	 */
+	struct list_head		hash_entry;
+
+	/*
+	 * global list of all lock-classes:
+	 */
+	struct list_head		lock_entry;
+
+	struct lockdep_subclass_key	*key;
+	unsigned int			subclass;
+
+	/*
+	 * IRQ/softirq usage tracking bits:
+	 */
+	unsigned long			usage_mask;
+	struct stack_trace		usage_traces[LOCK_USAGE_STATES];
+
+	/*
+	 * These fields represent a directed graph of lock dependencies,
+	 * to every node we attach a list of "forward" and a list of
+	 * "backward" graph nodes.
+	 */
+	struct list_head		locks_after, locks_before;
+
+	/*
+	 * Generation counter, when doing certain classes of graph walking,
+	 * to ensure that we check one node only once:
+	 */
+	unsigned int			version;
+
+	/*
+	 * Statistics counter:
+	 */
+	unsigned long			ops;
+
+	const char			*name;
+	int				name_version;
+};
+
+/*
+ * Map the lock object (the lock instance) to the lock-class object.
+ * This is embedded into specific lock instances:
+ */
+struct lockdep_map {
+	struct lock_class_key		*key;
+	struct lock_class		*class[MAX_LOCKDEP_SUBCLASSES];
+	const char			*name;
+};
+
+/*
+ * Every lock has a list of other locks that were taken after it.
+ * We only grow the list, never remove from it:
+ */
+struct lock_list {
+	struct list_head		entry;
+	struct lock_class		*class;
+	struct stack_trace		trace;
+};
+
+/*
+ * We record lock dependency chains, so that we can cache them:
+ */
+struct lock_chain {
+	struct list_head		entry;
+	u64				chain_key;
+};
+
+struct held_lock {
+	/*
+	 * One-way hash of the dependency chain up to this point. We
+	 * hash the hashes step by step as the dependency chain grows.
+	 *
+	 * We use it for dependency-caching and we skip detection
+	 * passes and dependency-updates if there is a cache-hit, so
+	 * it is absolutely critical for 100% coverage of the validator
+	 * to have a unique key value for every unique dependency path
+	 * that can occur in the system, to make a unique hash value
+	 * as likely as possible - hence the 64-bit width.
+	 *
+	 * The task struct holds the current hash value (initialized
+	 * with zero), here we store the previous hash value:
+	 */
+	u64				prev_chain_key;
+	struct lock_class		*class;
+	unsigned long			acquire_ip;
+	struct lockdep_map		*instance;
+
+	/*
+	 * The lock-stack is unified in that the lock chains of interrupt
+	 * contexts nest ontop of process context chains, but we 'separate'
+	 * the hashes by starting with 0 if we cross into an interrupt
+	 * context, and we also keep do not add cross-context lock
+	 * dependencies - the lock usage graph walking covers that area
+	 * anyway, and we'd just unnecessarily increase the number of
+	 * dependencies otherwise. [Note: hardirq and softirq contexts
+	 * are separated from each other too.]
+	 *
+	 * The following field is used to detect when we cross into an
+	 * interrupt context:
+	 */
+	int				irq_context;
+	int				trylock;
+	int				read;
+	int				check;
+	int				hardirqs_off;
+};
+
+/*
+ * Initialization, self-test and debugging-output methods:
+ */
+extern void lockdep_init(void);
+extern void lockdep_info(void);
+extern void lockdep_reset(void);
+extern void lockdep_reset_lock(struct lockdep_map *lock);
+extern void lockdep_free_key_range(void *start, unsigned long size);
+
+extern void lockdep_off(void);
+extern void lockdep_on(void);
+extern int lockdep_internal(void);
+
+/*
+ * These methods are used by specific locking variants (spinlocks,
+ * rwlocks, mutexes and rwsems) to pass init/acquire/release events
+ * to lockdep:
+ */
+
+extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
+			     struct lock_class_key *key);
+
+/*
+ * Reinitialize a lock key - for cases where there is special locking or
+ * special initialization of locks so that the validator gets the scope
+ * of dependencies wrong: they are either too broad (they need a class-split)
+ * or they are too narrow (they suffer from a false class-split):
+ */
+#define lockdep_set_class(lock, key) \
+		lockdep_init_map(&(lock)->dep_map, #key, key)
+#define lockdep_set_class_and_name(lock, key, name) \
+		lockdep_init_map(&(lock)->dep_map, name, key)
+
+/*
+ * Acquire a lock.
+ *
+ * Values for "read":
+ *
+ *   0: exclusive (write) acquire
+ *   1: read-acquire (no recursion allowed)
+ *   2: read-acquire with same-instance recursion allowed
+ *
+ * Values for check:
+ *
+ *   0: disabled
+ *   1: simple checks (freeing, held-at-exit-time, etc.)
+ *   2: full validation
+ */
+extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
+			 int trylock, int read, int check, unsigned long ip);
+
+extern void lock_release(struct lockdep_map *lock, int nested,
+			 unsigned long ip);
+
+# define INIT_LOCKDEP				.lockdep_recursion = 0,
+
+#else /* !LOCKDEP */
+
+static inline void lockdep_off(void)
+{
+}
+
+static inline void lockdep_on(void)
+{
+}
+
+static inline int lockdep_internal(void)
+{
+	return 0;
+}
+
+# define lock_acquire(l, s, t, r, c, i)		do { } while (0)
+# define lock_release(l, n, i)			do { } while (0)
+# define lockdep_init()				do { } while (0)
+# define lockdep_info()				do { } while (0)
+# define lockdep_init_map(lock, name, key)	do { (void)(key); } while (0)
+# define lockdep_set_class(lock, key)		do { (void)(key); } while (0)
+# define lockdep_set_class_and_name(lock, key, name) \
+		do { (void)(key); } while (0)
+# define INIT_LOCKDEP
+# define lockdep_reset()		do { debug_locks = 1; } while (0)
+# define lockdep_free_key_range(start, size)	do { } while (0)
+/*
+ * The class key takes no space if lockdep is disabled:
+ */
+struct lock_class_key { };
+#endif /* !LOCKDEP */
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+extern void early_boot_irqs_off(void);
+extern void early_boot_irqs_on(void);
+#else
+# define early_boot_irqs_off()			do { } while (0)
+# define early_boot_irqs_on()			do { } while (0)
+#endif
+
+/*
+ * For trivial one-depth nesting of a lock-class, the following
+ * global define can be used. (Subsystems with multiple levels
+ * of nesting should define their own lock-nesting subclasses.)
+ */
+#define SINGLE_DEPTH_NESTING			1
+
+/*
+ * Map the dependency ops to NOP or to real lockdep ops, depending
+ * on the per lock-class debug mode:
+ */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_PROVE_LOCKING
+#  define spin_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 2, i)
+# else
+#  define spin_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 1, i)
+# endif
+# define spin_release(l, n, i)			lock_release(l, n, i)
+#else
+# define spin_acquire(l, s, t, i)		do { } while (0)
+# define spin_release(l, n, i)			do { } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_PROVE_LOCKING
+#  define rwlock_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 2, i)
+#  define rwlock_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 2, 2, i)
+# else
+#  define rwlock_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 1, i)
+#  define rwlock_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 2, 1, i)
+# endif
+# define rwlock_release(l, n, i)		lock_release(l, n, i)
+#else
+# define rwlock_acquire(l, s, t, i)		do { } while (0)
+# define rwlock_acquire_read(l, s, t, i)	do { } while (0)
+# define rwlock_release(l, n, i)		do { } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_PROVE_LOCKING
+#  define mutex_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 2, i)
+# else
+#  define mutex_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 1, i)
+# endif
+# define mutex_release(l, n, i)			lock_release(l, n, i)
+#else
+# define mutex_acquire(l, s, t, i)		do { } while (0)
+# define mutex_release(l, n, i)			do { } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_PROVE_LOCKING
+#  define rwsem_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 2, i)
+#  define rwsem_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 1, 2, i)
+# else
+#  define rwsem_acquire(l, s, t, i)		lock_acquire(l, s, t, 0, 1, i)
+#  define rwsem_acquire_read(l, s, t, i)	lock_acquire(l, s, t, 1, 1, i)
+# endif
+# define rwsem_release(l, n, i)			lock_release(l, n, i)
+#else
+# define rwsem_acquire(l, s, t, i)		do { } while (0)
+# define rwsem_acquire_read(l, s, t, i)		do { } while (0)
+# define rwsem_release(l, n, i)			do { } while (0)
+#endif
+
+#endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ad7a890..8ebddba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -886,6 +886,13 @@ struct task_struct {
 	int hardirq_context;
 	int softirq_context;
 #endif
+#ifdef CONFIG_LOCKDEP
+# define MAX_LOCK_DEPTH 30UL
+	u64 curr_chain_key;
+	int lockdep_depth;
+	struct held_lock held_locks[MAX_LOCK_DEPTH];
+	unsigned int lockdep_recursion;
+#endif
 
 /* journalling filesystem info */
 	void *journal_info;
diff --git a/init/main.c b/init/main.c
index fc73e1c..fc473d4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -48,6 +48,7 @@
 #include <linux/unwind.h>
 #include <linux/buffer_head.h>
 #include <linux/debug_locks.h>
+#include <linux/lockdep.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -457,6 +458,15 @@ asmlinkage void __init start_kernel(void)
 
 	smp_setup_processor_id();
 
+	/*
+	 * Need to run as early as possible, to initialize the
+	 * lockdep hash:
+	 */
+	lockdep_init();
+
+	local_irq_disable();
+	early_boot_irqs_off();
+
 /*
  * Interrupts are still disabled. Do necessary setups, then
  * enable them
@@ -502,6 +512,7 @@ asmlinkage void __init start_kernel(void)
 	profile_init();
 	if (!irqs_disabled())
 		printk("start_kernel(): bug: interrupts were enabled early\n");
+	early_boot_irqs_on();
 	local_irq_enable();
 
 	/*
@@ -512,6 +523,9 @@ asmlinkage void __init start_kernel(void)
 	console_init();
 	if (panic_later)
 		panic(panic_later, panic_param);
+
+	lockdep_info();
+
 	/*
 	 * Need to run this when irqs are enabled, because it wants
 	 * to self-test [hard/soft]-irqs on/off lock inversion bugs
diff --git a/kernel/Makefile b/kernel/Makefile
index e7fd20e..049aa79 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -13,6 +13,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
+obj-$(CONFIG_LOCKDEP) += lockdep.o
 obj-$(CONFIG_FUTEX) += futex.o
 ifeq ($(CONFIG_COMPAT),y)
 obj-$(CONFIG_FUTEX) += futex_compat.o
diff --git a/kernel/fork.c b/kernel/fork.c
index b7db7fb..7f48abd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1061,6 +1061,11 @@ static task_t *copy_process(unsigned long clone_flags,
 	p->hardirq_context = 0;
 	p->softirq_context = 0;
 #endif
+#ifdef CONFIG_LOCKDEP
+	p->lockdep_depth = 0; /* no locks held yet */
+	p->curr_chain_key = 0;
+	p->lockdep_recursion = 0;
+#endif
 
 	rt_mutex_init_task(p);
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c911c6e..4e46143 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -410,6 +410,12 @@ int request_irq(unsigned int irq,
 	struct irqaction *action;
 	int retval;
 
+#ifdef CONFIG_LOCKDEP
+	/*
+	 * Lockdep wants atomic interrupt handlers:
+	 */
+	irqflags |= SA_INTERRUPT;
+#endif
 	/*
 	 * Sanity-check: shared interrupts must pass in a real dev-ID,
 	 * otherwise we'll have trouble later trying to figure out
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
new file mode 100644
index 0000000..dd05809
--- /dev/null
+++ b/kernel/lockdep.c
@@ -0,0 +1,2703 @@
+/*
+ * kernel/lockdep.c
+ *
+ * Runtime locking correctness validator
+ *
+ * Started by Ingo Molnar:
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * this code maps all the lock dependencies as they occur in a live kernel
+ * and will warn about the following classes of locking bugs:
+ *
+ * - lock inversion scenarios
+ * - circular lock dependencies
+ * - hardirq/softirq safe/unsafe locking bugs
+ *
+ * Bugs are reported even if the current locking scenario does not cause
+ * any deadlock at this point.
+ *
+ * I.e. if anytime in the past two locks were taken in a different order,
+ * even if it happened for another task, even if those were different
+ * locks (but of the same class as this lock), this code will detect it.
+ *
+ * Thanks to Arjan van de Ven for coming up with the initial idea of
+ * mapping lock dependencies runtime.
+ */
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/stacktrace.h>
+#include <linux/debug_locks.h>
+#include <linux/irqflags.h>
+
+#include <asm/sections.h>
+
+#include "lockdep_internals.h"
+
+/*
+ * hash_lock: protects the lockdep hashes and class/list/hash allocators.
+ *
+ * This is one of the rare exceptions where it's justified
+ * to use a raw spinlock - we really dont want the spinlock
+ * code to recurse back into the lockdep code.
+ */
+static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static int lockdep_initialized;
+
+unsigned long nr_list_entries;
+static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
+
+/*
+ * Allocate a lockdep entry. (assumes hash_lock held, returns
+ * with NULL on failure)
+ */
+static struct lock_list *alloc_list_entry(void)
+{
+	if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
+		__raw_spin_unlock(&hash_lock);
+		debug_locks_off();
+		printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
+		printk("turning off the locking correctness validator.\n");
+		return NULL;
+	}
+	return list_entries + nr_list_entries++;
+}
+
+/*
+ * All data structures here are protected by the global debug_lock.
+ *
+ * Mutex key structs only get allocated, once during bootup, and never
+ * get freed - this significantly simplifies the debugging code.
+ */
+unsigned long nr_lock_classes;
+static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+
+/*
+ * We keep a global list of all lock classes. The list only grows,
+ * never shrinks. The list is only accessed with the lockdep
+ * spinlock lock held.
+ */
+LIST_HEAD(all_lock_classes);
+
+/*
+ * The lockdep classes are in a hash-table as well, for fast lookup:
+ */
+#define CLASSHASH_BITS		(MAX_LOCKDEP_KEYS_BITS - 1)
+#define CLASSHASH_SIZE		(1UL << CLASSHASH_BITS)
+#define CLASSHASH_MASK		(CLASSHASH_SIZE - 1)
+#define __classhashfn(key)	((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK)
+#define classhashentry(key)	(classhash_table + __classhashfn((key)))
+
+static struct list_head classhash_table[CLASSHASH_SIZE];
+
+unsigned long nr_lock_chains;
+static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
+
+/*
+ * We put the lock dependency chains into a hash-table as well, to cache
+ * their existence:
+ */
+#define CHAINHASH_BITS		(MAX_LOCKDEP_CHAINS_BITS-1)
+#define CHAINHASH_SIZE		(1UL << CHAINHASH_BITS)
+#define CHAINHASH_MASK		(CHAINHASH_SIZE - 1)
+#define __chainhashfn(chain) \
+		(((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK)
+#define chainhashentry(chain)	(chainhash_table + __chainhashfn((chain)))
+
+static struct list_head chainhash_table[CHAINHASH_SIZE];
+
+/*
+ * The hash key of the lock dependency chains is a hash itself too:
+ * it's a hash of all locks taken up to that lock, including that lock.
+ * It's a 64-bit hash, because it's important for the keys to be
+ * unique.
+ */
+#define iterate_chain_key(key1, key2) \
+	(((key1) << MAX_LOCKDEP_KEYS_BITS/2) ^ \
+	((key1) >> (64-MAX_LOCKDEP_KEYS_BITS/2)) ^ \
+	(key2))
+
+void lockdep_off(void)
+{
+	current->lockdep_recursion++;
+}
+
+EXPORT_SYMBOL(lockdep_off);
+
+void lockdep_on(void)
+{
+	current->lockdep_recursion--;
+}
+
+EXPORT_SYMBOL(lockdep_on);
+
+int lockdep_internal(void)
+{
+	return current->lockdep_recursion != 0;
+}
+
+EXPORT_SYMBOL(lockdep_internal);
+
+/*
+ * Debugging switches:
+ */
+
+#define VERBOSE			0
+#ifdef VERBOSE
+# define VERY_VERBOSE		0
+#endif
+
+#if VERBOSE
+# define HARDIRQ_VERBOSE	1
+# define SOFTIRQ_VERBOSE	1
+#else
+# define HARDIRQ_VERBOSE	0
+# define SOFTIRQ_VERBOSE	0
+#endif
+
+#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
+/*
+ * Quick filtering for interesting events:
+ */
+static int class_filter(struct lock_class *class)
+{
+	if (class->name_version == 1 &&
+			!strcmp(class->name, "&rl->lock"))
+		return 1;
+	if (class->name_version == 1 &&
+			!strcmp(class->name, "&ni->mrec_lock"))
+		return 1;
+	if (class->name_version == 1 &&
+			!strcmp(class->name, "mft_ni_runlist_lock"))
+		return 1;
+	if (class->name_version == 1 &&
+			!strcmp(class->name, "mft_ni_mrec_lock"))
+		return 1;
+	if (class->name_version == 1 &&
+			!strcmp(class->name, "&vol->lcnbmp_lock"))
+		return 1;
+	return 0;
+}
+#endif
+
+static int verbose(struct lock_class *class)
+{
+#if VERBOSE
+	return class_filter(class);
+#endif
+	return 0;
+}
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+static int hardirq_verbose(struct lock_class *class)
+{
+#if HARDIRQ_VERBOSE
+	return class_filter(class);
+#endif
+	return 0;
+}
+
+static int softirq_verbose(struct lock_class *class)
+{
+#if SOFTIRQ_VERBOSE
+	return class_filter(class);
+#endif
+	return 0;
+}
+
+#endif
+
+/*
+ * Stack-trace: tightly packed array of stack backtrace
+ * addresses. Protected by the hash_lock.
+ */
+unsigned long nr_stack_trace_entries;
+static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
+
+static int save_trace(struct stack_trace *trace)
+{
+	trace->nr_entries = 0;
+	trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
+	trace->entries = stack_trace + nr_stack_trace_entries;
+
+	save_stack_trace(trace, NULL, 0, 3);
+
+	trace->max_entries = trace->nr_entries;
+
+	nr_stack_trace_entries += trace->nr_entries;
+	if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES))
+		return 0;
+
+	if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) {
+		__raw_spin_unlock(&hash_lock);
+		if (debug_locks_off()) {
+			printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n");
+			printk("turning off the locking correctness validator.\n");
+			dump_stack();
+		}
+		return 0;
+	}
+
+	return 1;
+}
+
+unsigned int nr_hardirq_chains;
+unsigned int nr_softirq_chains;
+unsigned int nr_process_chains;
+unsigned int max_lockdep_depth;
+unsigned int max_recursion_depth;
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+/*
+ * We cannot printk in early bootup code. Not even early_printk()
+ * might work. So we mark any initialization errors and printk
+ * about it later on, in lockdep_info().
+ */
+static int lockdep_init_error;
+
+/*
+ * Various lockdep statistics:
+ */
+atomic_t chain_lookup_hits;
+atomic_t chain_lookup_misses;
+atomic_t hardirqs_on_events;
+atomic_t hardirqs_off_events;
+atomic_t redundant_hardirqs_on;
+atomic_t redundant_hardirqs_off;
+atomic_t softirqs_on_events;
+atomic_t softirqs_off_events;
+atomic_t redundant_softirqs_on;
+atomic_t redundant_softirqs_off;
+atomic_t nr_unused_locks;
+atomic_t nr_cyclic_checks;
+atomic_t nr_cyclic_check_recursions;
+atomic_t nr_find_usage_forwards_checks;
+atomic_t nr_find_usage_forwards_recursions;
+atomic_t nr_find_usage_backwards_checks;
+atomic_t nr_find_usage_backwards_recursions;
+# define debug_atomic_inc(ptr)		atomic_inc(ptr)
+# define debug_atomic_dec(ptr)		atomic_dec(ptr)
+# define debug_atomic_read(ptr)		atomic_read(ptr)
+#else
+# define debug_atomic_inc(ptr)		do { } while (0)
+# define debug_atomic_dec(ptr)		do { } while (0)
+# define debug_atomic_read(ptr)		0
+#endif
+
+/*
+ * Locking printouts:
+ */
+
+static const char *usage_str[] =
+{
+	[LOCK_USED] =			"initial-use ",
+	[LOCK_USED_IN_HARDIRQ] =	"in-hardirq-W",
+	[LOCK_USED_IN_SOFTIRQ] =	"in-softirq-W",
+	[LOCK_ENABLED_SOFTIRQS] =	"softirq-on-W",
+	[LOCK_ENABLED_HARDIRQS] =	"hardirq-on-W",
+	[LOCK_USED_IN_HARDIRQ_READ] =	"in-hardirq-R",
+	[LOCK_USED_IN_SOFTIRQ_READ] =	"in-softirq-R",
+	[LOCK_ENABLED_SOFTIRQS_READ] =	"softirq-on-R",
+	[LOCK_ENABLED_HARDIRQS_READ] =	"hardirq-on-R",
+};
+
+const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
+{
+	unsigned long offs, size;
+	char *modname;
+
+	return kallsyms_lookup((unsigned long)key, &size, &offs, &modname, str);
+}
+
+void
+get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
+{
+	*c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.';
+
+	if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
+		*c1 = '+';
+	else
+		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
+			*c1 = '-';
+
+	if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
+		*c2 = '+';
+	else
+		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
+			*c2 = '-';
+
+	if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
+		*c3 = '-';
+	if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) {
+		*c3 = '+';
+		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
+			*c3 = '?';
+	}
+
+	if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
+		*c4 = '-';
+	if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) {
+		*c4 = '+';
+		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
+			*c4 = '?';
+	}
+}
+
+static void print_lock_name(struct lock_class *class)
+{
+	char str[128], c1, c2, c3, c4;
+	const char *name;
+
+	get_usage_chars(class, &c1, &c2, &c3, &c4);
+
+	name = class->name;
+	if (!name) {
+		name = __get_key_name(class->key, str);
+		printk(" (%s", name);
+	} else {
+		printk(" (%s", name);
+		if (class->name_version > 1)
+			printk("#%d", class->name_version);
+		if (class->subclass)
+			printk("/%d", class->subclass);
+	}
+	printk("){%c%c%c%c}", c1, c2, c3, c4);
+}
+
+static void print_lockdep_cache(struct lockdep_map *lock)
+{
+	const char *name;
+	char str[128];
+
+	name = lock->name;
+	if (!name)
+		name = __get_key_name(lock->key->subkeys, str);
+
+	printk("%s", name);
+}
+
+static void print_lock(struct held_lock *hlock)
+{
+	print_lock_name(hlock->class);
+	printk(", at: ");
+	print_ip_sym(hlock->acquire_ip);
+}
+
+static void lockdep_print_held_locks(struct task_struct *curr)
+{
+	int i, depth = curr->lockdep_depth;
+
+	if (!depth) {
+		printk("no locks held by %s/%d.\n", curr->comm, curr->pid);
+		return;
+	}
+	printk("%d lock%s held by %s/%d:\n",
+		depth, depth > 1 ? "s" : "", curr->comm, curr->pid);
+
+	for (i = 0; i < depth; i++) {
+		printk(" #%d: ", i);
+		print_lock(curr->held_locks + i);
+	}
+}
+/*
+ * Helper to print a nice hierarchy of lock dependencies:
+ */
+static void print_spaces(int nr)
+{
+	int i;
+
+	for (i = 0; i < nr; i++)
+		printk("  ");
+}
+
+static void print_lock_class_header(struct lock_class *class, int depth)
+{
+	int bit;
+
+	print_spaces(depth);
+	printk("->");
+	print_lock_name(class);
+	printk(" ops: %lu", class->ops);
+	printk(" {\n");
+
+	for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
+		if (class->usage_mask & (1 << bit)) {
+			int len = depth;
+
+			print_spaces(depth);
+			len += printk("   %s", usage_str[bit]);
+			len += printk(" at:\n");
+			print_stack_trace(class->usage_traces + bit, len);
+		}
+	}
+	print_spaces(depth);
+	printk(" }\n");
+
+	print_spaces(depth);
+	printk(" ... key      at: ");
+	print_ip_sym((unsigned long)class->key);
+}
+
+/*
+ * printk all lock dependencies starting at <entry>:
+ */
+static void print_lock_dependencies(struct lock_class *class, int depth)
+{
+	struct lock_list *entry;
+
+	if (DEBUG_LOCKS_WARN_ON(depth >= 20))
+		return;
+
+	print_lock_class_header(class, depth);
+
+	list_for_each_entry(entry, &class->locks_after, entry) {
+		DEBUG_LOCKS_WARN_ON(!entry->class);
+		print_lock_dependencies(entry->class, depth + 1);
+
+		print_spaces(depth);
+		printk(" ... acquired at:\n");
+		print_stack_trace(&entry->trace, 2);
+		printk("\n");
+	}
+}
+
+/*
+ * Add a new dependency to the head of the list:
+ */
+static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
+			    struct list_head *head, unsigned long ip)
+{
+	struct lock_list *entry;
+	/*
+	 * Lock not present yet - get a new dependency struct and
+	 * add it to the list:
+	 */
+	entry = alloc_list_entry();
+	if (!entry)
+		return 0;
+
+	entry->class = this;
+	save_trace(&entry->trace);
+
+	/*
+	 * Since we never remove from the dependency list, the list can
+	 * be walked lockless by other CPUs, it's only allocation
+	 * that must be protected by the spinlock. But this also means
+	 * we must make new entries visible only once writes to the
+	 * entry become visible - hence the RCU op:
+	 */
+	list_add_tail_rcu(&entry->entry, head);
+
+	return 1;
+}
+
+/*
+ * Recursive, forwards-direction lock-dependency checking, used for
+ * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
+ * checking.
+ *
+ * (to keep the stackframe of the recursive functions small we
+ *  use these global variables, and we also mark various helper
+ *  functions as noinline.)
+ */
+static struct held_lock *check_source, *check_target;
+
+/*
+ * Print a dependency chain entry (this is only done when a deadlock
+ * has been detected):
+ */
+static noinline int
+print_circular_bug_entry(struct lock_list *target, unsigned int depth)
+{
+	if (debug_locks_silent)
+		return 0;
+	printk("\n-> #%u", depth);
+	print_lock_name(target->class);
+	printk(":\n");
+	print_stack_trace(&target->trace, 6);
+
+	return 0;
+}
+
+/*
+ * When a circular dependency is detected, print the
+ * header first:
+ */
+static noinline int
+print_circular_bug_header(struct lock_list *entry, unsigned int depth)
+{
+	struct task_struct *curr = current;
+
+	__raw_spin_unlock(&hash_lock);
+	debug_locks_off();
+	if (debug_locks_silent)
+		return 0;
+
+	printk("\n=======================================================\n");
+	printk(  "[ INFO: possible circular locking dependency detected ]\n");
+	printk(  "-------------------------------------------------------\n");
+	printk("%s/%d is trying to acquire lock:\n",
+		curr->comm, curr->pid);
+	print_lock(check_source);
+	printk("\nbut task is already holding lock:\n");
+	print_lock(check_target);
+	printk("\nwhich lock already depends on the new lock.\n\n");
+	printk("\nthe existing dependency chain (in reverse order) is:\n");
+
+	print_circular_bug_entry(entry, depth);
+
+	return 0;
+}
+
+static noinline int print_circular_bug_tail(void)
+{
+	struct task_struct *curr = current;
+	struct lock_list this;
+
+	if (debug_locks_silent)
+		return 0;
+
+	this.class = check_source->class;
+	save_trace(&this.trace);
+	print_circular_bug_entry(&this, 0);
+
+	printk("\nother info that might help us debug this:\n\n");
+	lockdep_print_held_locks(curr);
+
+	printk("\nstack backtrace:\n");
+	dump_stack();
+
+	return 0;
+}
+
+static int noinline print_infinite_recursion_bug(void)
+{
+	__raw_spin_unlock(&hash_lock);
+	DEBUG_LOCKS_WARN_ON(1);
+
+	return 0;
+}
+
+/*
+ * Prove that the dependency graph starting at <entry> can not
+ * lead to <target>. Print an error and return 0 if it does.
+ */
+static noinline int
+check_noncircular(struct lock_class *source, unsigned int depth)
+{
+	struct lock_list *entry;
+
+	debug_atomic_inc(&nr_cyclic_check_recursions);
+	if (depth > max_recursion_depth)
+		max_recursion_depth = depth;
+	if (depth >= 20)
+		return print_infinite_recursion_bug();
+	/*
+	 * Check this lock's dependency list:
+	 */
+	list_for_each_entry(entry, &source->locks_after, entry) {
+		if (entry->class == check_target->class)
+			return print_circular_bug_header(entry, depth+1);
+		debug_atomic_inc(&nr_cyclic_checks);
+		if (!check_noncircular(entry->class, depth+1))
+			return print_circular_bug_entry(entry, depth+1);
+	}
+	return 1;
+}
+
+static int very_verbose(struct lock_class *class)
+{
+#if VERY_VERBOSE
+	return class_filter(class);
+#endif
+	return 0;
+}
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+/*
+ * Forwards and backwards subgraph searching, for the purposes of
+ * proving that two subgraphs can be connected by a new dependency
+ * without creating any illegal irq-safe -> irq-unsafe lock dependency.
+ */
+static enum lock_usage_bit find_usage_bit;
+static struct lock_class *forwards_match, *backwards_match;
+
+/*
+ * Find a node in the forwards-direction dependency sub-graph starting
+ * at <source> that matches <find_usage_bit>.
+ *
+ * Return 2 if such a node exists in the subgraph, and put that node
+ * into <forwards_match>.
+ *
+ * Return 1 otherwise and keep <forwards_match> unchanged.
+ * Return 0 on error.
+ */
+static noinline int
+find_usage_forwards(struct lock_class *source, unsigned int depth)
+{
+	struct lock_list *entry;
+	int ret;
+
+	if (depth > max_recursion_depth)
+		max_recursion_depth = depth;
+	if (depth >= 20)
+		return print_infinite_recursion_bug();
+
+	debug_atomic_inc(&nr_find_usage_forwards_checks);
+	if (source->usage_mask & (1 << find_usage_bit)) {
+		forwards_match = source;
+		return 2;
+	}
+
+	/*
+	 * Check this lock's dependency list:
+	 */
+	list_for_each_entry(entry, &source->locks_after, entry) {
+		debug_atomic_inc(&nr_find_usage_forwards_recursions);
+		ret = find_usage_forwards(entry->class, depth+1);
+		if (ret == 2 || ret == 0)
+			return ret;
+	}
+	return 1;
+}
+
+/*
+ * Find a node in the backwards-direction dependency sub-graph starting
+ * at <source> that matches <find_usage_bit>.
+ *
+ * Return 2 if such a node exists in the subgraph, and put that node
+ * into <backwards_match>.
+ *
+ * Return 1 otherwise and keep <backwards_match> unchanged.
+ * Return 0 on error.
+ */
+static noinline int
+find_usage_backwards(struct lock_class *source, unsigned int depth)
+{
+	struct lock_list *entry;
+	int ret;
+
+	if (depth > max_recursion_depth)
+		max_recursion_depth = depth;
+	if (depth >= 20)
+		return print_infinite_recursion_bug();
+
+	debug_atomic_inc(&nr_find_usage_backwards_checks);
+	if (source->usage_mask & (1 << find_usage_bit)) {
+		backwards_match = source;
+		return 2;
+	}
+
+	/*
+	 * Check this lock's dependency list:
+	 */
+	list_for_each_entry(entry, &source->locks_before, entry) {
+		debug_atomic_inc(&nr_find_usage_backwards_recursions);
+		ret = find_usage_backwards(entry->class, depth+1);
+		if (ret == 2 || ret == 0)
+			return ret;
+	}
+	return 1;
+}
+
+static int
+print_bad_irq_dependency(struct task_struct *curr,
+			 struct held_lock *prev,
+			 struct held_lock *next,
+			 enum lock_usage_bit bit1,
+			 enum lock_usage_bit bit2,
+			 const char *irqclass)
+{
+	__raw_spin_unlock(&hash_lock);
+	debug_locks_off();
+	if (debug_locks_silent)
+		return 0;
+
+	printk("\n======================================================\n");
+	printk(  "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
+		irqclass, irqclass);
+	printk(  "------------------------------------------------------\n");
+	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
+		curr->comm, curr->pid,
+		curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
+		curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
+		curr->hardirqs_enabled,
+		curr->softirqs_enabled);
+	print_lock(next);
+
+	printk("\nand this task is already holding:\n");
+	print_lock(prev);
+	printk("which would create a new lock dependency:\n");
+	print_lock_name(prev->class);
+	printk(" ->");
+	print_lock_name(next->class);
+	printk("\n");
+
+	printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
+		irqclass);
+	print_lock_name(backwards_match);
+	printk("\n... which became %s-irq-safe at:\n", irqclass);
+
+	print_stack_trace(backwards_match->usage_traces + bit1, 1);
+
+	printk("\nto a %s-irq-unsafe lock:\n", irqclass);
+	print_lock_name(forwards_match);
+	printk("\n... which became %s-irq-unsafe at:\n", irqclass);
+	printk("...");
+
+	print_stack_trace(forwards_match->usage_traces + bit2, 1);
+
+	printk("\nother info that might help us debug this:\n\n");
+	lockdep_print_held_locks(curr);
+
+	printk("\nthe %s-irq-safe lock's dependencies:\n", irqclass);
+	print_lock_dependencies(backwards_match, 0);
+
+	printk("\nthe %s-irq-unsafe lock's dependencies:\n", irqclass);
+	print_lock_dependencies(forwards_match, 0);
+
+	printk("\nstack backtrace:\n");
+	dump_stack();
+
+	return 0;
+}
+
+static int
+check_usage(struct task_struct *curr, struct held_lock *prev,
+	    struct held_lock *next, enum lock_usage_bit bit_backwards,
+	    enum lock_usage_bit bit_forwards, const char *irqclass)
+{
+	int ret;
+
+	find_usage_bit = bit_backwards;
+	/* fills in <backwards_match> */
+	ret = find_usage_backwards(prev->class, 0);
+	if (!ret || ret == 1)
+		return ret;
+
+	find_usage_bit = bit_forwards;
+	ret = find_usage_forwards(next->class, 0);
+	if (!ret || ret == 1)
+		return ret;
+	/* ret == 2 */
+	return print_bad_irq_dependency(curr, prev, next,
+			bit_backwards, bit_forwards, irqclass);
+}
+
+#endif
+
+static int
+print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
+		   struct held_lock *next)
+{
+	debug_locks_off();
+	__raw_spin_unlock(&hash_lock);
+	if (debug_locks_silent)
+		return 0;
+
+	printk("\n=============================================\n");
+	printk(  "[ INFO: possible recursive locking detected ]\n");
+	printk(  "---------------------------------------------\n");
+	printk("%s/%d is trying to acquire lock:\n",
+		curr->comm, curr->pid);
+	print_lock(next);
+	printk("\nbut task is already holding lock:\n");
+	print_lock(prev);
+
+	printk("\nother info that might help us debug this:\n");
+	lockdep_print_held_locks(curr);
+
+	printk("\nstack backtrace:\n");
+	dump_stack();
+
+	return 0;
+}
+
+/*
+ * Check whether we are holding such a class already.
+ *
+ * (Note that this has to be done separately, because the graph cannot
+ * detect such classes of deadlocks.)
+ *
+ * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
+ */
+static int
+check_deadlock(struct task_struct *curr, struct held_lock *next,
+	       struct lockdep_map *next_instance, int read)
+{
+	struct held_lock *prev;
+	int i;
+
+	for (i = 0; i < curr->lockdep_depth; i++) {
+		prev = curr->held_locks + i;
+		if (prev->class != next->class)
+			continue;
+		/*
+		 * Allow read-after-read recursion of the same
+		 * lock instance (i.e. read_lock(lock)+read_lock(lock)):
+		 */
+		if ((read == 2) && prev->read &&
+				(prev->instance == next_instance))
+			return 2;
+		return print_deadlock_bug(curr, prev, next);
+	}
+	return 1;
+}
+
+/*
+ * There was a chain-cache miss, and we are about to add a new dependency
+ * to a previous lock. We recursively validate the following rules:
+ *
+ *  - would the adding of the <prev> -> <next> dependency create a
+ *    circular dependency in the graph? [== circular deadlock]
+ *
+ *  - does the new prev->next dependency connect any hardirq-safe lock
+ *    (in the full backwards-subgraph starting at <prev>) with any
+ *    hardirq-unsafe lock (in the full forwards-subgraph starting at
+ *    <next>)? [== illegal lock inversion with hardirq contexts]
+ *
+ *  - does the new prev->next dependency connect any softirq-safe lock
+ *    (in the full backwards-subgraph starting at <prev>) with any
+ *    softirq-unsafe lock (in the full forwards-subgraph starting at
+ *    <next>)? [== illegal lock inversion with softirq contexts]
+ *
+ * any of these scenarios could lead to a deadlock.
+ *
+ * Then if all the validations pass, we add the forwards and backwards
+ * dependency.
+ */
+static int
+check_prev_add(struct task_struct *curr, struct held_lock *prev,
+	       struct held_lock *next)
+{
+	struct lock_list *entry;
+	int ret;
+
+	/*
+	 * Prove that the new <prev> -> <next> dependency would not
+	 * create a circular dependency in the graph. (We do this by
+	 * forward-recursing into the graph starting at <next>, and
+	 * checking whether we can reach <prev>.)
+	 *
+	 * We are using global variables to control the recursion, to
+	 * keep the stackframe size of the recursive functions low:
+	 */
+	check_source = next;
+	check_target = prev;
+	if (!(check_noncircular(next->class, 0)))
+		return print_circular_bug_tail();
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	/*
+	 * Prove that the new dependency does not connect a hardirq-safe
+	 * lock with a hardirq-unsafe lock - to achieve this we search
+	 * the backwards-subgraph starting at <prev>, and the
+	 * forwards-subgraph starting at <next>:
+	 */
+	if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ,
+					LOCK_ENABLED_HARDIRQS, "hard"))
+		return 0;
+
+	/*
+	 * Prove that the new dependency does not connect a hardirq-safe-read
+	 * lock with a hardirq-unsafe lock - to achieve this we search
+	 * the backwards-subgraph starting at <prev>, and the
+	 * forwards-subgraph starting at <next>:
+	 */
+	if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ,
+					LOCK_ENABLED_HARDIRQS, "hard-read"))
+		return 0;
+
+	/*
+	 * Prove that the new dependency does not connect a softirq-safe
+	 * lock with a softirq-unsafe lock - to achieve this we search
+	 * the backwards-subgraph starting at <prev>, and the
+	 * forwards-subgraph starting at <next>:
+	 */
+	if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ,
+					LOCK_ENABLED_SOFTIRQS, "soft"))
+		return 0;
+	/*
+	 * Prove that the new dependency does not connect a softirq-safe-read
+	 * lock with a softirq-unsafe lock - to achieve this we search
+	 * the backwards-subgraph starting at <prev>, and the
+	 * forwards-subgraph starting at <next>:
+	 */
+	if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
+					LOCK_ENABLED_SOFTIRQS, "soft"))
+		return 0;
+#endif
+	/*
+	 * For recursive read-locks we do all the dependency checks,
+	 * but we dont store read-triggered dependencies (only
+	 * write-triggered dependencies). This ensures that only the
+	 * write-side dependencies matter, and that if for example a
+	 * write-lock never takes any other locks, then the reads are
+	 * equivalent to a NOP.
+	 */
+	if (next->read == 2 || prev->read == 2)
+		return 1;
+	/*
+	 * Is the <prev> -> <next> dependency already present?
+	 *
+	 * (this may occur even though this is a new chain: consider
+	 *  e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3
+	 *  chains - the second one will be new, but L1 already has
+	 *  L2 added to its dependency list, due to the first chain.)
+	 */
+	list_for_each_entry(entry, &prev->class->locks_after, entry) {
+		if (entry->class == next->class)
+			return 2;
+	}
+
+	/*
+	 * Ok, all validations passed, add the new lock
+	 * to the previous lock's dependency list:
+	 */
+	ret = add_lock_to_list(prev->class, next->class,
+			       &prev->class->locks_after, next->acquire_ip);
+	if (!ret)
+		return 0;
+	/*
+	 * Return value of 2 signals 'dependency already added',
+	 * in that case we dont have to add the backlink either.
+	 */
+	if (ret == 2)
+		return 2;
+	ret = add_lock_to_list(next->class, prev->class,
+			       &next->class->locks_before, next->acquire_ip);
+
+	/*
+	 * Debugging printouts:
+	 */
+	if (verbose(prev->class) || verbose(next->class)) {
+		__raw_spin_unlock(&hash_lock);
+		printk("\n new dependency: ");
+		print_lock_name(prev->class);
+		printk(" => ");
+		print_lock_name(next->class);
+		printk("\n");
+		dump_stack();
+		__raw_spin_lock(&hash_lock);
+	}
+	return 1;
+}
+
+/*
+ * Add the dependency to all directly-previous locks that are 'relevant'.
+ * The ones that are relevant are (in increasing distance from curr):
+ * all consecutive trylock entries and the final non-trylock entry - or
+ * the end of this context's lock-chain - whichever comes first.
+ */
+static int
+check_prevs_add(struct task_struct *curr, struct held_lock *next)
+{
+	int depth = curr->lockdep_depth;
+	struct held_lock *hlock;
+
+	/*
+	 * Debugging checks.
+	 *
+	 * Depth must not be zero for a non-head lock:
+	 */
+	if (!depth)
+		goto out_bug;
+	/*
+	 * At least two relevant locks must exist for this
+	 * to be a head:
+	 */
+	if (curr->held_locks[depth].irq_context !=
+			curr->held_locks[depth-1].irq_context)
+		goto out_bug;
+
+	for (;;) {
+		hlock = curr->held_locks + depth-1;
+		/*
+		 * Only non-recursive-read entries get new dependencies
+		 * added:
+		 */
+		if (hlock->read != 2) {
+			check_prev_add(curr, hlock, next);
+			/*
+			 * Stop after the first non-trylock entry,
+			 * as non-trylock entries have added their
+			 * own direct dependencies already, so this
+			 * lock is connected to them indirectly:
+			 */
+			if (!hlock->trylock)
+				break;
+		}
+		depth--;
+		/*
+		 * End of lock-stack?
+		 */
+		if (!depth)
+			break;
+		/*
+		 * Stop the search if we cross into another context:
+		 */
+		if (curr->held_locks[depth].irq_context !=
+				curr->held_locks[depth-1].irq_context)
+			break;
+	}
+	return 1;
+out_bug:
+	__raw_spin_unlock(&hash_lock);
+	DEBUG_LOCKS_WARN_ON(1);
+
+	return 0;
+}
+
+
+/*
+ * Is this the address of a static object:
+ */
+static int static_obj(void *obj)
+{
+	unsigned long start = (unsigned long) &_stext,
+		      end   = (unsigned long) &_end,
+		      addr  = (unsigned long) obj;
+#ifdef CONFIG_SMP
+	int i;
+#endif
+
+	/*
+	 * static variable?
+	 */
+	if ((addr >= start) && (addr < end))
+		return 1;
+
+#ifdef CONFIG_SMP
+	/*
+	 * percpu var?
+	 */
+	for_each_possible_cpu(i) {
+		start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
+		end   = (unsigned long) &__per_cpu_end   + per_cpu_offset(i);
+
+		if ((addr >= start) && (addr < end))
+			return 1;
+	}
+#endif
+
+	/*
+	 * module var?
+	 */
+	return is_module_address(addr);
+}
+
+/*
+ * To make lock name printouts unique, we calculate a unique
+ * class->name_version generation counter:
+ */
+static int count_matching_names(struct lock_class *new_class)
+{
+	struct lock_class *class;
+	int count = 0;
+
+	if (!new_class->name)
+		return 0;
+
+	list_for_each_entry(class, &all_lock_classes, lock_entry) {
+		if (new_class->key - new_class->subclass == class->key)
+			return class->name_version;
+		if (class->name && !strcmp(class->name, new_class->name))
+			count = max(count, class->name_version);
+	}
+
+	return count + 1;
+}
+
+extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void);
+
+/*
+ * Register a lock's class in the hash-table, if the class is not present
+ * yet. Otherwise we look it up. We cache the result in the lock object
+ * itself, so actual lookup of the hash should be once per lock object.
+ */
+static inline struct lock_class *
+register_lock_class(struct lockdep_map *lock, unsigned int subclass)
+{
+	struct lockdep_subclass_key *key;
+	struct list_head *hash_head;
+	struct lock_class *class;
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+	/*
+	 * If the architecture calls into lockdep before initializing
+	 * the hashes then we'll warn about it later. (we cannot printk
+	 * right now)
+	 */
+	if (unlikely(!lockdep_initialized)) {
+		lockdep_init();
+		lockdep_init_error = 1;
+	}
+#endif
+
+	/*
+	 * Static locks do not have their class-keys yet - for them the key
+	 * is the lock object itself:
+	 */
+	if (unlikely(!lock->key))
+		lock->key = (void *)lock;
+
+	/*
+	 * NOTE: the class-key must be unique. For dynamic locks, a static
+	 * lock_class_key variable is passed in through the mutex_init()
+	 * (or spin_lock_init()) call - which acts as the key. For static
+	 * locks we use the lock object itself as the key.
+	 */
+	if (sizeof(struct lock_class_key) > sizeof(struct lock_class))
+		__error_too_big_MAX_LOCKDEP_SUBCLASSES();
+
+	key = lock->key->subkeys + subclass;
+
+	hash_head = classhashentry(key);
+
+	/*
+	 * We can walk the hash lockfree, because the hash only
+	 * grows, and we are careful when adding entries to the end:
+	 */
+	list_for_each_entry(class, hash_head, hash_entry)
+		if (class->key == key)
+			goto out_set;
+
+	/*
+	 * Debug-check: all keys must be persistent!
+ 	 */
+	if (!static_obj(lock->key)) {
+		debug_locks_off();
+		printk("INFO: trying to register non-static key.\n");
+		printk("the code is fine but needs lockdep annotation.\n");
+		printk("turning off the locking correctness validator.\n");
+		dump_stack();
+
+		return NULL;
+	}
+
+	__raw_spin_lock(&hash_lock);
+	/*
+	 * We have to do the hash-walk again, to avoid races
+	 * with another CPU:
+	 */
+	list_for_each_entry(class, hash_head, hash_entry)
+		if (class->key == key)
+			goto out_unlock_set;
+	/*
+	 * Allocate a new key from the static array, and add it to
+	 * the hash:
+	 */
+	if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
+		__raw_spin_unlock(&hash_lock);
+		debug_locks_off();
+		printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
+		printk("turning off the locking correctness validator.\n");
+		return NULL;
+	}
+	class = lock_classes + nr_lock_classes++;
+	debug_atomic_inc(&nr_unused_locks);
+	class->key = key;
+	class->name = lock->name;
+	class->subclass = subclass;
+	INIT_LIST_HEAD(&class->lock_entry);
+	INIT_LIST_HEAD(&class->locks_before);
+	INIT_LIST_HEAD(&class->locks_after);
+	class->name_version = count_matching_names(class);
+	/*
+	 * We use RCU's safe list-add method to make
+	 * parallel walking of the hash-list safe:
+	 */
+	list_add_tail_rcu(&class->hash_entry, hash_head);
+
+	if (verbose(class)) {
+		__raw_spin_unlock(&hash_lock);
+		printk("\nnew class %p: %s", class->key, class->name);
+		if (class->name_version > 1)
+			printk("#%d", class->name_version);
+		printk("\n");
+		dump_stack();
+		__raw_spin_lock(&hash_lock);
+	}
+out_unlock_set:
+	__raw_spin_unlock(&hash_lock);
+
+out_set:
+	lock->class[subclass] = class;
+
+	DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
+
+	return class;
+}
+
+/*
+ * Look up a dependency chain. If the key is not present yet then
+ * add it and return 0 - in this case the new dependency chain is
+ * validated. If the key is already hashed, return 1.
+ */
+static inline int lookup_chain_cache(u64 chain_key)
+{
+	struct list_head *hash_head = chainhashentry(chain_key);
+	struct lock_chain *chain;
+
+	DEBUG_LOCKS_WARN_ON(!irqs_disabled());
+	/*
+	 * We can walk it lock-free, because entries only get added
+	 * to the hash:
+	 */
+	list_for_each_entry(chain, hash_head, entry) {
+		if (chain->chain_key == chain_key) {
+cache_hit:
+			debug_atomic_inc(&chain_lookup_hits);
+			/*
+			 * In the debugging case, force redundant checking
+			 * by returning 1:
+			 */
+#ifdef CONFIG_DEBUG_LOCKDEP
+			__raw_spin_lock(&hash_lock);
+			return 1;
+#endif
+			return 0;
+		}
+	}
+	/*
+	 * Allocate a new chain entry from the static array, and add
+	 * it to the hash:
+	 */
+	__raw_spin_lock(&hash_lock);
+	/*
+	 * We have to walk the chain again locked - to avoid duplicates:
+	 */
+	list_for_each_entry(chain, hash_head, entry) {
+		if (chain->chain_key == chain_key) {
+			__raw_spin_unlock(&hash_lock);
+			goto cache_hit;
+		}
+	}
+	if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
+		__raw_spin_unlock(&hash_lock);
+		debug_locks_off();
+		printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
+		printk("turning off the locking correctness validator.\n");
+		return 0;
+	}
+	chain = lock_chains + nr_lock_chains++;
+	chain->chain_key = chain_key;
+	list_add_tail_rcu(&chain->entry, hash_head);
+	debug_atomic_inc(&chain_lookup_misses);
+#ifdef CONFIG_TRACE_IRQFLAGS
+	if (current->hardirq_context)
+		nr_hardirq_chains++;
+	else {
+		if (current->softirq_context)
+			nr_softirq_chains++;
+		else
+			nr_process_chains++;
+	}
+#else
+	nr_process_chains++;
+#endif
+
+	return 1;
+}
+
+/*
+ * We are building curr_chain_key incrementally, so double-check
+ * it from scratch, to make sure that it's done correctly:
+ */
+static void check_chain_key(struct task_struct *curr)
+{
+#ifdef CONFIG_DEBUG_LOCKDEP
+	struct held_lock *hlock, *prev_hlock = NULL;
+	unsigned int i, id;
+	u64 chain_key = 0;
+
+	for (i = 0; i < curr->lockdep_depth; i++) {
+		hlock = curr->held_locks + i;
+		if (chain_key != hlock->prev_chain_key) {
+			debug_locks_off();
+			printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n",
+				curr->lockdep_depth, i,
+				(unsigned long long)chain_key,
+				(unsigned long long)hlock->prev_chain_key);
+			WARN_ON(1);
+			return;
+		}
+		id = hlock->class - lock_classes;
+		DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS);
+		if (prev_hlock && (prev_hlock->irq_context !=
+							hlock->irq_context))
+			chain_key = 0;
+		chain_key = iterate_chain_key(chain_key, id);
+		prev_hlock = hlock;
+	}
+	if (chain_key != curr->curr_chain_key) {
+		debug_locks_off();
+		printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n",
+			curr->lockdep_depth, i,
+			(unsigned long long)chain_key,
+			(unsigned long long)curr->curr_chain_key);
+		WARN_ON(1);
+	}
+#endif
+}
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+/*
+ * print irq inversion bug:
+ */
+static int
+print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
+			struct held_lock *this, int forwards,
+			const char *irqclass)
+{
+	__raw_spin_unlock(&hash_lock);
+	debug_locks_off();
+	if (debug_locks_silent)
+		return 0;
+
+	printk("\n=========================================================\n");
+	printk(  "[ INFO: possible irq lock inversion dependency detected ]\n");
+	printk(  "---------------------------------------------------------\n");
+	printk("%s/%d just changed the state of lock:\n",
+		curr->comm, curr->pid);
+	print_lock(this);
+	if (forwards)
+		printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
+	else
+		printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass);
+	print_lock_name(other);
+	printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
+
+	printk("\nother info that might help us debug this:\n");
+	lockdep_print_held_locks(curr);
+
+	printk("\nthe first lock's dependencies:\n");
+	print_lock_dependencies(this->class, 0);
+
+	printk("\nthe second lock's dependencies:\n");
+	print_lock_dependencies(other, 0);
+
+	printk("\nstack backtrace:\n");
+	dump_stack();
+
+	return 0;
+}
+
+/*
+ * Prove that in the forwards-direction subgraph starting at <this>
+ * there is no lock matching <mask>:
+ */
+static int
+check_usage_forwards(struct task_struct *curr, struct held_lock *this,
+		     enum lock_usage_bit bit, const char *irqclass)
+{
+	int ret;
+
+	find_usage_bit = bit;
+	/* fills in <forwards_match> */
+	ret = find_usage_forwards(this->class, 0);
+	if (!ret || ret == 1)
+		return ret;
+
+	return print_irq_inversion_bug(curr, forwards_match, this, 1, irqclass);
+}
+
+/*
+ * Prove that in the backwards-direction subgraph starting at <this>
+ * there is no lock matching <mask>:
+ */
+static int
+check_usage_backwards(struct task_struct *curr, struct held_lock *this,
+		      enum lock_usage_bit bit, const char *irqclass)
+{
+	int ret;
+
+	find_usage_bit = bit;
+	/* fills in <backwards_match> */
+	ret = find_usage_backwards(this->class, 0);
+	if (!ret || ret == 1)
+		return ret;
+
+	return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass);
+}
+
+static inline void print_irqtrace_events(struct task_struct *curr)
+{
+	printk("irq event stamp: %u\n", curr->irq_events);
+	printk("hardirqs last  enabled at (%u): ", curr->hardirq_enable_event);
+	print_ip_sym(curr->hardirq_enable_ip);
+	printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event);
+	print_ip_sym(curr->hardirq_disable_ip);
+	printk("softirqs last  enabled at (%u): ", curr->softirq_enable_event);
+	print_ip_sym(curr->softirq_enable_ip);
+	printk("softirqs last disabled at (%u): ", curr->softirq_disable_event);
+	print_ip_sym(curr->softirq_disable_ip);
+}
+
+#else
+static inline void print_irqtrace_events(struct task_struct *curr)
+{
+}
+#endif
+
+static int
+print_usage_bug(struct task_struct *curr, struct held_lock *this,
+		enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
+{
+	__raw_spin_unlock(&hash_lock);
+	debug_locks_off();
+	if (debug_locks_silent)
+		return 0;
+
+	printk("\n=================================\n");
+	printk(  "[ INFO: inconsistent lock state ]\n");
+	printk(  "---------------------------------\n");
+
+	printk("inconsistent {%s} -> {%s} usage.\n",
+		usage_str[prev_bit], usage_str[new_bit]);
+
+	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
+		curr->comm, curr->pid,
+		trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
+		trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
+		trace_hardirqs_enabled(curr),
+		trace_softirqs_enabled(curr));
+	print_lock(this);
+
+	printk("{%s} state was registered at:\n", usage_str[prev_bit]);
+	print_stack_trace(this->class->usage_traces + prev_bit, 1);
+
+	print_irqtrace_events(curr);
+	printk("\nother info that might help us debug this:\n");
+	lockdep_print_held_locks(curr);
+
+	printk("\nstack backtrace:\n");
+	dump_stack();
+
+	return 0;
+}
+
+/*
+ * Print out an error if an invalid bit is set:
+ */
+static inline int
+valid_state(struct task_struct *curr, struct held_lock *this,
+	    enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
+{
+	if (unlikely(this->class->usage_mask & (1 << bad_bit)))
+		return print_usage_bug(curr, this, bad_bit, new_bit);
+	return 1;
+}
+
+#define STRICT_READ_CHECKS	1
+
+/*
+ * Mark a lock with a usage bit, and validate the state transition:
+ */
+static int mark_lock(struct task_struct *curr, struct held_lock *this,
+		     enum lock_usage_bit new_bit, unsigned long ip)
+{
+	unsigned int new_mask = 1 << new_bit, ret = 1;
+
+	/*
+	 * If already set then do not dirty the cacheline,
+	 * nor do any checks:
+	 */
+	if (likely(this->class->usage_mask & new_mask))
+		return 1;
+
+	__raw_spin_lock(&hash_lock);
+	/*
+	 * Make sure we didnt race:
+	 */
+	if (unlikely(this->class->usage_mask & new_mask)) {
+		__raw_spin_unlock(&hash_lock);
+		return 1;
+	}
+
+	this->class->usage_mask |= new_mask;
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	if (new_bit == LOCK_ENABLED_HARDIRQS ||
+			new_bit == LOCK_ENABLED_HARDIRQS_READ)
+		ip = curr->hardirq_enable_ip;
+	else if (new_bit == LOCK_ENABLED_SOFTIRQS ||
+			new_bit == LOCK_ENABLED_SOFTIRQS_READ)
+		ip = curr->softirq_enable_ip;
+#endif
+	if (!save_trace(this->class->usage_traces + new_bit))
+		return 0;
+
+	switch (new_bit) {
+#ifdef CONFIG_TRACE_IRQFLAGS
+	case LOCK_USED_IN_HARDIRQ:
+		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
+			return 0;
+		if (!valid_state(curr, this, new_bit,
+				 LOCK_ENABLED_HARDIRQS_READ))
+			return 0;
+		/*
+		 * just marked it hardirq-safe, check that this lock
+		 * took no hardirq-unsafe lock in the past:
+		 */
+		if (!check_usage_forwards(curr, this,
+					  LOCK_ENABLED_HARDIRQS, "hard"))
+			return 0;
+#if STRICT_READ_CHECKS
+		/*
+		 * just marked it hardirq-safe, check that this lock
+		 * took no hardirq-unsafe-read lock in the past:
+		 */
+		if (!check_usage_forwards(curr, this,
+				LOCK_ENABLED_HARDIRQS_READ, "hard-read"))
+			return 0;
+#endif
+		if (hardirq_verbose(this->class))
+			ret = 2;
+		break;
+	case LOCK_USED_IN_SOFTIRQ:
+		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
+			return 0;
+		if (!valid_state(curr, this, new_bit,
+				 LOCK_ENABLED_SOFTIRQS_READ))
+			return 0;
+		/*
+		 * just marked it softirq-safe, check that this lock
+		 * took no softirq-unsafe lock in the past:
+		 */
+		if (!check_usage_forwards(curr, this,
+					  LOCK_ENABLED_SOFTIRQS, "soft"))
+			return 0;
+#if STRICT_READ_CHECKS
+		/*
+		 * just marked it softirq-safe, check that this lock
+		 * took no softirq-unsafe-read lock in the past:
+		 */
+		if (!check_usage_forwards(curr, this,
+				LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
+			return 0;
+#endif
+		if (softirq_verbose(this->class))
+			ret = 2;
+		break;
+	case LOCK_USED_IN_HARDIRQ_READ:
+		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
+			return 0;
+		/*
+		 * just marked it hardirq-read-safe, check that this lock
+		 * took no hardirq-unsafe lock in the past:
+		 */
+		if (!check_usage_forwards(curr, this,
+					  LOCK_ENABLED_HARDIRQS, "hard"))
+			return 0;
+		if (hardirq_verbose(this->class))
+			ret = 2;
+		break;
+	case LOCK_USED_IN_SOFTIRQ_READ:
+		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
+			return 0;
+		/*
+		 * just marked it softirq-read-safe, check that this lock
+		 * took no softirq-unsafe lock in the past:
+		 */
+		if (!check_usage_forwards(curr, this,
+					  LOCK_ENABLED_SOFTIRQS, "soft"))
+			return 0;
+		if (softirq_verbose(this->class))
+			ret = 2;
+		break;
+	case LOCK_ENABLED_HARDIRQS:
+		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
+			return 0;
+		if (!valid_state(curr, this, new_bit,
+				 LOCK_USED_IN_HARDIRQ_READ))
+			return 0;
+		/*
+		 * just marked it hardirq-unsafe, check that no hardirq-safe
+		 * lock in the system ever took it in the past:
+		 */
+		if (!check_usage_backwards(curr, this,
+					   LOCK_USED_IN_HARDIRQ, "hard"))
+			return 0;
+#if STRICT_READ_CHECKS
+		/*
+		 * just marked it hardirq-unsafe, check that no
+		 * hardirq-safe-read lock in the system ever took
+		 * it in the past:
+		 */
+		if (!check_usage_backwards(curr, this,
+				   LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
+			return 0;
+#endif
+		if (hardirq_verbose(this->class))
+			ret = 2;
+		break;
+	case LOCK_ENABLED_SOFTIRQS:
+		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
+			return 0;
+		if (!valid_state(curr, this, new_bit,
+				 LOCK_USED_IN_SOFTIRQ_READ))
+			return 0;
+		/*
+		 * just marked it softirq-unsafe, check that no softirq-safe
+		 * lock in the system ever took it in the past:
+		 */
+		if (!check_usage_backwards(curr, this,
+					   LOCK_USED_IN_SOFTIRQ, "soft"))
+			return 0;
+#if STRICT_READ_CHECKS
+		/*
+		 * just marked it softirq-unsafe, check that no
+		 * softirq-safe-read lock in the system ever took
+		 * it in the past:
+		 */
+		if (!check_usage_backwards(curr, this,
+				   LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
+			return 0;
+#endif
+		if (softirq_verbose(this->class))
+			ret = 2;
+		break;
+	case LOCK_ENABLED_HARDIRQS_READ:
+		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
+			return 0;
+#if STRICT_READ_CHECKS
+		/*
+		 * just marked it hardirq-read-unsafe, check that no
+		 * hardirq-safe lock in the system ever took it in the past:
+		 */
+		if (!check_usage_backwards(curr, this,
+					   LOCK_USED_IN_HARDIRQ, "hard"))
+			return 0;
+#endif
+		if (hardirq_verbose(this->class))
+			ret = 2;
+		break;
+	case LOCK_ENABLED_SOFTIRQS_READ:
+		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
+			return 0;
+#if STRICT_READ_CHECKS
+		/*
+		 * just marked it softirq-read-unsafe, check that no
+		 * softirq-safe lock in the system ever took it in the past:
+		 */
+		if (!check_usage_backwards(curr, this,
+					   LOCK_USED_IN_SOFTIRQ, "soft"))
+			return 0;
+#endif
+		if (softirq_verbose(this->class))
+			ret = 2;
+		break;
+#endif
+	case LOCK_USED:
+		/*
+		 * Add it to the global list of classes:
+		 */
+		list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
+		debug_atomic_dec(&nr_unused_locks);
+		break;
+	default:
+		debug_locks_off();
+		WARN_ON(1);
+		return 0;
+	}
+
+	__raw_spin_unlock(&hash_lock);
+
+	/*
+	 * We must printk outside of the hash_lock:
+	 */
+	if (ret == 2) {
+		printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
+		print_lock(this);
+		print_irqtrace_events(curr);
+		dump_stack();
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+/*
+ * Mark all held locks with a usage bit:
+ */
+static int
+mark_held_locks(struct task_struct *curr, int hardirq, unsigned long ip)
+{
+	enum lock_usage_bit usage_bit;
+	struct held_lock *hlock;
+	int i;
+
+	for (i = 0; i < curr->lockdep_depth; i++) {
+		hlock = curr->held_locks + i;
+
+		if (hardirq) {
+			if (hlock->read)
+				usage_bit = LOCK_ENABLED_HARDIRQS_READ;
+			else
+				usage_bit = LOCK_ENABLED_HARDIRQS;
+		} else {
+			if (hlock->read)
+				usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
+			else
+				usage_bit = LOCK_ENABLED_SOFTIRQS;
+		}
+		if (!mark_lock(curr, hlock, usage_bit, ip))
+			return 0;
+	}
+
+	return 1;
+}
+
+/*
+ * Debugging helper: via this flag we know that we are in
+ * 'early bootup code', and will warn about any invalid irqs-on event:
+ */
+static int early_boot_irqs_enabled;
+
+void early_boot_irqs_off(void)
+{
+	early_boot_irqs_enabled = 0;
+}
+
+void early_boot_irqs_on(void)
+{
+	early_boot_irqs_enabled = 1;
+}
+
+/*
+ * Hardirqs will be enabled:
+ */
+void trace_hardirqs_on(void)
+{
+	struct task_struct *curr = current;
+	unsigned long ip;
+
+	if (unlikely(!debug_locks || current->lockdep_recursion))
+		return;
+
+	if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled)))
+		return;
+
+	if (unlikely(curr->hardirqs_enabled)) {
+		debug_atomic_inc(&redundant_hardirqs_on);
+		return;
+	}
+	/* we'll do an OFF -> ON transition: */
+	curr->hardirqs_enabled = 1;
+	ip = (unsigned long) __builtin_return_address(0);
+
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+		return;
+	if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+		return;
+	/*
+	 * We are going to turn hardirqs on, so set the
+	 * usage bit for all held locks:
+	 */
+	if (!mark_held_locks(curr, 1, ip))
+		return;
+	/*
+	 * If we have softirqs enabled, then set the usage
+	 * bit for all held locks. (disabled hardirqs prevented
+	 * this bit from being set before)
+	 */
+	if (curr->softirqs_enabled)
+		if (!mark_held_locks(curr, 0, ip))
+			return;
+
+	curr->hardirq_enable_ip = ip;
+	curr->hardirq_enable_event = ++curr->irq_events;
+	debug_atomic_inc(&hardirqs_on_events);
+}
+
+EXPORT_SYMBOL(trace_hardirqs_on);
+
+/*
+ * Hardirqs were disabled:
+ */
+void trace_hardirqs_off(void)
+{
+	struct task_struct *curr = current;
+
+	if (unlikely(!debug_locks || current->lockdep_recursion))
+		return;
+
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+		return;
+
+	if (curr->hardirqs_enabled) {
+		/*
+		 * We have done an ON -> OFF transition:
+		 */
+		curr->hardirqs_enabled = 0;
+		curr->hardirq_disable_ip = _RET_IP_;
+		curr->hardirq_disable_event = ++curr->irq_events;
+		debug_atomic_inc(&hardirqs_off_events);
+	} else
+		debug_atomic_inc(&redundant_hardirqs_off);
+}
+
+EXPORT_SYMBOL(trace_hardirqs_off);
+
+/*
+ * Softirqs will be enabled:
+ */
+void trace_softirqs_on(unsigned long ip)
+{
+	struct task_struct *curr = current;
+
+	if (unlikely(!debug_locks))
+		return;
+
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+		return;
+
+	if (curr->softirqs_enabled) {
+		debug_atomic_inc(&redundant_softirqs_on);
+		return;
+	}
+
+	/*
+	 * We'll do an OFF -> ON transition:
+	 */
+	curr->softirqs_enabled = 1;
+	curr->softirq_enable_ip = ip;
+	curr->softirq_enable_event = ++curr->irq_events;
+	debug_atomic_inc(&softirqs_on_events);
+	/*
+	 * We are going to turn softirqs on, so set the
+	 * usage bit for all held locks, if hardirqs are
+	 * enabled too:
+	 */
+	if (curr->hardirqs_enabled)
+		mark_held_locks(curr, 0, ip);
+}
+
+/*
+ * Softirqs were disabled:
+ */
+void trace_softirqs_off(unsigned long ip)
+{
+	struct task_struct *curr = current;
+
+	if (unlikely(!debug_locks))
+		return;
+
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+		return;
+
+	if (curr->softirqs_enabled) {
+		/*
+		 * We have done an ON -> OFF transition:
+		 */
+		curr->softirqs_enabled = 0;
+		curr->softirq_disable_ip = ip;
+		curr->softirq_disable_event = ++curr->irq_events;
+		debug_atomic_inc(&softirqs_off_events);
+		DEBUG_LOCKS_WARN_ON(!softirq_count());
+	} else
+		debug_atomic_inc(&redundant_softirqs_off);
+}
+
+#endif
+
+/*
+ * Initialize a lock instance's lock-class mapping info:
+ */
+void lockdep_init_map(struct lockdep_map *lock, const char *name,
+		      struct lock_class_key *key)
+{
+	if (unlikely(!debug_locks))
+		return;
+
+	if (DEBUG_LOCKS_WARN_ON(!key))
+		return;
+	if (DEBUG_LOCKS_WARN_ON(!name))
+		return;
+	/*
+	 * Sanity check, the lock-class key must be persistent:
+	 */
+	if (!static_obj(key)) {
+		printk("BUG: key %p not in .data!\n", key);
+		DEBUG_LOCKS_WARN_ON(1);
+		return;
+	}
+	lock->name = name;
+	lock->key = key;
+	memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES);
+}
+
+EXPORT_SYMBOL_GPL(lockdep_init_map);
+
+/*
+ * This gets called for every mutex_lock*()/spin_lock*() operation.
+ * We maintain the dependency maps and validate the locking attempt:
+ */
+static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
+			  int trylock, int read, int check, int hardirqs_off,
+			  unsigned long ip)
+{
+	struct task_struct *curr = current;
+	struct held_lock *hlock;
+	struct lock_class *class;
+	unsigned int depth, id;
+	int chain_head = 0;
+	u64 chain_key;
+
+	if (unlikely(!debug_locks))
+		return 0;
+
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+		return 0;
+
+	if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
+		debug_locks_off();
+		printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
+		printk("turning off the locking correctness validator.\n");
+		return 0;
+	}
+
+	class = lock->class[subclass];
+	/* not cached yet? */
+	if (unlikely(!class)) {
+		class = register_lock_class(lock, subclass);
+		if (!class)
+			return 0;
+	}
+	debug_atomic_inc((atomic_t *)&class->ops);
+	if (very_verbose(class)) {
+		printk("\nacquire class [%p] %s", class->key, class->name);
+		if (class->name_version > 1)
+			printk("#%d", class->name_version);
+		printk("\n");
+		dump_stack();
+	}
+
+	/*
+	 * Add the lock to the list of currently held locks.
+	 * (we dont increase the depth just yet, up until the
+	 * dependency checks are done)
+	 */
+	depth = curr->lockdep_depth;
+	if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
+		return 0;
+
+	hlock = curr->held_locks + depth;
+
+	hlock->class = class;
+	hlock->acquire_ip = ip;
+	hlock->instance = lock;
+	hlock->trylock = trylock;
+	hlock->read = read;
+	hlock->check = check;
+	hlock->hardirqs_off = hardirqs_off;
+
+	if (check != 2)
+		goto out_calc_hash;
+#ifdef CONFIG_TRACE_IRQFLAGS
+	/*
+	 * If non-trylock use in a hardirq or softirq context, then
+	 * mark the lock as used in these contexts:
+	 */
+	if (!trylock) {
+		if (read) {
+			if (curr->hardirq_context)
+				if (!mark_lock(curr, hlock,
+						LOCK_USED_IN_HARDIRQ_READ, ip))
+					return 0;
+			if (curr->softirq_context)
+				if (!mark_lock(curr, hlock,
+						LOCK_USED_IN_SOFTIRQ_READ, ip))
+					return 0;
+		} else {
+			if (curr->hardirq_context)
+				if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ, ip))
+					return 0;
+			if (curr->softirq_context)
+				if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ, ip))
+					return 0;
+		}
+	}
+	if (!hardirqs_off) {
+		if (read) {
+			if (!mark_lock(curr, hlock,
+					LOCK_ENABLED_HARDIRQS_READ, ip))
+				return 0;
+			if (curr->softirqs_enabled)
+				if (!mark_lock(curr, hlock,
+						LOCK_ENABLED_SOFTIRQS_READ, ip))
+					return 0;
+		} else {
+			if (!mark_lock(curr, hlock,
+					LOCK_ENABLED_HARDIRQS, ip))
+				return 0;
+			if (curr->softirqs_enabled)
+				if (!mark_lock(curr, hlock,
+						LOCK_ENABLED_SOFTIRQS, ip))
+					return 0;
+		}
+	}
+#endif
+	/* mark it as used: */
+	if (!mark_lock(curr, hlock, LOCK_USED, ip))
+		return 0;
+out_calc_hash:
+	/*
+	 * Calculate the chain hash: it's the combined has of all the
+	 * lock keys along the dependency chain. We save the hash value
+	 * at every step so that we can get the current hash easily
+	 * after unlock. The chain hash is then used to cache dependency
+	 * results.
+	 *
+	 * The 'key ID' is what is the most compact key value to drive
+	 * the hash, not class->key.
+	 */
+	id = class - lock_classes;
+	if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+		return 0;
+
+	chain_key = curr->curr_chain_key;
+	if (!depth) {
+		if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
+			return 0;
+		chain_head = 1;
+	}
+
+	hlock->prev_chain_key = chain_key;
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	/*
+	 * Keep track of points where we cross into an interrupt context:
+	 */
+	hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
+				curr->softirq_context;
+	if (depth) {
+		struct held_lock *prev_hlock;
+
+		prev_hlock = curr->held_locks + depth-1;
+		/*
+		 * If we cross into another context, reset the
+		 * hash key (this also prevents the checking and the
+		 * adding of the dependency to 'prev'):
+		 */
+		if (prev_hlock->irq_context != hlock->irq_context) {
+			chain_key = 0;
+			chain_head = 1;
+		}
+	}
+#endif
+	chain_key = iterate_chain_key(chain_key, id);
+	curr->curr_chain_key = chain_key;
+
+	/*
+	 * Trylock needs to maintain the stack of held locks, but it
+	 * does not add new dependencies, because trylock can be done
+	 * in any order.
+	 *
+	 * We look up the chain_key and do the O(N^2) check and update of
+	 * the dependencies only if this is a new dependency chain.
+	 * (If lookup_chain_cache() returns with 1 it acquires
+	 * hash_lock for us)
+	 */
+	if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) {
+		/*
+		 * Check whether last held lock:
+		 *
+		 * - is irq-safe, if this lock is irq-unsafe
+		 * - is softirq-safe, if this lock is hardirq-unsafe
+		 *
+		 * And check whether the new lock's dependency graph
+		 * could lead back to the previous lock.
+		 *
+		 * any of these scenarios could lead to a deadlock. If
+		 * All validations
+		 */
+		int ret = check_deadlock(curr, hlock, lock, read);
+
+		if (!ret)
+			return 0;
+		/*
+		 * Mark recursive read, as we jump over it when
+		 * building dependencies (just like we jump over
+		 * trylock entries):
+		 */
+		if (ret == 2)
+			hlock->read = 2;
+		/*
+		 * Add dependency only if this lock is not the head
+		 * of the chain, and if it's not a secondary read-lock:
+		 */
+		if (!chain_head && ret != 2)
+			if (!check_prevs_add(curr, hlock))
+				return 0;
+		__raw_spin_unlock(&hash_lock);
+	}
+	curr->lockdep_depth++;
+	check_chain_key(curr);
+	if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
+		debug_locks_off();
+		printk("BUG: MAX_LOCK_DEPTH too low!\n");
+		printk("turning off the locking correctness validator.\n");
+		return 0;
+	}
+	if (unlikely(curr->lockdep_depth > max_lockdep_depth))
+		max_lockdep_depth = curr->lockdep_depth;
+
+	return 1;
+}
+
+static int
+print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
+			   unsigned long ip)
+{
+	if (!debug_locks_off())
+		return 0;
+	if (debug_locks_silent)
+		return 0;
+
+	printk("\n=====================================\n");
+	printk(  "[ BUG: bad unlock balance detected! ]\n");
+	printk(  "-------------------------------------\n");
+	printk("%s/%d is trying to release lock (",
+		curr->comm, curr->pid);
+	print_lockdep_cache(lock);
+	printk(") at:\n");
+	print_ip_sym(ip);
+	printk("but there are no more locks to release!\n");
+	printk("\nother info that might help us debug this:\n");
+	lockdep_print_held_locks(curr);
+
+	printk("\nstack backtrace:\n");
+	dump_stack();
+
+	return 0;
+}
+
+/*
+ * Common debugging checks for both nested and non-nested unlock:
+ */
+static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
+			unsigned long ip)
+{
+	if (unlikely(!debug_locks))
+		return 0;
+	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+		return 0;
+
+	if (curr->lockdep_depth <= 0)
+		return print_unlock_inbalance_bug(curr, lock, ip);
+
+	return 1;
+}
+
+/*
+ * Remove the lock to the list of currently held locks in a
+ * potentially non-nested (out of order) manner. This is a
+ * relatively rare operation, as all the unlock APIs default
+ * to nested mode (which uses lock_release()):
+ */
+static int
+lock_release_non_nested(struct task_struct *curr,
+			struct lockdep_map *lock, unsigned long ip)
+{
+	struct held_lock *hlock, *prev_hlock;
+	unsigned int depth;
+	int i;
+
+	/*
+	 * Check whether the lock exists in the current stack
+	 * of held locks:
+	 */
+	depth = curr->lockdep_depth;
+	if (DEBUG_LOCKS_WARN_ON(!depth))
+		return 0;
+
+	prev_hlock = NULL;
+	for (i = depth-1; i >= 0; i--) {
+		hlock = curr->held_locks + i;
+		/*
+		 * We must not cross into another context:
+		 */
+		if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
+			break;
+		if (hlock->instance == lock)
+			goto found_it;
+		prev_hlock = hlock;
+	}
+	return print_unlock_inbalance_bug(curr, lock, ip);
+
+found_it:
+	/*
+	 * We have the right lock to unlock, 'hlock' points to it.
+	 * Now we remove it from the stack, and add back the other
+	 * entries (if any), recalculating the hash along the way:
+	 */
+	curr->lockdep_depth = i;
+	curr->curr_chain_key = hlock->prev_chain_key;
+
+	for (i++; i < depth; i++) {
+		hlock = curr->held_locks + i;
+		if (!__lock_acquire(hlock->instance,
+			hlock->class->subclass, hlock->trylock,
+				hlock->read, hlock->check, hlock->hardirqs_off,
+				hlock->acquire_ip))
+			return 0;
+	}
+
+	if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
+		return 0;
+	return 1;
+}
+
+/*
+ * Remove the lock to the list of currently held locks - this gets
+ * called on mutex_unlock()/spin_unlock*() (or on a failed
+ * mutex_lock_interruptible()). This is done for unlocks that nest
+ * perfectly. (i.e. the current top of the lock-stack is unlocked)
+ */
+static int lock_release_nested(struct task_struct *curr,
+			       struct lockdep_map *lock, unsigned long ip)
+{
+	struct held_lock *hlock;
+	unsigned int depth;
+
+	/*
+	 * Pop off the top of the lock stack:
+	 */
+	depth = curr->lockdep_depth - 1;
+	hlock = curr->held_locks + depth;
+
+	/*
+	 * Is the unlock non-nested:
+	 */
+	if (hlock->instance != lock)
+		return lock_release_non_nested(curr, lock, ip);
+	curr->lockdep_depth--;
+
+	if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
+		return 0;
+
+	curr->curr_chain_key = hlock->prev_chain_key;
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+	hlock->prev_chain_key = 0;
+	hlock->class = NULL;
+	hlock->acquire_ip = 0;
+	hlock->irq_context = 0;
+#endif
+	return 1;
+}
+
+/*
+ * Remove the lock to the list of currently held locks - this gets
+ * called on mutex_unlock()/spin_unlock*() (or on a failed
+ * mutex_lock_interruptible()). This is done for unlocks that nest
+ * perfectly. (i.e. the current top of the lock-stack is unlocked)
+ */
+static void
+__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
+{
+	struct task_struct *curr = current;
+
+	if (!check_unlock(curr, lock, ip))
+		return;
+
+	if (nested) {
+		if (!lock_release_nested(curr, lock, ip))
+			return;
+	} else {
+		if (!lock_release_non_nested(curr, lock, ip))
+			return;
+	}
+
+	check_chain_key(curr);
+}
+
+/*
+ * Check whether we follow the irq-flags state precisely:
+ */
+static void check_flags(unsigned long flags)
+{
+#if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS)
+	if (!debug_locks)
+		return;
+
+	if (irqs_disabled_flags(flags))
+		DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled);
+	else
+		DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled);
+
+	/*
+	 * We dont accurately track softirq state in e.g.
+	 * hardirq contexts (such as on 4KSTACKS), so only
+	 * check if not in hardirq contexts:
+	 */
+	if (!hardirq_count()) {
+		if (softirq_count())
+			DEBUG_LOCKS_WARN_ON(current->softirqs_enabled);
+		else
+			DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
+	}
+
+	if (!debug_locks)
+		print_irqtrace_events(current);
+#endif
+}
+
+/*
+ * We are not always called with irqs disabled - do that here,
+ * and also avoid lockdep recursion:
+ */
+void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
+		  int trylock, int read, int check, unsigned long ip)
+{
+	unsigned long flags;
+
+	if (unlikely(current->lockdep_recursion))
+		return;
+
+	raw_local_irq_save(flags);
+	check_flags(flags);
+
+	current->lockdep_recursion = 1;
+	__lock_acquire(lock, subclass, trylock, read, check,
+		       irqs_disabled_flags(flags), ip);
+	current->lockdep_recursion = 0;
+	raw_local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL_GPL(lock_acquire);
+
+void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
+{
+	unsigned long flags;
+
+	if (unlikely(current->lockdep_recursion))
+		return;
+
+	raw_local_irq_save(flags);
+	check_flags(flags);
+	current->lockdep_recursion = 1;
+	__lock_release(lock, nested, ip);
+	current->lockdep_recursion = 0;
+	raw_local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL_GPL(lock_release);
+
+/*
+ * Used by the testsuite, sanitize the validator state
+ * after a simulated failure:
+ */
+
+void lockdep_reset(void)
+{
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	current->curr_chain_key = 0;
+	current->lockdep_depth = 0;
+	current->lockdep_recursion = 0;
+	memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock));
+	nr_hardirq_chains = 0;
+	nr_softirq_chains = 0;
+	nr_process_chains = 0;
+	debug_locks = 1;
+	raw_local_irq_restore(flags);
+}
+
+static void zap_class(struct lock_class *class)
+{
+	int i;
+
+	/*
+	 * Remove all dependencies this lock is
+	 * involved in:
+	 */
+	for (i = 0; i < nr_list_entries; i++) {
+		if (list_entries[i].class == class)
+			list_del_rcu(&list_entries[i].entry);
+	}
+	/*
+	 * Unhash the class and remove it from the all_lock_classes list:
+	 */
+	list_del_rcu(&class->hash_entry);
+	list_del_rcu(&class->lock_entry);
+
+}
+
+static inline int within(void *addr, void *start, unsigned long size)
+{
+	return addr >= start && addr < start + size;
+}
+
+void lockdep_free_key_range(void *start, unsigned long size)
+{
+	struct lock_class *class, *next;
+	struct list_head *head;
+	unsigned long flags;
+	int i;
+
+	raw_local_irq_save(flags);
+	__raw_spin_lock(&hash_lock);
+
+	/*
+	 * Unhash all classes that were created by this module:
+	 */
+	for (i = 0; i < CLASSHASH_SIZE; i++) {
+		head = classhash_table + i;
+		if (list_empty(head))
+			continue;
+		list_for_each_entry_safe(class, next, head, hash_entry)
+			if (within(class->key, start, size))
+				zap_class(class);
+	}
+
+	__raw_spin_unlock(&hash_lock);
+	raw_local_irq_restore(flags);
+}
+
+void lockdep_reset_lock(struct lockdep_map *lock)
+{
+	struct lock_class *class, *next, *entry;
+	struct list_head *head;
+	unsigned long flags;
+	int i, j;
+
+	raw_local_irq_save(flags);
+	__raw_spin_lock(&hash_lock);
+
+	/*
+	 * Remove all classes this lock has:
+	 */
+	for (i = 0; i < CLASSHASH_SIZE; i++) {
+		head = classhash_table + i;
+		if (list_empty(head))
+			continue;
+		list_for_each_entry_safe(class, next, head, hash_entry) {
+			for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
+				entry = lock->class[j];
+				if (class == entry) {
+					zap_class(class);
+					lock->class[j] = NULL;
+					break;
+				}
+			}
+		}
+	}
+
+	/*
+	 * Debug check: in the end all mapped classes should
+	 * be gone.
+	 */
+	for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
+		entry = lock->class[j];
+		if (!entry)
+			continue;
+		__raw_spin_unlock(&hash_lock);
+		DEBUG_LOCKS_WARN_ON(1);
+		raw_local_irq_restore(flags);
+		return;
+	}
+
+	__raw_spin_unlock(&hash_lock);
+	raw_local_irq_restore(flags);
+}
+
+void __init lockdep_init(void)
+{
+	int i;
+
+	/*
+	 * Some architectures have their own start_kernel()
+	 * code which calls lockdep_init(), while we also
+	 * call lockdep_init() from the start_kernel() itself,
+	 * and we want to initialize the hashes only once:
+	 */
+	if (lockdep_initialized)
+		return;
+
+	for (i = 0; i < CLASSHASH_SIZE; i++)
+		INIT_LIST_HEAD(classhash_table + i);
+
+	for (i = 0; i < CHAINHASH_SIZE; i++)
+		INIT_LIST_HEAD(chainhash_table + i);
+
+	lockdep_initialized = 1;
+}
+
+void __init lockdep_info(void)
+{
+	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
+
+	printk("... MAX_LOCKDEP_SUBCLASSES:    %lu\n", MAX_LOCKDEP_SUBCLASSES);
+	printk("... MAX_LOCK_DEPTH:          %lu\n", MAX_LOCK_DEPTH);
+	printk("... MAX_LOCKDEP_KEYS:        %lu\n", MAX_LOCKDEP_KEYS);
+	printk("... CLASSHASH_SIZE:           %lu\n", CLASSHASH_SIZE);
+	printk("... MAX_LOCKDEP_ENTRIES:     %lu\n", MAX_LOCKDEP_ENTRIES);
+	printk("... MAX_LOCKDEP_CHAINS:      %lu\n", MAX_LOCKDEP_CHAINS);
+	printk("... CHAINHASH_SIZE:          %lu\n", CHAINHASH_SIZE);
+
+	printk(" memory used by lock dependency info: %lu kB\n",
+		(sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
+		sizeof(struct list_head) * CLASSHASH_SIZE +
+		sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
+		sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
+		sizeof(struct list_head) * CHAINHASH_SIZE) / 1024);
+
+	printk(" per task-struct memory footprint: %lu bytes\n",
+		sizeof(struct held_lock) * MAX_LOCK_DEPTH);
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+	if (lockdep_init_error)
+		printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n");
+#endif
+}
+
+static inline int in_range(const void *start, const void *addr, const void *end)
+{
+	return addr >= start && addr <= end;
+}
+
+static void
+print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
+		     const void *mem_to)
+{
+	if (!debug_locks_off())
+		return;
+	if (debug_locks_silent)
+		return;
+
+	printk("\n=========================\n");
+	printk(  "[ BUG: held lock freed! ]\n");
+	printk(  "-------------------------\n");
+	printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
+		curr->comm, curr->pid, mem_from, mem_to-1);
+	lockdep_print_held_locks(curr);
+
+	printk("\nstack backtrace:\n");
+	dump_stack();
+}
+
+/*
+ * Called when kernel memory is freed (or unmapped), or if a lock
+ * is destroyed or reinitialized - this code checks whether there is
+ * any held lock in the memory range of <from> to <to>:
+ */
+void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
+{
+	const void *mem_to = mem_from + mem_len, *lock_from, *lock_to;
+	struct task_struct *curr = current;
+	struct held_lock *hlock;
+	unsigned long flags;
+	int i;
+
+	if (unlikely(!debug_locks))
+		return;
+
+	local_irq_save(flags);
+	for (i = 0; i < curr->lockdep_depth; i++) {
+		hlock = curr->held_locks + i;
+
+		lock_from = (void *)hlock->instance;
+		lock_to = (void *)(hlock->instance + 1);
+
+		if (!in_range(mem_from, lock_from, mem_to) &&
+					!in_range(mem_from, lock_to, mem_to))
+			continue;
+
+		print_freed_lock_bug(curr, mem_from, mem_to);
+		break;
+	}
+	local_irq_restore(flags);
+}
+
+static void print_held_locks_bug(struct task_struct *curr)
+{
+	if (!debug_locks_off())
+		return;
+	if (debug_locks_silent)
+		return;
+
+	printk("\n=====================================\n");
+	printk(  "[ BUG: lock held at task exit time! ]\n");
+	printk(  "-------------------------------------\n");
+	printk("%s/%d is exiting with locks still held!\n",
+		curr->comm, curr->pid);
+	lockdep_print_held_locks(curr);
+
+	printk("\nstack backtrace:\n");
+	dump_stack();
+}
+
+void debug_check_no_locks_held(struct task_struct *task)
+{
+	if (unlikely(task->lockdep_depth > 0))
+		print_held_locks_bug(task);
+}
+
+void debug_show_all_locks(void)
+{
+	struct task_struct *g, *p;
+	int count = 10;
+	int unlock = 1;
+
+	printk("\nShowing all locks held in the system:\n");
+
+	/*
+	 * Here we try to get the tasklist_lock as hard as possible,
+	 * if not successful after 2 seconds we ignore it (but keep
+	 * trying). This is to enable a debug printout even if a
+	 * tasklist_lock-holding task deadlocks or crashes.
+	 */
+retry:
+	if (!read_trylock(&tasklist_lock)) {
+		if (count == 10)
+			printk("hm, tasklist_lock locked, retrying... ");
+		if (count) {
+			count--;
+			printk(" #%d", 10-count);
+			mdelay(200);
+			goto retry;
+		}
+		printk(" ignoring it.\n");
+		unlock = 0;
+	}
+	if (count != 10)
+		printk(" locked it.\n");
+
+	do_each_thread(g, p) {
+		if (p->lockdep_depth)
+			lockdep_print_held_locks(p);
+		if (!unlock)
+			if (read_trylock(&tasklist_lock))
+				unlock = 1;
+	} while_each_thread(g, p);
+
+	printk("\n");
+	printk("=============================================\n\n");
+
+	if (unlock)
+		read_unlock(&tasklist_lock);
+}
+
+EXPORT_SYMBOL_GPL(debug_show_all_locks);
+
+void debug_show_held_locks(struct task_struct *task)
+{
+	lockdep_print_held_locks(task);
+}
+
+EXPORT_SYMBOL_GPL(debug_show_held_locks);
+
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
new file mode 100644
index 0000000..0d355f2
--- /dev/null
+++ b/kernel/lockdep_internals.h
@@ -0,0 +1,78 @@
+/*
+ * kernel/lockdep_internals.h
+ *
+ * Runtime locking correctness validator
+ *
+ * lockdep subsystem internal functions and variables.
+ */
+
+/*
+ * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
+ * we track.
+ *
+ * We use the per-lock dependency maps in two ways: we grow it by adding
+ * every to-be-taken lock to all currently held lock's own dependency
+ * table (if it's not there yet), and we check it for lock order
+ * conflicts and deadlocks.
+ */
+#define MAX_LOCKDEP_ENTRIES	8192UL
+
+#define MAX_LOCKDEP_KEYS_BITS	11
+#define MAX_LOCKDEP_KEYS	(1UL << MAX_LOCKDEP_KEYS_BITS)
+
+#define MAX_LOCKDEP_CHAINS_BITS	13
+#define MAX_LOCKDEP_CHAINS	(1UL << MAX_LOCKDEP_CHAINS_BITS)
+
+/*
+ * Stack-trace: tightly packed array of stack backtrace
+ * addresses. Protected by the hash_lock.
+ */
+#define MAX_STACK_TRACE_ENTRIES	131072UL
+
+extern struct list_head all_lock_classes;
+
+extern void
+get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4);
+
+extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
+
+extern unsigned long nr_lock_classes;
+extern unsigned long nr_list_entries;
+extern unsigned long nr_lock_chains;
+extern unsigned long nr_stack_trace_entries;
+
+extern unsigned int nr_hardirq_chains;
+extern unsigned int nr_softirq_chains;
+extern unsigned int nr_process_chains;
+extern unsigned int max_lockdep_depth;
+extern unsigned int max_recursion_depth;
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+/*
+ * Various lockdep statistics:
+ */
+extern atomic_t chain_lookup_hits;
+extern atomic_t chain_lookup_misses;
+extern atomic_t hardirqs_on_events;
+extern atomic_t hardirqs_off_events;
+extern atomic_t redundant_hardirqs_on;
+extern atomic_t redundant_hardirqs_off;
+extern atomic_t softirqs_on_events;
+extern atomic_t softirqs_off_events;
+extern atomic_t redundant_softirqs_on;
+extern atomic_t redundant_softirqs_off;
+extern atomic_t nr_unused_locks;
+extern atomic_t nr_cyclic_checks;
+extern atomic_t nr_cyclic_check_recursions;
+extern atomic_t nr_find_usage_forwards_checks;
+extern atomic_t nr_find_usage_forwards_recursions;
+extern atomic_t nr_find_usage_backwards_checks;
+extern atomic_t nr_find_usage_backwards_recursions;
+# define debug_atomic_inc(ptr)		atomic_inc(ptr)
+# define debug_atomic_dec(ptr)		atomic_dec(ptr)
+# define debug_atomic_read(ptr)		atomic_read(ptr)
+#else
+# define debug_atomic_inc(ptr)		do { } while (0)
+# define debug_atomic_dec(ptr)		do { } while (0)
+# define debug_atomic_read(ptr)		0
+#endif
diff --git a/kernel/module.c b/kernel/module.c
index 0351625..35e1b1f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1121,6 +1121,9 @@ static void free_module(struct module *mod)
 	if (mod->percpu)
 		percpu_modfree(mod->percpu);
 
+	/* Free lock-classes: */
+	lockdep_free_key_range(mod->module_core, mod->core_size);
+
 	/* Finally, free the core (containing the module structure) */
 	module_free(mod, mod->module_core);
 }
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 16021b0..16c2e98 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -48,7 +48,7 @@ config DEBUG_KERNEL
 config LOG_BUF_SHIFT
 	int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL
 	range 12 21
-	default 17 if S390
+	default 17 if S390 || LOCKDEP
 	default 16 if X86_NUMAQ || IA64
 	default 15 if SMP
 	default 14
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 5cd05f20..9bdc8d4 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/lockdep.h>
 #include <linux/spinlock.h>
 #include <linux/kallsyms.h>
 #include <linux/interrupt.h>
@@ -889,9 +890,6 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 #include "locking-selftest-softirq.h"
 // GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)
 
-#define lockdep_reset()
-#define lockdep_reset_lock(x)
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
 # define I_RWLOCK(x)	lockdep_reset_lock(&rwlock_##x.dep_map)
-- 
cgit v0.10.2


From 6c9076ec9cd448f43bbda871352a7067f456ee26 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:51 -0700
Subject: [PATCH] lockdep: allow read_lock() recursion of same class

From: Ingo Molnar <mingo@elte.hu>

lockdep so far only allowed read-recursion for the same lock instance.
This is enough in the overwhelming majority of cases, but a hostap case
triggered and reported by Miles Lane relies on same-class
different-instance recursion.  So we relax the restriction on read-lock
recursion.

(This change does not allow rwsem read-recursion, which is still
forbidden.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index dd05809..f32ca78 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -843,10 +843,9 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
 			continue;
 		/*
 		 * Allow read-after-read recursion of the same
-		 * lock instance (i.e. read_lock(lock)+read_lock(lock)):
+		 * lock class (i.e. read_lock(lock)+read_lock(lock)):
 		 */
-		if ((read == 2) && prev->read &&
-				(prev->instance == next_instance))
+		if ((read == 2) && prev->read)
 			return 2;
 		return print_deadlock_bug(curr, prev, next);
 	}
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 9bdc8d4..7945787 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -248,7 +248,7 @@ GENERATE_TESTCASE(AA_rsem)
 
 /*
  * Special-case for read-locking, they are
- * allowed to recurse on the same lock instance:
+ * allowed to recurse on the same lock class:
  */
 static void rlock_AA1(void)
 {
@@ -259,7 +259,7 @@ static void rlock_AA1(void)
 static void rlock_AA1B(void)
 {
 	RL(X1);
-	RL(X2); // this one should fail
+	RL(X2); // this one should NOT fail
 }
 
 static void rsem_AA1(void)
@@ -1132,7 +1132,7 @@ void locking_selftest(void)
 	init_shared_classes();
 	debug_locks_silent = !debug_locks_verbose;
 
-	DO_TESTCASE_6("A-A deadlock", AA);
+	DO_TESTCASE_6R("A-A deadlock", AA);
 	DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
 	DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA);
 	DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC);
@@ -1153,7 +1153,7 @@ void locking_selftest(void)
 
 	print_testname("recursive read-lock #2");
 	printk("             |");
-	dotest(rlock_AA1B, FAILURE, LOCKTYPE_RWLOCK);
+	dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
 	printk("             |");
 	dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
 	printk("\n");
-- 
cgit v0.10.2


From f3e97da38e1d69d24195d76f96b912323f5ee30c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:52 -0700
Subject: [PATCH] lockdep: design docs

Lock validator design documentation.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
new file mode 100644
index 0000000..00d9360
--- /dev/null
+++ b/Documentation/lockdep-design.txt
@@ -0,0 +1,197 @@
+Runtime locking correctness validator
+=====================================
+
+started by Ingo Molnar <mingo@redhat.com>
+additions by Arjan van de Ven <arjan@linux.intel.com>
+
+Lock-class
+----------
+
+The basic object the validator operates upon is a 'class' of locks.
+
+A class of locks is a group of locks that are logically the same with
+respect to locking rules, even if the locks may have multiple (possibly
+tens of thousands of) instantiations. For example a lock in the inode
+struct is one class, while each inode has its own instantiation of that
+lock class.
+
+The validator tracks the 'state' of lock-classes, and it tracks
+dependencies between different lock-classes. The validator maintains a
+rolling proof that the state and the dependencies are correct.
+
+Unlike an lock instantiation, the lock-class itself never goes away: when
+a lock-class is used for the first time after bootup it gets registered,
+and all subsequent uses of that lock-class will be attached to this
+lock-class.
+
+State
+-----
+
+The validator tracks lock-class usage history into 5 separate state bits:
+
+- 'ever held in hardirq context'                    [ == hardirq-safe   ]
+- 'ever held in softirq context'                    [ == softirq-safe   ]
+- 'ever held with hardirqs enabled'                 [ == hardirq-unsafe ]
+- 'ever held with softirqs and hardirqs enabled'    [ == softirq-unsafe ]
+
+- 'ever used'                                       [ == !unused        ]
+
+Single-lock state rules:
+------------------------
+
+A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
+following states are exclusive, and only one of them is allowed to be
+set for any lock-class:
+
+ <hardirq-safe> and <hardirq-unsafe>
+ <softirq-safe> and <softirq-unsafe>
+
+The validator detects and reports lock usage that violate these
+single-lock state rules.
+
+Multi-lock dependency rules:
+----------------------------
+
+The same lock-class must not be acquired twice, because this could lead
+to lock recursion deadlocks.
+
+Furthermore, two locks may not be taken in different order:
+
+ <L1> -> <L2>
+ <L2> -> <L1>
+
+because this could lead to lock inversion deadlocks. (The validator
+finds such dependencies in arbitrary complexity, i.e. there can be any
+other locking sequence between the acquire-lock operations, the
+validator will still track all dependencies between locks.)
+
+Furthermore, the following usage based lock dependencies are not allowed
+between any two lock-classes:
+
+   <hardirq-safe>   ->  <hardirq-unsafe>
+   <softirq-safe>   ->  <softirq-unsafe>
+
+The first rule comes from the fact the a hardirq-safe lock could be
+taken by a hardirq context, interrupting a hardirq-unsafe lock - and
+thus could result in a lock inversion deadlock. Likewise, a softirq-safe
+lock could be taken by an softirq context, interrupting a softirq-unsafe
+lock.
+
+The above rules are enforced for any locking sequence that occurs in the
+kernel: when acquiring a new lock, the validator checks whether there is
+any rule violation between the new lock and any of the held locks.
+
+When a lock-class changes its state, the following aspects of the above
+dependency rules are enforced:
+
+- if a new hardirq-safe lock is discovered, we check whether it
+  took any hardirq-unsafe lock in the past.
+
+- if a new softirq-safe lock is discovered, we check whether it took
+  any softirq-unsafe lock in the past.
+
+- if a new hardirq-unsafe lock is discovered, we check whether any
+  hardirq-safe lock took it in the past.
+
+- if a new softirq-unsafe lock is discovered, we check whether any
+  softirq-safe lock took it in the past.
+
+(Again, we do these checks too on the basis that an interrupt context
+could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
+could lead to a lock inversion deadlock - even if that lock scenario did
+not trigger in practice yet.)
+
+Exception: Nested data dependencies leading to nested locking
+-------------------------------------------------------------
+
+There are a few cases where the Linux kernel acquires more than one
+instance of the same lock-class. Such cases typically happen when there
+is some sort of hierarchy within objects of the same type. In these
+cases there is an inherent "natural" ordering between the two objects
+(defined by the properties of the hierarchy), and the kernel grabs the
+locks in this fixed order on each of the objects.
+
+An example of such an object hieararchy that results in "nested locking"
+is that of a "whole disk" block-dev object and a "partition" block-dev
+object; the partition is "part of" the whole device and as long as one
+always takes the whole disk lock as a higher lock than the partition
+lock, the lock ordering is fully correct. The validator does not
+automatically detect this natural ordering, as the locking rule behind
+the ordering is not static.
+
+In order to teach the validator about this correct usage model, new
+versions of the various locking primitives were added that allow you to
+specify a "nesting level". An example call, for the block device mutex,
+looks like this:
+
+enum bdev_bd_mutex_lock_class
+{
+       BD_MUTEX_NORMAL,
+       BD_MUTEX_WHOLE,
+       BD_MUTEX_PARTITION
+};
+
+ mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
+
+In this case the locking is done on a bdev object that is known to be a
+partition.
+
+The validator treats a lock that is taken in such a nested fasion as a
+separate (sub)class for the purposes of validation.
+
+Note: When changing code to use the _nested() primitives, be careful and
+check really thoroughly that the hiearchy is correctly mapped; otherwise
+you can get false positives or false negatives.
+
+Proof of 100% correctness:
+--------------------------
+
+The validator achieves perfect, mathematical 'closure' (proof of locking
+correctness) in the sense that for every simple, standalone single-task
+locking sequence that occured at least once during the lifetime of the
+kernel, the validator proves it with a 100% certainty that no
+combination and timing of these locking sequences can cause any class of
+lock related deadlock. [*]
+
+I.e. complex multi-CPU and multi-task locking scenarios do not have to
+occur in practice to prove a deadlock: only the simple 'component'
+locking chains have to occur at least once (anytime, in any
+task/context) for the validator to be able to prove correctness. (For
+example, complex deadlocks that would normally need more than 3 CPUs and
+a very unlikely constellation of tasks, irq-contexts and timings to
+occur, can be detected on a plain, lightly loaded single-CPU system as
+well!)
+
+This radically decreases the complexity of locking related QA of the
+kernel: what has to be done during QA is to trigger as many "simple"
+single-task locking dependencies in the kernel as possible, at least
+once, to prove locking correctness - instead of having to trigger every
+possible combination of locking interaction between CPUs, combined with
+every possible hardirq and softirq nesting scenario (which is impossible
+to do in practice).
+
+[*] assuming that the validator itself is 100% correct, and no other
+    part of the system corrupts the state of the validator in any way.
+    We also assume that all NMI/SMM paths [which could interrupt
+    even hardirq-disabled codepaths] are correct and do not interfere
+    with the validator. We also assume that the 64-bit 'chain hash'
+    value is unique for every lock-chain in the system. Also, lock
+    recursion must not be higher than 20.
+
+Performance:
+------------
+
+The above rules require _massive_ amounts of runtime checking. If we did
+that for every lock taken and for every irqs-enable event, it would
+render the system practically unusably slow. The complexity of checking
+is O(N^2), so even with just a few hundred lock-classes we'd have to do
+tens of thousands of checks for every event.
+
+This problem is solved by checking any given 'locking scenario' (unique
+sequence of locks taken after each other) only once. A simple stack of
+held locks is maintained, and a lightweight 64-bit hash value is
+calculated, which hash is unique for every lock chain. The hash value,
+when the chain is validated for the first time, is then put into a hash
+table, which hash-table can be checked in a lockfree manner. If the
+locking chain occurs again later on, the hash table tells us that we
+dont have to validate the chain again.
-- 
cgit v0.10.2


From a8f24a3978c5f82419e1c90dc90460731204f46f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:52 -0700
Subject: [PATCH] lockdep: procfs

Lock validator /proc/lockdep and /proc/lockdep_stats support.
(FIXME: should go into debugfs)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/Makefile b/kernel/Makefile
index 049aa79..3dd994e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -14,6 +14,9 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
 obj-$(CONFIG_LOCKDEP) += lockdep.o
+ifeq ($(CONFIG_PROC_FS),y)
+obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
+endif
 obj-$(CONFIG_FUTEX) += futex.o
 ifeq ($(CONFIG_COMPAT),y)
 obj-$(CONFIG_FUTEX) += futex_compat.o
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
new file mode 100644
index 0000000..f6e72ea
--- /dev/null
+++ b/kernel/lockdep_proc.c
@@ -0,0 +1,345 @@
+/*
+ * kernel/lockdep_proc.c
+ *
+ * Runtime locking correctness validator
+ *
+ * Started by Ingo Molnar:
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Code for /proc/lockdep and /proc/lockdep_stats:
+ *
+ */
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/debug_locks.h>
+
+#include "lockdep_internals.h"
+
+static void *l_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct lock_class *class = v;
+
+	(*pos)++;
+
+	if (class->lock_entry.next != &all_lock_classes)
+		class = list_entry(class->lock_entry.next, struct lock_class,
+				  lock_entry);
+	else
+		class = NULL;
+	m->private = class;
+
+	return class;
+}
+
+static void *l_start(struct seq_file *m, loff_t *pos)
+{
+	struct lock_class *class = m->private;
+
+	if (&class->lock_entry == all_lock_classes.next)
+		seq_printf(m, "all lock classes:\n");
+
+	return class;
+}
+
+static void l_stop(struct seq_file *m, void *v)
+{
+}
+
+static unsigned long count_forward_deps(struct lock_class *class)
+{
+	struct lock_list *entry;
+	unsigned long ret = 1;
+
+	/*
+	 * Recurse this class's dependency list:
+	 */
+	list_for_each_entry(entry, &class->locks_after, entry)
+		ret += count_forward_deps(entry->class);
+
+	return ret;
+}
+
+static unsigned long count_backward_deps(struct lock_class *class)
+{
+	struct lock_list *entry;
+	unsigned long ret = 1;
+
+	/*
+	 * Recurse this class's dependency list:
+	 */
+	list_for_each_entry(entry, &class->locks_before, entry)
+		ret += count_backward_deps(entry->class);
+
+	return ret;
+}
+
+static int l_show(struct seq_file *m, void *v)
+{
+	unsigned long nr_forward_deps, nr_backward_deps;
+	struct lock_class *class = m->private;
+	char str[128], c1, c2, c3, c4;
+	const char *name;
+
+	seq_printf(m, "%p", class->key);
+#ifdef CONFIG_DEBUG_LOCKDEP
+	seq_printf(m, " OPS:%8ld", class->ops);
+#endif
+	nr_forward_deps = count_forward_deps(class);
+	seq_printf(m, " FD:%5ld", nr_forward_deps);
+
+	nr_backward_deps = count_backward_deps(class);
+	seq_printf(m, " BD:%5ld", nr_backward_deps);
+
+	get_usage_chars(class, &c1, &c2, &c3, &c4);
+	seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
+
+	name = class->name;
+	if (!name) {
+		name = __get_key_name(class->key, str);
+		seq_printf(m, ": %s", name);
+	} else{
+		seq_printf(m, ": %s", name);
+		if (class->name_version > 1)
+			seq_printf(m, "#%d", class->name_version);
+		if (class->subclass)
+			seq_printf(m, "/%d", class->subclass);
+	}
+	seq_puts(m, "\n");
+
+	return 0;
+}
+
+static struct seq_operations lockdep_ops = {
+	.start	= l_start,
+	.next	= l_next,
+	.stop	= l_stop,
+	.show	= l_show,
+};
+
+static int lockdep_open(struct inode *inode, struct file *file)
+{
+	int res = seq_open(file, &lockdep_ops);
+	if (!res) {
+		struct seq_file *m = file->private_data;
+
+		if (!list_empty(&all_lock_classes))
+			m->private = list_entry(all_lock_classes.next,
+					struct lock_class, lock_entry);
+		else
+			m->private = NULL;
+	}
+	return res;
+}
+
+static struct file_operations proc_lockdep_operations = {
+	.open		= lockdep_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static void lockdep_stats_debug_show(struct seq_file *m)
+{
+#ifdef CONFIG_DEBUG_LOCKDEP
+	unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
+		     hi2 = debug_atomic_read(&hardirqs_off_events),
+		     hr1 = debug_atomic_read(&redundant_hardirqs_on),
+		     hr2 = debug_atomic_read(&redundant_hardirqs_off),
+		     si1 = debug_atomic_read(&softirqs_on_events),
+		     si2 = debug_atomic_read(&softirqs_off_events),
+		     sr1 = debug_atomic_read(&redundant_softirqs_on),
+		     sr2 = debug_atomic_read(&redundant_softirqs_off);
+
+	seq_printf(m, " chain lookup misses:           %11u\n",
+		debug_atomic_read(&chain_lookup_misses));
+	seq_printf(m, " chain lookup hits:             %11u\n",
+		debug_atomic_read(&chain_lookup_hits));
+	seq_printf(m, " cyclic checks:                 %11u\n",
+		debug_atomic_read(&nr_cyclic_checks));
+	seq_printf(m, " cyclic-check recursions:       %11u\n",
+		debug_atomic_read(&nr_cyclic_check_recursions));
+	seq_printf(m, " find-mask forwards checks:     %11u\n",
+		debug_atomic_read(&nr_find_usage_forwards_checks));
+	seq_printf(m, " find-mask forwards recursions: %11u\n",
+		debug_atomic_read(&nr_find_usage_forwards_recursions));
+	seq_printf(m, " find-mask backwards checks:    %11u\n",
+		debug_atomic_read(&nr_find_usage_backwards_checks));
+	seq_printf(m, " find-mask backwards recursions:%11u\n",
+		debug_atomic_read(&nr_find_usage_backwards_recursions));
+
+	seq_printf(m, " hardirq on events:             %11u\n", hi1);
+	seq_printf(m, " hardirq off events:            %11u\n", hi2);
+	seq_printf(m, " redundant hardirq ons:         %11u\n", hr1);
+	seq_printf(m, " redundant hardirq offs:        %11u\n", hr2);
+	seq_printf(m, " softirq on events:             %11u\n", si1);
+	seq_printf(m, " softirq off events:            %11u\n", si2);
+	seq_printf(m, " redundant softirq ons:         %11u\n", sr1);
+	seq_printf(m, " redundant softirq offs:        %11u\n", sr2);
+#endif
+}
+
+static int lockdep_stats_show(struct seq_file *m, void *v)
+{
+	struct lock_class *class;
+	unsigned long nr_unused = 0, nr_uncategorized = 0,
+		      nr_irq_safe = 0, nr_irq_unsafe = 0,
+		      nr_softirq_safe = 0, nr_softirq_unsafe = 0,
+		      nr_hardirq_safe = 0, nr_hardirq_unsafe = 0,
+		      nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
+		      nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
+		      nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
+		      sum_forward_deps = 0, factor = 0;
+
+	list_for_each_entry(class, &all_lock_classes, lock_entry) {
+
+		if (class->usage_mask == 0)
+			nr_unused++;
+		if (class->usage_mask == LOCKF_USED)
+			nr_uncategorized++;
+		if (class->usage_mask & LOCKF_USED_IN_IRQ)
+			nr_irq_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_IRQS)
+			nr_irq_unsafe++;
+		if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
+			nr_softirq_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
+			nr_softirq_unsafe++;
+		if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
+			nr_hardirq_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
+			nr_hardirq_unsafe++;
+		if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
+			nr_irq_read_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_IRQS_READ)
+			nr_irq_read_unsafe++;
+		if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
+			nr_softirq_read_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
+			nr_softirq_read_unsafe++;
+		if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
+			nr_hardirq_read_safe++;
+		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
+			nr_hardirq_read_unsafe++;
+
+		sum_forward_deps += count_forward_deps(class);
+	}
+#ifdef CONFIG_LOCKDEP_DEBUG
+	DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
+#endif
+	seq_printf(m, " lock-classes:                  %11lu [max: %lu]\n",
+			nr_lock_classes, MAX_LOCKDEP_KEYS);
+	seq_printf(m, " direct dependencies:           %11lu [max: %lu]\n",
+			nr_list_entries, MAX_LOCKDEP_ENTRIES);
+	seq_printf(m, " indirect dependencies:         %11lu\n",
+			sum_forward_deps);
+
+	/*
+	 * Total number of dependencies:
+	 *
+	 * All irq-safe locks may nest inside irq-unsafe locks,
+	 * plus all the other known dependencies:
+	 */
+	seq_printf(m, " all direct dependencies:       %11lu\n",
+			nr_irq_unsafe * nr_irq_safe +
+			nr_hardirq_unsafe * nr_hardirq_safe +
+			nr_list_entries);
+
+	/*
+	 * Estimated factor between direct and indirect
+	 * dependencies:
+	 */
+	if (nr_list_entries)
+		factor = sum_forward_deps / nr_list_entries;
+
+	seq_printf(m, " dependency chains:             %11lu [max: %lu]\n",
+			nr_lock_chains, MAX_LOCKDEP_CHAINS);
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	seq_printf(m, " in-hardirq chains:             %11u\n",
+			nr_hardirq_chains);
+	seq_printf(m, " in-softirq chains:             %11u\n",
+			nr_softirq_chains);
+#endif
+	seq_printf(m, " in-process chains:             %11u\n",
+			nr_process_chains);
+	seq_printf(m, " stack-trace entries:           %11lu [max: %lu]\n",
+			nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES);
+	seq_printf(m, " combined max dependencies:     %11u\n",
+			(nr_hardirq_chains + 1) *
+			(nr_softirq_chains + 1) *
+			(nr_process_chains + 1)
+	);
+	seq_printf(m, " hardirq-safe locks:            %11lu\n",
+			nr_hardirq_safe);
+	seq_printf(m, " hardirq-unsafe locks:          %11lu\n",
+			nr_hardirq_unsafe);
+	seq_printf(m, " softirq-safe locks:            %11lu\n",
+			nr_softirq_safe);
+	seq_printf(m, " softirq-unsafe locks:          %11lu\n",
+			nr_softirq_unsafe);
+	seq_printf(m, " irq-safe locks:                %11lu\n",
+			nr_irq_safe);
+	seq_printf(m, " irq-unsafe locks:              %11lu\n",
+			nr_irq_unsafe);
+
+	seq_printf(m, " hardirq-read-safe locks:       %11lu\n",
+			nr_hardirq_read_safe);
+	seq_printf(m, " hardirq-read-unsafe locks:     %11lu\n",
+			nr_hardirq_read_unsafe);
+	seq_printf(m, " softirq-read-safe locks:       %11lu\n",
+			nr_softirq_read_safe);
+	seq_printf(m, " softirq-read-unsafe locks:     %11lu\n",
+			nr_softirq_read_unsafe);
+	seq_printf(m, " irq-read-safe locks:           %11lu\n",
+			nr_irq_read_safe);
+	seq_printf(m, " irq-read-unsafe locks:         %11lu\n",
+			nr_irq_read_unsafe);
+
+	seq_printf(m, " uncategorized locks:           %11lu\n",
+			nr_uncategorized);
+	seq_printf(m, " unused locks:                  %11lu\n",
+			nr_unused);
+	seq_printf(m, " max locking depth:             %11u\n",
+			max_lockdep_depth);
+	seq_printf(m, " max recursion depth:           %11u\n",
+			max_recursion_depth);
+	lockdep_stats_debug_show(m);
+	seq_printf(m, " debug_locks:                   %11u\n",
+			debug_locks);
+
+	return 0;
+}
+
+static int lockdep_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, lockdep_stats_show, NULL);
+}
+
+static struct file_operations proc_lockdep_stats_operations = {
+	.open		= lockdep_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int __init lockdep_proc_init(void)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry("lockdep", S_IRUSR, NULL);
+	if (entry)
+		entry->proc_fops = &proc_lockdep_operations;
+
+	entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL);
+	if (entry)
+		entry->proc_fops = &proc_lockdep_stats_operations;
+
+	return 0;
+}
+
+__initcall(lockdep_proc_init);
+
-- 
cgit v0.10.2


From 4ea2176dfa714882e88180b474e4cbcd888b70af Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:53 -0700
Subject: [PATCH] lockdep: prove rwsem locking correctness

Use the lock validator framework to prove rwsem locking correctness.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h
index 558804e..2f07601 100644
--- a/include/asm-i386/rwsem.h
+++ b/include/asm-i386/rwsem.h
@@ -40,6 +40,7 @@
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/lockdep.h>
 
 struct rwsem_waiter;
 
@@ -61,21 +62,34 @@ struct rw_semaphore {
 #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 };
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
+#else
+# define __RWSEM_DEP_MAP_INIT(lockname)
+#endif
+
+
 #define __RWSEM_INITIALIZER(name) \
 { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
-	}
+	__RWSEM_DEP_MAP_INIT(name) }
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
 
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
-	sem->count = RWSEM_UNLOCKED_VALUE;
-	spin_lock_init(&sem->wait_lock);
-	INIT_LIST_HEAD(&sem->wait_list);
-}
+extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
+			 struct lock_class_key *key);
+
+#define init_rwsem(sem)						\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	__init_rwsem((sem), #sem, &__key);			\
+} while (0)
 
 /*
  * lock for reading
@@ -128,7 +142,7 @@ LOCK_PREFIX	"  cmpxchgl  %2,%0\n\t"
 /*
  * lock for writing
  */
-static inline void __down_write(struct rw_semaphore *sem)
+static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
 {
 	int tmp;
 
@@ -152,6 +166,11 @@ LOCK_PREFIX	"  xadd      %%edx,(%%eax)\n\t" /* subtract 0x0000ffff, returns the
 		: "memory", "cc");
 }
 
+static inline void __down_write(struct rw_semaphore *sem)
+{
+	__down_write_nested(sem, 0);
+}
+
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h
index 0422a08..13ec169 100644
--- a/include/asm-s390/rwsem.h
+++ b/include/asm-s390/rwsem.h
@@ -61,6 +61,9 @@ struct rw_semaphore {
 	signed long		count;
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
 };
 
 #ifndef __s390x__
@@ -80,8 +83,16 @@ struct rw_semaphore {
 /*
  * initialisation
  */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
+#else
+# define __RWSEM_DEP_MAP_INIT(lockname)
+#endif
+
 #define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) }
+{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
+  __RWSEM_DEP_MAP_INIT(name) }
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
@@ -93,6 +104,17 @@ static inline void init_rwsem(struct rw_semaphore *sem)
 	INIT_LIST_HEAD(&sem->wait_list);
 }
 
+extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
+			 struct lock_class_key *key);
+
+#define init_rwsem(sem)				\
+do {						\
+	static struct lock_class_key __key;	\
+						\
+	__init_rwsem((sem), #sem, &__key);	\
+} while (0)
+
+
 /*
  * lock for reading
  */
@@ -155,7 +177,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 /*
  * lock for writing
  */
-static inline void __down_write(struct rw_semaphore *sem)
+static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
 {
 	signed long old, new, tmp;
 
@@ -181,6 +203,11 @@ static inline void __down_write(struct rw_semaphore *sem)
 		rwsem_down_write_failed(sem);
 }
 
+static inline void __down_write(struct rw_semaphore *sem)
+{
+	__down_write_nested(sem, 0);
+}
+
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h
index 702cf43..32cdc69 100644
--- a/include/asm-s390/semaphore.h
+++ b/include/asm-s390/semaphore.h
@@ -37,7 +37,8 @@ struct semaphore {
 
 static inline void sema_init (struct semaphore *sem, int val)
 {
-	*sem = (struct semaphore) __SEMAPHORE_INITIALIZER((*sem),val);
+	atomic_set(&sem->count, val);
+	init_waitqueue_head(&sem->wait);
 }
 
 static inline void init_MUTEX (struct semaphore *sem)
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index d68afcc..ae1fcad 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -32,18 +32,37 @@ struct rw_semaphore {
 	__s32			activity;
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 };
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
+#else
+# define __RWSEM_DEP_MAP_INIT(lockname)
+#endif
+
 #define __RWSEM_INITIALIZER(name) \
-{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) }
+{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
 
-extern void FASTCALL(init_rwsem(struct rw_semaphore *sem));
+extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
+			 struct lock_class_key *key);
+
+#define init_rwsem(sem)						\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	__init_rwsem((sem), #sem, &__key);			\
+} while (0)
+
 extern void FASTCALL(__down_read(struct rw_semaphore *sem));
 extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem));
 extern void FASTCALL(__down_write(struct rw_semaphore *sem));
+extern void FASTCALL(__down_write_nested(struct rw_semaphore *sem, int subclass));
 extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem));
 extern void FASTCALL(__up_read(struct rw_semaphore *sem));
 extern void FASTCALL(__up_write(struct rw_semaphore *sem));
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 9358153..658afb3 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -27,64 +27,55 @@ struct rw_semaphore;
 /*
  * lock for reading
  */
-static inline void down_read(struct rw_semaphore *sem)
-{
-	might_sleep();
-	__down_read(sem);
-}
+extern void down_read(struct rw_semaphore *sem);
 
 /*
  * trylock for reading -- returns 1 if successful, 0 if contention
  */
-static inline int down_read_trylock(struct rw_semaphore *sem)
-{
-	int ret;
-	ret = __down_read_trylock(sem);
-	return ret;
-}
+extern int down_read_trylock(struct rw_semaphore *sem);
 
 /*
  * lock for writing
  */
-static inline void down_write(struct rw_semaphore *sem)
-{
-	might_sleep();
-	__down_write(sem);
-}
+extern void down_write(struct rw_semaphore *sem);
 
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
-static inline int down_write_trylock(struct rw_semaphore *sem)
-{
-	int ret;
-	ret = __down_write_trylock(sem);
-	return ret;
-}
+extern int down_write_trylock(struct rw_semaphore *sem);
 
 /*
  * release a read lock
  */
-static inline void up_read(struct rw_semaphore *sem)
-{
-	__up_read(sem);
-}
+extern void up_read(struct rw_semaphore *sem);
 
 /*
  * release a write lock
  */
-static inline void up_write(struct rw_semaphore *sem)
-{
-	__up_write(sem);
-}
+extern void up_write(struct rw_semaphore *sem);
 
 /*
  * downgrade write lock to read lock
  */
-static inline void downgrade_write(struct rw_semaphore *sem)
-{
-	__downgrade_write(sem);
-}
+extern void downgrade_write(struct rw_semaphore *sem);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/*
+ * nested locking:
+ */
+extern void down_read_nested(struct rw_semaphore *sem, int subclass);
+extern void down_write_nested(struct rw_semaphore *sem, int subclass);
+/*
+ * Take/release a lock when not the owner will release it:
+ */
+extern void down_read_non_owner(struct rw_semaphore *sem);
+extern void up_read_non_owner(struct rw_semaphore *sem);
+#else
+# define down_read_nested(sem, subclass)		down_read(sem)
+# define down_write_nested(sem, subclass)	down_write(sem)
+# define down_read_non_owner(sem)		down_read(sem)
+# define up_read_non_owner(sem)			up_read(sem)
+#endif
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_RWSEM_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 3dd994e..df6ef32 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
-	    hrtimer.o
+	    hrtimer.o rwsem.o
 
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index 790a99b..291ded5 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -103,3 +103,45 @@ void downgrade_write(struct rw_semaphore *sem)
 }
 
 EXPORT_SYMBOL(downgrade_write);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+void down_read_nested(struct rw_semaphore *sem, int subclass)
+{
+	might_sleep();
+	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
+
+	__down_read(sem);
+}
+
+EXPORT_SYMBOL(down_read_nested);
+
+void down_read_non_owner(struct rw_semaphore *sem)
+{
+	might_sleep();
+
+	__down_read(sem);
+}
+
+EXPORT_SYMBOL(down_read_non_owner);
+
+void down_write_nested(struct rw_semaphore *sem, int subclass)
+{
+	might_sleep();
+	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
+
+	__down_write_nested(sem, subclass);
+}
+
+EXPORT_SYMBOL(down_write_nested);
+
+void up_read_non_owner(struct rw_semaphore *sem)
+{
+	__up_read(sem);
+}
+
+EXPORT_SYMBOL(up_read_non_owner);
+
+#endif
+
+
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 03b6097..db4fed7 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -20,8 +20,16 @@ struct rwsem_waiter {
 /*
  * initialise the semaphore
  */
-void fastcall init_rwsem(struct rw_semaphore *sem)
+void __init_rwsem(struct rw_semaphore *sem, const char *name,
+		  struct lock_class_key *key)
 {
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * Make sure we are not reinitializing a held semaphore:
+	 */
+	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+	lockdep_init_map(&sem->dep_map, name, key);
+#endif
 	sem->activity = 0;
 	spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
@@ -183,7 +191,7 @@ int fastcall __down_read_trylock(struct rw_semaphore *sem)
  * get a write lock on the semaphore
  * - we increment the waiting count anyway to indicate an exclusive lock
  */
-void fastcall __sched __down_write(struct rw_semaphore *sem)
+void fastcall __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 {
 	struct rwsem_waiter waiter;
 	struct task_struct *tsk;
@@ -223,6 +231,11 @@ void fastcall __sched __down_write(struct rw_semaphore *sem)
 	;
 }
 
+void fastcall __sched __down_write(struct rw_semaphore *sem)
+{
+	__down_write_nested(sem, 0);
+}
+
 /*
  * trylock for writing -- returns 1 if successful, 0 if contention
  */
@@ -292,9 +305,10 @@ void fastcall __downgrade_write(struct rw_semaphore *sem)
 	spin_unlock_irqrestore(&sem->wait_lock, flags);
 }
 
-EXPORT_SYMBOL(init_rwsem);
+EXPORT_SYMBOL(__init_rwsem);
 EXPORT_SYMBOL(__down_read);
 EXPORT_SYMBOL(__down_read_trylock);
+EXPORT_SYMBOL(__down_write_nested);
 EXPORT_SYMBOL(__down_write);
 EXPORT_SYMBOL(__down_write_trylock);
 EXPORT_SYMBOL(__up_read);
diff --git a/lib/rwsem.c b/lib/rwsem.c
index bae5972..b322421 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -8,6 +8,26 @@
 #include <linux/init.h>
 #include <linux/module.h>
 
+/*
+ * Initialize an rwsem:
+ */
+void __init_rwsem(struct rw_semaphore *sem, const char *name,
+		  struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * Make sure we are not reinitializing a held semaphore:
+	 */
+	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+	lockdep_init_map(&sem->dep_map, name, key);
+#endif
+	sem->count = RWSEM_UNLOCKED_VALUE;
+	spin_lock_init(&sem->wait_lock);
+	INIT_LIST_HEAD(&sem->wait_list);
+}
+
+EXPORT_SYMBOL(__init_rwsem);
+
 struct rwsem_waiter {
 	struct list_head list;
 	struct task_struct *task;
-- 
cgit v0.10.2


From 8a25d5debff2daee280e83e09d8c25d67c26a972 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:54 -0700
Subject: [PATCH] lockdep: prove spinlock rwlock locking correctness

Use the lock validator framework to prove spinlock and rwlock locking
correctness.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h
index 7e29b51..87c40f8 100644
--- a/include/asm-i386/spinlock.h
+++ b/include/asm-i386/spinlock.h
@@ -68,6 +68,12 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 		"=m" (lock->slock) : : "memory");
 }
 
+/*
+ * It is easier for the lock validator if interrupts are not re-enabled
+ * in the middle of a lock-acquire. This is a performance feature anyway
+ * so we turn it off:
+ */
+#ifndef CONFIG_PROVE_LOCKING
 static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
 {
 	alternative_smp(
@@ -75,6 +81,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla
 		__raw_spin_lock_string_up,
 		"=m" (lock->slock) : "r" (flags) : "memory");
 }
+#endif
 
 static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 {
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index ae23bee..31473db 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -82,14 +82,40 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
 /*
  * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them):
  */
-#if defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
 # include <asm/spinlock.h>
 #else
 # include <linux/spinlock_up.h>
 #endif
 
-#define spin_lock_init(lock)	do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0)
-#define rwlock_init(lock)	do { *(lock) = RW_LOCK_UNLOCKED; } while (0)
+#ifdef CONFIG_DEBUG_SPINLOCK
+  extern void __spin_lock_init(spinlock_t *lock, const char *name,
+			       struct lock_class_key *key);
+# define spin_lock_init(lock)					\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	__spin_lock_init((lock), #lock, &__key);		\
+} while (0)
+
+#else
+# define spin_lock_init(lock)					\
+	do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+  extern void __rwlock_init(rwlock_t *lock, const char *name,
+			    struct lock_class_key *key);
+# define rwlock_init(lock)					\
+do {								\
+	static struct lock_class_key __key;			\
+								\
+	__rwlock_init((lock), #lock, &__key);			\
+} while (0)
+#else
+# define rwlock_init(lock)					\
+	do { *(lock) = RW_LOCK_UNLOCKED; } while (0)
+#endif
 
 #define spin_is_locked(lock)	__raw_spin_is_locked(&(lock)->raw_lock)
 
@@ -113,7 +139,6 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
 #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
  extern int _raw_spin_trylock(spinlock_t *lock);
  extern void _raw_spin_unlock(spinlock_t *lock);
-
  extern void _raw_read_lock(rwlock_t *lock);
  extern int _raw_read_trylock(rwlock_t *lock);
  extern void _raw_read_unlock(rwlock_t *lock);
@@ -121,17 +146,17 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
  extern int _raw_write_trylock(rwlock_t *lock);
  extern void _raw_write_unlock(rwlock_t *lock);
 #else
-# define _raw_spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
-# define _raw_spin_trylock(lock)	__raw_spin_trylock(&(lock)->raw_lock)
 # define _raw_spin_lock(lock)		__raw_spin_lock(&(lock)->raw_lock)
 # define _raw_spin_lock_flags(lock, flags) \
 		__raw_spin_lock_flags(&(lock)->raw_lock, *(flags))
+# define _raw_spin_trylock(lock)	__raw_spin_trylock(&(lock)->raw_lock)
+# define _raw_spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
 # define _raw_read_lock(rwlock)		__raw_read_lock(&(rwlock)->raw_lock)
-# define _raw_write_lock(rwlock)	__raw_write_lock(&(rwlock)->raw_lock)
-# define _raw_read_unlock(rwlock)	__raw_read_unlock(&(rwlock)->raw_lock)
-# define _raw_write_unlock(rwlock)	__raw_write_unlock(&(rwlock)->raw_lock)
 # define _raw_read_trylock(rwlock)	__raw_read_trylock(&(rwlock)->raw_lock)
+# define _raw_read_unlock(rwlock)	__raw_read_unlock(&(rwlock)->raw_lock)
+# define _raw_write_lock(rwlock)	__raw_write_lock(&(rwlock)->raw_lock)
 # define _raw_write_trylock(rwlock)	__raw_write_trylock(&(rwlock)->raw_lock)
+# define _raw_write_unlock(rwlock)	__raw_write_unlock(&(rwlock)->raw_lock)
 #endif
 
 #define read_can_lock(rwlock)		__raw_read_can_lock(&(rwlock)->raw_lock)
@@ -147,6 +172,13 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
 #define write_trylock(lock)		__cond_lock(_write_trylock(lock))
 
 #define spin_lock(lock)			_spin_lock(lock)
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
+#else
+# define spin_lock_nested(lock, subclass) _spin_lock(lock)
+#endif
+
 #define write_lock(lock)		_write_lock(lock)
 #define read_lock(lock)			_read_lock(lock)
 
@@ -172,21 +204,18 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
 /*
  * We inline the unlock functions in the nondebug case:
  */
-#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
+#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \
+	!defined(CONFIG_SMP)
 # define spin_unlock(lock)		_spin_unlock(lock)
 # define read_unlock(lock)		_read_unlock(lock)
 # define write_unlock(lock)		_write_unlock(lock)
-#else
-# define spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
-# define read_unlock(lock)		__raw_read_unlock(&(lock)->raw_lock)
-# define write_unlock(lock)		__raw_write_unlock(&(lock)->raw_lock)
-#endif
-
-#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
 # define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
 # define read_unlock_irq(lock)		_read_unlock_irq(lock)
 # define write_unlock_irq(lock)		_write_unlock_irq(lock)
 #else
+# define spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
+# define read_unlock(lock)		__raw_read_unlock(&(lock)->raw_lock)
+# define write_unlock(lock)		__raw_write_unlock(&(lock)->raw_lock)
 # define spin_unlock_irq(lock) \
     do { __raw_spin_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0)
 # define read_unlock_irq(lock) \
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 78e6989..b2c4f82 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -20,6 +20,8 @@ int in_lock_functions(unsigned long addr);
 #define assert_spin_locked(x)	BUG_ON(!spin_is_locked(x))
 
 void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(spinlock_t);
+void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
+							__acquires(spinlock_t);
 void __lockfunc _read_lock(rwlock_t *lock)		__acquires(rwlock_t);
 void __lockfunc _write_lock(rwlock_t *lock)		__acquires(rwlock_t);
 void __lockfunc _spin_lock_bh(spinlock_t *lock)		__acquires(spinlock_t);
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index cd81cee..67faa04 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -49,6 +49,7 @@
   do { local_irq_restore(flags); __UNLOCK(lock); } while (0)
 
 #define _spin_lock(lock)			__LOCK(lock)
+#define _spin_lock_nested(lock, subclass)	__LOCK(lock)
 #define _read_lock(lock)			__LOCK(lock)
 #define _write_lock(lock)			__LOCK(lock)
 #define _spin_lock_bh(lock)			__LOCK_BH(lock)
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index f5d4ed7..dc5fb69 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -9,6 +9,8 @@
  * Released under the General Public License (GPL).
  */
 
+#include <linux/lockdep.h>
+
 #if defined(CONFIG_SMP)
 # include <asm/spinlock_types.h>
 #else
@@ -24,6 +26,9 @@ typedef struct {
 	unsigned int magic, owner_cpu;
 	void *owner;
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 } spinlock_t;
 
 #define SPINLOCK_MAGIC		0xdead4ead
@@ -37,28 +42,47 @@ typedef struct {
 	unsigned int magic, owner_cpu;
 	void *owner;
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 } rwlock_t;
 
 #define RWLOCK_MAGIC		0xdeaf1eed
 
 #define SPINLOCK_OWNER_INIT	((void *)-1L)
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define SPIN_DEP_MAP_INIT(lockname)	.dep_map = { .name = #lockname }
+#else
+# define SPIN_DEP_MAP_INIT(lockname)
+#endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define RW_DEP_MAP_INIT(lockname)	.dep_map = { .name = #lockname }
+#else
+# define RW_DEP_MAP_INIT(lockname)
+#endif
+
 #ifdef CONFIG_DEBUG_SPINLOCK
 # define __SPIN_LOCK_UNLOCKED(lockname)					\
 	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
 				.magic = SPINLOCK_MAGIC,		\
 				.owner = SPINLOCK_OWNER_INIT,		\
-				.owner_cpu = -1 }
+				.owner_cpu = -1,			\
+				SPIN_DEP_MAP_INIT(lockname) }
 #define __RW_LOCK_UNLOCKED(lockname)					\
 	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
 				.magic = RWLOCK_MAGIC,			\
 				.owner = SPINLOCK_OWNER_INIT,		\
-				.owner_cpu = -1 }
+				.owner_cpu = -1,			\
+				RW_DEP_MAP_INIT(lockname) }
 #else
 # define __SPIN_LOCK_UNLOCKED(lockname) \
-	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED }
+	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
+				SPIN_DEP_MAP_INIT(lockname) }
 #define __RW_LOCK_UNLOCKED(lockname) \
-	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED }
+	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
+				RW_DEP_MAP_INIT(lockname) }
 #endif
 
 #define SPIN_LOCK_UNLOCKED	__SPIN_LOCK_UNLOCKED(old_style_spin_init)
diff --git a/include/linux/spinlock_types_up.h b/include/linux/spinlock_types_up.h
index 04135b0..27644af 100644
--- a/include/linux/spinlock_types_up.h
+++ b/include/linux/spinlock_types_up.h
@@ -12,10 +12,14 @@
  * Released under the General Public License (GPL).
  */
 
-#ifdef CONFIG_DEBUG_SPINLOCK
+#if defined(CONFIG_DEBUG_SPINLOCK) || \
+	defined(CONFIG_DEBUG_LOCK_ALLOC)
 
 typedef struct {
 	volatile unsigned int slock;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 } raw_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED { 1 }
@@ -30,6 +34,9 @@ typedef struct { } raw_spinlock_t;
 
 typedef struct {
 	/* no debug version on UP */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 } raw_rwlock_t;
 
 #define __RAW_RW_LOCK_UNLOCKED { }
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 31accf2..ea54c4c 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -18,7 +18,6 @@
  */
 
 #ifdef CONFIG_DEBUG_SPINLOCK
-
 #define __raw_spin_is_locked(x)		((x)->slock == 0)
 
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
diff --git a/kernel/Makefile b/kernel/Makefile
index df6ef32..47dbcd5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
+obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/sched.c b/kernel/sched.c
index 9118299..ae4db01 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -308,6 +308,13 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
 	/* this is a valid case when another task releases the spinlock */
 	rq->lock.owner = current;
 #endif
+	/*
+	 * If we are tracking spinlock dependencies then we have to
+	 * fix up the runqueue lock - which gets 'carried over' from
+	 * prev into current:
+	 */
+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+
 	spin_unlock_irq(&rq->lock);
 }
 
@@ -1778,6 +1785,7 @@ task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
 		WARN_ON(rq->prev_mm);
 		rq->prev_mm = oldmm;
 	}
+	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 
 	/* Here we just switch the register state and the stack. */
 	switch_to(prev, next, prev);
@@ -4384,6 +4392,7 @@ asmlinkage long sys_sched_yield(void)
 	 * no need to preempt or enable interrupts:
 	 */
 	__release(rq->lock);
+	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
 	_raw_spin_unlock(&rq->lock);
 	preempt_enable_no_resched();
 
@@ -4447,6 +4456,7 @@ int cond_resched_lock(spinlock_t *lock)
 		spin_lock(lock);
 	}
 	if (need_resched() && __resched_legal()) {
+		spin_release(&lock->dep_map, 1, _THIS_IP_);
 		_raw_spin_unlock(lock);
 		preempt_enable_no_resched();
 		__cond_resched();
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index b31e54e..bfd6ad9 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -13,6 +13,7 @@
 #include <linux/preempt.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/debug_locks.h>
 #include <linux/module.h>
 
 /*
@@ -29,8 +30,10 @@ EXPORT_SYMBOL(generic__raw_read_trylock);
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
 	preempt_disable();
-	if (_raw_spin_trylock(lock))
+	if (_raw_spin_trylock(lock)) {
+		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
+	}
 	
 	preempt_enable();
 	return 0;
@@ -40,8 +43,10 @@ EXPORT_SYMBOL(_spin_trylock);
 int __lockfunc _read_trylock(rwlock_t *lock)
 {
 	preempt_disable();
-	if (_raw_read_trylock(lock))
+	if (_raw_read_trylock(lock)) {
+		rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
+	}
 
 	preempt_enable();
 	return 0;
@@ -51,19 +56,28 @@ EXPORT_SYMBOL(_read_trylock);
 int __lockfunc _write_trylock(rwlock_t *lock)
 {
 	preempt_disable();
-	if (_raw_write_trylock(lock))
+	if (_raw_write_trylock(lock)) {
+		rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
+	}
 
 	preempt_enable();
 	return 0;
 }
 EXPORT_SYMBOL(_write_trylock);
 
-#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
+/*
+ * If lockdep is enabled then we use the non-preemption spin-ops
+ * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+ * not re-enabled during lock-acquire (which the preempt-spin-ops do):
+ */
+#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
+	defined(CONFIG_PROVE_LOCKING)
 
 void __lockfunc _read_lock(rwlock_t *lock)
 {
 	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock);
@@ -74,7 +88,17 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 
 	local_irq_save(flags);
 	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	/*
+	 * On lockdep we dont want the hand-coded irq-enable of
+	 * _raw_spin_lock_flags() code, because lockdep assumes
+	 * that interrupts are not re-enabled during lock-acquire:
+	 */
+#ifdef CONFIG_PROVE_LOCKING
+	_raw_spin_lock(lock);
+#else
 	_raw_spin_lock_flags(lock, &flags);
+#endif
 	return flags;
 }
 EXPORT_SYMBOL(_spin_lock_irqsave);
@@ -83,6 +107,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)
 {
 	local_irq_disable();
 	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_spin_lock(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irq);
@@ -91,6 +116,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_spin_lock(lock);
 }
 EXPORT_SYMBOL(_spin_lock_bh);
@@ -101,6 +127,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 
 	local_irq_save(flags);
 	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_read_lock(lock);
 	return flags;
 }
@@ -110,6 +137,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)
 {
 	local_irq_disable();
 	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock_irq);
@@ -118,6 +146,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock_bh);
@@ -128,6 +157,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 
 	local_irq_save(flags);
 	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_write_lock(lock);
 	return flags;
 }
@@ -137,6 +167,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)
 {
 	local_irq_disable();
 	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_write_lock(lock);
 }
 EXPORT_SYMBOL(_write_lock_irq);
@@ -145,6 +176,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_write_lock(lock);
 }
 EXPORT_SYMBOL(_write_lock_bh);
@@ -152,6 +184,7 @@ EXPORT_SYMBOL(_write_lock_bh);
 void __lockfunc _spin_lock(spinlock_t *lock)
 {
 	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_spin_lock(lock);
 }
 
@@ -160,6 +193,7 @@ EXPORT_SYMBOL(_spin_lock);
 void __lockfunc _write_lock(rwlock_t *lock)
 {
 	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
 	_raw_write_lock(lock);
 }
 
@@ -255,8 +289,22 @@ BUILD_LOCK_OPS(write, rwlock);
 
 #endif /* CONFIG_PREEMPT */
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
+{
+	preempt_disable();
+	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	_raw_spin_lock(lock);
+}
+
+EXPORT_SYMBOL(_spin_lock_nested);
+
+#endif
+
 void __lockfunc _spin_unlock(spinlock_t *lock)
 {
+	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
 	preempt_enable();
 }
@@ -264,6 +312,7 @@ EXPORT_SYMBOL(_spin_unlock);
 
 void __lockfunc _write_unlock(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_write_unlock(lock);
 	preempt_enable();
 }
@@ -271,6 +320,7 @@ EXPORT_SYMBOL(_write_unlock);
 
 void __lockfunc _read_unlock(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_read_unlock(lock);
 	preempt_enable();
 }
@@ -278,6 +328,7 @@ EXPORT_SYMBOL(_read_unlock);
 
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 {
+	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
 	local_irq_restore(flags);
 	preempt_enable();
@@ -286,6 +337,7 @@ EXPORT_SYMBOL(_spin_unlock_irqrestore);
 
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 {
+	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
 	local_irq_enable();
 	preempt_enable();
@@ -294,14 +346,16 @@ EXPORT_SYMBOL(_spin_unlock_irq);
 
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 {
+	spin_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_spin_unlock(lock);
 	preempt_enable_no_resched();
-	local_bh_enable();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 EXPORT_SYMBOL(_spin_unlock_bh);
 
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_read_unlock(lock);
 	local_irq_restore(flags);
 	preempt_enable();
@@ -310,6 +364,7 @@ EXPORT_SYMBOL(_read_unlock_irqrestore);
 
 void __lockfunc _read_unlock_irq(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_read_unlock(lock);
 	local_irq_enable();
 	preempt_enable();
@@ -318,14 +373,16 @@ EXPORT_SYMBOL(_read_unlock_irq);
 
 void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_read_unlock(lock);
 	preempt_enable_no_resched();
-	local_bh_enable();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 EXPORT_SYMBOL(_read_unlock_bh);
 
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_write_unlock(lock);
 	local_irq_restore(flags);
 	preempt_enable();
@@ -334,6 +391,7 @@ EXPORT_SYMBOL(_write_unlock_irqrestore);
 
 void __lockfunc _write_unlock_irq(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_write_unlock(lock);
 	local_irq_enable();
 	preempt_enable();
@@ -342,9 +400,10 @@ EXPORT_SYMBOL(_write_unlock_irq);
 
 void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
 	_raw_write_unlock(lock);
 	preempt_enable_no_resched();
-	local_bh_enable();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 }
 EXPORT_SYMBOL(_write_unlock_bh);
 
@@ -352,11 +411,13 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
 {
 	local_bh_disable();
 	preempt_disable();
-	if (_raw_spin_trylock(lock))
+	if (_raw_spin_trylock(lock)) {
+		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
 		return 1;
+	}
 
 	preempt_enable_no_resched();
-	local_bh_enable();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
 	return 0;
 }
 EXPORT_SYMBOL(_spin_trylock_bh);
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index e713e86..e0fdfdd 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -177,7 +177,12 @@ static inline void __lock_kernel(void)
 
 static inline void __unlock_kernel(void)
 {
-	spin_unlock(&kernel_flag);
+	/*
+	 * the BKL is not covered by lockdep, so we open-code the
+	 * unlocking sequence (and thus avoid the dep-chain ops):
+	 */
+	_raw_spin_unlock(&kernel_flag);
+	preempt_enable();
 }
 
 /*
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 3de2ccf..3d9c4dc 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -12,6 +12,42 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 
+void __spin_lock_init(spinlock_t *lock, const char *name,
+		      struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+	lockdep_init_map(&lock->dep_map, name, key);
+#endif
+	lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+	lock->magic = SPINLOCK_MAGIC;
+	lock->owner = SPINLOCK_OWNER_INIT;
+	lock->owner_cpu = -1;
+}
+
+EXPORT_SYMBOL(__spin_lock_init);
+
+void __rwlock_init(rwlock_t *lock, const char *name,
+		   struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+	lockdep_init_map(&lock->dep_map, name, key);
+#endif
+	lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED;
+	lock->magic = RWLOCK_MAGIC;
+	lock->owner = SPINLOCK_OWNER_INIT;
+	lock->owner_cpu = -1;
+}
+
+EXPORT_SYMBOL(__rwlock_init);
+
 static void spin_bug(spinlock_t *lock, const char *msg)
 {
 	struct task_struct *owner = NULL;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index da44fab..283a722 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -205,7 +205,8 @@ __u8 ip_tos2prio[16] = {
 struct rt_hash_bucket {
 	struct rtable	*chain;
 };
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
+	defined(CONFIG_PROVE_LOCKING)
 /*
  * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
  * The size of this table is a power of two and depends on the number of CPUS.
-- 
cgit v0.10.2


From ef5d4707b9065c0cf8a69fa3716893f3b75201ba Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:55 -0700
Subject: [PATCH] lockdep: prove mutex locking correctness

Use the lock validator framework to prove mutex locking correctness.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
index 70a2609..2537285 100644
--- a/include/linux/mutex-debug.h
+++ b/include/linux/mutex-debug.h
@@ -2,6 +2,7 @@
 #define __LINUX_MUTEX_DEBUG_H
 
 #include <linux/linkage.h>
+#include <linux/lockdep.h>
 
 /*
  * Mutexes - debugging helpers:
@@ -10,7 +11,12 @@
 #define __DEBUG_MUTEX_INITIALIZER(lockname)				\
 	, .magic = &lockname
 
-#define mutex_init(sem)		__mutex_init(sem, __FILE__":"#sem)
+#define mutex_init(mutex)						\
+do {									\
+	static struct lock_class_key __key;				\
+									\
+	__mutex_init((mutex), #mutex, &__key);				\
+} while (0)
 
 extern void FASTCALL(mutex_destroy(struct mutex *lock));
 
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index caafecd..27c48da 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -13,6 +13,7 @@
 #include <linux/list.h>
 #include <linux/spinlock_types.h>
 #include <linux/linkage.h>
+#include <linux/lockdep.h>
 
 #include <asm/atomic.h>
 
@@ -53,6 +54,9 @@ struct mutex {
 	const char 		*name;
 	void			*magic;
 #endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	dep_map;
+#endif
 };
 
 /*
@@ -72,20 +76,34 @@ struct mutex_waiter {
 # include <linux/mutex-debug.h>
 #else
 # define __DEBUG_MUTEX_INITIALIZER(lockname)
-# define mutex_init(mutex)			__mutex_init(mutex, NULL)
+# define mutex_init(mutex) \
+do {							\
+	static struct lock_class_key __key;		\
+							\
+	__mutex_init((mutex), #mutex, &__key);		\
+} while (0)
 # define mutex_destroy(mutex)				do { } while (0)
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
+		, .dep_map = { .name = #lockname }
+#else
+# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
+#endif
+
 #define __MUTEX_INITIALIZER(lockname) \
 		{ .count = ATOMIC_INIT(1) \
 		, .wait_lock = SPIN_LOCK_UNLOCKED \
 		, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
-		__DEBUG_MUTEX_INITIALIZER(lockname) }
+		__DEBUG_MUTEX_INITIALIZER(lockname) \
+		__DEP_MAP_MUTEX_INITIALIZER(lockname) }
 
 #define DEFINE_MUTEX(mutexname) \
 	struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
 
-extern void fastcall __mutex_init(struct mutex *lock, const char *name);
+extern void __mutex_init(struct mutex *lock, const char *name,
+			 struct lock_class_key *key);
 
 /***
  * mutex_is_locked - is the mutex locked
@@ -104,6 +122,13 @@ static inline int fastcall mutex_is_locked(struct mutex *lock)
  */
 extern void fastcall mutex_lock(struct mutex *lock);
 extern int fastcall mutex_lock_interruptible(struct mutex *lock);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+#else
+# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
+#endif
+
 /*
  * NOTE: mutex_trylock() follows the spin_trylock() convention,
  *       not the down_trylock() convention!
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 5569766..e3203c6 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -83,12 +83,16 @@ void debug_mutex_unlock(struct mutex *lock)
 	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
 }
 
-void debug_mutex_init(struct mutex *lock, const char *name)
+void debug_mutex_init(struct mutex *lock, const char *name,
+		      struct lock_class_key *key)
 {
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
 	/*
 	 * Make sure we are not reinitializing a held lock:
 	 */
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+	lockdep_init_map(&lock->dep_map, name, key);
+#endif
 	lock->owner = NULL;
 	lock->magic = lock;
 }
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 43a50c1..8c71cf7 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -39,13 +39,14 @@
  *
  * It is not allowed to initialize an already locked mutex.
  */
-__always_inline void fastcall __mutex_init(struct mutex *lock, const char *name)
+void
+__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 {
 	atomic_set(&lock->count, 1);
 	spin_lock_init(&lock->wait_lock);
 	INIT_LIST_HEAD(&lock->wait_list);
 
-	debug_mutex_init(lock, name);
+	debug_mutex_init(lock, name, key);
 }
 
 EXPORT_SYMBOL(__mutex_init);
@@ -131,6 +132,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
 	spin_lock_mutex(&lock->wait_lock, flags);
 
 	debug_mutex_lock_common(lock, &waiter);
+	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
 	debug_mutex_add_waiter(lock, &waiter, task->thread_info);
 
 	/* add waiting tasks to the end of the waitqueue (FIFO): */
@@ -158,6 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
 		if (unlikely(state == TASK_INTERRUPTIBLE &&
 						signal_pending(task))) {
 			mutex_remove_waiter(lock, &waiter, task->thread_info);
+			mutex_release(&lock->dep_map, 1, _RET_IP_);
 			spin_unlock_mutex(&lock->wait_lock, flags);
 
 			debug_mutex_free_waiter(&waiter);
@@ -194,16 +197,28 @@ __mutex_lock_slowpath(atomic_t *lock_count)
 	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
 }
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __sched
+mutex_lock_nested(struct mutex *lock, unsigned int subclass)
+{
+	might_sleep();
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
+}
+
+EXPORT_SYMBOL_GPL(mutex_lock_nested);
+#endif
+
 /*
  * Release the lock, slowpath:
  */
 static fastcall inline void
-__mutex_unlock_common_slowpath(atomic_t *lock_count)
+__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 	unsigned long flags;
 
 	spin_lock_mutex(&lock->wait_lock, flags);
+	mutex_release(&lock->dep_map, nested, _RET_IP_);
 	debug_mutex_unlock(lock);
 
 	/*
@@ -236,7 +251,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count)
 static fastcall noinline void
 __mutex_unlock_slowpath(atomic_t *lock_count)
 {
-	__mutex_unlock_common_slowpath(lock_count);
+	__mutex_unlock_common_slowpath(lock_count, 1);
 }
 
 /*
@@ -287,9 +302,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
 	spin_lock_mutex(&lock->wait_lock, flags);
 
 	prev = atomic_xchg(&lock->count, -1);
-	if (likely(prev == 1))
+	if (likely(prev == 1)) {
 		debug_mutex_set_owner(lock, current_thread_info());
-
+		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+	}
 	/* Set it back to 0 if there are no waiters: */
 	if (likely(list_empty(&lock->wait_list)))
 		atomic_set(&lock->count, 0);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index aeb2d91..a075daf 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -22,7 +22,7 @@
 #define debug_mutex_free_waiter(waiter)			do { } while (0)
 #define debug_mutex_add_waiter(lock, waiter, ti)	do { } while (0)
 #define debug_mutex_unlock(lock)			do { } while (0)
-#define debug_mutex_init(lock, name)			do { } while (0)
+#define debug_mutex_init(lock, name, key)		do { } while (0)
 
 static inline void
 debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
-- 
cgit v0.10.2


From 4d9f34ad366a7935be3d6e7cca90805e6b7a692d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:55 -0700
Subject: [PATCH] lockdep: kconfig

Offer the following lock validation options:

 CONFIG_PROVE_LOCKING

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 16c2e98..e5889b1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -134,7 +134,7 @@ config RT_MUTEX_TESTER
 	  This option enables a rt-mutex tester.
 
 config DEBUG_SPINLOCK
-	bool "Spinlock debugging"
+	bool "Spinlock and rw-lock debugging: basic checks"
 	depends on DEBUG_KERNEL
 	help
 	  Say Y here and build SMP to catch missing spinlock initialization
@@ -142,8 +142,102 @@ config DEBUG_SPINLOCK
 	  best used in conjunction with the NMI watchdog so that spinlock
 	  deadlocks are also debuggable.
 
+config DEBUG_MUTEXES
+	bool "Mutex debugging: basic checks"
+	depends on DEBUG_KERNEL
+	help
+	 This feature allows mutex semantics violations to be detected and
+	 reported.
+
+config DEBUG_RWSEMS
+	bool "RW-sem debugging: basic checks"
+	depends on DEBUG_KERNEL
+	help
+	 This feature allows read-write semaphore semantics violations to
+	 be detected and reported.
+
+config DEBUG_LOCK_ALLOC
+	bool "Lock debugging: detect incorrect freeing of live locks"
+	depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+	select DEBUG_SPINLOCK
+	select DEBUG_MUTEXES
+	select DEBUG_RWSEMS
+	select LOCKDEP
+	help
+	 This feature will check whether any held lock (spinlock, rwlock,
+	 mutex or rwsem) is incorrectly freed by the kernel, via any of the
+	 memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
+	 vfree(), etc.), whether a live lock is incorrectly reinitialized via
+	 spin_lock_init()/mutex_init()/etc., or whether there is any lock
+	 held during task exit.
+
+config PROVE_LOCKING
+	bool "Lock debugging: prove locking correctness"
+	depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+	select LOCKDEP
+	select DEBUG_SPINLOCK
+	select DEBUG_MUTEXES
+	select DEBUG_RWSEMS
+	select DEBUG_LOCK_ALLOC
+	default n
+	help
+	 This feature enables the kernel to prove that all locking
+	 that occurs in the kernel runtime is mathematically
+	 correct: that under no circumstance could an arbitrary (and
+	 not yet triggered) combination of observed locking
+	 sequences (on an arbitrary number of CPUs, running an
+	 arbitrary number of tasks and interrupt contexts) cause a
+	 deadlock.
+
+	 In short, this feature enables the kernel to report locking
+	 related deadlocks before they actually occur.
+
+	 The proof does not depend on how hard and complex a
+	 deadlock scenario would be to trigger: how many
+	 participant CPUs, tasks and irq-contexts would be needed
+	 for it to trigger. The proof also does not depend on
+	 timing: if a race and a resulting deadlock is possible
+	 theoretically (no matter how unlikely the race scenario
+	 is), it will be proven so and will immediately be
+	 reported by the kernel (once the event is observed that
+	 makes the deadlock theoretically possible).
+
+	 If a deadlock is impossible (i.e. the locking rules, as
+	 observed by the kernel, are mathematically correct), the
+	 kernel reports nothing.
+
+	 NOTE: this feature can also be enabled for rwlocks, mutexes
+	 and rwsems - in which case all dependencies between these
+	 different locking variants are observed and mapped too, and
+	 the proof of observed correctness is also maintained for an
+	 arbitrary combination of these separate locking variants.
+
+	 For more details, see Documentation/lockdep-design.txt.
+
+config LOCKDEP
+	bool
+	depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+	select STACKTRACE
+	select FRAME_POINTER
+	select KALLSYMS
+	select KALLSYMS_ALL
+
+config DEBUG_LOCKDEP
+	bool "Lock dependency engine debugging"
+	depends on LOCKDEP
+	help
+	  If you say Y here, the lock dependency engine will do
+	  additional runtime checks to debug itself, at the price
+	  of more runtime overhead.
+
+config TRACE_IRQFLAGS
+	bool
+	default y
+	depends on TRACE_IRQFLAGS_SUPPORT
+	depends on PROVE_LOCKING
+
 config DEBUG_SPINLOCK_SLEEP
-	bool "Sleep-inside-spinlock checking"
+	bool "Spinlock debugging: sleep-inside-spinlock checking"
 	depends on DEBUG_KERNEL
 	help
 	  If you say Y here, various routines which may sleep will become very
-- 
cgit v0.10.2


From 8c64580d522542e50f2ba689316128b7fbae497b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:56 -0700
Subject: [PATCH] lockdep: print all lock classes on SysRQ-D

Print all lock-classes on SysRq-D.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index e31f079..ee3ca8f 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -147,12 +147,13 @@ static struct sysrq_key_op sysrq_mountro_op = {
 	.enable_mask	= SYSRQ_ENABLE_REMOUNT,
 };
 
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef CONFIG_LOCKDEP
 static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs,
 				struct tty_struct *tty)
 {
 	debug_show_all_locks();
 }
+
 static struct sysrq_key_op sysrq_showlocks_op = {
 	.handler	= sysrq_handle_showlocks,
 	.help_msg	= "show-all-locks(D)",
-- 
cgit v0.10.2


From 2148270cd2ebe0d05e4289b7c77b1435c45481bf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:57 -0700
Subject: [PATCH] lockdep: x86_64 early init

x86_64 uses spinlocks very early - earlier than start_kernel().  So call
lockdep_init() from the arch setup code.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index e6a71c9..36647ce 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -85,6 +85,11 @@ void __init x86_64_start_kernel(char * real_mode_data)
 	clear_bss();
 
 	/*
+	 * This must be called really, really early:
+	 */
+	lockdep_init();
+
+	/*
 	 * switch to init_level4_pgt from boot_level4_pgt
 	 */
 	memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
-- 
cgit v0.10.2


From 3047e99ede366298c6233d1870d12a520d4d92f3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:57 -0700
Subject: [PATCH] lockdep: x86 smp alternatives workaround

Disable SMP alternatives fixups (the patching in of NOPs on 1-CPU systems) if
the lock validator is enabled: there is a binutils section handling bug that
causes corrupted instructions when UP instructions are patched in.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 7b421b3..28ab806 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -303,6 +303,16 @@ void alternatives_smp_switch(int smp)
 	struct smp_alt_module *mod;
 	unsigned long flags;
 
+#ifdef CONFIG_LOCKDEP
+	/*
+	 * A not yet fixed binutils section handling bug prevents
+	 * alternatives-replacement from working reliably, so turn
+	 * it off:
+	 */
+	printk("lockdep: not fixing up alternatives.\n");
+	return;
+#endif
+
 	if (no_replacement || smp_alt_once)
 		return;
 	BUG_ON(!smp && (num_online_cpus() > 1));
-- 
cgit v0.10.2


From a0f1ccfd8d37457a6d8a9e01acebeefcdfcc306e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:58 -0700
Subject: [PATCH] lockdep: do not recurse in printk

Make printk()-ing from within the lock validation code safer by using the
lockdep-recursion counter.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/printk.c b/kernel/printk.c
index 39ae24d..bdba5d8 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -518,7 +518,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 		zap_locks();
 
 	/* This stops the holder of console_sem just where we want him */
-	spin_lock_irqsave(&logbuf_lock, flags);
+	local_irq_save(flags);
+	lockdep_off();
+	spin_lock(&logbuf_lock);
 	printk_cpu = smp_processor_id();
 
 	/* Emit the output into the temporary buffer */
@@ -588,7 +590,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 		 */
 		console_locked = 1;
 		printk_cpu = UINT_MAX;
-		spin_unlock_irqrestore(&logbuf_lock, flags);
+		spin_unlock(&logbuf_lock);
 
 		/*
 		 * Console drivers may assume that per-cpu resources have
@@ -604,6 +606,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 			console_locked = 0;
 			up(&console_sem);
 		}
+		lockdep_on();
+		local_irq_restore(flags);
 	} else {
 		/*
 		 * Someone else owns the drivers.  We drop the spinlock, which
@@ -611,7 +615,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 		 * console drivers with the output which we just produced.
 		 */
 		printk_cpu = UINT_MAX;
-		spin_unlock_irqrestore(&logbuf_lock, flags);
+		spin_unlock(&logbuf_lock);
+		lockdep_on();
+		local_irq_restore(flags);
 	}
 
 	preempt_enable();
@@ -809,8 +815,15 @@ void release_console_sem(void)
 	console_may_schedule = 0;
 	up(&console_sem);
 	spin_unlock_irqrestore(&logbuf_lock, flags);
-	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
-		wake_up_interruptible(&log_wait);
+	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
+		/*
+		 * If we printk from within the lock dependency code,
+		 * from within the scheduler code, then do not lock
+		 * up due to self-recursion:
+		 */
+		if (!lockdep_internal())
+			wake_up_interruptible(&log_wait);
+	}
 }
 EXPORT_SYMBOL(release_console_sem);
 
-- 
cgit v0.10.2


From 6205120044bb75ca06019491d1aa0e727fdd35be Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:24:59 -0700
Subject: [PATCH] lockdep: fix RT_HASH_LOCK_SZ

On lockdep we have a quite big spinlock_t, so keep the size down.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 283a722..2dc6dbb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -210,17 +210,22 @@ struct rt_hash_bucket {
 /*
  * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
  * The size of this table is a power of two and depends on the number of CPUS.
+ * (on lockdep we have a quite big spinlock_t, so keep the size down there)
  */
-#if NR_CPUS >= 32
-#define RT_HASH_LOCK_SZ	4096
-#elif NR_CPUS >= 16
-#define RT_HASH_LOCK_SZ	2048
-#elif NR_CPUS >= 8
-#define RT_HASH_LOCK_SZ	1024
-#elif NR_CPUS >= 4
-#define RT_HASH_LOCK_SZ	512
+#ifdef CONFIG_LOCKDEP
+# define RT_HASH_LOCK_SZ	256
 #else
-#define RT_HASH_LOCK_SZ	256
+# if NR_CPUS >= 32
+#  define RT_HASH_LOCK_SZ	4096
+# elif NR_CPUS >= 16
+#  define RT_HASH_LOCK_SZ	2048
+# elif NR_CPUS >= 8
+#  define RT_HASH_LOCK_SZ	1024
+# elif NR_CPUS >= 4
+#  define RT_HASH_LOCK_SZ	512
+# else
+#  define RT_HASH_LOCK_SZ	256
+# endif
 #endif
 
 static spinlock_t	*rt_hash_locks;
-- 
cgit v0.10.2


From 8e9ccae6ee2bb19b7752b7d412e1bbbaf28e7fe1 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 3 Jul 2006 00:25:00 -0700
Subject: [PATCH] lockdep: s390 turn validator off in machine-check handler

Machine checks on s390 are always enabled (except in the machine check handler
itself).  Therefore use lockdep_off()/on() in the machine check handler to
avoid deadlocks in the lock validator.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 432136f..ffb3677 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -378,6 +378,8 @@ s390_do_machine_check(struct pt_regs *regs)
 	struct mcck_struct *mcck;
 	int umode;
 
+	lockdep_off();
+
 	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
 	mcck = &__get_cpu_var(cpu_mcck);
 	umode = user_mode(regs);
@@ -482,6 +484,7 @@ s390_do_machine_check(struct pt_regs *regs)
 		mcck->warning = 1;
 		set_thread_flag(TIF_MCCK_PENDING);
 	}
+	lockdep_on();
 }
 
 /*
-- 
cgit v0.10.2


From cbbf437a8de3192739334eb8cae7c9acee38590f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:00 -0700
Subject: [PATCH] lockdep: enable on i386

Enable LOCKDEP_SUPPORT on i386.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 076a72c..daa75ce 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -18,6 +18,10 @@ config GENERIC_TIME
 	bool
 	default y
 
+config LOCKDEP_SUPPORT
+	bool
+	default y
+
 config STACKTRACE_SUPPORT
 	bool
 	default y
-- 
cgit v0.10.2


From 1e9505279a8a2303d8feef4e464e6f48ea644f03 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:01 -0700
Subject: [PATCH] lockdep: enable on x86_64

Enable LOCKDEP_SUPPORT on x86_64.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 445f436..28df7d8 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,10 @@ config X86
 	bool
 	default y
 
+config LOCKDEP_SUPPORT
+	bool
+	default y
+
 config STACKTRACE_SUPPORT
 	bool
 	default y
-- 
cgit v0.10.2


From 2b105ff9cccea5533ad518bb37dc9b43a322f7b1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:02 -0700
Subject: [PATCH] lockdep: enable on s390

Enable LOCKDEP_SUPPORT on s390.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5b28da4..224fbff 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -7,6 +7,10 @@ config MMU
 	bool
 	default y
 
+config LOCKDEP_SUPPORT
+	bool
+	default y
+
 config STACKTRACE_SUPPORT
 	bool
 	default y
-- 
cgit v0.10.2


From d8aa905b429700e8b6b6b301a8ac2d4a24f2c19b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:02 -0700
Subject: [PATCH] lockdep: annotate direct io

Teach special (rwsem-in-irq) locking code to the lock validator.  Has no
effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 538fb04..5981e17f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -220,7 +220,8 @@ static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
 	if (dio->end_io && dio->result)
 		dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
 	if (dio->lock_type == DIO_LOCKING)
-		up_read(&dio->inode->i_alloc_sem);
+		/* lockdep: non-owner release */
+		up_read_non_owner(&dio->inode->i_alloc_sem);
 }
 
 /*
@@ -1261,7 +1262,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 		}
 
 		if (dio_lock_type == DIO_LOCKING)
-			down_read(&inode->i_alloc_sem);
+			/* lockdep: not the owner will release it */
+			down_read_non_owner(&inode->i_alloc_sem);
 	}
 
 	/*
-- 
cgit v0.10.2


From 13e83599d282ddfd544600df9db5ab343ac4662f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:03 -0700
Subject: [PATCH] lockdep: annotate serial

Teach special (dual-initialized) locking code to the lock validator.  Has no
effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index c54af877..9583180 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -49,6 +49,12 @@
  */
 static DEFINE_MUTEX(port_mutex);
 
+/*
+ * lockdep: port->lock is initialized in two places, but we
+ *          want only one lock-class:
+ */
+static struct lock_class_key port_lock_key;
+
 #define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
 
 #define uart_users(state)	((state)->count + ((state)->info ? (state)->info->blocked_open : 0))
@@ -1865,6 +1871,7 @@ uart_set_options(struct uart_port *port, struct console *co,
 	 * early.
 	 */
 	spin_lock_init(&port->lock);
+	lockdep_set_class(&port->lock, &port_lock_key);
 
 	memset(&termios, 0, sizeof(struct termios));
 
@@ -2247,8 +2254,10 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
 	 * If this port is a console, then the spinlock is already
 	 * initialised.
 	 */
-	if (!(uart_console(port) && (port->cons->flags & CON_ENABLED)))
+	if (!(uart_console(port) && (port->cons->flags & CON_ENABLED))) {
 		spin_lock_init(&port->lock);
+		lockdep_set_class(&port->lock, &port_lock_key);
+	}
 
 	uart_configure_port(drv, state, port);
 
-- 
cgit v0.10.2


From a90b9c05df3c1e58eaedc28795d0f5abd896c098 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:04 -0700
Subject: [PATCH] lockdep: annotate dcache

Teach special (recursive) locking code to the lock validator.  Has no effect
on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/dcache.c b/fs/dcache.c
index bec4de1..1b4a3a3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1339,10 +1339,10 @@ void d_move(struct dentry * dentry, struct dentry * target)
 	 */
 	if (target < dentry) {
 		spin_lock(&target->d_lock);
-		spin_lock(&dentry->d_lock);
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 	} else {
 		spin_lock(&dentry->d_lock);
-		spin_lock(&target->d_lock);
+		spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
 	}
 
 	/* Move the dentry to the target hash queue, if on different bucket */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 0dd1610..471781f 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -114,6 +114,18 @@ struct dentry {
 	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */
 };
 
+/*
+ * dentry->d_lock spinlock nesting subclasses:
+ *
+ * 0: normal
+ * 1: nested
+ */
+enum dentry_d_lock_class
+{
+	DENTRY_D_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */
+	DENTRY_D_LOCK_NESTED
+};
+
 struct dentry_operations {
 	int (*d_revalidate)(struct dentry *, struct nameidata *);
 	int (*d_hash) (struct dentry *, struct qstr *);
-- 
cgit v0.10.2


From f2eace23e924bd3f05aedea4fc505eb5508d2d93 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:05 -0700
Subject: [PATCH] lockdep: annotate i_mutex

Teach special (recursive) locking code to the lock validator.  Has no effect
on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index e47e3a8..cf11196 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -200,7 +200,7 @@ static void update_sb(struct super_block *sb)
 	if (!root)
 		return;
 
-	mutex_lock(&root->d_inode->i_mutex);
+	mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
 
 	list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) {
 		if (bus->d_inode) {
diff --git a/fs/namei.c b/fs/namei.c
index c784e8b..c9750d7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1423,7 +1423,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 	struct dentry *p;
 
 	if (p1 == p2) {
-		mutex_lock(&p1->d_inode->i_mutex);
+		mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
 		return NULL;
 	}
 
@@ -1431,22 +1431,22 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 
 	for (p = p1; p->d_parent != p; p = p->d_parent) {
 		if (p->d_parent == p2) {
-			mutex_lock(&p2->d_inode->i_mutex);
-			mutex_lock(&p1->d_inode->i_mutex);
+			mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
+			mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
 			return p;
 		}
 	}
 
 	for (p = p2; p->d_parent != p; p = p->d_parent) {
 		if (p->d_parent == p1) {
-			mutex_lock(&p1->d_inode->i_mutex);
-			mutex_lock(&p2->d_inode->i_mutex);
+			mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+			mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
 			return p;
 		}
 	}
 
-	mutex_lock(&p1->d_inode->i_mutex);
-	mutex_lock(&p2->d_inode->i_mutex);
+	mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+	mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
 	return NULL;
 }
 
@@ -1751,7 +1751,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 {
 	struct dentry *dentry = ERR_PTR(-EEXIST);
 
-	mutex_lock(&nd->dentry->d_inode->i_mutex);
+	mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	/*
 	 * Yucky last component or no last component at all?
 	 * (foo/., foo/.., /////)
@@ -2008,7 +2008,7 @@ static long do_rmdir(int dfd, const char __user *pathname)
 			error = -EBUSY;
 			goto exit1;
 	}
-	mutex_lock(&nd.dentry->d_inode->i_mutex);
+	mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_hash(&nd);
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
@@ -2082,7 +2082,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 	error = -EISDIR;
 	if (nd.last_type != LAST_NORM)
 		goto exit1;
-	mutex_lock(&nd.dentry->d_inode->i_mutex);
+	mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_hash(&nd);
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e04a5cf..05ded9e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -543,6 +543,25 @@ struct inode {
 };
 
 /*
+ * inode->i_mutex nesting subclasses for the lock validator:
+ *
+ * 0: the object of the current VFS operation
+ * 1: parent
+ * 2: child/target
+ * 3: quota file
+ *
+ * The locking order between these classes is
+ * parent -> child -> normal -> quota
+ */
+enum inode_i_mutex_lock_class
+{
+	I_MUTEX_NORMAL,
+	I_MUTEX_PARENT,
+	I_MUTEX_CHILD,
+	I_MUTEX_QUOTA
+};
+
+/*
  * NOTE: in a 32bit arch with a preemptable kernel and
  * an UP compile the i_size_read/write must be atomic
  * with respect to the local cpu (unlike with preempt disabled),
-- 
cgit v0.10.2


From 8b8f319fc7f4ab59f567d6a401a62659b3d37007 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:05 -0700
Subject: [PATCH] lockdep: annotate futex

Teach special (recursive) locking code to the lock validator.  Introduces
double_lock_hb() to unify double- hash-bucket-lock taking.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/futex.c b/kernel/futex.c
index 15caf93..1dc98e4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -607,6 +607,22 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
 }
 
 /*
+ * Express the locking dependencies for lockdep:
+ */
+static inline void
+double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
+{
+	if (hb1 <= hb2) {
+		spin_lock(&hb1->lock);
+		if (hb1 < hb2)
+			spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
+	} else { /* hb1 > hb2 */
+		spin_lock(&hb2->lock);
+		spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
+	}
+}
+
+/*
  * Wake up all waiters hashed on the physical page that is mapped
  * to this virtual address:
  */
@@ -674,11 +690,7 @@ retryfull:
 	hb2 = hash_futex(&key2);
 
 retry:
-	if (hb1 < hb2)
-		spin_lock(&hb1->lock);
-	spin_lock(&hb2->lock);
-	if (hb1 > hb2)
-		spin_lock(&hb1->lock);
+	double_lock_hb(hb1, hb2);
 
 	op_ret = futex_atomic_op_inuser(op, uaddr2);
 	if (unlikely(op_ret < 0)) {
@@ -787,11 +799,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
 	hb1 = hash_futex(&key1);
 	hb2 = hash_futex(&key2);
 
-	if (hb1 < hb2)
-		spin_lock(&hb1->lock);
-	spin_lock(&hb2->lock);
-	if (hb1 > hb2)
-		spin_lock(&hb1->lock);
+	double_lock_hb(hb1, hb2);
 
 	if (likely(cmpval != NULL)) {
 		u32 curval;
-- 
cgit v0.10.2


From 243c7621aac4ed1aa79524c9a1cecf7c05a28124 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:06 -0700
Subject: [PATCH] lockdep: annotate genirq

Teach special (recursive) locking code to the lock validator.  Has no effect
on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 80ec7a4..316e0fb 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -270,6 +270,12 @@ static inline int lockdep_internal(void)
 struct lock_class_key { };
 #endif /* !LOCKDEP */
 
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS)
+extern void early_init_irq_lock_class(void);
+#else
+# define early_init_irq_lock_class()		do { } while (0)
+#endif
+
 #ifdef CONFIG_TRACE_IRQFLAGS
 extern void early_boot_irqs_off(void);
 extern void early_boot_irqs_on(void);
diff --git a/init/main.c b/init/main.c
index fc473d4..628b8e9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -466,6 +466,7 @@ asmlinkage void __init start_kernel(void)
 
 	local_irq_disable();
 	early_boot_irqs_off();
+	early_init_irq_lock_class();
 
 /*
  * Interrupts are still disabled. Do necessary setups, then
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index aeb6e39..a7b497e 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -249,3 +249,19 @@ out:
 	return 1;
 }
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+/*
+ * lockdep: we want to handle all irq_desc locks as a single lock-class:
+ */
+static struct lock_class_key irq_desc_lock_class;
+
+void early_init_irq_lock_class(void)
+{
+	int i;
+
+	for (i = 0; i < NR_IRQS; i++)
+		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
+}
+
+#endif
-- 
cgit v0.10.2


From eb4542b98c81e22e08587b747b21986a45360999 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:07 -0700
Subject: [PATCH] lockdep: annotate waitqueues

Create one lock class for all waitqueue locks in the kernel.  Has no effect on
non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/wait.h b/include/linux/wait.h
index bc4f389..794be7a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -77,9 +77,15 @@ struct task_struct;
 #define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
 	{ .flags = word, .bit_nr = bit, }
 
+/*
+ * lockdep: we want one lock-class for all waitqueue locks.
+ */
+extern struct lock_class_key waitqueue_lock_key;
+
 static inline void init_waitqueue_head(wait_queue_head_t *q)
 {
 	spin_lock_init(&q->lock);
+	lockdep_set_class(&q->lock, &waitqueue_lock_key);
 	INIT_LIST_HEAD(&q->task_list);
 }
 
diff --git a/kernel/wait.c b/kernel/wait.c
index 5985d86..a1d57ae 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,6 +10,10 @@
 #include <linux/wait.h>
 #include <linux/hash.h>
 
+struct lock_class_key waitqueue_lock_key;
+
+EXPORT_SYMBOL(waitqueue_lock_key);
+
 void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
 {
 	unsigned long flags;
-- 
cgit v0.10.2


From f20dc5f7c1adf1c4b68b7672d6f2002cb824e636 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:08 -0700
Subject: [PATCH] lockdep: annotate mm

Teach special (recursive) locking code to the lock validator.  Has no effect
on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/mm/memory.c b/mm/memory.c
index 7e2a4b1..c1e14c9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -503,7 +503,7 @@ again:
 		return -ENOMEM;
 	src_pte = pte_offset_map_nested(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
-	spin_lock(src_ptl);
+	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 
 	do {
 		/*
diff --git a/mm/mremap.c b/mm/mremap.c
index 1903bdf..7c15cf3 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -97,7 +97,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
  	new_pte = pte_offset_map_nested(new_pmd, new_addr);
 	new_ptl = pte_lockptr(mm, new_pmd);
 	if (new_ptl != old_ptl)
-		spin_lock(new_ptl);
+		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
 
 	for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
 				   new_pte++, new_addr += PAGE_SIZE) {
-- 
cgit v0.10.2


From 3aceafc1e2596f1c2c4e053126561f00b68e3a1a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 3 Jul 2006 00:25:08 -0700
Subject: [PATCH] lockdep: annotate serio

The PS/2 code has a natural device order and there is a one level recursion in
this device order in terms of the cmd_mutex; annotate this explicit recursion
as ok.

Has no effect on non-lockdep kernels.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Dmitry Torokhov <dtor_core@ameritech.net>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 79c97f9..61a6f97 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -177,7 +177,7 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
 		return -1;
 	}
 
-	mutex_lock(&ps2dev->cmd_mutex);
+	mutex_lock_nested(&ps2dev->cmd_mutex, SINGLE_DEPTH_NESTING);
 
 	serio_pause_rx(ps2dev->serio);
 	ps2dev->flags = command == PS2_CMD_GETID ? PS2_FLAG_WAITID : 0;
-- 
cgit v0.10.2


From 06825ba3553151eea24206bc53d4fc3de49e0ab1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:09 -0700
Subject: [PATCH] lockdep: annotate skb_queue_head_init

Teach special (multi-initialized) locking code to the lock validator.  Has no
effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 57d7d49..3597b4f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -604,9 +604,12 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
 	return list_->qlen;
 }
 
+extern struct lock_class_key skb_queue_lock_key;
+
 static inline void skb_queue_head_init(struct sk_buff_head *list)
 {
 	spin_lock_init(&list->lock);
+	lockdep_set_class(&list->lock, &skb_queue_lock_key);
 	list->prev = list->next = (struct sk_buff *)list;
 	list->qlen = 0;
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7cfbdb2..44f6a18 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -71,6 +71,13 @@ static kmem_cache_t *skbuff_head_cache __read_mostly;
 static kmem_cache_t *skbuff_fclone_cache __read_mostly;
 
 /*
+ * lockdep: lock class key used by skb_queue_head_init():
+ */
+struct lock_class_key skb_queue_lock_key;
+
+EXPORT_SYMBOL(skb_queue_lock_key);
+
+/*
  *	Keep out-of-line to prevent kernel bloat.
  *	__builtin_return_address is not used because it is not always
  *	reliable.
-- 
cgit v0.10.2


From d730e882a15c38de02b63a063be636b2ff9e9ed1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:10 -0700
Subject: [PATCH] lockdep: annotate timer base locks

Split the per-CPU timer base locks up into separate lock classes, because they
are used recursively.

Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/timer.c b/kernel/timer.c
index 4dd9a10..b761898 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1559,6 +1559,13 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
 	return 0;
 }
 
+/*
+ * lockdep: we want to track each per-CPU base as a separate lock-class,
+ * but timer-bases are kmalloc()-ed, so we need to attach separate
+ * keys to them:
+ */
+static struct lock_class_key base_lock_keys[NR_CPUS];
+
 static int __devinit init_timers_cpu(int cpu)
 {
 	int j;
@@ -1594,6 +1601,8 @@ static int __devinit init_timers_cpu(int cpu)
 	}
 
 	spin_lock_init(&base->lock);
+	lockdep_set_class(&base->lock, base_lock_keys + cpu);
+
 	for (j = 0; j < TVN_SIZE; j++) {
 		INIT_LIST_HEAD(base->tv5.vec + j);
 		INIT_LIST_HEAD(base->tv4.vec + j);
-- 
cgit v0.10.2


From fcb993712f231a4faea8393513d1276170679107 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:10 -0700
Subject: [PATCH] lockdep: annotate scheduler runqueue locks

Teach per-CPU runqueue locks and recursive locking code to the lock validator.
 Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index ae4db01..f4778d1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -266,6 +266,7 @@ struct runqueue {
 	unsigned long ttwu_cnt;
 	unsigned long ttwu_local;
 #endif
+	struct lock_class_key rq_lock_key;
 };
 
 static DEFINE_PER_CPU(struct runqueue, runqueues);
@@ -6656,6 +6657,7 @@ void __init sched_init(void)
 
 		rq = cpu_rq(i);
 		spin_lock_init(&rq->lock);
+		lockdep_set_class(&rq->lock, &rq->rq_lock_key);
 		rq->nr_running = 0;
 		rq->active = rq->arrays;
 		rq->expired = rq->arrays + 1;
-- 
cgit v0.10.2


From 543655244866b8ec648fea1eb9c32a35ffba5721 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:11 -0700
Subject: [PATCH] lockdep: annotate hrtimer base locks

Teach special (recursive) locking code to the lock validator.  Has no effect
on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 07d7305..e4bccbc 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -91,6 +91,7 @@ struct hrtimer_base {
 	ktime_t			(*get_softirq_time)(void);
 	struct hrtimer		*curr_timer;
 	ktime_t			softirq_time;
+	struct lock_class_key lock_key;
 };
 
 /*
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 8d3dc29..617304c 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -782,8 +782,10 @@ static void __devinit init_hrtimers_cpu(int cpu)
 	struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);
 	int i;
 
-	for (i = 0; i < MAX_HRTIMER_BASES; i++, base++)
+	for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {
 		spin_lock_init(&base->lock);
+		lockdep_set_class(&base->lock, &base->lock_key);
+	}
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-- 
cgit v0.10.2


From da21f24dd73954c2ed0cd39a698e2c9916c05d71 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:12 -0700
Subject: [PATCH] lockdep: annotate sock_lock_init()

Teach special (multi-initialized, per-address-family) locking code to the lock
validator.  Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/net/sock.h b/include/net/sock.h
index 7b3d6b8..83805fe 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -80,8 +80,12 @@ typedef struct {
 	wait_queue_head_t	wq;
 } socket_lock_t;
 
+extern struct lock_class_key af_family_keys[AF_MAX];
+
 #define sock_lock_init(__sk) \
 do {	spin_lock_init(&((__sk)->sk_lock.slock)); \
+	lockdep_set_class(&(__sk)->sk_lock.slock, \
+			  af_family_keys + (__sk)->sk_family); \
 	(__sk)->sk_lock.owner = NULL; \
 	init_waitqueue_head(&((__sk)->sk_lock.wq)); \
 } while(0)
diff --git a/net/core/sock.c b/net/core/sock.c
index 533b931..0b4d5d2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -129,6 +129,18 @@
 #include <net/tcp.h>
 #endif
 
+/*
+ * Each address family might have different locking rules, so we have
+ * one slock key per address family:
+ */
+struct lock_class_key af_family_keys[AF_MAX];
+
+/*
+ * sk_callback_lock locking rules are per-address-family,
+ * so split the lock classes by using a per-AF key:
+ */
+static struct lock_class_key af_callback_keys[AF_MAX];
+
 /* Take into consideration the size of the struct sk_buff overhead in the
  * determination of these values, since that is non-constant across
  * platforms.  This makes socket queueing behavior and performance
@@ -848,6 +860,8 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 
 		rwlock_init(&newsk->sk_dst_lock);
 		rwlock_init(&newsk->sk_callback_lock);
+		lockdep_set_class(&newsk->sk_callback_lock,
+				   af_callback_keys + newsk->sk_family);
 
 		newsk->sk_dst_cache	= NULL;
 		newsk->sk_wmem_queued	= 0;
@@ -1422,6 +1436,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	rwlock_init(&sk->sk_dst_lock);
 	rwlock_init(&sk->sk_callback_lock);
+	lockdep_set_class(&sk->sk_callback_lock,
+			   af_callback_keys + sk->sk_family);
 
 	sk->sk_state_change	=	sock_def_wakeup;
 	sk->sk_data_ready	=	sock_def_readable;
-- 
cgit v0.10.2


From a09785a2414afb261d9f719d544742af4300df22 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:12 -0700
Subject: [PATCH] lockdep: annotate af_unix locking

Teach special (recursive) locking code to the lock validator.  Also splits
af_unix's sk_receive_queue.lock class from the other networking skb-queue
locks.  Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 5ba72d9..2fec827 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -67,6 +67,9 @@ struct unix_skb_parms {
 #define unix_state_rlock(s)	spin_lock(&unix_sk(s)->lock)
 #define unix_state_runlock(s)	spin_unlock(&unix_sk(s)->lock)
 #define unix_state_wlock(s)	spin_lock(&unix_sk(s)->lock)
+#define unix_state_wlock_nested(s) \
+				spin_lock_nested(&unix_sk(s)->lock, \
+				SINGLE_DEPTH_NESTING)
 #define unix_state_wunlock(s)	spin_unlock(&unix_sk(s)->lock)
 
 #ifdef __KERNEL__
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aca6501..e9a287b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -565,6 +565,14 @@ static struct proto unix_proto = {
 	.obj_size = sizeof(struct unix_sock),
 };
 
+/*
+ * AF_UNIX sockets do not interact with hardware, hence they
+ * dont trigger interrupts - so it's safe for them to have
+ * bh-unsafe locking for their sk_receive_queue.lock. Split off
+ * this special lock-class by reinitializing the spinlock key:
+ */
+static struct lock_class_key af_unix_sk_receive_queue_lock_key;
+
 static struct sock * unix_create1(struct socket *sock)
 {
 	struct sock *sk = NULL;
@@ -580,6 +588,8 @@ static struct sock * unix_create1(struct socket *sock)
 	atomic_inc(&unix_nr_socks);
 
 	sock_init_data(sock,sk);
+	lockdep_set_class(&sk->sk_receive_queue.lock,
+				&af_unix_sk_receive_queue_lock_key);
 
 	sk->sk_write_space	= unix_write_space;
 	sk->sk_max_ack_backlog	= sysctl_unix_max_dgram_qlen;
@@ -1045,7 +1055,7 @@ restart:
 		goto out_unlock;
 	}
 
-	unix_state_wlock(sk);
+	unix_state_wlock_nested(sk);
 
 	if (sk->sk_state != st) {
 		unix_state_wunlock(sk);
-- 
cgit v0.10.2


From c63661848581a9842dfc72d9a400285dd284fc47 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:13 -0700
Subject: [PATCH] lockdep: annotate bh_lock_sock()

Teach special (recursive) locking code to the lock validator.  Has no effect
on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/net/sock.h b/include/net/sock.h
index 83805fe..0969fb6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -751,6 +751,9 @@ extern void FASTCALL(release_sock(struct sock *sk));
 
 /* BH context may only use the following locking interface. */
 #define bh_lock_sock(__sk)	spin_lock(&((__sk)->sk_lock.slock))
+#define bh_lock_sock_nested(__sk) \
+				spin_lock_nested(&((__sk)->sk_lock.slock), \
+				SINGLE_DEPTH_NESTING)
 #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
 
 extern struct sock		*sk_alloc(int family,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8237172..5a886e6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1090,7 +1090,7 @@ process:
 
 	skb->dev = NULL;
 
-	bh_lock_sock(sk);
+	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 #ifdef CONFIG_NET_DMA
-- 
cgit v0.10.2


From d378834840907326ac9d448056d957d13cc3718f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:14 -0700
Subject: [PATCH] lockdep: annotate ieee1394 skb-queue-head locking

ieee1394 reuses the skb infrastructure of the networking code, and uses two
skb-head queues: ->pending_packet_queue and hpsbpkt_queue.  The latter is used
in the usual fashion: processed from a kernel thread.  The other one,
->pending_packet_queue is also processed from hardirq context (f.e.  in
hpsb_bus_reset()), which is not what the networking code usually does (which
completes from softirq or process context).  This locking assymetry can be
totally correct if done carefully, but it can also be dangerous if networking
helper functions are reused, which could assume traditional networking use.

It would probably be more robust to push this completion into a workqueue -
but technically the code can be 100% correct, and lockdep has to be taught
about it.  The solution is to split the ->pending_packet_queue skb-head->lock
class from the networking lock-class by using a private lock-validator key.

Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: Jody McIntyre <scjody@modernduck.com>
Cc: Ben Collins <bcollins@debian.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c
index 2c66928..4feead4 100644
--- a/drivers/ieee1394/hosts.c
+++ b/drivers/ieee1394/hosts.c
@@ -107,6 +107,14 @@ static int alloc_hostnum_cb(struct hpsb_host *host, void *__data)
  */
 static DEFINE_MUTEX(host_num_alloc);
 
+/*
+ * The pending_packet_queue is special in that it's processed
+ * from hardirq context too (such as hpsb_bus_reset()). Hence
+ * split the lock class from the usual networking skb-head
+ * lock class by using a separate key for it:
+ */
+static struct lock_class_key pending_packet_queue_key;
+
 struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
 				  struct device *dev)
 {
@@ -128,6 +136,8 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra,
 	h->driver = drv;
 
 	skb_queue_head_init(&h->pending_packet_queue);
+	lockdep_set_class(&h->pending_packet_queue.lock,
+			   &pending_packet_queue_key);
 	INIT_LIST_HEAD(&h->addr_space);
 
 	for (i = 2; i < 16; i++)
-- 
cgit v0.10.2


From ad33945175bed649ca5fe0881269db005bbb449a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:15 -0700
Subject: [PATCH] lockdep: annotate ->mmap_sem

Teach special (recursive) locking code to the lock validator.  Has no effect
on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/fork.c b/kernel/fork.c
index 7f48abd..54953d8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,7 +193,10 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 
 	down_write(&oldmm->mmap_sem);
 	flush_cache_mm(oldmm);
-	down_write(&mm->mmap_sem);
+	/*
+	 * Not linked in yet - no deadlock potential:
+	 */
+	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
 
 	mm->locked_vm = 0;
 	mm->mmap = NULL;
-- 
cgit v0.10.2


From c6573c2904231279de0584787d6214f19a128d0b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 3 Jul 2006 00:25:16 -0700
Subject: [PATCH] lockdep: annotate sunrpc code

Add i_mutex ordering annotations to the sunrpc rpc_pipe code.  This code has 3
levels of i_mutex hierarchy in some cases: parent dir, client dir and file
inside client dir; the i_mutex ordering is I_MUTEX_PARENT -> I_MUTEX_CHILD ->
I_MUTEX_NORMAL

This patch applies this ordering annotation to the various functions.  This is
in line with the VFS expected ordering where it is always OK to lock a child
after locking a parent; the sunrpc code is very diligent in doing this
correctly.

Has no effect on non-lockdep kernels.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 6db6006..dc6cb93 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -515,7 +515,7 @@ rpc_depopulate(struct dentry *parent)
 	struct dentry *dentry, *dvec[10];
 	int n = 0;
 
-	mutex_lock(&dir->i_mutex);
+	mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
 repeat:
 	spin_lock(&dcache_lock);
 	list_for_each_safe(pos, next, &parent->d_subdirs) {
@@ -631,7 +631,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
 	if ((error = rpc_lookup_parent(path, nd)) != 0)
 		return ERR_PTR(error);
 	dir = nd->dentry->d_inode;
-	mutex_lock(&dir->i_mutex);
+	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_one_len(nd->last.name, nd->dentry, nd->last.len);
 	if (IS_ERR(dentry))
 		goto out_err;
@@ -693,7 +693,7 @@ rpc_rmdir(char *path)
 	if ((error = rpc_lookup_parent(path, &nd)) != 0)
 		return error;
 	dir = nd.dentry->d_inode;
-	mutex_lock(&dir->i_mutex);
+	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
 	if (IS_ERR(dentry)) {
 		error = PTR_ERR(dentry);
@@ -754,7 +754,7 @@ rpc_unlink(char *path)
 	if ((error = rpc_lookup_parent(path, &nd)) != 0)
 		return error;
 	dir = nd.dentry->d_inode;
-	mutex_lock(&dir->i_mutex);
+	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
 	if (IS_ERR(dentry)) {
 		error = PTR_ERR(dentry);
-- 
cgit v0.10.2


From 59345374742ee6673c2d04b0fa8c888e881b7209 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:18 -0700
Subject: [PATCH] lockdep: annotate NTFS locking rules

NTFS uses lots of type-opaque objects which acquire their true identity
runtime - so the lock validator needs to be helped in a couple of places to
figure out object types.

Many thanks to Anton Altaparmakov for giving lots of explanations about NTFS
locking rules.

Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Anton Altaparmakov <aia21@cantab.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 4c86b7e..d313f35 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -367,6 +367,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni)
 	kmem_cache_free(ntfs_inode_cache, ni);
 }
 
+/*
+ * The attribute runlist lock has separate locking rules from the
+ * normal runlist lock, so split the two lock-classes:
+ */
+static struct lock_class_key attr_list_rl_lock_class;
+
 /**
  * __ntfs_init_inode - initialize ntfs specific part of an inode
  * @sb:		super block of mounted volume
@@ -394,6 +400,8 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
 	ni->attr_list_size = 0;
 	ni->attr_list = NULL;
 	ntfs_init_runlist(&ni->attr_list_rl);
+	lockdep_set_class(&ni->attr_list_rl.lock,
+				&attr_list_rl_lock_class);
 	ni->itype.index.bmp_ino = NULL;
 	ni->itype.index.block_size = 0;
 	ni->itype.index.vcn_size = 0;
@@ -405,6 +413,13 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
 	ni->ext.base_ntfs_ino = NULL;
 }
 
+/*
+ * Extent inodes get MFT-mapped in a nested way, while the base inode
+ * is still mapped. Teach this nesting to the lock validator by creating
+ * a separate class for nested inode's mrec_lock's:
+ */
+static struct lock_class_key extent_inode_mrec_lock_key;
+
 inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
 		unsigned long mft_no)
 {
@@ -413,6 +428,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
 	ntfs_debug("Entering.");
 	if (likely(ni != NULL)) {
 		__ntfs_init_inode(sb, ni);
+		lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
 		ni->mft_no = mft_no;
 		ni->type = AT_UNUSED;
 		ni->name = NULL;
@@ -1722,6 +1738,15 @@ err_out:
 	return err;
 }
 
+/*
+ * The MFT inode has special locking, so teach the lock validator
+ * about this by splitting off the locking rules of the MFT from
+ * the locking rules of other inodes. The MFT inode can never be
+ * accessed from the VFS side (or even internally), only by the
+ * map_mft functions.
+ */
+static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
+
 /**
  * ntfs_read_inode_mount - special read_inode for mount time use only
  * @vi:		inode to read
@@ -2148,6 +2173,14 @@ int ntfs_read_inode_mount(struct inode *vi)
 	ntfs_attr_put_search_ctx(ctx);
 	ntfs_debug("Done.");
 	ntfs_free(m);
+
+	/*
+	 * Split the locking rules of the MFT inode from the
+	 * locking rules of other inodes:
+	 */
+	lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
+	lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
+
 	return 0;
 
 em_put_err_out:
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 0e14ace..74e0ee8 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1724,6 +1724,14 @@ upcase_failed:
 	return FALSE;
 }
 
+/*
+ * The lcn and mft bitmap inodes are NTFS-internal inodes with
+ * their own special locking rules:
+ */
+static struct lock_class_key
+	lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key,
+	mftbmp_runlist_lock_key, mftbmp_mrec_lock_key;
+
 /**
  * load_system_files - open the system files using normal functions
  * @vol:	ntfs super block describing device whose system files to load
@@ -1780,6 +1788,10 @@ static BOOL load_system_files(ntfs_volume *vol)
 		ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute.");
 		goto iput_mirr_err_out;
 	}
+	lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock,
+			   &mftbmp_runlist_lock_key);
+	lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock,
+			   &mftbmp_mrec_lock_key);
 	/* Read upcase table and setup @vol->upcase and @vol->upcase_len. */
 	if (!load_and_init_upcase(vol))
 		goto iput_mftbmp_err_out;
@@ -1802,6 +1814,11 @@ static BOOL load_system_files(ntfs_volume *vol)
 			iput(vol->lcnbmp_ino);
 		goto bitmap_failed;
 	}
+	lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock,
+			   &lcnbmp_runlist_lock_key);
+	lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock,
+			   &lcnbmp_mrec_lock_key);
+
 	NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
 	if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
 		iput(vol->lcnbmp_ino);
@@ -2743,6 +2760,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 	struct inode *tmp_ino;
 	int blocksize, result;
 
+	/*
+	 * We do a pretty difficult piece of bootstrap by reading the
+	 * MFT (and other metadata) from disk into memory. We'll only
+	 * release this metadata during umount, so the locking patterns
+	 * observed during bootstrap do not count. So turn off the
+	 * observation of locking patterns (strictly for this context
+	 * only) while mounting NTFS. [The validator is still active
+	 * otherwise, even for this context: it will for example record
+	 * lock class registrations.]
+	 */
+	lockdep_off();
 	ntfs_debug("Entering.");
 #ifndef NTFS_RW
 	sb->s_flags |= MS_RDONLY;
@@ -2754,6 +2782,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		if (!silent)
 			ntfs_error(sb, "Allocation of NTFS volume structure "
 					"failed. Aborting mount...");
+		lockdep_on();
 		return -ENOMEM;
 	}
 	/* Initialize ntfs_volume structure. */
@@ -2940,6 +2969,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		mutex_unlock(&ntfs_lock);
 		sb->s_export_op = &ntfs_export_ops;
 		lock_kernel();
+		lockdep_on();
 		return 0;
 	}
 	ntfs_error(sb, "Failed to allocate root directory.");
@@ -3059,6 +3089,7 @@ err_out_now:
 	sb->s_fs_info = NULL;
 	kfree(vol);
 	ntfs_debug("Failed, returning -EINVAL.");
+	lockdep_on();
 	return -EINVAL;
 }
 
-- 
cgit v0.10.2


From 5c81a4197de38411fe3e27f8593fff73a5d6b868 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 3 Jul 2006 00:25:20 -0700
Subject: [PATCH] lockdep: annotate the quota code

The quota code plays interesting games with the lock ordering; to quote Jan:

| i_mutex of inode containing quota file is acquired after all other
| quota locks. i_mutex of all other inodes is acquired before quota
| locks. Quota code makes sure (by resetting inode operations and
| setting special flag on inode) that noone tries to enter quota code
| while holding i_mutex on a quota file...

The good news is that all of this special case i_mutex grabbing happens in the
(per filesystem) low level quota write function.  For this special case we
need a new I_MUTEX_* nesting level, since this just entirely outside any of
the regular VFS locking rules for i_mutex.  I trust Jan on his blue eyes that
this is not ever going to deadlock; and based on that the patch below is what
it takes to inform lockdep of these very interesting new locking rules.

The new locking rule for the I_MUTEX_QUOTA nesting level is that this is the
deepest possible level of nesting for i_mutex, and that this only should be
used in quota write (and possibly read) function of filesystems.  This makes
the lock ordering of the I_MUTEX_* levels:

I_MUTEX_PARENT -> I_MUTEX_CHILD -> I_MUTEX_NORMAL -> I_MUTEX_QUOTA

Has no effect on non-lockdep kernels.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kara <jack@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9f43879..f2702cd 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1157,7 +1157,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
 	struct buffer_head tmp_bh;
 	struct buffer_head *bh;
 
-	mutex_lock(&inode->i_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f2dd713..813d589 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2614,7 +2614,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
 
-	mutex_lock(&inode->i_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 28eb3c8..5567328 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2203,7 +2203,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
 	size_t towrite = len;
 	struct buffer_head tmp_bh, *bh;
 
-	mutex_lock(&inode->i_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 		    sb->s_blocksize - offset : towrite;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 19a9972..992ee0b 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1326,7 +1326,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
 	size_t towrite = len;
 	struct buffer_head *bh;
 
-	mutex_lock(&inode->i_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
-- 
cgit v0.10.2


From 8e7795ef6b5fd117b22b87ea7d501de2526a25f7 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 3 Jul 2006 00:25:21 -0700
Subject: [PATCH] lockdep: annotate USBFS

In usbfs's fs_remove_file() function, the aim is to remove a file or
directory from usbfs. This is done by first taking the i_mutex of the
parent directory of this file/dir via
  mutex_lock(&parent->d_inode->i_mutex);
and then to call either usbfs_rmdir() for a directory or usbfs_unlink()
for a file. Both these functions then take the i_mutex for the
to-be-removed object themselves:
  mutex_lock(&inode->i_mutex);

This is a classical parent->child locking order relationship that the VFS uses
all over the place; the VFS locking rule is "you need to take the parent
first".  This patch annotates the usbfs code to make this explicit and thus
informs the lockdep code that those two locks indeed have this relationship.

The rules for unlink that we already use in the VFS for unlink are to use
I_MUTEX_PARENT for the parent directory, and a normal mutex for the file
itself; this patch follows that convention.

Has no effect on non-lockdep kernels.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index cf11196..f48c3db 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -527,7 +527,7 @@ static void fs_remove_file (struct dentry *dentry)
 	if (!parent || !parent->d_inode)
 		return;
 
-	mutex_lock(&parent->d_inode->i_mutex);
+	mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_PARENT);
 	if (usbfs_positive(dentry)) {
 		if (dentry->d_inode) {
 			if (S_ISDIR(dentry->d_inode->i_mode))
-- 
cgit v0.10.2


From d8371f0481fd1ad2701081d37815b0bb5d236b92 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:21 -0700
Subject: [PATCH] lockdep: annotate sound/core/seq/seq_ports.c

Teach special (recursive) locking code to the lock validator.  Has no effect
on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Jaroslav Kysela <perex@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c
index d467b4f..8c64b58 100644
--- a/sound/core/seq/seq_ports.c
+++ b/sound/core/seq/seq_ports.c
@@ -514,7 +514,7 @@ int snd_seq_port_connect(struct snd_seq_client *connector,
 	atomic_set(&subs->ref_count, 2);
 
 	down_write(&src->list_mutex);
-	down_write(&dest->list_mutex);
+	down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
 
 	exclusive = info->flags & SNDRV_SEQ_PORT_SUBS_EXCLUSIVE ? 1 : 0;
 	err = -EBUSY;
@@ -587,7 +587,7 @@ int snd_seq_port_disconnect(struct snd_seq_client *connector,
 	unsigned long flags;
 
 	down_write(&src->list_mutex);
-	down_write(&dest->list_mutex);
+	down_write_nested(&dest->list_mutex, SINGLE_DEPTH_NESTING);
 
 	/* look for the connection */
 	list_for_each(p, &src->list_head) {
-- 
cgit v0.10.2


From 933a2efc59513551dcfa7b814752dc581bd3c60b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 3 Jul 2006 00:25:22 -0700
Subject: [PATCH] lockdep: annotate sound/core/seq/seq_device.c

The ops structure has complex locking rules, where not all ops are equal, some
are subordinate on others for some complex sound cards.  This requires for
lockdep checking that each individual reg_mutex is considered in separation
for its locking rules.

Has no effect on non-lockdep kernels.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Jaroslav Kysela <perex@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/core/seq/seq_device.c b/sound/core/seq/seq_device.c
index d812dc8..4260de9 100644
--- a/sound/core/seq/seq_device.c
+++ b/sound/core/seq/seq_device.c
@@ -380,6 +380,12 @@ static struct ops_list * create_driver(char *id)
 	/* set up driver entry */
 	strlcpy(ops->id, id, sizeof(ops->id));
 	mutex_init(&ops->reg_mutex);
+	/*
+	 * The ->reg_mutex locking rules are per-driver, so we create
+	 * separate per-driver lock classes:
+	 */
+	lockdep_set_class(&ops->reg_mutex, (struct lock_class_key *)id);
+
 	ops->driver = DRIVER_EMPTY;
 	INIT_LIST_HEAD(&ops->dev_list);
 	/* lock this instance */
-- 
cgit v0.10.2


From e745165c6d235c03e9fa0e57984ca6fd3d3b2c50 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:23 -0700
Subject: [PATCH] lockdep: annotate 8390.c disable_irq()

8390.c knows that ei_local->page_lock can only be used by an irq context that
it disabled - and can hence take the ->page_lock without disabling hardirqs.
Teach lockdep about this.

Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/net/8390.c b/drivers/net/8390.c
index 86be96a..d2935ae 100644
--- a/drivers/net/8390.c
+++ b/drivers/net/8390.c
@@ -249,7 +249,7 @@ void ei_tx_timeout(struct net_device *dev)
 
 	/* Ugly but a reset can be slow, yet must be protected */
 		
-	disable_irq_nosync(dev->irq);
+	disable_irq_nosync_lockdep(dev->irq);
 	spin_lock(&ei_local->page_lock);
 		
 	/* Try to restart the card.  Perhaps the user has fixed something. */
@@ -257,7 +257,7 @@ void ei_tx_timeout(struct net_device *dev)
 	NS8390_init(dev, 1);
 		
 	spin_unlock(&ei_local->page_lock);
-	enable_irq(dev->irq);
+	enable_irq_lockdep(dev->irq);
 	netif_wake_queue(dev);
 }
     
-- 
cgit v0.10.2


From 0a9da4bd8eb30fb3f36e841c2cc72e426a17bbe2 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:24 -0700
Subject: [PATCH] lockdep: annotate 3c59x.c disable_irq()

3c59x.c's vortex_timer() function knows that vp->lock can only be used by an
irq context that it disabled - and can hence take the vp->lock without
disabling hardirqs.  Teach lockdep about this.

Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 8ab03b4..2819de7 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -1897,7 +1897,7 @@ vortex_timer(unsigned long data)
 		printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo);
 	}
 
-	disable_irq(dev->irq);
+	disable_irq_lockdep(dev->irq);
 	old_window = ioread16(ioaddr + EL3_CMD) >> 13;
 	EL3WINDOW(4);
 	media_status = ioread16(ioaddr + Wn4_Media);
@@ -1978,7 +1978,7 @@ leave_media_alone:
 			 dev->name, media_tbl[dev->if_port].name);
 
 	EL3WINDOW(old_window);
-	enable_irq(dev->irq);
+	enable_irq_lockdep(dev->irq);
 	mod_timer(&vp->timer, RUN_AT(next_tick));
 	if (vp->deferred)
 		iowrite16(FakeIntr, ioaddr + EL3_CMD);
-- 
cgit v0.10.2


From 366c7f554e888e51b8395f9b07b273fe775c7ff3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:25 -0700
Subject: [PATCH] lockdep: annotate enable_in_hardirq()

Make use of local_irq_enable_in_hardirq() API to annotate places that enable
hardirqs in hardirq context.

Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index a76e931..2dd928a 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -107,7 +107,7 @@ int nmi_active;
 static __init void nmi_cpu_busy(void *data)
 {
 	volatile int *endflag = data;
-	local_irq_enable();
+	local_irq_enable_in_hardirq();
 	/* Intentionally don't use cpu_relax here. This is
 	   to make sure that the performance counter really ticks,
 	   even if there is a simulator or similar that catches the
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 476c1472..5baa0c7 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void)
 static __init void nmi_cpu_busy(void *data)
 {
 	volatile int *endflag = data;
-	local_irq_enable();
+	local_irq_enable_in_hardirq();
 	/* Intentionally don't use cpu_relax here. This is
 	   to make sure that the performance counter really ticks,
 	   even if there is a simulator or similar that catches the
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 6ca3476..adbe9f7 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -838,7 +838,7 @@ static ide_startstop_t idefloppy_pc_intr (ide_drive_t *drive)
 			"transferred\n", pc->actually_transferred);
 		clear_bit(PC_DMA_IN_PROGRESS, &pc->flags);
 
-		local_irq_enable();
+		local_irq_enable_in_hardirq();
 
 		if (status.b.check || test_bit(PC_DMA_ERROR, &pc->flags)) {
 			/* Error detected */
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 7dba999..d6bf1ec 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -693,7 +693,7 @@ static ide_startstop_t drive_cmd_intr (ide_drive_t *drive)
 	u8 stat = hwif->INB(IDE_STATUS_REG);
 	int retries = 10;
 
-	local_irq_enable();
+	local_irq_enable_in_hardirq();
 	if ((stat & DRQ_STAT) && args && args[3]) {
 		u8 io_32bit = drive->io_32bit;
 		drive->io_32bit = 0;
@@ -1286,7 +1286,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 		if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq)
 			disable_irq_nosync(hwif->irq);
 		spin_unlock(&ide_lock);
-		local_irq_enable();
+		local_irq_enable_in_hardirq();
 			/* allow other IRQs while we start this request */
 		startstop = start_request(drive, rq);
 		spin_lock_irq(&ide_lock);
@@ -1631,7 +1631,7 @@ irqreturn_t ide_intr (int irq, void *dev_id, struct pt_regs *regs)
 	spin_unlock(&ide_lock);
 
 	if (drive->unmask)
-		local_irq_enable();
+		local_irq_enable_in_hardirq();
 	/* service this interrupt, may set handler for next interrupt */
 	startstop = handler(drive);
 	spin_lock_irq(&ide_lock);
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 04547eb..97a9244 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -222,7 +222,7 @@ ide_startstop_t task_no_data_intr (ide_drive_t *drive)
 	ide_hwif_t *hwif	= HWIF(drive);
 	u8 stat;
 
-	local_irq_enable();
+	local_irq_enable_in_hardirq();
 	if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),READY_STAT,BAD_STAT)) {
 		return ide_error(drive, "task_no_data_intr", stat);
 		/* calls ide_end_drive_cmd */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 285316c..dc7abef 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1359,7 +1359,7 @@ extern struct semaphore ide_cfg_sem;
  * ide_drive_t->hwif: constant, no locking
  */
 
-#define local_irq_set(flags)	do { local_save_flags((flags)); local_irq_enable(); } while (0)
+#define local_irq_set(flags)	do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0)
 
 extern struct bus_type ide_bus_type;
 
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a7b497e..fc4e906 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -132,7 +132,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
 	handle_dynamic_tick(action);
 
 	if (!(action->flags & IRQF_DISABLED))
-		local_irq_enable();
+		local_irq_enable_in_hardirq();
 
 	do {
 		ret = action->handler(irq, action->dev_id, regs);
-- 
cgit v0.10.2


From 60be6b9a41cb0da0df7a9f11486da56baebf04cd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:26 -0700
Subject: [PATCH] lockdep: annotate on-stack completions

lockdep needs to have the waitqueue lock initialized for on-stack waitqueues
implicitly initialized by DECLARE_COMPLETION().  Annotate on-stack completions
accordingly.

Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 9705a6a..b7c7059 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -775,6 +775,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 	};
 	DECLARE_WORK(work, do_fork_idle, &c_idle);
 
+	lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key);
+
 	/* allocate memory for gdts of secondary cpus. Hotplug is considered */
 	if (!cpu_gdt_descr[cpu].address &&
 		!(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 5813d63..ab17c72 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2516,7 +2516,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
 int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
 		   struct request *rq, int at_head)
 {
-	DECLARE_COMPLETION(wait);
+	DECLARE_COMPLETION_ONSTACK(wait);
 	char sense[SCSI_SENSE_BUFFERSIZE];
 	int err = 0;
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index d6bf1ec..fb67952 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1705,7 +1705,7 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
 {
 	unsigned long flags;
 	ide_hwgroup_t *hwgroup = HWGROUP(drive);
-	DECLARE_COMPLETION(wait);
+	DECLARE_COMPLETION_ONSTACK(wait);
 	int where = ELEVATOR_INSERT_BACK, err;
 	int must_wait = (action == ide_wait || action == ide_head_wait);
 
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 82caba4..1c960ac 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1001,7 +1001,7 @@ unsigned ata_exec_internal(struct ata_device *dev,
 	struct ata_queued_cmd *qc;
 	unsigned int tag, preempted_tag;
 	u32 preempted_sactive, preempted_qc_active;
-	DECLARE_COMPLETION(wait);
+	DECLARE_COMPLETION_ONSTACK(wait);
 	unsigned long flags;
 	unsigned int err_mask;
 	int rc;
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ed1cdf6..146298a 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -510,7 +510,7 @@ static void spi_complete(void *arg)
  */
 int spi_sync(struct spi_device *spi, struct spi_message *message)
 {
-	DECLARE_COMPLETION(done);
+	DECLARE_COMPLETION_ONSTACK(done);
 	int status;
 
 	message->complete = spi_complete;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 1b7157a..1d32def 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -233,7 +233,7 @@ static void __call_usermodehelper(void *data)
 int call_usermodehelper_keys(char *path, char **argv, char **envp,
 			     struct key *session_keyring, int wait)
 {
-	DECLARE_COMPLETION(done);
+	DECLARE_COMPLETION_ONSTACK(done);
 	struct subprocess_info sub_info = {
 		.complete	= &done,
 		.path		= path,
-- 
cgit v0.10.2


From 91ebe2a9320db7195d1e25152b5d158fc66dc133 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 3 Jul 2006 00:25:26 -0700
Subject: [PATCH] lockdep: annotate qeth driver

Annotate the qeth driver which uses a private skb-queue-head that is safely
used in hardirq context too.

Has no effect on non-lockdep kernels.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 36733b9..8e8963f 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -84,6 +84,8 @@ static debug_info_t *qeth_dbf_qerr = NULL;
 
 DEFINE_PER_CPU(char[256], qeth_dbf_txt_buf);
 
+static struct lock_class_key qdio_out_skb_queue_key;
+
 /**
  * some more definitions and declarations
  */
@@ -3229,6 +3231,9 @@ qeth_alloc_qdio_buffers(struct qeth_card *card)
 				&card->qdio.out_qs[i]->qdio_bufs[j];
 			skb_queue_head_init(&card->qdio.out_qs[i]->bufs[j].
 					    skb_list);
+			lockdep_set_class(
+				&card->qdio.out_qs[i]->bufs[j].skb_list.lock,
+				&qdio_out_skb_queue_key);
 			INIT_LIST_HEAD(&card->qdio.out_qs[i]->bufs[j].ctx_list);
 		}
 	}
@@ -5272,6 +5277,7 @@ qeth_free_vlan_buffer(struct qeth_card *card, struct qeth_qdio_out_buffer *buf,
 	struct sk_buff_head tmp_list;
 
 	skb_queue_head_init(&tmp_list);
+	lockdep_set_class(&tmp_list.lock, &qdio_out_skb_queue_key);
 	for(i = 0; i < QETH_MAX_BUFFER_ELEMENTS(card); ++i){
 		while ((skb = skb_dequeue(&buf->skb_list))){
 			if (vlan_tx_tag_present(skb) &&
-- 
cgit v0.10.2


From cf51624999e56c88154b5f7d451a265db6aabff7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:27 -0700
Subject: [PATCH] lockdep: annotate ->s_lock

Teach special (per-filesystem) locking code to the lock validator.

Minimal effect on non-lockdep kernels: one extra parameter to alloc_super().

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/super.c b/fs/super.c
index 9b780c4..5a4fe8b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -53,7 +53,7 @@ DEFINE_SPINLOCK(sb_lock);
  *	Allocates and initializes a new &struct super_block.  alloc_super()
  *	returns a pointer new superblock or %NULL if allocation had failed.
  */
-static struct super_block *alloc_super(void)
+static struct super_block *alloc_super(struct file_system_type *type)
 {
 	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
 	static struct super_operations default_op;
@@ -72,6 +72,12 @@ static struct super_block *alloc_super(void)
 		INIT_LIST_HEAD(&s->s_inodes);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
+		/*
+		 * The locking rules for s_lock are up to the
+		 * filesystem. For example ext3fs has different
+		 * lock ordering than usbfs:
+		 */
+		lockdep_set_class(&s->s_lock, &type->s_lock_key);
 		down_write(&s->s_umount);
 		s->s_count = S_BIAS;
 		atomic_set(&s->s_active, 1);
@@ -295,7 +301,7 @@ retry:
 	}
 	if (!s) {
 		spin_unlock(&sb_lock);
-		s = alloc_super();
+		s = alloc_super(type);
 		if (!s)
 			return ERR_PTR(-ENOMEM);
 		goto retry;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 05ded9e..0a3ea52 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1295,6 +1295,7 @@ struct file_system_type {
 	struct module *owner;
 	struct file_system_type * next;
 	struct list_head fs_supers;
+	struct lock_class_key s_lock_key;
 };
 
 extern int get_sb_bdev(struct file_system_type *fs_type,
-- 
cgit v0.10.2


From 897c6ff9568bcb102ffc6b465ebe1def0cba829d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 3 Jul 2006 00:25:28 -0700
Subject: [PATCH] lockdep: annotate sb ->s_umount

The s_umount rwsem needs to be classified as per-superblock since it's
perfectly legit to keep multiple of those recursively in the VFS locking
rules.

Has no effect on non-lockdep kernels.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/super.c b/fs/super.c
index 5a4fe8b..6d4e817 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -72,6 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		INIT_LIST_HEAD(&s->s_inodes);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
+		lockdep_set_class(&s->s_umount, &type->s_umount_key);
 		/*
 		 * The locking rules for s_lock are up to the
 		 * filesystem. For example ext3fs has different
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0a3ea52..e26de6805 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1296,6 +1296,7 @@ struct file_system_type {
 	struct file_system_type * next;
 	struct list_head fs_supers;
 	struct lock_class_key s_lock_key;
+	struct lock_class_key s_umount_key;
 };
 
 extern int get_sb_bdev(struct file_system_type *fs_type,
-- 
cgit v0.10.2


From 2b2d5493e10051694ae3a57ea6a153e3cb4d4488 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:28 -0700
Subject: [PATCH] lockdep: annotate SLAB code

Teach special (recursive) locking code to the lock validator.  Has no effect
on non-lockdep kernels.

Fix initialize-locks-via-memcpy assumptions.

Effects on non-lockdep kernels: the subclass nesting parameter is passed into
cache_free_alien() and __cache_free(), and turns one internal
kmem_cache_free() call into an open-coded __cache_free() call.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/mm/slab.c b/mm/slab.c
index 3936af3..85c2e03 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1021,7 +1021,8 @@ static void drain_alien_cache(struct kmem_cache *cachep,
 	}
 }
 
-static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
+static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
+				   int nesting)
 {
 	struct slab *slabp = virt_to_slab(objp);
 	int nodeid = slabp->nodeid;
@@ -1039,7 +1040,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
 	STATS_INC_NODEFREES(cachep);
 	if (l3->alien && l3->alien[nodeid]) {
 		alien = l3->alien[nodeid];
-		spin_lock(&alien->lock);
+		spin_lock_nested(&alien->lock, nesting);
 		if (unlikely(alien->avail == alien->limit)) {
 			STATS_INC_ACOVERFLOW(cachep);
 			__drain_alien_cache(cachep, alien, nodeid);
@@ -1068,7 +1069,8 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
 {
 }
 
-static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
+static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
+				   int nesting)
 {
 	return 0;
 }
@@ -1272,6 +1274,11 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
 
 	local_irq_disable();
 	memcpy(ptr, list, sizeof(struct kmem_list3));
+	/*
+	 * Do not assume that spinlocks can be initialized via memcpy:
+	 */
+	spin_lock_init(&ptr->list_lock);
+
 	MAKE_ALL_LISTS(cachep, ptr, nodeid);
 	cachep->nodelists[nodeid] = ptr;
 	local_irq_enable();
@@ -1398,7 +1405,7 @@ void __init kmem_cache_init(void)
 	}
 	/* 4) Replace the bootstrap head arrays */
 	{
-		void *ptr;
+		struct array_cache *ptr;
 
 		ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
 
@@ -1406,6 +1413,11 @@ void __init kmem_cache_init(void)
 		BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
 		memcpy(ptr, cpu_cache_get(&cache_cache),
 		       sizeof(struct arraycache_init));
+		/*
+		 * Do not assume that spinlocks can be initialized via memcpy:
+		 */
+		spin_lock_init(&ptr->lock);
+
 		cache_cache.array[smp_processor_id()] = ptr;
 		local_irq_enable();
 
@@ -1416,6 +1428,11 @@ void __init kmem_cache_init(void)
 		       != &initarray_generic.cache);
 		memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
 		       sizeof(struct arraycache_init));
+		/*
+		 * Do not assume that spinlocks can be initialized via memcpy:
+		 */
+		spin_lock_init(&ptr->lock);
+
 		malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
 		    ptr;
 		local_irq_enable();
@@ -1743,6 +1760,8 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
 }
 #endif
 
+static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting);
+
 /**
  * slab_destroy - destroy and release all objects in a slab
  * @cachep: cache pointer being destroyed
@@ -1766,8 +1785,17 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
 		call_rcu(&slab_rcu->head, kmem_rcu_free);
 	} else {
 		kmem_freepages(cachep, addr);
-		if (OFF_SLAB(cachep))
-			kmem_cache_free(cachep->slabp_cache, slabp);
+		if (OFF_SLAB(cachep)) {
+			unsigned long flags;
+
+			/*
+		 	 * lockdep: we may nest inside an already held
+			 * ac->lock, so pass in a nesting flag:
+			 */
+			local_irq_save(flags);
+			__cache_free(cachep->slabp_cache, slabp, 1);
+			local_irq_restore(flags);
+		}
 	}
 }
 
@@ -3072,7 +3100,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
 		if (slabp->inuse == 0) {
 			if (l3->free_objects > l3->free_limit) {
 				l3->free_objects -= cachep->num;
+				/*
+				 * It is safe to drop the lock. The slab is
+				 * no longer linked to the cache. cachep
+				 * cannot disappear - we are using it and
+				 * all destruction of caches must be
+				 * serialized properly by the user.
+				 */
+				spin_unlock(&l3->list_lock);
 				slab_destroy(cachep, slabp);
+				spin_lock(&l3->list_lock);
 			} else {
 				list_add(&slabp->list, &l3->slabs_free);
 			}
@@ -3098,7 +3135,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
 #endif
 	check_irq_off();
 	l3 = cachep->nodelists[node];
-	spin_lock(&l3->list_lock);
+	spin_lock_nested(&l3->list_lock, SINGLE_DEPTH_NESTING);
 	if (l3->shared) {
 		struct array_cache *shared_array = l3->shared;
 		int max = shared_array->limit - shared_array->avail;
@@ -3141,14 +3178,14 @@ free_done:
  * Release an obj back to its cache. If the obj has a constructed state, it must
  * be in this state _before_ it is released.  Called with disabled ints.
  */
-static inline void __cache_free(struct kmem_cache *cachep, void *objp)
+static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting)
 {
 	struct array_cache *ac = cpu_cache_get(cachep);
 
 	check_irq_off();
 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
 
-	if (cache_free_alien(cachep, objp))
+	if (cache_free_alien(cachep, objp, nesting))
 		return;
 
 	if (likely(ac->avail < ac->limit)) {
@@ -3387,7 +3424,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 	BUG_ON(virt_to_cache(objp) != cachep);
 
 	local_irq_save(flags);
-	__cache_free(cachep, objp);
+	__cache_free(cachep, objp, 0);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(kmem_cache_free);
@@ -3412,7 +3449,7 @@ void kfree(const void *objp)
 	kfree_debugcheck(objp);
 	c = virt_to_cache(objp);
 	debug_check_no_locks_freed(objp, obj_size(c));
-	__cache_free(c, (void *)objp);
+	__cache_free(c, (void *)objp, 0);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(kfree);
-- 
cgit v0.10.2


From 663d440eaa496db903cc58be04b9b602ba45e43b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:33 -0700
Subject: [PATCH] lockdep: annotate blkdev nesting

Teach special (recursive) locking code to the lock validator.

Effects on non-lockdep kernels:

- the introduction of the following function variants:

  extern struct block_device *open_partition_by_devnum(dev_t, unsigned);

  extern int blkdev_put_partition(struct block_device *);

  static int
  blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);

 which on non-lockdep are the same as open_by_devnum(), blkdev_put()
 and blkdev_get().

- a subclass parameter to do_open(). [unused on non-lockdep]

- a subclass parameter to __blkdev_put(), which is a new internal
  function for the main blkdev_put*() functions. [parameter unused
  on non-lockdep kernels, except for two sanity check WARN_ON()s]

these functions carry no semantical difference - they only express
object dependencies towards the lockdep subsystem.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2fe32c2..e4e1613 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1404,7 +1404,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
 	struct block_device *bdev;
 	char b[BDEVNAME_SIZE];
 
-	bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+	bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE);
 	if (IS_ERR(bdev)) {
 		printk(KERN_ERR "md: could not open %s.\n",
 			__bdevname(dev, b));
@@ -1414,7 +1414,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
 	if (err) {
 		printk(KERN_ERR "md: could not bd_claim %s.\n",
 			bdevname(bdev, b));
-		blkdev_put(bdev);
+		blkdev_put_partition(bdev);
 		return err;
 	}
 	rdev->bdev = bdev;
@@ -1428,7 +1428,7 @@ static void unlock_rdev(mdk_rdev_t *rdev)
 	if (!bdev)
 		MD_BUG();
 	bd_release(bdev);
-	blkdev_put(bdev);
+	blkdev_put_partition(bdev);
 }
 
 void md_autodetect_dev(dev_t dev);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9633a49..3753457 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -739,7 +739,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 	if (!bo)
 		return -ENOMEM;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
 	res = bd_claim(bdev, holder);
 	if (res || !add_bd_holder(bdev, bo))
 		free_bd_holder(bo);
@@ -764,7 +764,7 @@ static void bd_release_from_kobject(struct block_device *bdev,
 	if (!kobj)
 		return;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
 	bd_release(bdev);
 	if ((bo = del_bd_holder(bdev, kobj)))
 		free_bd_holder(bo);
@@ -822,6 +822,22 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)
 
 EXPORT_SYMBOL(open_by_devnum);
 
+static int
+blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags);
+
+struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode)
+{
+	struct block_device *bdev = bdget(dev);
+	int err = -ENOMEM;
+	int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
+	if (bdev)
+		err = blkdev_get_partition(bdev, mode, flags);
+	return err ? ERR_PTR(err) : bdev;
+}
+
+EXPORT_SYMBOL(open_partition_by_devnum);
+
+
 /*
  * This routine checks whether a removable media has been changed,
  * and invalidates all buffer-cache-entries in that case. This
@@ -868,7 +884,11 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 }
 EXPORT_SYMBOL(bd_set_size);
 
-static int do_open(struct block_device *bdev, struct file *file)
+static int
+blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
+
+static int
+do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
 {
 	struct module *owner = NULL;
 	struct gendisk *disk;
@@ -885,7 +905,8 @@ static int do_open(struct block_device *bdev, struct file *file)
 	}
 	owner = disk->fops->owner;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, subclass);
+
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
 		bdev->bd_contains = bdev;
@@ -912,11 +933,11 @@ static int do_open(struct block_device *bdev, struct file *file)
 			ret = -ENOMEM;
 			if (!whole)
 				goto out_first;
-			ret = blkdev_get(whole, file->f_mode, file->f_flags);
+			ret = blkdev_get_whole(whole, file->f_mode, file->f_flags);
 			if (ret)
 				goto out_first;
 			bdev->bd_contains = whole;
-			mutex_lock(&whole->bd_mutex);
+			mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE);
 			whole->bd_part_count++;
 			p = disk->part[part - 1];
 			bdev->bd_inode->i_data.backing_dev_info =
@@ -944,7 +965,8 @@ static int do_open(struct block_device *bdev, struct file *file)
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
 		} else {
-			mutex_lock(&bdev->bd_contains->bd_mutex);
+			mutex_lock_nested(&bdev->bd_contains->bd_mutex,
+					  BD_MUTEX_PARTITION);
 			bdev->bd_contains->bd_part_count++;
 			mutex_unlock(&bdev->bd_contains->bd_mutex);
 		}
@@ -985,11 +1007,49 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
 	fake_file.f_dentry = &fake_dentry;
 	fake_dentry.d_inode = bdev->bd_inode;
 
-	return do_open(bdev, &fake_file);
+	return do_open(bdev, &fake_file, BD_MUTEX_NORMAL);
 }
 
 EXPORT_SYMBOL(blkdev_get);
 
+static int
+blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags)
+{
+	/*
+	 * This crockload is due to bad choice of ->open() type.
+	 * It will go away.
+	 * For now, block device ->open() routine must _not_
+	 * examine anything in 'inode' argument except ->i_rdev.
+	 */
+	struct file fake_file = {};
+	struct dentry fake_dentry = {};
+	fake_file.f_mode = mode;
+	fake_file.f_flags = flags;
+	fake_file.f_dentry = &fake_dentry;
+	fake_dentry.d_inode = bdev->bd_inode;
+
+	return do_open(bdev, &fake_file, BD_MUTEX_WHOLE);
+}
+
+static int
+blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags)
+{
+	/*
+	 * This crockload is due to bad choice of ->open() type.
+	 * It will go away.
+	 * For now, block device ->open() routine must _not_
+	 * examine anything in 'inode' argument except ->i_rdev.
+	 */
+	struct file fake_file = {};
+	struct dentry fake_dentry = {};
+	fake_file.f_mode = mode;
+	fake_file.f_flags = flags;
+	fake_file.f_dentry = &fake_dentry;
+	fake_dentry.d_inode = bdev->bd_inode;
+
+	return do_open(bdev, &fake_file, BD_MUTEX_PARTITION);
+}
+
 static int blkdev_open(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev;
@@ -1005,7 +1065,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 
 	bdev = bd_acquire(inode);
 
-	res = do_open(bdev, filp);
+	res = do_open(bdev, filp, BD_MUTEX_NORMAL);
 	if (res)
 		return res;
 
@@ -1019,13 +1079,13 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	return res;
 }
 
-int blkdev_put(struct block_device *bdev)
+static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
 {
 	int ret = 0;
 	struct inode *bd_inode = bdev->bd_inode;
 	struct gendisk *disk = bdev->bd_disk;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, subclass);
 	lock_kernel();
 	if (!--bdev->bd_openers) {
 		sync_blockdev(bdev);
@@ -1035,7 +1095,8 @@ int blkdev_put(struct block_device *bdev)
 		if (disk->fops->release)
 			ret = disk->fops->release(bd_inode, NULL);
 	} else {
-		mutex_lock(&bdev->bd_contains->bd_mutex);
+		mutex_lock_nested(&bdev->bd_contains->bd_mutex,
+				  subclass + 1);
 		bdev->bd_contains->bd_part_count--;
 		mutex_unlock(&bdev->bd_contains->bd_mutex);
 	}
@@ -1051,9 +1112,8 @@ int blkdev_put(struct block_device *bdev)
 		}
 		bdev->bd_disk = NULL;
 		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
-		if (bdev != bdev->bd_contains) {
-			blkdev_put(bdev->bd_contains);
-		}
+		if (bdev != bdev->bd_contains)
+			__blkdev_put(bdev->bd_contains, subclass + 1);
 		bdev->bd_contains = NULL;
 	}
 	unlock_kernel();
@@ -1062,8 +1122,20 @@ int blkdev_put(struct block_device *bdev)
 	return ret;
 }
 
+int blkdev_put(struct block_device *bdev)
+{
+	return __blkdev_put(bdev, BD_MUTEX_NORMAL);
+}
+
 EXPORT_SYMBOL(blkdev_put);
 
+int blkdev_put_partition(struct block_device *bdev)
+{
+	return __blkdev_put(bdev, BD_MUTEX_PARTITION);
+}
+
+EXPORT_SYMBOL(blkdev_put_partition);
+
 static int blkdev_close(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e26de6805..134b320 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -436,6 +436,21 @@ struct block_device {
 };
 
 /*
+ * bdev->bd_mutex nesting subclasses for the lock validator:
+ *
+ * 0: normal
+ * 1: 'whole'
+ * 2: 'partition'
+ */
+enum bdev_bd_mutex_lock_class
+{
+	BD_MUTEX_NORMAL,
+	BD_MUTEX_WHOLE,
+	BD_MUTEX_PARTITION
+};
+
+
+/*
  * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
  * radix trees
  */
@@ -1425,6 +1440,7 @@ extern void bd_set_size(struct block_device *, loff_t size);
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct block_device *open_by_devnum(dev_t, unsigned);
+extern struct block_device *open_partition_by_devnum(dev_t, unsigned);
 extern const struct file_operations def_blk_fops;
 extern const struct address_space_operations def_blk_aops;
 extern const struct file_operations def_chr_fops;
@@ -1435,6 +1451,7 @@ extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
 extern int blkdev_get(struct block_device *, mode_t, unsigned);
 extern int blkdev_put(struct block_device *);
+extern int blkdev_put_partition(struct block_device *);
 extern int bd_claim(struct block_device *, void *);
 extern void bd_release(struct block_device *);
 #ifdef CONFIG_SYSFS
-- 
cgit v0.10.2


From 5dd8d1e9eb8b51041505966fe96d081ecbe86efe Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 3 Jul 2006 00:25:33 -0700
Subject: [PATCH] lockdep: annotate vlan net device as being a special class

vlan network devices have devices nesting below it, and are a special
"super class" of normal network devices; split their locks off into a
separate class since they always nest.

[deweerdt@free.fr: fix possible null-pointer deref]
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederik Deweerdt <frederik.deweerdt@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 3948949..458031b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -364,6 +364,14 @@ static void vlan_transfer_operstate(const struct net_device *dev, struct net_dev
 	}
 }
 
+/*
+ * vlan network devices have devices nesting below it, and are a special
+ * "super class" of normal network devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key vlan_netdev_xmit_lock_key;
+
+
 /*  Attach a VLAN device to a mac address (ie Ethernet Card).
  *  Returns the device that was created, or NULL if there was
  *  an error of some kind.
@@ -460,6 +468,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
 		    
 	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
 			       vlan_setup);
+
 	if (new_dev == NULL)
 		goto out_unlock;
 
@@ -518,6 +527,8 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
 	if (register_netdevice(new_dev))
 		goto out_free_newdev;
 
+	lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
+
 	new_dev->iflink = real_dev->ifindex;
 	vlan_transfer_operstate(real_dev, new_dev);
 	linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */
-- 
cgit v0.10.2


From 0afffc723c8041a005134099847ac2a2fd0316a0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:35 -0700
Subject: [PATCH] lockdep: annotate on-stack completions, mmc

lockdep needs to have the waitqueue lock initialized for on-stack
waitqueues implicitly initialized by DECLARE_COMPLETION().

Annotate mmc_wait_for_req()'s on-stack completion accordingly.

Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c
index 247ff2f..33525bd 100644
--- a/drivers/mmc/mmc.c
+++ b/drivers/mmc/mmc.c
@@ -128,7 +128,7 @@ static void mmc_wait_done(struct mmc_request *mrq)
 
 int mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
 {
-	DECLARE_COMPLETION(complete);
+	DECLARE_COMPLETION_ONSTACK(complete);
 
 	mrq->done_data = &complete;
 	mrq->done = mmc_wait_done;
-- 
cgit v0.10.2


From a5b5bb9a053a973c23b867738c074acb3e80c0a0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:35 -0700
Subject: [PATCH] lockdep: annotate sk_locks

Teach sk_lock semantics to the lock validator.  In the softirq path the
slock has mutex_trylock()+mutex_unlock() semantics, in the process context
sock_lock() case it has mutex_lock()/mutex_unlock() semantics.

Thus we treat sock_owned_by_user() flagged areas as an exclusion area too,
not just those areas covered by a held sk_lock.slock.

Effect on non-lockdep kernels: minimal, sk_lock_sock_init() has been turned
into an inline function.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/net/sock.h b/include/net/sock.h
index 0969fb6..324b3ea 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -44,6 +44,7 @@
 #include <linux/timer.h>
 #include <linux/cache.h>
 #include <linux/module.h>
+#include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>	/* struct sk_buff */
 #include <linux/security.h>
@@ -78,18 +79,17 @@ typedef struct {
 	spinlock_t		slock;
 	struct sock_iocb	*owner;
 	wait_queue_head_t	wq;
+	/*
+	 * We express the mutex-alike socket_lock semantics
+	 * to the lock validator by explicitly managing
+	 * the slock as a lock variant (in addition to
+	 * the slock itself):
+	 */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
 } socket_lock_t;
 
-extern struct lock_class_key af_family_keys[AF_MAX];
-
-#define sock_lock_init(__sk) \
-do {	spin_lock_init(&((__sk)->sk_lock.slock)); \
-	lockdep_set_class(&(__sk)->sk_lock.slock, \
-			  af_family_keys + (__sk)->sk_family); \
-	(__sk)->sk_lock.owner = NULL; \
-	init_waitqueue_head(&((__sk)->sk_lock.wq)); \
-} while(0)
-
 struct sock;
 struct proto;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 0b4d5d2..51fcfbc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -133,7 +133,42 @@
  * Each address family might have different locking rules, so we have
  * one slock key per address family:
  */
-struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_keys[AF_MAX];
+static struct lock_class_key af_family_slock_keys[AF_MAX];
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/*
+ * Make lock validator output more readable. (we pre-construct these
+ * strings build-time, so that runtime initialization of socket
+ * locks is fast):
+ */
+static const char *af_family_key_strings[AF_MAX+1] = {
+  "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
+  "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
+  "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
+  "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
+  "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
+  "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
+  "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
+  "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
+  "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
+  "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
+  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
+};
+static const char *af_family_slock_key_strings[AF_MAX+1] = {
+  "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
+  "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
+  "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
+  "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
+  "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
+  "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
+  "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
+  "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
+  "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
+  "slock-27"       , "slock-28"          , "slock-29"          ,
+  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_MAX"
+};
+#endif
 
 /*
  * sk_callback_lock locking rules are per-address-family,
@@ -249,9 +284,16 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb)
 	skb->dev = NULL;
 
 	bh_lock_sock(sk);
-	if (!sock_owned_by_user(sk))
+	if (!sock_owned_by_user(sk)) {
+		/*
+		 * trylock + unlock semantics:
+		 */
+		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
+
 		rc = sk->sk_backlog_rcv(sk, skb);
-	else
+
+		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+	} else
 		sk_add_backlog(sk, skb);
 	bh_unlock_sock(sk);
 out:
@@ -761,6 +803,33 @@ lenout:
   	return 0;
 }
 
+/*
+ * Initialize an sk_lock.
+ *
+ * (We also register the sk_lock with the lock validator.)
+ */
+static void inline sock_lock_init(struct sock *sk)
+{
+	spin_lock_init(&sk->sk_lock.slock);
+	sk->sk_lock.owner = NULL;
+	init_waitqueue_head(&sk->sk_lock.wq);
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock));
+
+	/*
+	 * Mark both the sk_lock and the sk_lock.slock as a
+	 * per-address-family lock class:
+	 */
+	lockdep_set_class_and_name(&sk->sk_lock.slock,
+				   af_family_slock_keys + sk->sk_family,
+				   af_family_slock_key_strings[sk->sk_family]);
+	lockdep_init_map(&sk->sk_lock.dep_map,
+			 af_family_key_strings[sk->sk_family],
+			 af_family_keys + sk->sk_family);
+}
+
 /**
  *	sk_alloc - All socket objects are allocated here
  *	@family: protocol family
@@ -1465,24 +1534,34 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 void fastcall lock_sock(struct sock *sk)
 {
 	might_sleep();
-	spin_lock_bh(&(sk->sk_lock.slock));
+	spin_lock_bh(&sk->sk_lock.slock);
 	if (sk->sk_lock.owner)
 		__lock_sock(sk);
 	sk->sk_lock.owner = (void *)1;
-	spin_unlock_bh(&(sk->sk_lock.slock));
+	spin_unlock(&sk->sk_lock.slock);
+	/*
+	 * The sk_lock has mutex_lock() semantics here:
+	 */
+	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+	local_bh_enable();
 }
 
 EXPORT_SYMBOL(lock_sock);
 
 void fastcall release_sock(struct sock *sk)
 {
-	spin_lock_bh(&(sk->sk_lock.slock));
+	/*
+	 * The sk_lock has mutex_unlock() semantics:
+	 */
+	mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
+
+	spin_lock_bh(&sk->sk_lock.slock);
 	if (sk->sk_backlog.tail)
 		__release_sock(sk);
 	sk->sk_lock.owner = NULL;
-        if (waitqueue_active(&(sk->sk_lock.wq)))
-		wake_up(&(sk->sk_lock.wq));
-	spin_unlock_bh(&(sk->sk_lock.slock));
+	if (waitqueue_active(&sk->sk_lock.wq))
+		wake_up(&sk->sk_lock.wq);
+	spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL(release_sock);
 
-- 
cgit v0.10.2


From cd11acdd8542cb0c0fa7cd86590b1ba79d7e263a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 3 Jul 2006 00:25:36 -0700
Subject: [PATCH] lockdep: annotate hostap netdev ->xmit_lock

On Fri, 2006-06-30 at 15:45 -0700, Miles Lane wrote:
> Okay, I rebuilt my kernel with your combo patch applied.
> Then, I inserted my US Robotics USR2210 PCMCIA wifi card,
> ran "pccardutil eject", popped out the card and then inserted
> a Compaq iPaq wifi card.  This triggered the following.
>
> [ INFO: possible circular locking dependency detected ]
> -------------------------------------------------------
> syslogd/1886 is trying to acquire lock:
>  (&dev->queue_lock){-+..}, at: [<c11a50b5>] dev_queue_xmit+0x120/0x24b
>
> but task is already holding lock:
>  (&dev->_xmit_lock){-+..}, at: [<c11a5118>] dev_queue_xmit+0x183/0x24b
>
> which lock already depends on the new lock.

ok this appears to be hostap playing games... it has 2 network devices
for one piece of hardware and one calls the other via the networking
layer; there is thankfully a natural ordering between the two, so just
making the slave one a separate type ought to make this work.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index b764cfd..dafaa5f 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -3095,6 +3095,14 @@ static void prism2_clear_set_tim_queue(local_info_t *local)
 }
 
 
+/*
+ * HostAP uses two layers of net devices, where the inner
+ * layer gets called all the time from the outer layer.
+ * This is a natural nesting, which needs a split lock type.
+ */
+static struct lock_class_key hostap_netdev_xmit_lock_key;
+
+
 static struct net_device *
 prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx,
 		       struct device *sdev)
@@ -3259,6 +3267,8 @@ while (0)
 	SET_NETDEV_DEV(dev, sdev);
 	if (ret >= 0)
 		ret = register_netdevice(dev);
+
+	lockdep_set_class(&dev->_xmit_lock, &hostap_netdev_xmit_lock_key);
 	rtnl_unlock();
 	if (ret < 0) {
 		printk(KERN_WARNING "%s: register netdevice failed!\n",
-- 
cgit v0.10.2


From 479ceddd7baf3b387665c4d69a7398918b201ad0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 3 Jul 2006 00:25:37 -0700
Subject: [PATCH] forcedeth: typecast cleanup

Someone went nuts in there.

Cc: Ayaz Abdulla <aabdulla@nvidia.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 3c90003..92740c6 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -2761,22 +2761,22 @@ static void nv_do_nic_poll(unsigned long data)
 	pci_push(base);
 
 	if (!using_multi_irqs(dev)) {
-		nv_nic_irq((int) 0, (void *) data, (struct pt_regs *) NULL);
+		nv_nic_irq(0, dev, NULL);
 		if (np->msi_flags & NV_MSI_X_ENABLED)
 			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
 		else
 			enable_irq(dev->irq);
 	} else {
 		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
-			nv_nic_irq_rx((int) 0, (void *) data, (struct pt_regs *) NULL);
+			nv_nic_irq_rx(0, dev, NULL);
 			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
 		}
 		if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
-			nv_nic_irq_tx((int) 0, (void *) data, (struct pt_regs *) NULL);
+			nv_nic_irq_tx(0, dev, NULL);
 			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
 		}
 		if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
-			nv_nic_irq_other((int) 0, (void *) data, (struct pt_regs *) NULL);
+			nv_nic_irq_other(0, dev, NULL);
 			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
 		}
 	}
-- 
cgit v0.10.2


From 8688cfcebf09b84385b5e2c461ae08fcde8a5d18 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:39 -0700
Subject: [PATCH] lockdep: annotate forcedeth.c disable_irq()

nv_do_nic_poll() is called from timer softirqs, which has interrupts enabled,
but np->lock might also be taken by some other interrupt context.

The driver does disable_irq() to get around this problem, so annotate the
disable_irq()/enable_irq() calls for lockdep.

Has no effect on non-lockdep kernels.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Ayaz Abdulla <aabdulla@nvidia.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 92740c6..037d870 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -2735,21 +2735,21 @@ static void nv_do_nic_poll(unsigned long data)
 
 	if (!using_multi_irqs(dev)) {
 		if (np->msi_flags & NV_MSI_X_ENABLED)
-			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
+			disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
 		else
-			disable_irq(dev->irq);
+			disable_irq_lockdep(dev->irq);
 		mask = np->irqmask;
 	} else {
 		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
-			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+			disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
 			mask |= NVREG_IRQ_RX_ALL;
 		}
 		if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
-			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
+			disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
 			mask |= NVREG_IRQ_TX_ALL;
 		}
 		if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
-			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
+			disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
 			mask |= NVREG_IRQ_OTHER;
 		}
 	}
@@ -2763,21 +2763,21 @@ static void nv_do_nic_poll(unsigned long data)
 	if (!using_multi_irqs(dev)) {
 		nv_nic_irq(0, dev, NULL);
 		if (np->msi_flags & NV_MSI_X_ENABLED)
-			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
+			enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
 		else
-			enable_irq(dev->irq);
+			enable_irq_lockdep(dev->irq);
 	} else {
 		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
 			nv_nic_irq_rx(0, dev, NULL);
-			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
+			enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
 		}
 		if (np->nic_poll_irq & NVREG_IRQ_TX_ALL) {
 			nv_nic_irq_tx(0, dev, NULL);
-			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
+			enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
 		}
 		if (np->nic_poll_irq & NVREG_IRQ_OTHER) {
 			nv_nic_irq_other(0, dev, NULL);
-			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
+			enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_OTHER].vector);
 		}
 	}
 }
-- 
cgit v0.10.2


From 829035fd709119d9def124a6d40b94d317573e6f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 3 Jul 2006 00:25:40 -0700
Subject: [PATCH] lockdep: irqtrace subsystem, move account_system_vtime()
 calls into kernel/softirq.c

At the moment, powerpc and s390 have their own versions of do_softirq which
include local_bh_disable() and __local_bh_enable() calls.  They end up
calling __do_softirq (in kernel/softirq.c) which also does
local_bh_disable/enable.

Apparently the two levels of disable/enable trigger a warning from some
validation code that Ingo is working on, and he would like to see the outer
level removed.  But to do that, we have to move the account_system_vtime
calls that are currently in the arch do_softirq() implementations for
powerpc and s390 into the generic __do_softirq() (this is a no-op for other
archs because account_system_vtime is defined to be an empty inline
function on all other archs).  This patch does that.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 027728b..e3774f6 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -424,13 +424,8 @@ void do_softirq(void)
 
 	local_irq_save(flags);
 
-	if (local_softirq_pending()) {
-		account_system_vtime(current);
-		local_bh_disable();
+	if (local_softirq_pending())
 		do_softirq_onstack();
-		account_system_vtime(current);
-		_local_bh_enable();
-	}
 
 	local_irq_restore(flags);
 }
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index e347190..1eef509 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -69,10 +69,6 @@ asmlinkage void do_softirq(void)
 
 	local_irq_save(flags);
 
-	account_system_vtime(current);
-
-	local_bh_disable();
-
 	if (local_softirq_pending()) {
 		/* Get current stack pointer. */
 		asm volatile("la %0,0(15)" : "=a" (old));
@@ -95,10 +91,6 @@ asmlinkage void do_softirq(void)
 			__do_softirq();
 	}
 
-	account_system_vtime(current);
-
-	_local_bh_enable();
-
 	local_irq_restore(flags);
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 584609b..215541e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -193,6 +193,8 @@ asmlinkage void __do_softirq(void)
 	int cpu;
 
 	pending = local_softirq_pending();
+	account_system_vtime(current);
+
 	__local_bh_disable((unsigned long)__builtin_return_address(0));
 	trace_softirq_enter();
 
@@ -224,6 +226,8 @@ restart:
 		wakeup_softirqd();
 
 	trace_softirq_exit();
+
+	account_system_vtime(current);
 	_local_bh_enable();
 }
 
-- 
cgit v0.10.2


From 48f24c4da1ee7f3f22289cb85e8b8a73e4df4db5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:40 -0700
Subject: [PATCH] sched: clean up fallout of recent changes

Clean up some of the impact of recent (and not so recent) scheduler
changes:

 - turning macros into nice inline functions
 - sanitizing and unifying variable definitions
 - whitespace, style consistency, 80-lines, comment correctness, spelling
   and curly braces police

Due to the macro hell and variable placement simplifications there's even 26
bytes of .text saved:

   text    data     bss     dec     hex filename
  25510    4153     192   29855    749f sched.o.before
  25484    4153     192   29829    7485 sched.o.after

[akpm@osdl.org: build fix]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index f4778d1..b032614 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -184,9 +184,6 @@ static inline unsigned int task_timeslice(task_t *p)
 	return static_prio_timeslice(p->static_prio);
 }
 
-#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)	\
-				< (long long) (sd)->cache_hot_time)
-
 /*
  * These are the runqueue data structures:
  */
@@ -278,8 +275,8 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
  * The domain tree of any CPU may only be accessed from within
  * preempt-disabled sections.
  */
-#define for_each_domain(cpu, domain) \
-for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
+#define for_each_domain(cpu, __sd) \
+	for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
 
 #define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
 #define this_rq()		(&__get_cpu_var(runqueues))
@@ -1039,6 +1036,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
 	req->task = p;
 	req->dest_cpu = dest_cpu;
 	list_add(&req->list, &rq->migration_queue);
+
 	return 1;
 }
 
@@ -1135,7 +1133,7 @@ static inline unsigned long cpu_avg_load_per_task(int cpu)
 	runqueue_t *rq = cpu_rq(cpu);
 	unsigned long n = rq->nr_running;
 
-	return n ?  rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
+	return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
 }
 
 /*
@@ -1494,7 +1492,6 @@ int fastcall wake_up_process(task_t *p)
 	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
 				 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
 }
-
 EXPORT_SYMBOL(wake_up_process);
 
 int fastcall wake_up_state(task_t *p, unsigned int state)
@@ -1867,6 +1864,15 @@ unsigned long nr_active(void)
 #ifdef CONFIG_SMP
 
 /*
+ * Is this task likely cache-hot:
+ */
+static inline int
+task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
+{
+	return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;
+}
+
+/*
  * double_rq_lock - safely lock two runqueues
  *
  * Note this does not disable interrupts like task_rq_lock,
@@ -2029,6 +2035,7 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
 }
 
 #define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio)
+
 /*
  * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
  * load from busiest to this_rq, as part of a balancing operation within
@@ -2041,11 +2048,10 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
 		      struct sched_domain *sd, enum idle_type idle,
 		      int *all_pinned)
 {
+	int idx, pulled = 0, pinned = 0, this_best_prio, best_prio,
+	    best_prio_seen, skip_for_load;
 	prio_array_t *array, *dst_array;
 	struct list_head *head, *curr;
-	int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio;
-	int busiest_best_prio_seen;
-	int skip_for_load; /* skip the task based on weighted load issues */
 	long rem_load_move;
 	task_t *tmp;
 
@@ -2055,15 +2061,15 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
 	rem_load_move = max_load_move;
 	pinned = 1;
 	this_best_prio = rq_best_prio(this_rq);
-	busiest_best_prio = rq_best_prio(busiest);
+	best_prio = rq_best_prio(busiest);
 	/*
 	 * Enable handling of the case where there is more than one task
 	 * with the best priority.   If the current running task is one
-	 * of those with prio==busiest_best_prio we know it won't be moved
+	 * of those with prio==best_prio we know it won't be moved
 	 * and therefore it's safe to override the skip (based on load) of
 	 * any task we find with that prio.
 	 */
-	busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio;
+	best_prio_seen = best_prio == busiest->curr->prio;
 
 	/*
 	 * We first consider expired tasks. Those will likely not be
@@ -2110,10 +2116,11 @@ skip_queue:
 	 */
 	skip_for_load = tmp->load_weight > rem_load_move;
 	if (skip_for_load && idx < this_best_prio)
-		skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio;
+		skip_for_load = !best_prio_seen && idx == best_prio;
 	if (skip_for_load ||
 	    !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) {
-		busiest_best_prio_seen |= idx == busiest_best_prio;
+
+		best_prio_seen |= idx == best_prio;
 		if (curr != head)
 			goto skip_queue;
 		idx++;
@@ -2156,8 +2163,8 @@ out:
 
 /*
  * find_busiest_group finds and returns the busiest CPU group within the
- * domain. It calculates and returns the amount of weighted load which should be
- * moved to restore balance via the imbalance parameter.
+ * domain. It calculates and returns the amount of weighted load which
+ * should be moved to restore balance via the imbalance parameter.
  */
 static struct sched_group *
 find_busiest_group(struct sched_domain *sd, int this_cpu,
@@ -2279,7 +2286,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
  		 * capacity but still has some space to pick up some load
  		 * from other group and save more power
  		 */
- 		if (sum_nr_running <= group_capacity - 1)
+ 		if (sum_nr_running <= group_capacity - 1) {
  			if (sum_nr_running > leader_nr_running ||
  			    (sum_nr_running == leader_nr_running &&
  			     first_cpu(group->cpumask) >
@@ -2287,7 +2294,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
  				group_leader = group;
  				leader_nr_running = sum_nr_running;
  			}
-
+		}
 group_next:
 #endif
 		group = group->next;
@@ -2342,8 +2349,7 @@ group_next:
 	 * moved
 	 */
 	if (*imbalance < busiest_load_per_task) {
-		unsigned long pwr_now, pwr_move;
-		unsigned long tmp;
+		unsigned long tmp, pwr_now, pwr_move;
 		unsigned int imbn;
 
 small_imbalance:
@@ -2415,22 +2421,23 @@ ret:
 /*
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
-static runqueue_t *find_busiest_queue(struct sched_group *group,
-	enum idle_type idle, unsigned long imbalance)
+static runqueue_t *
+find_busiest_queue(struct sched_group *group, enum idle_type idle,
+		   unsigned long imbalance)
 {
+	runqueue_t *busiest = NULL, *rq;
 	unsigned long max_load = 0;
-	runqueue_t *busiest = NULL, *rqi;
 	int i;
 
 	for_each_cpu_mask(i, group->cpumask) {
-		rqi = cpu_rq(i);
+		rq = cpu_rq(i);
 
-		if (rqi->nr_running == 1 && rqi->raw_weighted_load > imbalance)
+		if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
 			continue;
 
-		if (rqi->raw_weighted_load > max_load) {
-			max_load = rqi->raw_weighted_load;
-			busiest = rqi;
+		if (rq->raw_weighted_load > max_load) {
+			max_load = rq->raw_weighted_load;
+			busiest = rq;
 		}
 	}
 
@@ -2443,7 +2450,11 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
  */
 #define MAX_PINNED_INTERVAL	512
 
-#define minus_1_or_zero(n) ((n) > 0 ? (n) - 1 : 0)
+static inline unsigned long minus_1_or_zero(unsigned long n)
+{
+	return n > 0 ? n - 1 : 0;
+}
+
 /*
  * Check this_cpu to ensure it is balanced within domain. Attempt to move
  * tasks if there is an imbalance.
@@ -2453,12 +2464,10 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
 static int load_balance(int this_cpu, runqueue_t *this_rq,
 			struct sched_domain *sd, enum idle_type idle)
 {
+	int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
 	struct sched_group *group;
-	runqueue_t *busiest;
 	unsigned long imbalance;
-	int nr_moved, all_pinned = 0;
-	int active_balance = 0;
-	int sd_idle = 0;
+	runqueue_t *busiest;
 
 	if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
 	    !sched_smt_power_savings)
@@ -2492,8 +2501,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
 		 */
 		double_rq_lock(this_rq, busiest);
 		nr_moved = move_tasks(this_rq, this_cpu, busiest,
-					minus_1_or_zero(busiest->nr_running),
-					imbalance, sd, idle, &all_pinned);
+				      minus_1_or_zero(busiest->nr_running),
+				      imbalance, sd, idle, &all_pinned);
 		double_rq_unlock(this_rq, busiest);
 
 		/* All tasks on this runqueue were pinned by CPU affinity */
@@ -2566,7 +2575,8 @@ out_one_pinned:
 			(sd->balance_interval < sd->max_interval))
 		sd->balance_interval *= 2;
 
-	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
+	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
+			!sched_smt_power_savings)
 		return -1;
 	return 0;
 }
@@ -2578,8 +2588,8 @@ out_one_pinned:
  * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
  * this_rq is locked.
  */
-static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
-				struct sched_domain *sd)
+static int
+load_balance_newidle(int this_cpu, runqueue_t *this_rq, struct sched_domain *sd)
 {
 	struct sched_group *group;
 	runqueue_t *busiest = NULL;
@@ -2628,9 +2638,11 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
 
 out_balanced:
 	schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
-	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
+	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
+					!sched_smt_power_savings)
 		return -1;
 	sd->nr_balance_failed = 0;
+
 	return 0;
 }
 
@@ -2644,10 +2656,9 @@ static void idle_balance(int this_cpu, runqueue_t *this_rq)
 
 	for_each_domain(this_cpu, sd) {
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
-			if (load_balance_newidle(this_cpu, this_rq, sd)) {
-				/* We've pulled tasks over so stop searching */
+			/* If we've pulled tasks over stop searching: */
+			if (load_balance_newidle(this_cpu, this_rq, sd))
 				break;
-			}
 		}
 	}
 }
@@ -2666,8 +2677,8 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
 	runqueue_t *target_rq;
 	int target_cpu = busiest_rq->push_cpu;
 
+	/* Is there any task to move? */
 	if (busiest_rq->nr_running <= 1)
-		/* no task to move */
 		return;
 
 	target_rq = cpu_rq(target_cpu);
@@ -2685,21 +2696,20 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
 	/* Search for an sd spanning us and the target CPU. */
 	for_each_domain(target_cpu, sd) {
 		if ((sd->flags & SD_LOAD_BALANCE) &&
-			cpu_isset(busiest_cpu, sd->span))
+		    cpu_isset(busiest_cpu, sd->span))
 				break;
 	}
 
-	if (unlikely(sd == NULL))
-		goto out;
-
-	schedstat_inc(sd, alb_cnt);
+	if (likely(sd)) {
+		schedstat_inc(sd, alb_cnt);
 
-	if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
-			RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, NULL))
-		schedstat_inc(sd, alb_pushed);
-	else
-		schedstat_inc(sd, alb_failed);
-out:
+		if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
+			       RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE,
+			       NULL))
+			schedstat_inc(sd, alb_pushed);
+		else
+			schedstat_inc(sd, alb_failed);
+	}
 	spin_unlock(&target_rq->lock);
 }
 
@@ -2712,23 +2722,27 @@ out:
  * Balancing parameters are set up in arch_init_sched_domains.
  */
 
-/* Don't have all balancing operations going off at once */
-#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS)
+/* Don't have all balancing operations going off at once: */
+static inline unsigned long cpu_offset(int cpu)
+{
+	return jiffies + cpu * HZ / NR_CPUS;
+}
 
-static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
-			   enum idle_type idle)
+static void
+rebalance_tick(int this_cpu, runqueue_t *this_rq, enum idle_type idle)
 {
-	unsigned long old_load, this_load;
-	unsigned long j = jiffies + CPU_OFFSET(this_cpu);
+	unsigned long this_load, interval, j = cpu_offset(this_cpu);
 	struct sched_domain *sd;
-	int i;
+	int i, scale;
 
 	this_load = this_rq->raw_weighted_load;
-	/* Update our load */
-	for (i = 0; i < 3; i++) {
-		unsigned long new_load = this_load;
-		int scale = 1 << i;
+
+	/* Update our load: */
+	for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
+		unsigned long old_load, new_load;
+
 		old_load = this_rq->cpu_load[i];
+		new_load = this_load;
 		/*
 		 * Round up the averaging division if load is increasing. This
 		 * prevents us from getting stuck on 9 if the load is 10, for
@@ -2740,8 +2754,6 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
 	}
 
 	for_each_domain(this_cpu, sd) {
-		unsigned long interval;
-
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
 
@@ -2782,6 +2794,7 @@ static inline void idle_balance(int cpu, runqueue_t *rq)
 static inline int wake_priority_sleeper(runqueue_t *rq)
 {
 	int ret = 0;
+
 #ifdef CONFIG_SCHED_SMT
 	spin_lock(&rq->lock);
 	/*
@@ -2805,25 +2818,26 @@ EXPORT_PER_CPU_SYMBOL(kstat);
  * This is called on clock ticks and on context switches.
  * Bank in p->sched_time the ns elapsed since the last tick or switch.
  */
-static inline void update_cpu_clock(task_t *p, runqueue_t *rq,
-				    unsigned long long now)
+static inline void
+update_cpu_clock(task_t *p, runqueue_t *rq, unsigned long long now)
 {
-	unsigned long long last = max(p->timestamp, rq->timestamp_last_tick);
-	p->sched_time += now - last;
+	p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
 }
 
 /*
  * Return current->sched_time plus any more ns on the sched_clock
  * that have not yet been banked.
  */
-unsigned long long current_sched_time(const task_t *tsk)
+unsigned long long current_sched_time(const task_t *p)
 {
 	unsigned long long ns;
 	unsigned long flags;
+
 	local_irq_save(flags);
-	ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick);
-	ns = tsk->sched_time + (sched_clock() - ns);
+	ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
+	ns = p->sched_time + sched_clock() - ns;
 	local_irq_restore(flags);
+
 	return ns;
 }
 
@@ -2837,11 +2851,16 @@ unsigned long long current_sched_time(const task_t *tsk)
  * increasing number of running tasks. We also ignore the interactivity
  * if a better static_prio task has expired:
  */
-#define EXPIRED_STARVING(rq) \
-	((STARVATION_LIMIT && ((rq)->expired_timestamp && \
-		(jiffies - (rq)->expired_timestamp >= \
-			STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
-			((rq)->curr->static_prio > (rq)->best_expired_prio))
+static inline int expired_starving(runqueue_t *rq)
+{
+	if (rq->curr->static_prio > rq->best_expired_prio)
+		return 1;
+	if (!STARVATION_LIMIT || !rq->expired_timestamp)
+		return 0;
+	if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running)
+		return 1;
+	return 0;
+}
 
 /*
  * Account user cpu time to a process.
@@ -2925,10 +2944,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
  */
 void scheduler_tick(void)
 {
+	unsigned long long now = sched_clock();
 	int cpu = smp_processor_id();
 	runqueue_t *rq = this_rq();
 	task_t *p = current;
-	unsigned long long now = sched_clock();
 
 	update_cpu_clock(p, rq, now);
 
@@ -2978,7 +2997,7 @@ void scheduler_tick(void)
 
 		if (!rq->expired_timestamp)
 			rq->expired_timestamp = jiffies;
-		if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
+		if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
 			enqueue_task(p, rq->expired);
 			if (p->static_prio < rq->best_expired_prio)
 				rq->best_expired_prio = p->static_prio;
@@ -3137,9 +3156,8 @@ unlock:
 static inline void wake_sleeping_dependent(int this_cpu)
 {
 }
-
-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq,
-					task_t *p)
+static inline int
+dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
 {
 	return 0;
 }
@@ -3193,14 +3211,14 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
  */
 asmlinkage void __sched schedule(void)
 {
-	long *switch_count;
-	task_t *prev, *next;
-	runqueue_t *rq;
-	prio_array_t *array;
 	struct list_head *queue;
 	unsigned long long now;
 	unsigned long run_time;
 	int cpu, idx, new_prio;
+	task_t *prev, *next;
+	prio_array_t *array;
+	long *switch_count;
+	runqueue_t *rq;
 
 	/*
 	 * Test if we are atomic.  Since do_exit() needs to call into
@@ -3353,7 +3371,6 @@ switch_tasks:
 	if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
 		goto need_resched;
 }
-
 EXPORT_SYMBOL(schedule);
 
 #ifdef CONFIG_PREEMPT
@@ -3398,7 +3415,6 @@ need_resched:
 	if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
 		goto need_resched;
 }
-
 EXPORT_SYMBOL(preempt_schedule);
 
 /*
@@ -3447,10 +3463,8 @@ need_resched:
 int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
 			  void *key)
 {
-	task_t *p = curr->private;
-	return try_to_wake_up(p, mode, sync);
+	return try_to_wake_up(curr->private, mode, sync);
 }
-
 EXPORT_SYMBOL(default_wake_function);
 
 /*
@@ -3468,13 +3482,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
 	struct list_head *tmp, *next;
 
 	list_for_each_safe(tmp, next, &q->task_list) {
-		wait_queue_t *curr;
-		unsigned flags;
-		curr = list_entry(tmp, wait_queue_t, task_list);
-		flags = curr->flags;
+		wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+		unsigned flags = curr->flags;
+
 		if (curr->func(curr, mode, sync, key) &&
-		    (flags & WQ_FLAG_EXCLUSIVE) &&
-		    !--nr_exclusive)
+				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
 			break;
 	}
 }
@@ -3495,7 +3507,6 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
 	__wake_up_common(q, mode, nr_exclusive, 0, key);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
-
 EXPORT_SYMBOL(__wake_up);
 
 /*
@@ -3564,6 +3575,7 @@ EXPORT_SYMBOL(complete_all);
 void fastcall __sched wait_for_completion(struct completion *x)
 {
 	might_sleep();
+
 	spin_lock_irq(&x->wait.lock);
 	if (!x->done) {
 		DECLARE_WAITQUEUE(wait, current);
@@ -3708,7 +3720,6 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
 	schedule();
 	SLEEP_ON_TAIL
 }
-
 EXPORT_SYMBOL(interruptible_sleep_on);
 
 long fastcall __sched
@@ -3724,7 +3735,6 @@ interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
 
 	return timeout;
 }
-
 EXPORT_SYMBOL(interruptible_sleep_on_timeout);
 
 void fastcall __sched sleep_on(wait_queue_head_t *q)
@@ -3737,7 +3747,6 @@ void fastcall __sched sleep_on(wait_queue_head_t *q)
 	schedule();
 	SLEEP_ON_TAIL
 }
-
 EXPORT_SYMBOL(sleep_on);
 
 long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
@@ -3810,10 +3819,10 @@ void rt_mutex_setprio(task_t *p, int prio)
 
 void set_user_nice(task_t *p, long nice)
 {
+	int old_prio, delta;
 	unsigned long flags;
 	prio_array_t *array;
 	runqueue_t *rq;
-	int old_prio, delta;
 
 	if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
 		return;
@@ -3868,6 +3877,7 @@ int can_nice(const task_t *p, const int nice)
 {
 	/* convert nice value [19,-20] to rlimit style value [1,40] */
 	int nice_rlim = 20 - nice;
+
 	return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
 		capable(CAP_SYS_NICE));
 }
@@ -3883,8 +3893,7 @@ int can_nice(const task_t *p, const int nice)
  */
 asmlinkage long sys_nice(int increment)
 {
-	int retval;
-	long nice;
+	long nice, retval;
 
 	/*
 	 * Setpriority might change our priority at the same moment.
@@ -3969,6 +3978,7 @@ static inline task_t *find_process_by_pid(pid_t pid)
 static void __setscheduler(struct task_struct *p, int policy, int prio)
 {
 	BUG_ON(p->array);
+
 	p->policy = policy;
 	p->rt_priority = prio;
 	p->normal_prio = normal_prio(p);
@@ -3992,8 +4002,7 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
 int sched_setscheduler(struct task_struct *p, int policy,
 		       struct sched_param *param)
 {
-	int retval;
-	int oldprio, oldpolicy = -1;
+	int retval, oldprio, oldpolicy = -1;
 	prio_array_t *array;
 	unsigned long flags;
 	runqueue_t *rq;
@@ -4495,7 +4504,6 @@ void __sched yield(void)
 	set_current_state(TASK_RUNNING);
 	sys_sched_yield();
 }
-
 EXPORT_SYMBOL(yield);
 
 /*
@@ -4513,7 +4521,6 @@ void __sched io_schedule(void)
 	schedule();
 	atomic_dec(&rq->nr_iowait);
 }
-
 EXPORT_SYMBOL(io_schedule);
 
 long __sched io_schedule_timeout(long timeout)
@@ -4615,19 +4622,22 @@ out_unlock:
 
 static inline struct task_struct *eldest_child(struct task_struct *p)
 {
-	if (list_empty(&p->children)) return NULL;
+	if (list_empty(&p->children))
+		return NULL;
 	return list_entry(p->children.next,struct task_struct,sibling);
 }
 
 static inline struct task_struct *older_sibling(struct task_struct *p)
 {
-	if (p->sibling.prev==&p->parent->children) return NULL;
+	if (p->sibling.prev==&p->parent->children)
+		return NULL;
 	return list_entry(p->sibling.prev,struct task_struct,sibling);
 }
 
 static inline struct task_struct *younger_sibling(struct task_struct *p)
 {
-	if (p->sibling.next==&p->parent->children) return NULL;
+	if (p->sibling.next==&p->parent->children)
+		return NULL;
 	return list_entry(p->sibling.next,struct task_struct,sibling);
 }
 
@@ -4786,9 +4796,9 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 int set_cpus_allowed(task_t *p, cpumask_t new_mask)
 {
 	unsigned long flags;
-	int ret = 0;
 	migration_req_t req;
 	runqueue_t *rq;
+	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
 	if (!cpus_intersects(new_mask, cpu_online_map)) {
@@ -4811,9 +4821,9 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
 	}
 out:
 	task_rq_unlock(rq, &flags);
+
 	return ret;
 }
-
 EXPORT_SYMBOL_GPL(set_cpus_allowed);
 
 /*
@@ -4874,8 +4884,8 @@ out:
  */
 static int migration_thread(void *data)
 {
-	runqueue_t *rq;
 	int cpu = (long)data;
+	runqueue_t *rq;
 
 	rq = cpu_rq(cpu);
 	BUG_ON(rq->migration_thread != current);
@@ -4932,7 +4942,7 @@ wait_to_die:
 
 #ifdef CONFIG_HOTPLUG_CPU
 /* Figure out where task on dead CPU should go, use force if neccessary. */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
+static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	runqueue_t *rq;
 	unsigned long flags;
@@ -4942,18 +4952,18 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
 restart:
 	/* On same node? */
 	mask = node_to_cpumask(cpu_to_node(dead_cpu));
-	cpus_and(mask, mask, tsk->cpus_allowed);
+	cpus_and(mask, mask, p->cpus_allowed);
 	dest_cpu = any_online_cpu(mask);
 
 	/* On any allowed CPU? */
 	if (dest_cpu == NR_CPUS)
-		dest_cpu = any_online_cpu(tsk->cpus_allowed);
+		dest_cpu = any_online_cpu(p->cpus_allowed);
 
 	/* No more Mr. Nice Guy. */
 	if (dest_cpu == NR_CPUS) {
-		rq = task_rq_lock(tsk, &flags);
-		cpus_setall(tsk->cpus_allowed);
-		dest_cpu = any_online_cpu(tsk->cpus_allowed);
+		rq = task_rq_lock(p, &flags);
+		cpus_setall(p->cpus_allowed);
+		dest_cpu = any_online_cpu(p->cpus_allowed);
 		task_rq_unlock(rq, &flags);
 
 		/*
@@ -4961,12 +4971,12 @@ restart:
 		 * kernel threads (both mm NULL), since they never
 		 * leave kernel.
 		 */
-		if (tsk->mm && printk_ratelimit())
+		if (p->mm && printk_ratelimit())
 			printk(KERN_INFO "process %d (%s) no "
 			       "longer affine to cpu%d\n",
-			       tsk->pid, tsk->comm, dead_cpu);
+			       p->pid, p->comm, dead_cpu);
 	}
-	if (!__migrate_task(tsk, dead_cpu, dest_cpu))
+	if (!__migrate_task(p, dead_cpu, dest_cpu))
 		goto restart;
 }
 
@@ -4993,48 +5003,51 @@ static void migrate_nr_uninterruptible(runqueue_t *rq_src)
 /* Run through task list and migrate tasks from the dead cpu. */
 static void migrate_live_tasks(int src_cpu)
 {
-	struct task_struct *tsk, *t;
+	struct task_struct *p, *t;
 
 	write_lock_irq(&tasklist_lock);
 
-	do_each_thread(t, tsk) {
-		if (tsk == current)
+	do_each_thread(t, p) {
+		if (p == current)
 			continue;
 
-		if (task_cpu(tsk) == src_cpu)
-			move_task_off_dead_cpu(src_cpu, tsk);
-	} while_each_thread(t, tsk);
+		if (task_cpu(p) == src_cpu)
+			move_task_off_dead_cpu(src_cpu, p);
+	} while_each_thread(t, p);
 
 	write_unlock_irq(&tasklist_lock);
 }
 
 /* Schedules idle task to be the next runnable task on current CPU.
  * It does so by boosting its priority to highest possible and adding it to
- * the _front_ of runqueue. Used by CPU offline code.
+ * the _front_ of the runqueue. Used by CPU offline code.
  */
 void sched_idle_next(void)
 {
-	int cpu = smp_processor_id();
-	runqueue_t *rq = this_rq();
+	int this_cpu = smp_processor_id();
+	runqueue_t *rq = cpu_rq(this_cpu);
 	struct task_struct *p = rq->idle;
 	unsigned long flags;
 
 	/* cpu has to be offline */
-	BUG_ON(cpu_online(cpu));
+	BUG_ON(cpu_online(this_cpu));
 
-	/* Strictly not necessary since rest of the CPUs are stopped by now
-	 * and interrupts disabled on current cpu.
+	/*
+	 * Strictly not necessary since rest of the CPUs are stopped by now
+	 * and interrupts disabled on the current cpu.
 	 */
 	spin_lock_irqsave(&rq->lock, flags);
 
 	__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
-	/* Add idle task to _front_ of it's priority queue */
+
+	/* Add idle task to the _front_ of its priority queue: */
 	__activate_idle_task(p, rq);
 
 	spin_unlock_irqrestore(&rq->lock, flags);
 }
 
-/* Ensures that the idle task is using init_mm right before its cpu goes
+/*
+ * Ensures that the idle task is using init_mm right before its cpu goes
  * offline.
  */
 void idle_task_exit(void)
@@ -5048,17 +5061,17 @@ void idle_task_exit(void)
 	mmdrop(mm);
 }
 
-static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
+static void migrate_dead(unsigned int dead_cpu, task_t *p)
 {
 	struct runqueue *rq = cpu_rq(dead_cpu);
 
 	/* Must be exiting, otherwise would be on tasklist. */
-	BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD);
+	BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
 
 	/* Cannot have done final schedule yet: would have vanished. */
-	BUG_ON(tsk->flags & PF_DEAD);
+	BUG_ON(p->flags & PF_DEAD);
 
-	get_task_struct(tsk);
+	get_task_struct(p);
 
 	/*
 	 * Drop lock around migration; if someone else moves it,
@@ -5066,21 +5079,22 @@ static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
 	 * fine.
 	 */
 	spin_unlock_irq(&rq->lock);
-	move_task_off_dead_cpu(dead_cpu, tsk);
+	move_task_off_dead_cpu(dead_cpu, p);
 	spin_lock_irq(&rq->lock);
 
-	put_task_struct(tsk);
+	put_task_struct(p);
 }
 
 /* release_task() removes task from tasklist, so we won't find dead tasks. */
 static void migrate_dead_tasks(unsigned int dead_cpu)
 {
-	unsigned arr, i;
 	struct runqueue *rq = cpu_rq(dead_cpu);
+	unsigned int arr, i;
 
 	for (arr = 0; arr < 2; arr++) {
 		for (i = 0; i < MAX_PRIO; i++) {
 			struct list_head *list = &rq->arrays[arr].queue[i];
+
 			while (!list_empty(list))
 				migrate_dead(dead_cpu,
 					     list_entry(list->next, task_t,
@@ -5094,12 +5108,11 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
  * migration_call - callback that gets triggered when a CPU is added.
  * Here we can start up the necessary migration thread for the new CPU.
  */
-static int __cpuinit migration_call(struct notifier_block *nfb,
-			unsigned long action,
-			void *hcpu)
+static int __cpuinit
+migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
-	int cpu = (long)hcpu;
 	struct task_struct *p;
+	int cpu = (long)hcpu;
 	struct runqueue *rq;
 	unsigned long flags;
 
@@ -5116,10 +5129,12 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
 		task_rq_unlock(rq, &flags);
 		cpu_rq(cpu)->migration_thread = p;
 		break;
+
 	case CPU_ONLINE:
 		/* Strictly unneccessary, as first user will wake it. */
 		wake_up_process(cpu_rq(cpu)->migration_thread);
 		break;
+
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_UP_CANCELED:
 		if (!cpu_rq(cpu)->migration_thread)
@@ -5130,6 +5145,7 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
 		kthread_stop(cpu_rq(cpu)->migration_thread);
 		cpu_rq(cpu)->migration_thread = NULL;
 		break;
+
 	case CPU_DEAD:
 		migrate_live_tasks(cpu);
 		rq = cpu_rq(cpu);
@@ -5174,10 +5190,12 @@ static struct notifier_block __cpuinitdata migration_notifier = {
 int __init migration_init(void)
 {
 	void *cpu = (void *)(long)smp_processor_id();
-	/* Start one for boot CPU. */
+
+	/* Start one for the boot CPU: */
 	migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
 	migration_call(&migration_notifier, CPU_ONLINE, cpu);
 	register_cpu_notifier(&migration_notifier);
+
 	return 0;
 }
 #endif
@@ -5273,7 +5291,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 	} while (sd);
 }
 #else
-#define sched_domain_debug(sd, cpu) {}
+# define sched_domain_debug(sd, cpu) do { } while (0)
 #endif
 
 static int sd_degenerate(struct sched_domain *sd)
@@ -5299,8 +5317,8 @@ static int sd_degenerate(struct sched_domain *sd)
 	return 1;
 }
 
-static int sd_parent_degenerate(struct sched_domain *sd,
-						struct sched_domain *parent)
+static int
+sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 {
 	unsigned long cflags = sd->flags, pflags = parent->flags;
 
@@ -5595,8 +5613,8 @@ static void touch_cache(void *__cache, unsigned long __size)
 /*
  * Measure the cache-cost of one task migration. Returns in units of nsec.
  */
-static unsigned long long measure_one(void *cache, unsigned long size,
-				      int source, int target)
+static unsigned long long
+measure_one(void *cache, unsigned long size, int source, int target)
 {
 	cpumask_t mask, saved_mask;
 	unsigned long long t0, t1, t2, t3, cost;
@@ -5946,9 +5964,9 @@ static int find_next_best_node(int node, unsigned long *used_nodes)
  */
 static cpumask_t sched_domain_node_span(int node)
 {
-	int i;
-	cpumask_t span, nodemask;
 	DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
+	cpumask_t span, nodemask;
+	int i;
 
 	cpus_clear(span);
 	bitmap_zero(used_nodes, MAX_NUMNODES);
@@ -5959,6 +5977,7 @@ static cpumask_t sched_domain_node_span(int node)
 
 	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
 		int next_node = find_next_best_node(node, used_nodes);
+
 		nodemask = node_to_cpumask(next_node);
 		cpus_or(span, span, nodemask);
 	}
@@ -5968,19 +5987,23 @@ static cpumask_t sched_domain_node_span(int node)
 #endif
 
 int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
+
 /*
- * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
- * can switch it on easily if needed.
+ * SMT sched-domains:
  */
 #ifdef CONFIG_SCHED_SMT
 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
 static struct sched_group sched_group_cpus[NR_CPUS];
+
 static int cpu_to_cpu_group(int cpu)
 {
 	return cpu;
 }
 #endif
 
+/*
+ * multi-core sched-domains:
+ */
 #ifdef CONFIG_SCHED_MC
 static DEFINE_PER_CPU(struct sched_domain, core_domains);
 static struct sched_group *sched_group_core_bycpu[NR_CPUS];
@@ -6000,9 +6023,10 @@ static int cpu_to_core_group(int cpu)
 
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
 static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
+
 static int cpu_to_phys_group(int cpu)
 {
-#if defined(CONFIG_SCHED_MC)
+#ifdef CONFIG_SCHED_MC
 	cpumask_t mask = cpu_coregroup_map(cpu);
 	return first_cpu(mask);
 #elif defined(CONFIG_SCHED_SMT)
@@ -6548,6 +6572,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
 int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
 {
 	int err = 0;
+
 #ifdef CONFIG_SCHED_SMT
 	if (smt_capable())
 		err = sysfs_create_file(&cls->kset.kobj,
@@ -6567,7 +6592,8 @@ static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
 {
 	return sprintf(page, "%u\n", sched_mc_power_savings);
 }
-static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count)
+static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
+					    const char *buf, size_t count)
 {
 	return sched_power_savings_store(buf, count, 0);
 }
@@ -6580,7 +6606,8 @@ static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
 {
 	return sprintf(page, "%u\n", sched_smt_power_savings);
 }
-static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count)
+static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
+					     const char *buf, size_t count)
 {
 	return sched_power_savings_store(buf, count, 1);
 }
@@ -6642,6 +6669,7 @@ int in_sched_functions(unsigned long addr)
 {
 	/* Linker adds these: start and end of __sched functions */
 	extern char __sched_text_start[], __sched_text_end[];
+
 	return in_lock_functions(addr) ||
 		(addr >= (unsigned long)__sched_text_start
 		&& addr < (unsigned long)__sched_text_end);
@@ -6649,11 +6677,11 @@ int in_sched_functions(unsigned long addr)
 
 void __init sched_init(void)
 {
-	runqueue_t *rq;
 	int i, j, k;
 
 	for_each_possible_cpu(i) {
 		prio_array_t *array;
+		runqueue_t *rq;
 
 		rq = cpu_rq(i);
 		spin_lock_init(&rq->lock);
@@ -6704,7 +6732,7 @@ void __init sched_init(void)
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 void __might_sleep(char *file, int line)
 {
-#if defined(in_atomic)
+#ifdef in_atomic
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
 	if ((in_atomic() || irqs_disabled()) &&
-- 
cgit v0.10.2


From 36c8b586896f60cb91a4fd526233190b34316baf Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:41 -0700
Subject: [PATCH] sched: cleanup, remove task_t, convert to struct task_struct

cleanup: remove task_t and convert all the uses to struct task_struct. I
introduced it for the scheduler anno and it was a mistake.

Conversion was mostly scripted, the result was reviewed and all
secondary whitespace and style impact (if any) was fixed up by hand.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 01c8c8b..41ebf51 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -474,7 +474,7 @@ out:
  */
 
 unsigned long
-thread_saved_pc(task_t *t)
+thread_saved_pc(struct task_struct *t)
 {
 	unsigned long base = (unsigned long)task_stack_page(t);
 	unsigned long fp, sp = task_thread_info(t)->pcb.ksp;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index eb8e8dc..2fbe453 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -678,7 +678,7 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
  */
 
 static void
-ia64_mca_modify_comm(const task_t *previous_current)
+ia64_mca_modify_comm(const struct task_struct *previous_current)
 {
 	char *p, comm[sizeof(current->comm)];
 	if (previous_current->pid)
@@ -709,7 +709,7 @@ ia64_mca_modify_comm(const task_t *previous_current)
  * that we can do backtrace on the MCA/INIT handler code itself.
  */
 
-static task_t *
+static struct task_struct *
 ia64_mca_modify_original_stack(struct pt_regs *regs,
 		const struct switch_stack *sw,
 		struct ia64_sal_os_state *sos,
@@ -719,7 +719,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
 	ia64_va va;
 	extern char ia64_leave_kernel[];	/* Need asm address, not function descriptor */
 	const pal_min_state_area_t *ms = sos->pal_min_state;
-	task_t *previous_current;
+	struct task_struct *previous_current;
 	struct pt_regs *old_regs;
 	struct switch_stack *old_sw;
 	unsigned size = sizeof(struct pt_regs) +
@@ -1023,7 +1023,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 	pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
 		&sos->proc_state_param;
 	int recover, cpu = smp_processor_id();
-	task_t *previous_current;
+	struct task_struct *previous_current;
 	struct ia64_mca_notify_die nd =
 		{ .sos = sos, .monarch_cpu = &monarch_cpu };
 
@@ -1352,7 +1352,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
 {
 	static atomic_t slaves;
 	static atomic_t monarchs;
-	task_t *previous_current;
+	struct task_struct *previous_current;
 	int cpu = smp_processor_id();
 	struct ia64_mca_notify_die nd =
 		{ .sos = sos, .monarch_cpu = &monarch_cpu };
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index e1960979..6203ed4 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -124,7 +124,7 @@ extern void __devinit calibrate_delay (void);
 extern void start_ap (void);
 extern unsigned long ia64_iobase;
 
-task_t *task_for_booting_cpu;
+struct task_struct *task_for_booting_cpu;
 
 /*
  * State for each CPU
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index ecfd637..01e7fa8 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -65,7 +65,7 @@ need_resched:
 #endif
 
 FEXPORT(ret_from_fork)
-	jal	schedule_tail		# a0 = task_t *prev
+	jal	schedule_tail		# a0 = struct task_struct *prev
 
 FEXPORT(syscall_exit)
 	local_irq_disable		# make sure need_resched and
diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c
index 02237a6..4dcc39f 100644
--- a/arch/mips/kernel/mips-mt.c
+++ b/arch/mips/kernel/mips-mt.c
@@ -47,7 +47,7 @@ unsigned long mt_fpemul_threshold = 0;
  * used in sys_sched_set/getaffinity() in kernel/sched.c, so
  * cloned here.
  */
-static inline task_t *find_process_by_pid(pid_t pid)
+static inline struct task_struct *find_process_by_pid(pid_t pid)
 {
 	return pid ? find_task_by_pid(pid) : current;
 }
@@ -62,7 +62,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
 	cpumask_t new_mask;
 	cpumask_t effective_mask;
 	int retval;
-	task_t *p;
+	struct task_struct *p;
 
 	if (len < sizeof(new_mask))
 		return -EINVAL;
@@ -127,7 +127,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
 	unsigned int real_len;
 	cpumask_t mask;
 	int retval;
-	task_t *p;
+	struct task_struct *p;
 
 	real_len = sizeof(mask);
 	if (len < real_len)
diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c
index a9c1443..8368c2d 100644
--- a/arch/um/kernel/tt/process_kern.c
+++ b/arch/um/kernel/tt/process_kern.c
@@ -119,7 +119,7 @@ void suspend_new_thread(int fd)
 		panic("read failed in suspend_new_thread, err = %d", -err);
 }
 
-void schedule_tail(task_t *prev);
+void schedule_tail(struct task_struct *prev);
 
 static void new_thread_handler(int sig)
 {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 6fb7795..bfdb902 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2336,7 +2336,7 @@ static int fionbio(struct file *file, int __user *p)
 
 static int tiocsctty(struct tty_struct *tty, int arg)
 {
-	task_t *p;
+	struct task_struct *p;
 
 	if (current->signal->leader &&
 	    (current->signal->session == tty->session))
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9c677bb..19ffb04 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -120,7 +120,7 @@ struct epoll_filefd {
  */
 struct wake_task_node {
 	struct list_head llink;
-	task_t *task;
+	struct task_struct *task;
 	wait_queue_head_t *wq;
 };
 
@@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
 {
 	int wake_nests = 0;
 	unsigned long flags;
-	task_t *this_task = current;
+	struct task_struct *this_task = current;
 	struct list_head *lsthead = &psw->wake_task_list, *lnk;
 	struct wake_task_node *tncur;
 	struct wake_task_node tnode;
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 8bc9869..8adcde0 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -68,7 +68,7 @@ struct thread_info {
 #define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
 
 #define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
-#define alloc_task_struct()	((task_t *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER))
+#define alloc_task_struct()	((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER))
 #define free_task_struct(tsk)	free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER)
 
 #endif /* !__ASSEMBLY */
diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h
index 66c4742f..311cebf 100644
--- a/include/asm-m32r/system.h
+++ b/include/asm-m32r/system.h
@@ -18,7 +18,7 @@
  * switch_to(prev, next) should switch from task `prev' to `next'
  * `prev' will never be the same as `next'.
  *
- * `next' and `prev' should be task_t, but it isn't always defined
+ * `next' and `prev' should be struct task_struct, but it isn't always defined
  */
 
 #define switch_to(prev, next, last)  do { \
diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h
index b752e5c..ce2e606 100644
--- a/include/asm-sh/system.h
+++ b/include/asm-sh/system.h
@@ -12,7 +12,7 @@
  */
 
 #define switch_to(prev, next, last) do {				\
- task_t *__last;							\
+ struct task_struct *__last;						\
  register unsigned long *__ts1 __asm__ ("r1") = &prev->thread.sp;	\
  register unsigned long *__ts2 __asm__ ("r2") = &prev->thread.pc;	\
  register unsigned long *__ts4 __asm__ ("r4") = (unsigned long *)prev;	\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8ebddba..c2797f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -184,11 +184,11 @@ extern unsigned long weighted_cpuload(const int cpu);
 extern rwlock_t tasklist_lock;
 extern spinlock_t mmlist_lock;
 
-typedef struct task_struct task_t;
+struct task_struct;
 
 extern void sched_init(void);
 extern void sched_init_smp(void);
-extern void init_idle(task_t *idle, int cpu);
+extern void init_idle(struct task_struct *idle, int cpu);
 
 extern cpumask_t nohz_cpu_mask;
 
@@ -383,7 +383,7 @@ struct signal_struct {
 	wait_queue_head_t	wait_chldexit;	/* for wait4() */
 
 	/* current thread group signal load-balancing target: */
-	task_t			*curr_target;
+	struct task_struct	*curr_target;
 
 	/* shared signal handling: */
 	struct sigpending	shared_pending;
@@ -699,7 +699,7 @@ extern int groups_search(struct group_info *group_info, gid_t grp);
     ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
 
 #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
-extern void prefetch_stack(struct task_struct*);
+extern void prefetch_stack(struct task_struct *t);
 #else
 static inline void prefetch_stack(struct task_struct *t) { }
 #endif
@@ -1031,9 +1031,9 @@ static inline void put_task_struct(struct task_struct *t)
 #define used_math() tsk_used_math(current)
 
 #ifdef CONFIG_SMP
-extern int set_cpus_allowed(task_t *p, cpumask_t new_mask);
+extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask);
 #else
-static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask)
+static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 {
 	if (!cpu_isset(0, new_mask))
 		return -EINVAL;
@@ -1042,7 +1042,8 @@ static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask)
 #endif
 
 extern unsigned long long sched_clock(void);
-extern unsigned long long current_sched_time(const task_t *current_task);
+extern unsigned long long
+current_sched_time(const struct task_struct *current_task);
 
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
@@ -1060,27 +1061,27 @@ static inline void idle_task_exit(void) {}
 extern void sched_idle_next(void);
 
 #ifdef CONFIG_RT_MUTEXES
-extern int rt_mutex_getprio(task_t *p);
-extern void rt_mutex_setprio(task_t *p, int prio);
-extern void rt_mutex_adjust_pi(task_t *p);
+extern int rt_mutex_getprio(struct task_struct *p);
+extern void rt_mutex_setprio(struct task_struct *p, int prio);
+extern void rt_mutex_adjust_pi(struct task_struct *p);
 #else
-static inline int rt_mutex_getprio(task_t *p)
+static inline int rt_mutex_getprio(struct task_struct *p)
 {
 	return p->normal_prio;
 }
 # define rt_mutex_adjust_pi(p)		do { } while (0)
 #endif
 
-extern void set_user_nice(task_t *p, long nice);
-extern int task_prio(const task_t *p);
-extern int task_nice(const task_t *p);
-extern int can_nice(const task_t *p, const int nice);
-extern int task_curr(const task_t *p);
+extern void set_user_nice(struct task_struct *p, long nice);
+extern int task_prio(const struct task_struct *p);
+extern int task_nice(const struct task_struct *p);
+extern int can_nice(const struct task_struct *p, const int nice);
+extern int task_curr(const struct task_struct *p);
 extern int idle_cpu(int cpu);
 extern int sched_setscheduler(struct task_struct *, int, struct sched_param *);
-extern task_t *idle_task(int cpu);
-extern task_t *curr_task(int cpu);
-extern void set_curr_task(int cpu, task_t *p);
+extern struct task_struct *idle_task(int cpu);
+extern struct task_struct *curr_task(int cpu);
+extern void set_curr_task(int cpu, struct task_struct *p);
 
 void yield(void);
 
@@ -1137,8 +1138,8 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
 #else
  static inline void kick_process(struct task_struct *tsk) { }
 #endif
-extern void FASTCALL(sched_fork(task_t * p, int clone_flags));
-extern void FASTCALL(sched_exit(task_t * p));
+extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags));
+extern void FASTCALL(sched_exit(struct task_struct * p));
 
 extern int in_group_p(gid_t);
 extern int in_egroup_p(gid_t);
@@ -1243,17 +1244,17 @@ extern NORET_TYPE void do_group_exit(int);
 extern void daemonize(const char *, ...);
 extern int allow_signal(int);
 extern int disallow_signal(int);
-extern task_t *child_reaper;
+extern struct task_struct *child_reaper;
 
 extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
 extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
-task_t *fork_idle(int);
+struct task_struct *fork_idle(int);
 
 extern void set_task_comm(struct task_struct *tsk, char *from);
 extern void get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
-extern void wait_task_inactive(task_t * p);
+extern void wait_task_inactive(struct task_struct * p);
 #else
 #define wait_task_inactive(p)	do { } while (0)
 #endif
@@ -1279,13 +1280,13 @@ extern void wait_task_inactive(task_t * p);
 /* de_thread depends on thread_group_leader not being a pid based check */
 #define thread_group_leader(p)	(p == p->group_leader)
 
-static inline task_t *next_thread(const task_t *p)
+static inline struct task_struct *next_thread(const struct task_struct *p)
 {
 	return list_entry(rcu_dereference(p->thread_group.next),
-				task_t, thread_group);
+			  struct task_struct, thread_group);
 }
 
-static inline int thread_group_empty(task_t *p)
+static inline int thread_group_empty(struct task_struct *p)
 {
 	return list_empty(&p->thread_group);
 }
diff --git a/kernel/capability.c b/kernel/capability.c
index 1a4d8a4..c7685ad 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -46,7 +46,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
      int ret = 0;
      pid_t pid;
      __u32 version;
-     task_t *target;
+     struct task_struct *target;
      struct __user_cap_data_struct data;
 
      if (get_user(version, &header->version))
@@ -96,7 +96,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
 			      kernel_cap_t *inheritable,
 			      kernel_cap_t *permitted)
 {
-	task_t *g, *target;
+	struct task_struct *g, *target;
 	int ret = -EPERM;
 	int found = 0;
 
@@ -128,7 +128,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
 			       kernel_cap_t *inheritable,
 			       kernel_cap_t *permitted)
 {
-     task_t *g, *target;
+     struct task_struct *g, *target;
      int ret = -EPERM;
      int found = 0;
 
@@ -172,7 +172,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 {
      kernel_cap_t inheritable, permitted, effective;
      __u32 version;
-     task_t *target;
+     struct task_struct *target;
      int ret;
      pid_t pid;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index c595db1..6664c08 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -134,8 +134,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 
 void release_task(struct task_struct * p)
 {
+	struct task_struct *leader;
 	int zap_leader;
-	task_t *leader;
 repeat:
 	atomic_dec(&p->user->processes);
 	write_lock_irq(&tasklist_lock);
@@ -209,7 +209,7 @@ out:
  *
  * "I ask you, have you ever known what it is to be an orphan?"
  */
-static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
+static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
 {
 	struct task_struct *p;
 	int ret = 1;
@@ -582,7 +582,8 @@ static void exit_mm(struct task_struct * tsk)
 	mmput(mm);
 }
 
-static inline void choose_new_parent(task_t *p, task_t *reaper)
+static inline void
+choose_new_parent(struct task_struct *p, struct task_struct *reaper)
 {
 	/*
 	 * Make sure we're not reparenting to ourselves and that
@@ -592,7 +593,8 @@ static inline void choose_new_parent(task_t *p, task_t *reaper)
 	p->real_parent = reaper;
 }
 
-static void reparent_thread(task_t *p, task_t *father, int traced)
+static void
+reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
 {
 	/* We don't want people slaying init.  */
 	if (p->exit_signal != -1)
@@ -656,8 +658,8 @@ static void reparent_thread(task_t *p, task_t *father, int traced)
  * group, and if no such member exists, give it to
  * the global child reaper process (ie "init")
  */
-static void forget_original_parent(struct task_struct * father,
-					  struct list_head *to_release)
+static void
+forget_original_parent(struct task_struct *father, struct list_head *to_release)
 {
 	struct task_struct *p, *reaper = father;
 	struct list_head *_p, *_n;
@@ -680,7 +682,7 @@ static void forget_original_parent(struct task_struct * father,
 	 */
 	list_for_each_safe(_p, _n, &father->children) {
 		int ptrace;
-		p = list_entry(_p,struct task_struct,sibling);
+		p = list_entry(_p, struct task_struct, sibling);
 
 		ptrace = p->ptrace;
 
@@ -709,7 +711,7 @@ static void forget_original_parent(struct task_struct * father,
 			list_add(&p->ptrace_list, to_release);
 	}
 	list_for_each_safe(_p, _n, &father->ptrace_children) {
-		p = list_entry(_p,struct task_struct,ptrace_list);
+		p = list_entry(_p, struct task_struct, ptrace_list);
 		choose_new_parent(p, reaper);
 		reparent_thread(p, father, 1);
 	}
@@ -829,7 +831,7 @@ static void exit_notify(struct task_struct *tsk)
 
 	list_for_each_safe(_p, _n, &ptrace_dead) {
 		list_del_init(_p);
-		t = list_entry(_p,struct task_struct,ptrace_list);
+		t = list_entry(_p, struct task_struct, ptrace_list);
 		release_task(t);
 	}
 
@@ -1010,7 +1012,7 @@ asmlinkage void sys_exit_group(int error_code)
 	do_group_exit((error_code & 0xff) << 8);
 }
 
-static int eligible_child(pid_t pid, int options, task_t *p)
+static int eligible_child(pid_t pid, int options, struct task_struct *p)
 {
 	if (pid > 0) {
 		if (p->pid != pid)
@@ -1051,12 +1053,13 @@ static int eligible_child(pid_t pid, int options, task_t *p)
 	return 1;
 }
 
-static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
+static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
 			       int why, int status,
 			       struct siginfo __user *infop,
 			       struct rusage __user *rusagep)
 {
 	int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
+
 	put_task_struct(p);
 	if (!retval)
 		retval = put_user(SIGCHLD, &infop->si_signo);
@@ -1081,7 +1084,7 @@ static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
-static int wait_task_zombie(task_t *p, int noreap,
+static int wait_task_zombie(struct task_struct *p, int noreap,
 			    struct siginfo __user *infop,
 			    int __user *stat_addr, struct rusage __user *ru)
 {
@@ -1243,8 +1246,8 @@ static int wait_task_zombie(task_t *p, int noreap,
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
-static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap,
-			     struct siginfo __user *infop,
+static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
+			     int noreap, struct siginfo __user *infop,
 			     int __user *stat_addr, struct rusage __user *ru)
 {
 	int retval, exit_code;
@@ -1358,7 +1361,7 @@ bail_ref:
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
-static int wait_task_continued(task_t *p, int noreap,
+static int wait_task_continued(struct task_struct *p, int noreap,
 			       struct siginfo __user *infop,
 			       int __user *stat_addr, struct rusage __user *ru)
 {
@@ -1444,7 +1447,7 @@ repeat:
 		int ret;
 
 		list_for_each(_p,&tsk->children) {
-			p = list_entry(_p,struct task_struct,sibling);
+			p = list_entry(_p, struct task_struct, sibling);
 
 			ret = eligible_child(pid, options, p);
 			if (!ret)
diff --git a/kernel/fork.c b/kernel/fork.c
index 54953d8..56e4e07 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -933,13 +933,13 @@ static inline void rt_mutex_init_task(struct task_struct *p)
  * parts of the process environment (as per the clone
  * flags). The actual kick-off is left to the caller.
  */
-static task_t *copy_process(unsigned long clone_flags,
-				 unsigned long stack_start,
-				 struct pt_regs *regs,
-				 unsigned long stack_size,
-				 int __user *parent_tidptr,
-				 int __user *child_tidptr,
-				 int pid)
+static struct task_struct *copy_process(unsigned long clone_flags,
+					unsigned long stack_start,
+					struct pt_regs *regs,
+					unsigned long stack_size,
+					int __user *parent_tidptr,
+					int __user *child_tidptr,
+					int pid)
 {
 	int retval;
 	struct task_struct *p = NULL;
@@ -1294,9 +1294,9 @@ struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
 	return regs;
 }
 
-task_t * __devinit fork_idle(int cpu)
+struct task_struct * __devinit fork_idle(int cpu)
 {
-	task_t *task;
+	struct task_struct *task;
 	struct pt_regs regs;
 
 	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 617304c..d17766d 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -669,7 +669,7 @@ static int hrtimer_wakeup(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task)
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
 {
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = task;
diff --git a/kernel/pid.c b/kernel/pid.c
index eeb836b..93e212f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -218,7 +218,7 @@ struct pid * fastcall find_pid(int nr)
 	return NULL;
 }
 
-int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
+int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
 {
 	struct pid_link *link;
 	struct pid *pid;
@@ -233,7 +233,7 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 	return 0;
 }
 
-void fastcall detach_pid(task_t *task, enum pid_type type)
+void fastcall detach_pid(struct task_struct *task, enum pid_type type)
 {
 	struct pid_link *link;
 	struct pid *pid;
@@ -267,7 +267,7 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
 /*
  * Must be called under rcu_read_lock() or with tasklist_lock read-held.
  */
-task_t *find_task_by_pid_type(int type, int nr)
+struct task_struct *find_task_by_pid_type(int type, int nr)
 {
 	return pid_task(find_pid(nr), type);
 }
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 335c5b9..9a111f7 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -28,7 +28,7 @@
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_link(task_t *child, task_t *new_parent)
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
 {
 	BUG_ON(!list_empty(&child->ptrace_list));
 	if (child->parent == new_parent)
@@ -46,7 +46,7 @@ void __ptrace_link(task_t *child, task_t *new_parent)
  * TASK_TRACED, resume it now.
  * Requires that irqs be disabled.
  */
-void ptrace_untrace(task_t *child)
+void ptrace_untrace(struct task_struct *child)
 {
 	spin_lock(&child->sighand->siglock);
 	if (child->state == TASK_TRACED) {
@@ -65,7 +65,7 @@ void ptrace_untrace(task_t *child)
  *
  * Must be called with the tasklist lock write-held.
  */
-void __ptrace_unlink(task_t *child)
+void __ptrace_unlink(struct task_struct *child)
 {
 	BUG_ON(!child->ptrace);
 
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 353a853..0c1faa9 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -96,7 +96,7 @@ void deadlock_trace_off(void)
 	rt_trace_on = 0;
 }
 
-static void printk_task(task_t *p)
+static void printk_task(struct task_struct *p)
 {
 	if (p)
 		printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
@@ -231,7 +231,8 @@ void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
 	lock->name = name;
 }
 
-void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, task_t *task)
+void
+rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
 {
 }
 
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index e82c2f8..494dac8 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -33,7 +33,7 @@ struct test_thread_data {
 };
 
 static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
-static task_t *threads[MAX_RT_TEST_THREADS];
+static struct task_struct *threads[MAX_RT_TEST_THREADS];
 static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
 
 enum test_opcodes {
@@ -361,8 +361,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
 static ssize_t sysfs_test_status(struct sys_device *dev, char *buf)
 {
 	struct test_thread_data *td;
+	struct task_struct *tsk;
 	char *curr = buf;
-	task_t *tsk;
 	int i;
 
 	td = container_of(dev, struct test_thread_data, sysdev);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 91b699a..d2ef13b 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -157,7 +157,7 @@ int max_lock_depth = 1024;
  * Decreases task's usage by one - may thus free the task.
  * Returns 0 or -EDEADLK.
  */
-static int rt_mutex_adjust_prio_chain(task_t *task,
+static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 				      int deadlock_detect,
 				      struct rt_mutex *orig_lock,
 				      struct rt_mutex_waiter *orig_waiter,
@@ -282,6 +282,7 @@ static int rt_mutex_adjust_prio_chain(task_t *task,
 	spin_unlock_irqrestore(&task->pi_lock, flags);
  out_put_task:
 	put_task_struct(task);
+
 	return ret;
 }
 
@@ -403,10 +404,10 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 				   struct rt_mutex_waiter *waiter,
 				   int detect_deadlock)
 {
+	struct task_struct *owner = rt_mutex_owner(lock);
 	struct rt_mutex_waiter *top_waiter = waiter;
-	task_t *owner = rt_mutex_owner(lock);
-	int boost = 0, res;
 	unsigned long flags;
+	int boost = 0, res;
 
 	spin_lock_irqsave(&current->pi_lock, flags);
 	__rt_mutex_adjust_prio(current);
@@ -527,9 +528,9 @@ static void remove_waiter(struct rt_mutex *lock,
 			  struct rt_mutex_waiter *waiter)
 {
 	int first = (waiter == rt_mutex_top_waiter(lock));
-	int boost = 0;
-	task_t *owner = rt_mutex_owner(lock);
+	struct task_struct *owner = rt_mutex_owner(lock);
 	unsigned long flags;
+	int boost = 0;
 
 	spin_lock_irqsave(&current->pi_lock, flags);
 	plist_del(&waiter->list_entry, &lock->wait_list);
diff --git a/kernel/sched.c b/kernel/sched.c
index b032614..021b3121 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -179,7 +179,7 @@ static unsigned int static_prio_timeslice(int static_prio)
 		return SCALE_PRIO(DEF_TIMESLICE, static_prio);
 }
 
-static inline unsigned int task_timeslice(task_t *p)
+static inline unsigned int task_timeslice(struct task_struct *p)
 {
 	return static_prio_timeslice(p->static_prio);
 }
@@ -227,7 +227,7 @@ struct runqueue {
 
 	unsigned long expired_timestamp;
 	unsigned long long timestamp_last_tick;
-	task_t *curr, *idle;
+	struct task_struct *curr, *idle;
 	struct mm_struct *prev_mm;
 	prio_array_t *active, *expired, arrays[2];
 	int best_expired_prio;
@@ -240,7 +240,7 @@ struct runqueue {
 	int active_balance;
 	int push_cpu;
 
-	task_t *migration_thread;
+	struct task_struct *migration_thread;
 	struct list_head migration_queue;
 #endif
 
@@ -291,16 +291,16 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
 #endif
 
 #ifndef __ARCH_WANT_UNLOCKED_CTXSW
-static inline int task_running(runqueue_t *rq, task_t *p)
+static inline int task_running(runqueue_t *rq, struct task_struct *p)
 {
 	return rq->curr == p;
 }
 
-static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
+static inline void prepare_lock_switch(runqueue_t *rq, struct task_struct *next)
 {
 }
 
-static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
+static inline void finish_lock_switch(runqueue_t *rq, struct task_struct *prev)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
@@ -317,7 +317,7 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
 }
 
 #else /* __ARCH_WANT_UNLOCKED_CTXSW */
-static inline int task_running(runqueue_t *rq, task_t *p)
+static inline int task_running(runqueue_t *rq, struct task_struct *p)
 {
 #ifdef CONFIG_SMP
 	return p->oncpu;
@@ -326,7 +326,7 @@ static inline int task_running(runqueue_t *rq, task_t *p)
 #endif
 }
 
-static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
+static inline void prepare_lock_switch(runqueue_t *rq, struct task_struct *next)
 {
 #ifdef CONFIG_SMP
 	/*
@@ -343,7 +343,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
 #endif
 }
 
-static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
+static inline void finish_lock_switch(runqueue_t *rq, struct task_struct *prev)
 {
 #ifdef CONFIG_SMP
 	/*
@@ -364,7 +364,7 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
  * __task_rq_lock - lock the runqueue a given task resides on.
  * Must be called interrupts disabled.
  */
-static inline runqueue_t *__task_rq_lock(task_t *p)
+static inline runqueue_t *__task_rq_lock(struct task_struct *p)
 	__acquires(rq->lock)
 {
 	struct runqueue *rq;
@@ -384,7 +384,7 @@ repeat_lock_task:
  * interrupts.  Note the ordering: we can safely lookup the task_rq without
  * explicitly disabling preemption.
  */
-static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
+static runqueue_t *task_rq_lock(struct task_struct *p, unsigned long *flags)
 	__acquires(rq->lock)
 {
 	struct runqueue *rq;
@@ -541,7 +541,7 @@ static inline runqueue_t *this_rq_lock(void)
  * long it was from the *first* time it was queued to the time that it
  * finally hit a cpu.
  */
-static inline void sched_info_dequeued(task_t *t)
+static inline void sched_info_dequeued(struct task_struct *t)
 {
 	t->sched_info.last_queued = 0;
 }
@@ -551,7 +551,7 @@ static inline void sched_info_dequeued(task_t *t)
  * long it was waiting to run.  We also note when it began so that we
  * can keep stats on how long its timeslice is.
  */
-static void sched_info_arrive(task_t *t)
+static void sched_info_arrive(struct task_struct *t)
 {
 	unsigned long now = jiffies, diff = 0;
 	struct runqueue *rq = task_rq(t);
@@ -585,7 +585,7 @@ static void sched_info_arrive(task_t *t)
  * the timestamp if it is already not set.  It's assumed that
  * sched_info_dequeued() will clear that stamp when appropriate.
  */
-static inline void sched_info_queued(task_t *t)
+static inline void sched_info_queued(struct task_struct *t)
 {
 	if (!t->sched_info.last_queued)
 		t->sched_info.last_queued = jiffies;
@@ -595,7 +595,7 @@ static inline void sched_info_queued(task_t *t)
  * Called when a process ceases being the active-running process, either
  * voluntarily or involuntarily.  Now we can calculate how long we ran.
  */
-static inline void sched_info_depart(task_t *t)
+static inline void sched_info_depart(struct task_struct *t)
 {
 	struct runqueue *rq = task_rq(t);
 	unsigned long diff = jiffies - t->sched_info.last_arrival;
@@ -611,7 +611,8 @@ static inline void sched_info_depart(task_t *t)
  * their time slice.  (This may also be called when switching to or from
  * the idle task.)  We are only called when prev != next.
  */
-static inline void sched_info_switch(task_t *prev, task_t *next)
+static inline void
+sched_info_switch(struct task_struct *prev, struct task_struct *next)
 {
 	struct runqueue *rq = task_rq(prev);
 
@@ -683,7 +684,7 @@ static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
  * Both properties are important to certain workloads.
  */
 
-static inline int __normal_prio(task_t *p)
+static inline int __normal_prio(struct task_struct *p)
 {
 	int bonus, prio;
 
@@ -719,7 +720,7 @@ static inline int __normal_prio(task_t *p)
 #define RTPRIO_TO_LOAD_WEIGHT(rp) \
 	(PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))
 
-static void set_load_weight(task_t *p)
+static void set_load_weight(struct task_struct *p)
 {
 	if (has_rt_policy(p)) {
 #ifdef CONFIG_SMP
@@ -737,23 +738,25 @@ static void set_load_weight(task_t *p)
 		p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);
 }
 
-static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p)
+static inline void
+inc_raw_weighted_load(runqueue_t *rq, const struct task_struct *p)
 {
 	rq->raw_weighted_load += p->load_weight;
 }
 
-static inline void dec_raw_weighted_load(runqueue_t *rq, const task_t *p)
+static inline void
+dec_raw_weighted_load(runqueue_t *rq, const struct task_struct *p)
 {
 	rq->raw_weighted_load -= p->load_weight;
 }
 
-static inline void inc_nr_running(task_t *p, runqueue_t *rq)
+static inline void inc_nr_running(struct task_struct *p, runqueue_t *rq)
 {
 	rq->nr_running++;
 	inc_raw_weighted_load(rq, p);
 }
 
-static inline void dec_nr_running(task_t *p, runqueue_t *rq)
+static inline void dec_nr_running(struct task_struct *p, runqueue_t *rq)
 {
 	rq->nr_running--;
 	dec_raw_weighted_load(rq, p);
@@ -766,7 +769,7 @@ static inline void dec_nr_running(task_t *p, runqueue_t *rq)
  * setprio syscalls, and whenever the interactivity
  * estimator recalculates.
  */
-static inline int normal_prio(task_t *p)
+static inline int normal_prio(struct task_struct *p)
 {
 	int prio;
 
@@ -784,7 +787,7 @@ static inline int normal_prio(task_t *p)
  * interactivity modifiers. Will be RT if the task got
  * RT-boosted. If not then it returns p->normal_prio.
  */
-static int effective_prio(task_t *p)
+static int effective_prio(struct task_struct *p)
 {
 	p->normal_prio = normal_prio(p);
 	/*
@@ -800,7 +803,7 @@ static int effective_prio(task_t *p)
 /*
  * __activate_task - move a task to the runqueue.
  */
-static void __activate_task(task_t *p, runqueue_t *rq)
+static void __activate_task(struct task_struct *p, runqueue_t *rq)
 {
 	prio_array_t *target = rq->active;
 
@@ -813,7 +816,7 @@ static void __activate_task(task_t *p, runqueue_t *rq)
 /*
  * __activate_idle_task - move idle task to the _front_ of runqueue.
  */
-static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
+static inline void __activate_idle_task(struct task_struct *p, runqueue_t *rq)
 {
 	enqueue_task_head(p, rq->active);
 	inc_nr_running(p, rq);
@@ -823,7 +826,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
  * Recalculate p->normal_prio and p->prio after having slept,
  * updating the sleep-average too:
  */
-static int recalc_task_prio(task_t *p, unsigned long long now)
+static int recalc_task_prio(struct task_struct *p, unsigned long long now)
 {
 	/* Caller must always ensure 'now >= p->timestamp' */
 	unsigned long sleep_time = now - p->timestamp;
@@ -895,7 +898,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
  * Update all the scheduling statistics stuff. (sleep average
  * calculation, priority modifiers, etc.)
  */
-static void activate_task(task_t *p, runqueue_t *rq, int local)
+static void activate_task(struct task_struct *p, runqueue_t *rq, int local)
 {
 	unsigned long long now;
 
@@ -962,7 +965,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
 #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
 #endif
 
-static void resched_task(task_t *p)
+static void resched_task(struct task_struct *p)
 {
 	int cpu;
 
@@ -983,7 +986,7 @@ static void resched_task(task_t *p)
 		smp_send_reschedule(cpu);
 }
 #else
-static inline void resched_task(task_t *p)
+static inline void resched_task(struct task_struct *p)
 {
 	assert_spin_locked(&task_rq(p)->lock);
 	set_tsk_need_resched(p);
@@ -994,7 +997,7 @@ static inline void resched_task(task_t *p)
  * task_curr - is this task currently executing on a CPU?
  * @p: the task in question.
  */
-inline int task_curr(const task_t *p)
+inline int task_curr(const struct task_struct *p)
 {
 	return cpu_curr(task_cpu(p)) == p;
 }
@@ -1009,7 +1012,7 @@ unsigned long weighted_cpuload(const int cpu)
 typedef struct {
 	struct list_head list;
 
-	task_t *task;
+	struct task_struct *task;
 	int dest_cpu;
 
 	struct completion done;
@@ -1019,7 +1022,8 @@ typedef struct {
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
-static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
+static int
+migrate_task(struct task_struct *p, int dest_cpu, migration_req_t *req)
 {
 	runqueue_t *rq = task_rq(p);
 
@@ -1049,7 +1053,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
  * smp_call_function() if an IPI is sent by the same process we are
  * waiting to become inactive.
  */
-void wait_task_inactive(task_t *p)
+void wait_task_inactive(struct task_struct *p)
 {
 	unsigned long flags;
 	runqueue_t *rq;
@@ -1083,7 +1087,7 @@ repeat:
  * to another CPU then no harm is done and the purpose has been
  * achieved as well.
  */
-void kick_process(task_t *p)
+void kick_process(struct task_struct *p)
 {
 	int cpu;
 
@@ -1286,7 +1290,7 @@ nextlevel:
  * Returns the CPU we should wake onto.
  */
 #if defined(ARCH_HAS_SCHED_WAKE_IDLE)
-static int wake_idle(int cpu, task_t *p)
+static int wake_idle(int cpu, struct task_struct *p)
 {
 	cpumask_t tmp;
 	struct sched_domain *sd;
@@ -1309,7 +1313,7 @@ static int wake_idle(int cpu, task_t *p)
 	return cpu;
 }
 #else
-static inline int wake_idle(int cpu, task_t *p)
+static inline int wake_idle(int cpu, struct task_struct *p)
 {
 	return cpu;
 }
@@ -1329,7 +1333,7 @@ static inline int wake_idle(int cpu, task_t *p)
  *
  * returns failure only if the task is already active.
  */
-static int try_to_wake_up(task_t *p, unsigned int state, int sync)
+static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 {
 	int cpu, this_cpu, success = 0;
 	unsigned long flags;
@@ -1487,14 +1491,14 @@ out:
 	return success;
 }
 
-int fastcall wake_up_process(task_t *p)
+int fastcall wake_up_process(struct task_struct *p)
 {
 	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
 				 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
 }
 EXPORT_SYMBOL(wake_up_process);
 
-int fastcall wake_up_state(task_t *p, unsigned int state)
+int fastcall wake_up_state(struct task_struct *p, unsigned int state)
 {
 	return try_to_wake_up(p, state, 0);
 }
@@ -1503,7 +1507,7 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
  * Perform scheduler related setup for a newly forked process p.
  * p is forked by current.
  */
-void fastcall sched_fork(task_t *p, int clone_flags)
+void fastcall sched_fork(struct task_struct *p, int clone_flags)
 {
 	int cpu = get_cpu();
 
@@ -1571,7 +1575,7 @@ void fastcall sched_fork(task_t *p, int clone_flags)
  * that must be done for every newly created context, then puts the task
  * on the runqueue and wakes it.
  */
-void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
+void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 {
 	unsigned long flags;
 	int this_cpu, cpu;
@@ -1655,7 +1659,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
  * artificially, because any timeslice recovered here
  * was given away by the parent in the first place.)
  */
-void fastcall sched_exit(task_t *p)
+void fastcall sched_exit(struct task_struct *p)
 {
 	unsigned long flags;
 	runqueue_t *rq;
@@ -1689,7 +1693,7 @@ void fastcall sched_exit(task_t *p)
  * prepare_task_switch sets up locking and calls architecture specific
  * hooks.
  */
-static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
+static inline void prepare_task_switch(runqueue_t *rq, struct task_struct *next)
 {
 	prepare_lock_switch(rq, next);
 	prepare_arch_switch(next);
@@ -1710,7 +1714,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
  * with the lock held can cause deadlocks; see schedule() for
  * details.)
  */
-static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
+static inline void finish_task_switch(runqueue_t *rq, struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct mm_struct *mm = rq->prev_mm;
@@ -1748,7 +1752,7 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
  * schedule_tail - first thing a freshly forked thread must call.
  * @prev: the thread we just switched away from.
  */
-asmlinkage void schedule_tail(task_t *prev)
+asmlinkage void schedule_tail(struct task_struct *prev)
 	__releases(rq->lock)
 {
 	runqueue_t *rq = this_rq();
@@ -1765,8 +1769,9 @@ asmlinkage void schedule_tail(task_t *prev)
  * context_switch - switch to the new MM and the new
  * thread's register state.
  */
-static inline
-task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
+static inline struct task_struct *
+context_switch(runqueue_t *rq, struct task_struct *prev,
+	       struct task_struct *next)
 {
 	struct mm_struct *mm = next->mm;
 	struct mm_struct *oldmm = prev->active_mm;
@@ -1937,7 +1942,7 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
  * allow dest_cpu, which will force the cpu onto dest_cpu.  Then
  * the cpu_allowed mask is restored.
  */
-static void sched_migrate_task(task_t *p, int dest_cpu)
+static void sched_migrate_task(struct task_struct *p, int dest_cpu)
 {
 	migration_req_t req;
 	runqueue_t *rq;
@@ -1952,11 +1957,13 @@ static void sched_migrate_task(task_t *p, int dest_cpu)
 	if (migrate_task(p, dest_cpu, &req)) {
 		/* Need to wait for migration thread (might exit: take ref). */
 		struct task_struct *mt = rq->migration_thread;
+
 		get_task_struct(mt);
 		task_rq_unlock(rq, &flags);
 		wake_up_process(mt);
 		put_task_struct(mt);
 		wait_for_completion(&req.done);
+
 		return;
 	}
 out:
@@ -1980,9 +1987,9 @@ void sched_exec(void)
  * pull_task - move a task from a remote runqueue to the local runqueue.
  * Both runqueues must be locked.
  */
-static
-void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
-	       runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
+static void pull_task(runqueue_t *src_rq, prio_array_t *src_array,
+		      struct task_struct *p, runqueue_t *this_rq,
+		      prio_array_t *this_array, int this_cpu)
 {
 	dequeue_task(p, src_array);
 	dec_nr_running(p, src_rq);
@@ -2003,7 +2010,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
 static
-int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
+int can_migrate_task(struct task_struct *p, runqueue_t *rq, int this_cpu,
 		     struct sched_domain *sd, enum idle_type idle,
 		     int *all_pinned)
 {
@@ -2052,8 +2059,8 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
 	    best_prio_seen, skip_for_load;
 	prio_array_t *array, *dst_array;
 	struct list_head *head, *curr;
+	struct task_struct *tmp;
 	long rem_load_move;
-	task_t *tmp;
 
 	if (max_nr_move == 0 || max_load_move == 0)
 		goto out;
@@ -2105,7 +2112,7 @@ skip_bitmap:
 	head = array->queue + idx;
 	curr = head->prev;
 skip_queue:
-	tmp = list_entry(curr, task_t, run_list);
+	tmp = list_entry(curr, struct task_struct, run_list);
 
 	curr = curr->prev;
 
@@ -2819,7 +2826,7 @@ EXPORT_PER_CPU_SYMBOL(kstat);
  * Bank in p->sched_time the ns elapsed since the last tick or switch.
  */
 static inline void
-update_cpu_clock(task_t *p, runqueue_t *rq, unsigned long long now)
+update_cpu_clock(struct task_struct *p, runqueue_t *rq, unsigned long long now)
 {
 	p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
 }
@@ -2828,7 +2835,7 @@ update_cpu_clock(task_t *p, runqueue_t *rq, unsigned long long now)
  * Return current->sched_time plus any more ns on the sched_clock
  * that have not yet been banked.
  */
-unsigned long long current_sched_time(const task_t *p)
+unsigned long long current_sched_time(const struct task_struct *p)
 {
 	unsigned long long ns;
 	unsigned long flags;
@@ -2945,9 +2952,9 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 void scheduler_tick(void)
 {
 	unsigned long long now = sched_clock();
+	struct task_struct *p = current;
 	int cpu = smp_processor_id();
 	runqueue_t *rq = this_rq();
-	task_t *p = current;
 
 	update_cpu_clock(p, rq, now);
 
@@ -3079,7 +3086,8 @@ static void wake_sleeping_dependent(int this_cpu)
  * utilize, if another task runs on a sibling. This models the
  * slowdown effect of other tasks running on siblings:
  */
-static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
+static inline unsigned long
+smt_slice(struct task_struct *p, struct sched_domain *sd)
 {
 	return p->time_slice * (100 - sd->per_cpu_gain) / 100;
 }
@@ -3090,7 +3098,8 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
  * acquire their lock. As we only trylock the normal locking order does not
  * need to be obeyed.
  */
-static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
+static int
+dependent_sleeper(int this_cpu, runqueue_t *this_rq, struct task_struct *p)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	int ret = 0, i;
@@ -3110,8 +3119,8 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
 		return 0;
 
 	for_each_cpu_mask(i, sd->span) {
+		struct task_struct *smt_curr;
 		runqueue_t *smt_rq;
-		task_t *smt_curr;
 
 		if (i == this_cpu)
 			continue;
@@ -3157,7 +3166,7 @@ static inline void wake_sleeping_dependent(int this_cpu)
 {
 }
 static inline int
-dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
+dependent_sleeper(int this_cpu, runqueue_t *this_rq, struct task_struct *p)
 {
 	return 0;
 }
@@ -3211,11 +3220,11 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
  */
 asmlinkage void __sched schedule(void)
 {
+	struct task_struct *prev, *next;
 	struct list_head *queue;
 	unsigned long long now;
 	unsigned long run_time;
 	int cpu, idx, new_prio;
-	task_t *prev, *next;
 	prio_array_t *array;
 	long *switch_count;
 	runqueue_t *rq;
@@ -3308,7 +3317,7 @@ need_resched_nonpreemptible:
 
 	idx = sched_find_first_bit(array->bitmap);
 	queue = array->queue + idx;
-	next = list_entry(queue->next, task_t, run_list);
+	next = list_entry(queue->next, struct task_struct, run_list);
 
 	if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
 		unsigned long long delta = now - next->timestamp;
@@ -3776,7 +3785,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
  *
  * Used by the rt_mutex code to implement priority inheritance logic.
  */
-void rt_mutex_setprio(task_t *p, int prio)
+void rt_mutex_setprio(struct task_struct *p, int prio)
 {
 	unsigned long flags;
 	prio_array_t *array;
@@ -3817,7 +3826,7 @@ void rt_mutex_setprio(task_t *p, int prio)
 
 #endif
 
-void set_user_nice(task_t *p, long nice)
+void set_user_nice(struct task_struct *p, long nice)
 {
 	int old_prio, delta;
 	unsigned long flags;
@@ -3873,7 +3882,7 @@ EXPORT_SYMBOL(set_user_nice);
  * @p: task
  * @nice: nice value
  */
-int can_nice(const task_t *p, const int nice)
+int can_nice(const struct task_struct *p, const int nice)
 {
 	/* convert nice value [19,-20] to rlimit style value [1,40] */
 	int nice_rlim = 20 - nice;
@@ -3932,7 +3941,7 @@ asmlinkage long sys_nice(int increment)
  * RT tasks are offset by -200. Normal tasks are centered
  * around 0, value goes from -16 to +15.
  */
-int task_prio(const task_t *p)
+int task_prio(const struct task_struct *p)
 {
 	return p->prio - MAX_RT_PRIO;
 }
@@ -3941,7 +3950,7 @@ int task_prio(const task_t *p)
  * task_nice - return the nice value of a given task.
  * @p: the task in question.
  */
-int task_nice(const task_t *p)
+int task_nice(const struct task_struct *p)
 {
 	return TASK_NICE(p);
 }
@@ -3960,7 +3969,7 @@ int idle_cpu(int cpu)
  * idle_task - return the idle task for a given cpu.
  * @cpu: the processor in question.
  */
-task_t *idle_task(int cpu)
+struct task_struct *idle_task(int cpu)
 {
 	return cpu_rq(cpu)->idle;
 }
@@ -3969,7 +3978,7 @@ task_t *idle_task(int cpu)
  * find_process_by_pid - find a process with a matching PID value.
  * @pid: the pid in question.
  */
-static inline task_t *find_process_by_pid(pid_t pid)
+static inline struct task_struct *find_process_by_pid(pid_t pid)
 {
 	return pid ? find_task_by_pid(pid) : current;
 }
@@ -4103,9 +4112,9 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
 static int
 do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 {
-	int retval;
 	struct sched_param lparam;
 	struct task_struct *p;
+	int retval;
 
 	if (!param || pid < 0)
 		return -EINVAL;
@@ -4121,6 +4130,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 	read_unlock_irq(&tasklist_lock);
 	retval = sched_setscheduler(p, policy, &lparam);
 	put_task_struct(p);
+
 	return retval;
 }
 
@@ -4156,8 +4166,8 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
  */
 asmlinkage long sys_sched_getscheduler(pid_t pid)
 {
+	struct task_struct *p;
 	int retval = -EINVAL;
-	task_t *p;
 
 	if (pid < 0)
 		goto out_nounlock;
@@ -4184,8 +4194,8 @@ out_nounlock:
 asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
 {
 	struct sched_param lp;
+	struct task_struct *p;
 	int retval = -EINVAL;
-	task_t *p;
 
 	if (!param || pid < 0)
 		goto out_nounlock;
@@ -4218,9 +4228,9 @@ out_unlock:
 
 long sched_setaffinity(pid_t pid, cpumask_t new_mask)
 {
-	task_t *p;
-	int retval;
 	cpumask_t cpus_allowed;
+	struct task_struct *p;
+	int retval;
 
 	lock_cpu_hotplug();
 	read_lock(&tasklist_lock);
@@ -4306,8 +4316,8 @@ cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
 
 long sched_getaffinity(pid_t pid, cpumask_t *mask)
 {
+	struct task_struct *p;
 	int retval;
-	task_t *p;
 
 	lock_cpu_hotplug();
 	read_lock(&tasklist_lock);
@@ -4592,9 +4602,9 @@ asmlinkage long sys_sched_get_priority_min(int policy)
 asmlinkage
 long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
 {
+	struct task_struct *p;
 	int retval = -EINVAL;
 	struct timespec t;
-	task_t *p;
 
 	if (pid < 0)
 		goto out_nounlock;
@@ -4641,12 +4651,13 @@ static inline struct task_struct *younger_sibling(struct task_struct *p)
 	return list_entry(p->sibling.next,struct task_struct,sibling);
 }
 
-static void show_task(task_t *p)
+static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" };
+
+static void show_task(struct task_struct *p)
 {
-	task_t *relative;
-	unsigned state;
+	struct task_struct *relative;
 	unsigned long free = 0;
-	static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" };
+	unsigned state;
 
 	printk("%-13.13s ", p->comm);
 	state = p->state ? __ffs(p->state) + 1 : 0;
@@ -4697,7 +4708,7 @@ static void show_task(task_t *p)
 
 void show_state(void)
 {
-	task_t *g, *p;
+	struct task_struct *g, *p;
 
 #if (BITS_PER_LONG == 32)
 	printk("\n"
@@ -4730,7 +4741,7 @@ void show_state(void)
  * NOTE: this function does not set the idle thread's NEED_RESCHED
  * flag, to make booting more robust.
  */
-void __devinit init_idle(task_t *idle, int cpu)
+void __devinit init_idle(struct task_struct *idle, int cpu)
 {
 	runqueue_t *rq = cpu_rq(cpu);
 	unsigned long flags;
@@ -4793,7 +4804,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
  * task must not exit() & deallocate itself prematurely.  The
  * call is not atomic; no spinlocks may be held.
  */
-int set_cpus_allowed(task_t *p, cpumask_t new_mask)
+int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 {
 	unsigned long flags;
 	migration_req_t req;
@@ -5061,7 +5072,7 @@ void idle_task_exit(void)
 	mmdrop(mm);
 }
 
-static void migrate_dead(unsigned int dead_cpu, task_t *p)
+static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
 {
 	struct runqueue *rq = cpu_rq(dead_cpu);
 
@@ -5096,9 +5107,8 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
 			struct list_head *list = &rq->arrays[arr].queue[i];
 
 			while (!list_empty(list))
-				migrate_dead(dead_cpu,
-					     list_entry(list->next, task_t,
-							run_list));
+				migrate_dead(dead_cpu, list_entry(list->next,
+					     struct task_struct, run_list));
 		}
 	}
 }
@@ -6801,7 +6811,7 @@ void normalize_rt_tasks(void)
  *
  * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
  */
-task_t *curr_task(int cpu)
+struct task_struct *curr_task(int cpu)
 {
 	return cpu_curr(cpu);
 }
@@ -6821,7 +6831,7 @@ task_t *curr_task(int cpu)
  *
  * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
  */
-void set_curr_task(int cpu, task_t *p)
+void set_curr_task(int cpu, struct task_struct *p)
 {
 	cpu_curr(cpu) = p;
 }
diff --git a/kernel/timer.c b/kernel/timer.c
index b761898..396a3c0 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1368,7 +1368,7 @@ asmlinkage long sys_getegid(void)
 
 static void process_timeout(unsigned long __data)
 {
-	wake_up_process((task_t *)__data);
+	wake_up_process((struct task_struct *)__data);
 }
 
 /**
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 59f0b42..90d2c60 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,7 +51,7 @@ struct cpu_workqueue_struct {
 	wait_queue_head_t work_done;
 
 	struct workqueue_struct *wq;
-	task_t *thread;
+	struct task_struct *thread;
 
 	int run_depth;		/* Detect run_workqueue() recursion depth */
 } ____cacheline_aligned;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d46ed0f..b9af136 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -225,7 +225,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
  * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that
  * we select a process with CAP_SYS_RAW_IO set).
  */
-static void __oom_kill_task(task_t *p, const char *message)
+static void __oom_kill_task(struct task_struct *p, const char *message)
 {
 	if (p->pid == 1) {
 		WARN_ON(1);
@@ -255,10 +255,10 @@ static void __oom_kill_task(task_t *p, const char *message)
 	force_sig(SIGKILL, p);
 }
 
-static int oom_kill_task(task_t *p, const char *message)
+static int oom_kill_task(struct task_struct *p, const char *message)
 {
 	struct mm_struct *mm;
-	task_t * g, * q;
+	struct task_struct *g, *q;
 
 	mm = p->mm;
 
@@ -316,7 +316,7 @@ static int oom_kill_process(struct task_struct *p, unsigned long points,
  */
 void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 {
-	task_t *p;
+	struct task_struct *p;
 	unsigned long points = 0;
 
 	if (printk_ratelimit()) {
-- 
cgit v0.10.2


From 70b97a7f0b19cf1f2619deb5cc41e8b78c591aa7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Jul 2006 00:25:42 -0700
Subject: [PATCH] sched: cleanup, convert sched.c-internal typedefs to struct

convert:

 - runqueue_t to 'struct rq'
 - prio_array_t to 'struct prio_array'
 - migration_req_t to 'struct migration_req'

I was the one who added these but they are both against the kernel coding
style and also were used inconsistently at places.  So just get rid of them at
once, now that we are flushing the scheduler patch-queue anyway.

Conversion was mostly scripted, the result was reviewed and all secondary
whitespace and style impact (if any) was fixed up by hand.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c2797f0..1c876e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -534,7 +534,6 @@ extern struct user_struct *find_user(uid_t);
 extern struct user_struct root_user;
 #define INIT_USER (&root_user)
 
-typedef struct prio_array prio_array_t;
 struct backing_dev_info;
 struct reclaim_state;
 
@@ -715,6 +714,8 @@ enum sleep_type {
 	SLEEP_INTERRUPTED,
 };
 
+struct prio_array;
+
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	struct thread_info *thread_info;
@@ -732,7 +733,7 @@ struct task_struct {
 	int load_weight;	/* for niceness load balancing purposes */
 	int prio, static_prio, normal_prio;
 	struct list_head run_list;
-	prio_array_t *array;
+	struct prio_array *array;
 
 	unsigned short ioprio;
 	unsigned int btrace_seq;
diff --git a/kernel/sched.c b/kernel/sched.c
index 021b3121..4ee400f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -188,8 +188,6 @@ static inline unsigned int task_timeslice(struct task_struct *p)
  * These are the runqueue data structures:
  */
 
-typedef struct runqueue runqueue_t;
-
 struct prio_array {
 	unsigned int nr_active;
 	DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
@@ -203,7 +201,7 @@ struct prio_array {
  * (such as the load balancing or the thread migration code), lock
  * acquire operations must be ordered by ascending &runqueue.
  */
-struct runqueue {
+struct rq {
 	spinlock_t lock;
 
 	/*
@@ -229,7 +227,7 @@ struct runqueue {
 	unsigned long long timestamp_last_tick;
 	struct task_struct *curr, *idle;
 	struct mm_struct *prev_mm;
-	prio_array_t *active, *expired, arrays[2];
+	struct prio_array *active, *expired, arrays[2];
 	int best_expired_prio;
 	atomic_t nr_iowait;
 
@@ -266,7 +264,7 @@ struct runqueue {
 	struct lock_class_key rq_lock_key;
 };
 
-static DEFINE_PER_CPU(struct runqueue, runqueues);
+static DEFINE_PER_CPU(struct rq, runqueues);
 
 /*
  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
@@ -291,16 +289,16 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
 #endif
 
 #ifndef __ARCH_WANT_UNLOCKED_CTXSW
-static inline int task_running(runqueue_t *rq, struct task_struct *p)
+static inline int task_running(struct rq *rq, struct task_struct *p)
 {
 	return rq->curr == p;
 }
 
-static inline void prepare_lock_switch(runqueue_t *rq, struct task_struct *next)
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 {
 }
 
-static inline void finish_lock_switch(runqueue_t *rq, struct task_struct *prev)
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
@@ -317,7 +315,7 @@ static inline void finish_lock_switch(runqueue_t *rq, struct task_struct *prev)
 }
 
 #else /* __ARCH_WANT_UNLOCKED_CTXSW */
-static inline int task_running(runqueue_t *rq, struct task_struct *p)
+static inline int task_running(struct rq *rq, struct task_struct *p)
 {
 #ifdef CONFIG_SMP
 	return p->oncpu;
@@ -326,7 +324,7 @@ static inline int task_running(runqueue_t *rq, struct task_struct *p)
 #endif
 }
 
-static inline void prepare_lock_switch(runqueue_t *rq, struct task_struct *next)
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
 {
 #ifdef CONFIG_SMP
 	/*
@@ -343,7 +341,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, struct task_struct *next)
 #endif
 }
 
-static inline void finish_lock_switch(runqueue_t *rq, struct task_struct *prev)
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 {
 #ifdef CONFIG_SMP
 	/*
@@ -364,10 +362,10 @@ static inline void finish_lock_switch(runqueue_t *rq, struct task_struct *prev)
  * __task_rq_lock - lock the runqueue a given task resides on.
  * Must be called interrupts disabled.
  */
-static inline runqueue_t *__task_rq_lock(struct task_struct *p)
+static inline struct rq *__task_rq_lock(struct task_struct *p)
 	__acquires(rq->lock)
 {
-	struct runqueue *rq;
+	struct rq *rq;
 
 repeat_lock_task:
 	rq = task_rq(p);
@@ -384,10 +382,10 @@ repeat_lock_task:
  * interrupts.  Note the ordering: we can safely lookup the task_rq without
  * explicitly disabling preemption.
  */
-static runqueue_t *task_rq_lock(struct task_struct *p, unsigned long *flags)
+static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
 	__acquires(rq->lock)
 {
-	struct runqueue *rq;
+	struct rq *rq;
 
 repeat_lock_task:
 	local_irq_save(*flags);
@@ -400,13 +398,13 @@ repeat_lock_task:
 	return rq;
 }
 
-static inline void __task_rq_unlock(runqueue_t *rq)
+static inline void __task_rq_unlock(struct rq *rq)
 	__releases(rq->lock)
 {
 	spin_unlock(&rq->lock);
 }
 
-static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
+static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
 	__releases(rq->lock)
 {
 	spin_unlock_irqrestore(&rq->lock, *flags);
@@ -426,7 +424,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
 	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
 	seq_printf(seq, "timestamp %lu\n", jiffies);
 	for_each_online_cpu(cpu) {
-		runqueue_t *rq = cpu_rq(cpu);
+		struct rq *rq = cpu_rq(cpu);
 #ifdef CONFIG_SMP
 		struct sched_domain *sd;
 		int dcnt = 0;
@@ -513,10 +511,10 @@ struct file_operations proc_schedstat_operations = {
 /*
  * rq_lock - lock a given runqueue and disable interrupts.
  */
-static inline runqueue_t *this_rq_lock(void)
+static inline struct rq *this_rq_lock(void)
 	__acquires(rq->lock)
 {
-	runqueue_t *rq;
+	struct rq *rq;
 
 	local_irq_disable();
 	rq = this_rq();
@@ -554,7 +552,7 @@ static inline void sched_info_dequeued(struct task_struct *t)
 static void sched_info_arrive(struct task_struct *t)
 {
 	unsigned long now = jiffies, diff = 0;
-	struct runqueue *rq = task_rq(t);
+	struct rq *rq = task_rq(t);
 
 	if (t->sched_info.last_queued)
 		diff = now - t->sched_info.last_queued;
@@ -597,7 +595,7 @@ static inline void sched_info_queued(struct task_struct *t)
  */
 static inline void sched_info_depart(struct task_struct *t)
 {
-	struct runqueue *rq = task_rq(t);
+	struct rq *rq = task_rq(t);
 	unsigned long diff = jiffies - t->sched_info.last_arrival;
 
 	t->sched_info.cpu_time += diff;
@@ -614,7 +612,7 @@ static inline void sched_info_depart(struct task_struct *t)
 static inline void
 sched_info_switch(struct task_struct *prev, struct task_struct *next)
 {
-	struct runqueue *rq = task_rq(prev);
+	struct rq *rq = task_rq(prev);
 
 	/*
 	 * prev now departs the cpu.  It's not interesting to record
@@ -635,7 +633,7 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
 /*
  * Adding/removing a task to/from a priority array:
  */
-static void dequeue_task(struct task_struct *p, prio_array_t *array)
+static void dequeue_task(struct task_struct *p, struct prio_array *array)
 {
 	array->nr_active--;
 	list_del(&p->run_list);
@@ -643,7 +641,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array)
 		__clear_bit(p->prio, array->bitmap);
 }
 
-static void enqueue_task(struct task_struct *p, prio_array_t *array)
+static void enqueue_task(struct task_struct *p, struct prio_array *array)
 {
 	sched_info_queued(p);
 	list_add_tail(&p->run_list, array->queue + p->prio);
@@ -656,12 +654,13 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array)
  * Put task to the end of the run list without the overhead of dequeue
  * followed by enqueue.
  */
-static void requeue_task(struct task_struct *p, prio_array_t *array)
+static void requeue_task(struct task_struct *p, struct prio_array *array)
 {
 	list_move_tail(&p->run_list, array->queue + p->prio);
 }
 
-static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
+static inline void
+enqueue_task_head(struct task_struct *p, struct prio_array *array)
 {
 	list_add(&p->run_list, array->queue + p->prio);
 	__set_bit(p->prio, array->bitmap);
@@ -739,24 +738,24 @@ static void set_load_weight(struct task_struct *p)
 }
 
 static inline void
-inc_raw_weighted_load(runqueue_t *rq, const struct task_struct *p)
+inc_raw_weighted_load(struct rq *rq, const struct task_struct *p)
 {
 	rq->raw_weighted_load += p->load_weight;
 }
 
 static inline void
-dec_raw_weighted_load(runqueue_t *rq, const struct task_struct *p)
+dec_raw_weighted_load(struct rq *rq, const struct task_struct *p)
 {
 	rq->raw_weighted_load -= p->load_weight;
 }
 
-static inline void inc_nr_running(struct task_struct *p, runqueue_t *rq)
+static inline void inc_nr_running(struct task_struct *p, struct rq *rq)
 {
 	rq->nr_running++;
 	inc_raw_weighted_load(rq, p);
 }
 
-static inline void dec_nr_running(struct task_struct *p, runqueue_t *rq)
+static inline void dec_nr_running(struct task_struct *p, struct rq *rq)
 {
 	rq->nr_running--;
 	dec_raw_weighted_load(rq, p);
@@ -803,9 +802,9 @@ static int effective_prio(struct task_struct *p)
 /*
  * __activate_task - move a task to the runqueue.
  */
-static void __activate_task(struct task_struct *p, runqueue_t *rq)
+static void __activate_task(struct task_struct *p, struct rq *rq)
 {
-	prio_array_t *target = rq->active;
+	struct prio_array *target = rq->active;
 
 	if (batch_task(p))
 		target = rq->expired;
@@ -816,7 +815,7 @@ static void __activate_task(struct task_struct *p, runqueue_t *rq)
 /*
  * __activate_idle_task - move idle task to the _front_ of runqueue.
  */
-static inline void __activate_idle_task(struct task_struct *p, runqueue_t *rq)
+static inline void __activate_idle_task(struct task_struct *p, struct rq *rq)
 {
 	enqueue_task_head(p, rq->active);
 	inc_nr_running(p, rq);
@@ -898,7 +897,7 @@ static int recalc_task_prio(struct task_struct *p, unsigned long long now)
  * Update all the scheduling statistics stuff. (sleep average
  * calculation, priority modifiers, etc.)
  */
-static void activate_task(struct task_struct *p, runqueue_t *rq, int local)
+static void activate_task(struct task_struct *p, struct rq *rq, int local)
 {
 	unsigned long long now;
 
@@ -906,7 +905,7 @@ static void activate_task(struct task_struct *p, runqueue_t *rq, int local)
 #ifdef CONFIG_SMP
 	if (!local) {
 		/* Compensate for drifting sched_clock */
-		runqueue_t *this_rq = this_rq();
+		struct rq *this_rq = this_rq();
 		now = (now - this_rq->timestamp_last_tick)
 			+ rq->timestamp_last_tick;
 	}
@@ -945,7 +944,7 @@ static void activate_task(struct task_struct *p, runqueue_t *rq, int local)
 /*
  * deactivate_task - remove a task from the runqueue.
  */
-static void deactivate_task(struct task_struct *p, runqueue_t *rq)
+static void deactivate_task(struct task_struct *p, struct rq *rq)
 {
 	dec_nr_running(p, rq);
 	dequeue_task(p, p->array);
@@ -1009,23 +1008,23 @@ unsigned long weighted_cpuload(const int cpu)
 }
 
 #ifdef CONFIG_SMP
-typedef struct {
+struct migration_req {
 	struct list_head list;
 
 	struct task_struct *task;
 	int dest_cpu;
 
 	struct completion done;
-} migration_req_t;
+};
 
 /*
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
 static int
-migrate_task(struct task_struct *p, int dest_cpu, migration_req_t *req)
+migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
 {
-	runqueue_t *rq = task_rq(p);
+	struct rq *rq = task_rq(p);
 
 	/*
 	 * If the task is not on a runqueue (and not running), then
@@ -1056,7 +1055,7 @@ migrate_task(struct task_struct *p, int dest_cpu, migration_req_t *req)
 void wait_task_inactive(struct task_struct *p)
 {
 	unsigned long flags;
-	runqueue_t *rq;
+	struct rq *rq;
 	int preempted;
 
 repeat:
@@ -1107,7 +1106,7 @@ void kick_process(struct task_struct *p)
  */
 static inline unsigned long source_load(int cpu, int type)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	struct rq *rq = cpu_rq(cpu);
 
 	if (type == 0)
 		return rq->raw_weighted_load;
@@ -1121,7 +1120,7 @@ static inline unsigned long source_load(int cpu, int type)
  */
 static inline unsigned long target_load(int cpu, int type)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	struct rq *rq = cpu_rq(cpu);
 
 	if (type == 0)
 		return rq->raw_weighted_load;
@@ -1134,7 +1133,7 @@ static inline unsigned long target_load(int cpu, int type)
  */
 static inline unsigned long cpu_avg_load_per_task(int cpu)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	struct rq *rq = cpu_rq(cpu);
 	unsigned long n = rq->nr_running;
 
 	return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
@@ -1338,10 +1337,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	int cpu, this_cpu, success = 0;
 	unsigned long flags;
 	long old_state;
-	runqueue_t *rq;
+	struct rq *rq;
 #ifdef CONFIG_SMP
-	unsigned long load, this_load;
 	struct sched_domain *sd, *this_sd = NULL;
+	unsigned long load, this_load;
 	int new_cpu;
 #endif
 
@@ -1577,9 +1576,9 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
  */
 void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 {
+	struct rq *rq, *this_rq;
 	unsigned long flags;
 	int this_cpu, cpu;
-	runqueue_t *rq, *this_rq;
 
 	rq = task_rq_lock(p, &flags);
 	BUG_ON(p->state != TASK_RUNNING);
@@ -1662,7 +1661,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 void fastcall sched_exit(struct task_struct *p)
 {
 	unsigned long flags;
-	runqueue_t *rq;
+	struct rq *rq;
 
 	/*
 	 * If the child was a (relative-) CPU hog then decrease
@@ -1693,7 +1692,7 @@ void fastcall sched_exit(struct task_struct *p)
  * prepare_task_switch sets up locking and calls architecture specific
  * hooks.
  */
-static inline void prepare_task_switch(runqueue_t *rq, struct task_struct *next)
+static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
 {
 	prepare_lock_switch(rq, next);
 	prepare_arch_switch(next);
@@ -1714,7 +1713,7 @@ static inline void prepare_task_switch(runqueue_t *rq, struct task_struct *next)
  * with the lock held can cause deadlocks; see schedule() for
  * details.)
  */
-static inline void finish_task_switch(runqueue_t *rq, struct task_struct *prev)
+static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct mm_struct *mm = rq->prev_mm;
@@ -1755,7 +1754,8 @@ static inline void finish_task_switch(runqueue_t *rq, struct task_struct *prev)
 asmlinkage void schedule_tail(struct task_struct *prev)
 	__releases(rq->lock)
 {
-	runqueue_t *rq = this_rq();
+	struct rq *rq = this_rq();
+
 	finish_task_switch(rq, prev);
 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
 	/* In this case, finish_task_switch does not reenable preemption */
@@ -1770,7 +1770,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
  * thread's register state.
  */
 static inline struct task_struct *
-context_switch(runqueue_t *rq, struct task_struct *prev,
+context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next)
 {
 	struct mm_struct *mm = next->mm;
@@ -1883,7 +1883,7 @@ task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
  * Note this does not disable interrupts like task_rq_lock,
  * you need to do so manually before calling.
  */
-static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
+static void double_rq_lock(struct rq *rq1, struct rq *rq2)
 	__acquires(rq1->lock)
 	__acquires(rq2->lock)
 {
@@ -1907,7 +1907,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
  * Note this does not restore interrupts like task_rq_unlock,
  * you need to do so manually after calling.
  */
-static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
+static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 	__releases(rq1->lock)
 	__releases(rq2->lock)
 {
@@ -1921,7 +1921,7 @@ static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
 /*
  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
  */
-static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
+static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
 	__releases(this_rq->lock)
 	__acquires(busiest->lock)
 	__acquires(this_rq->lock)
@@ -1944,9 +1944,9 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
  */
 static void sched_migrate_task(struct task_struct *p, int dest_cpu)
 {
-	migration_req_t req;
-	runqueue_t *rq;
+	struct migration_req req;
 	unsigned long flags;
+	struct rq *rq;
 
 	rq = task_rq_lock(p, &flags);
 	if (!cpu_isset(dest_cpu, p->cpus_allowed)
@@ -1987,9 +1987,9 @@ void sched_exec(void)
  * pull_task - move a task from a remote runqueue to the local runqueue.
  * Both runqueues must be locked.
  */
-static void pull_task(runqueue_t *src_rq, prio_array_t *src_array,
-		      struct task_struct *p, runqueue_t *this_rq,
-		      prio_array_t *this_array, int this_cpu)
+static void pull_task(struct rq *src_rq, struct prio_array *src_array,
+		      struct task_struct *p, struct rq *this_rq,
+		      struct prio_array *this_array, int this_cpu)
 {
 	dequeue_task(p, src_array);
 	dec_nr_running(p, src_rq);
@@ -2010,7 +2010,7 @@ static void pull_task(runqueue_t *src_rq, prio_array_t *src_array,
  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
  */
 static
-int can_migrate_task(struct task_struct *p, runqueue_t *rq, int this_cpu,
+int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
 		     struct sched_domain *sd, enum idle_type idle,
 		     int *all_pinned)
 {
@@ -2050,14 +2050,14 @@ int can_migrate_task(struct task_struct *p, runqueue_t *rq, int this_cpu,
  *
  * Called with both runqueues locked.
  */
-static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
+static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		      unsigned long max_nr_move, unsigned long max_load_move,
 		      struct sched_domain *sd, enum idle_type idle,
 		      int *all_pinned)
 {
 	int idx, pulled = 0, pinned = 0, this_best_prio, best_prio,
 	    best_prio_seen, skip_for_load;
-	prio_array_t *array, *dst_array;
+	struct prio_array *array, *dst_array;
 	struct list_head *head, *curr;
 	struct task_struct *tmp;
 	long rem_load_move;
@@ -2212,7 +2212,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 		sum_weighted_load = sum_nr_running = avg_load = 0;
 
 		for_each_cpu_mask(i, group->cpumask) {
-			runqueue_t *rq = cpu_rq(i);
+			struct rq *rq = cpu_rq(i);
 
 			if (*sd_idle && !idle_cpu(i))
 				*sd_idle = 0;
@@ -2428,11 +2428,11 @@ ret:
 /*
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
-static runqueue_t *
+static struct rq *
 find_busiest_queue(struct sched_group *group, enum idle_type idle,
 		   unsigned long imbalance)
 {
-	runqueue_t *busiest = NULL, *rq;
+	struct rq *busiest = NULL, *rq;
 	unsigned long max_load = 0;
 	int i;
 
@@ -2468,13 +2468,13 @@ static inline unsigned long minus_1_or_zero(unsigned long n)
  *
  * Called with this_rq unlocked.
  */
-static int load_balance(int this_cpu, runqueue_t *this_rq,
+static int load_balance(int this_cpu, struct rq *this_rq,
 			struct sched_domain *sd, enum idle_type idle)
 {
 	int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
 	struct sched_group *group;
 	unsigned long imbalance;
-	runqueue_t *busiest;
+	struct rq *busiest;
 
 	if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
 	    !sched_smt_power_savings)
@@ -2596,10 +2596,10 @@ out_one_pinned:
  * this_rq is locked.
  */
 static int
-load_balance_newidle(int this_cpu, runqueue_t *this_rq, struct sched_domain *sd)
+load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
 {
 	struct sched_group *group;
-	runqueue_t *busiest = NULL;
+	struct rq *busiest = NULL;
 	unsigned long imbalance;
 	int nr_moved = 0;
 	int sd_idle = 0;
@@ -2657,7 +2657,7 @@ out_balanced:
  * idle_balance is called by schedule() if this_cpu is about to become
  * idle. Attempts to pull tasks from other CPUs.
  */
-static void idle_balance(int this_cpu, runqueue_t *this_rq)
+static void idle_balance(int this_cpu, struct rq *this_rq)
 {
 	struct sched_domain *sd;
 
@@ -2678,11 +2678,11 @@ static void idle_balance(int this_cpu, runqueue_t *this_rq)
  *
  * Called with busiest_rq locked.
  */
-static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
+static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 {
-	struct sched_domain *sd;
-	runqueue_t *target_rq;
 	int target_cpu = busiest_rq->push_cpu;
+	struct sched_domain *sd;
+	struct rq *target_rq;
 
 	/* Is there any task to move? */
 	if (busiest_rq->nr_running <= 1)
@@ -2736,7 +2736,7 @@ static inline unsigned long cpu_offset(int cpu)
 }
 
 static void
-rebalance_tick(int this_cpu, runqueue_t *this_rq, enum idle_type idle)
+rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
 {
 	unsigned long this_load, interval, j = cpu_offset(this_cpu);
 	struct sched_domain *sd;
@@ -2790,15 +2790,15 @@ rebalance_tick(int this_cpu, runqueue_t *this_rq, enum idle_type idle)
 /*
  * on UP we do not need to balance between CPUs:
  */
-static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle)
+static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)
 {
 }
-static inline void idle_balance(int cpu, runqueue_t *rq)
+static inline void idle_balance(int cpu, struct rq *rq)
 {
 }
 #endif
 
-static inline int wake_priority_sleeper(runqueue_t *rq)
+static inline int wake_priority_sleeper(struct rq *rq)
 {
 	int ret = 0;
 
@@ -2826,7 +2826,7 @@ EXPORT_PER_CPU_SYMBOL(kstat);
  * Bank in p->sched_time the ns elapsed since the last tick or switch.
  */
 static inline void
-update_cpu_clock(struct task_struct *p, runqueue_t *rq, unsigned long long now)
+update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
 {
 	p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
 }
@@ -2858,7 +2858,7 @@ unsigned long long current_sched_time(const struct task_struct *p)
  * increasing number of running tasks. We also ignore the interactivity
  * if a better static_prio task has expired:
  */
-static inline int expired_starving(runqueue_t *rq)
+static inline int expired_starving(struct rq *rq)
 {
 	if (rq->curr->static_prio > rq->best_expired_prio)
 		return 1;
@@ -2900,7 +2900,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 			 cputime_t cputime)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-	runqueue_t *rq = this_rq();
+	struct rq *rq = this_rq();
 	cputime64_t tmp;
 
 	p->stime = cputime_add(p->stime, cputime);
@@ -2930,7 +2930,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	cputime64_t tmp = cputime_to_cputime64(steal);
-	runqueue_t *rq = this_rq();
+	struct rq *rq = this_rq();
 
 	if (p == rq->idle) {
 		p->stime = cputime_add(p->stime, steal);
@@ -2954,7 +2954,7 @@ void scheduler_tick(void)
 	unsigned long long now = sched_clock();
 	struct task_struct *p = current;
 	int cpu = smp_processor_id();
-	runqueue_t *rq = this_rq();
+	struct rq *rq = cpu_rq(cpu);
 
 	update_cpu_clock(p, rq, now);
 
@@ -3043,7 +3043,7 @@ out:
 }
 
 #ifdef CONFIG_SCHED_SMT
-static inline void wakeup_busy_runqueue(runqueue_t *rq)
+static inline void wakeup_busy_runqueue(struct rq *rq)
 {
 	/* If an SMT runqueue is sleeping due to priority reasons wake it up */
 	if (rq->curr == rq->idle && rq->nr_running)
@@ -3069,7 +3069,7 @@ static void wake_sleeping_dependent(int this_cpu)
 		return;
 
 	for_each_cpu_mask(i, sd->span) {
-		runqueue_t *smt_rq = cpu_rq(i);
+		struct rq *smt_rq = cpu_rq(i);
 
 		if (i == this_cpu)
 			continue;
@@ -3099,7 +3099,7 @@ smt_slice(struct task_struct *p, struct sched_domain *sd)
  * need to be obeyed.
  */
 static int
-dependent_sleeper(int this_cpu, runqueue_t *this_rq, struct task_struct *p)
+dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	int ret = 0, i;
@@ -3120,7 +3120,7 @@ dependent_sleeper(int this_cpu, runqueue_t *this_rq, struct task_struct *p)
 
 	for_each_cpu_mask(i, sd->span) {
 		struct task_struct *smt_curr;
-		runqueue_t *smt_rq;
+		struct rq *smt_rq;
 
 		if (i == this_cpu)
 			continue;
@@ -3166,7 +3166,7 @@ static inline void wake_sleeping_dependent(int this_cpu)
 {
 }
 static inline int
-dependent_sleeper(int this_cpu, runqueue_t *this_rq, struct task_struct *p)
+dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
 {
 	return 0;
 }
@@ -3221,13 +3221,13 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
 asmlinkage void __sched schedule(void)
 {
 	struct task_struct *prev, *next;
+	struct prio_array *array;
 	struct list_head *queue;
 	unsigned long long now;
 	unsigned long run_time;
 	int cpu, idx, new_prio;
-	prio_array_t *array;
 	long *switch_count;
-	runqueue_t *rq;
+	struct rq *rq;
 
 	/*
 	 * Test if we are atomic.  Since do_exit() needs to call into
@@ -3787,9 +3787,9 @@ EXPORT_SYMBOL(sleep_on_timeout);
  */
 void rt_mutex_setprio(struct task_struct *p, int prio)
 {
+	struct prio_array *array;
 	unsigned long flags;
-	prio_array_t *array;
-	runqueue_t *rq;
+	struct rq *rq;
 	int oldprio;
 
 	BUG_ON(prio < 0 || prio > MAX_PRIO);
@@ -3828,10 +3828,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
 void set_user_nice(struct task_struct *p, long nice)
 {
+	struct prio_array *array;
 	int old_prio, delta;
 	unsigned long flags;
-	prio_array_t *array;
-	runqueue_t *rq;
+	struct rq *rq;
 
 	if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
 		return;
@@ -4012,9 +4012,9 @@ int sched_setscheduler(struct task_struct *p, int policy,
 		       struct sched_param *param)
 {
 	int retval, oldprio, oldpolicy = -1;
-	prio_array_t *array;
+	struct prio_array *array;
 	unsigned long flags;
-	runqueue_t *rq;
+	struct rq *rq;
 
 	/* may grab non-irq protected spin_locks */
 	BUG_ON(in_interrupt());
@@ -4376,9 +4376,8 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
  */
 asmlinkage long sys_sched_yield(void)
 {
-	runqueue_t *rq = this_rq_lock();
-	prio_array_t *array = current->array;
-	prio_array_t *target = rq->expired;
+	struct rq *rq = this_rq_lock();
+	struct prio_array *array = current->array, *target = rq->expired;
 
 	schedstat_inc(rq, yld_cnt);
 	/*
@@ -4525,7 +4524,7 @@ EXPORT_SYMBOL(yield);
  */
 void __sched io_schedule(void)
 {
-	struct runqueue *rq = &__raw_get_cpu_var(runqueues);
+	struct rq *rq = &__raw_get_cpu_var(runqueues);
 
 	atomic_inc(&rq->nr_iowait);
 	schedule();
@@ -4535,7 +4534,7 @@ EXPORT_SYMBOL(io_schedule);
 
 long __sched io_schedule_timeout(long timeout)
 {
-	struct runqueue *rq = &__raw_get_cpu_var(runqueues);
+	struct rq *rq = &__raw_get_cpu_var(runqueues);
 	long ret;
 
 	atomic_inc(&rq->nr_iowait);
@@ -4743,7 +4742,7 @@ void show_state(void)
  */
 void __devinit init_idle(struct task_struct *idle, int cpu)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
 
 	idle->timestamp = sched_clock();
@@ -4782,7 +4781,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 /*
  * This is how migration works:
  *
- * 1) we queue a migration_req_t structure in the source CPU's
+ * 1) we queue a struct migration_req structure in the source CPU's
  *    runqueue and wake up that CPU's migration thread.
  * 2) we down() the locked semaphore => thread blocks.
  * 3) migration thread wakes up (implicitly it forces the migrated
@@ -4806,9 +4805,9 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
  */
 int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 {
+	struct migration_req req;
 	unsigned long flags;
-	migration_req_t req;
-	runqueue_t *rq;
+	struct rq *rq;
 	int ret = 0;
 
 	rq = task_rq_lock(p, &flags);
@@ -4850,7 +4849,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
  */
 static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 {
-	runqueue_t *rq_dest, *rq_src;
+	struct rq *rq_dest, *rq_src;
 	int ret = 0;
 
 	if (unlikely(cpu_is_offline(dest_cpu)))
@@ -4896,15 +4895,15 @@ out:
 static int migration_thread(void *data)
 {
 	int cpu = (long)data;
-	runqueue_t *rq;
+	struct rq *rq;
 
 	rq = cpu_rq(cpu);
 	BUG_ON(rq->migration_thread != current);
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	while (!kthread_should_stop()) {
+		struct migration_req *req;
 		struct list_head *head;
-		migration_req_t *req;
 
 		try_to_freeze();
 
@@ -4928,7 +4927,7 @@ static int migration_thread(void *data)
 			set_current_state(TASK_INTERRUPTIBLE);
 			continue;
 		}
-		req = list_entry(head->next, migration_req_t, list);
+		req = list_entry(head->next, struct migration_req, list);
 		list_del_init(head->next);
 
 		spin_unlock(&rq->lock);
@@ -4955,10 +4954,10 @@ wait_to_die:
 /* Figure out where task on dead CPU should go, use force if neccessary. */
 static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
-	runqueue_t *rq;
 	unsigned long flags;
-	int dest_cpu;
 	cpumask_t mask;
+	struct rq *rq;
+	int dest_cpu;
 
 restart:
 	/* On same node? */
@@ -4998,9 +4997,9 @@ restart:
  * their home CPUs. So we just add the counter to another CPU's counter,
  * to keep the global sum constant after CPU-down:
  */
-static void migrate_nr_uninterruptible(runqueue_t *rq_src)
+static void migrate_nr_uninterruptible(struct rq *rq_src)
 {
-	runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));
+	struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -5036,7 +5035,7 @@ static void migrate_live_tasks(int src_cpu)
 void sched_idle_next(void)
 {
 	int this_cpu = smp_processor_id();
-	runqueue_t *rq = cpu_rq(this_cpu);
+	struct rq *rq = cpu_rq(this_cpu);
 	struct task_struct *p = rq->idle;
 	unsigned long flags;
 
@@ -5074,7 +5073,7 @@ void idle_task_exit(void)
 
 static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
 {
-	struct runqueue *rq = cpu_rq(dead_cpu);
+	struct rq *rq = cpu_rq(dead_cpu);
 
 	/* Must be exiting, otherwise would be on tasklist. */
 	BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
@@ -5099,7 +5098,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
 /* release_task() removes task from tasklist, so we won't find dead tasks. */
 static void migrate_dead_tasks(unsigned int dead_cpu)
 {
-	struct runqueue *rq = cpu_rq(dead_cpu);
+	struct rq *rq = cpu_rq(dead_cpu);
 	unsigned int arr, i;
 
 	for (arr = 0; arr < 2; arr++) {
@@ -5123,8 +5122,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
 	struct task_struct *p;
 	int cpu = (long)hcpu;
-	struct runqueue *rq;
 	unsigned long flags;
+	struct rq *rq;
 
 	switch (action) {
 	case CPU_UP_PREPARE:
@@ -5176,9 +5175,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		 * the requestors. */
 		spin_lock_irq(&rq->lock);
 		while (!list_empty(&rq->migration_queue)) {
-			migration_req_t *req;
+			struct migration_req *req;
+
 			req = list_entry(rq->migration_queue.next,
-					 migration_req_t, list);
+					 struct migration_req, list);
 			list_del_init(&req->list);
 			complete(&req->done);
 		}
@@ -5361,7 +5361,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
  */
 static void cpu_attach_domain(struct sched_domain *sd, int cpu)
 {
-	runqueue_t *rq = cpu_rq(cpu);
+	struct rq *rq = cpu_rq(cpu);
 	struct sched_domain *tmp;
 
 	/* Remove the sched domains which do not contribute to scheduling. */
@@ -6690,8 +6690,8 @@ void __init sched_init(void)
 	int i, j, k;
 
 	for_each_possible_cpu(i) {
-		prio_array_t *array;
-		runqueue_t *rq;
+		struct prio_array *array;
+		struct rq *rq;
 
 		rq = cpu_rq(i);
 		spin_lock_init(&rq->lock);
@@ -6764,10 +6764,10 @@ EXPORT_SYMBOL(__might_sleep);
 #ifdef CONFIG_MAGIC_SYSRQ
 void normalize_rt_tasks(void)
 {
+	struct prio_array *array;
 	struct task_struct *p;
-	prio_array_t *array;
 	unsigned long flags;
-	runqueue_t *rq;
+	struct rq *rq;
 
 	read_lock_irq(&tasklist_lock);
 	for_each_process(p) {
-- 
cgit v0.10.2


From c1b4df5d2a5b1a9c037fe3b2e42804cf1267c750 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Mon, 3 Jul 2006 19:24:19 -0700
Subject: [IOAT]: fix sparse ulong warning

Fix sparse warning:
drivers/dma/ioatdma.c:444:32: warning: constant 0xFFFFFFFFFFFFFFC0 is so big it is unsigned long

Also needs a MAINTAINERS entry.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h
index 41a21ab..a30c734 100644
--- a/drivers/dma/ioatdma_registers.h
+++ b/drivers/dma/ioatdma_registers.h
@@ -76,7 +76,7 @@
 #define IOAT_CHANSTS_OFFSET			0x04	/* 64-bit Channel Status Register */
 #define IOAT_CHANSTS_OFFSET_LOW			0x04
 #define IOAT_CHANSTS_OFFSET_HIGH		0x08
-#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR	0xFFFFFFFFFFFFFFC0
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR	0xFFFFFFFFFFFFFFC0UL
 #define IOAT_CHANSTS_SOFT_ERR			0x0000000000000010
 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS	0x0000000000000007
 #define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE	0x0
-- 
cgit v0.10.2


From 882d02d6fb040a246b005305ffeb790bb5ce80ad Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 3 Jul 2006 19:26:15 -0700
Subject: [AF_UNIX]: datagram getpeersec fix

The unix_get_peersec_dgram() stub should have been inlined so that it
disappears.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e9a287b..f70475b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -144,7 +144,7 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 	scm->seclen = *UNIXSECLEN(skb);
 }
 #else
-static void unix_get_peersec_dgram(struct sk_buff *skb)
+static inline void unix_get_peersec_dgram(struct sk_buff *skb)
 { }
 
 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
-- 
cgit v0.10.2


From 56e0873b7b146564a0c36e225624f5a9b2d63791 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 3 Jul 2006 19:27:20 -0700
Subject: [IOAT]: drivers/dma/iovlock.c: make num_pages_spanned() static

This patch makes the needlessly global num_pages_spanned() static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
index 5ed327e..d637555 100644
--- a/drivers/dma/iovlock.c
+++ b/drivers/dma/iovlock.c
@@ -31,7 +31,7 @@
 #include <asm/io.h>
 #include <asm/uaccess.h>
 
-int num_pages_spanned(struct iovec *iov)
+static int num_pages_spanned(struct iovec *iov)
 {
 	return
 	((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
-- 
cgit v0.10.2


From 518d1c9679f644811adaa22d853f43a83fbdae84 Mon Sep 17 00:00:00 2001
From: Benoit Boissinot <benoit.boissinot@ens-lyon.org>
Date: Mon, 3 Jul 2006 19:28:13 -0700
Subject: [IOAT]: Fix a warning in ioatdma

drivers/dma/ioatdma.c: In function 'ioat_init_module':
drivers/dma/ioatdma.c:830: warning: control reaches end of non-void function

Signed-off-by: Benoit Boissinot <benoit.boissinot@ens-lyon.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
index ecad8f6..d481069 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioatdma.c
@@ -826,7 +826,7 @@ static int __init ioat_init_module(void)
 	/* if forced, worst case is that rmmod hangs */
 	__unsafe(THIS_MODULE);
 
-	pci_module_init(&ioat_pci_drv);
+	return pci_module_init(&ioat_pci_drv);
 }
 
 module_init(ioat_init_module);
-- 
cgit v0.10.2


From 8dc22d2b642f8a6f14ef8878777a05311e5d1d7e Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 3 Jul 2006 19:29:15 -0700
Subject: [ROSE]: Fix dereference of skb pointer after free.

If rose_route_frame return success we'll dereference a stale pointer.
Likely this is only going to result in bad statistics for the ROSE
interface.

This fixes coverity 946.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index 9d0bf2a..7c279e2 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -59,6 +59,7 @@ static int rose_rebuild_header(struct sk_buff *skb)
 	struct net_device_stats *stats = netdev_priv(dev);
 	unsigned char *bp = (unsigned char *)skb->data;
 	struct sk_buff *skbn;
+	unsigned int len;
 
 #ifdef CONFIG_INET
 	if (arp_find(bp + 7, skb)) {
@@ -75,6 +76,8 @@ static int rose_rebuild_header(struct sk_buff *skb)
 
 	kfree_skb(skb);
 
+	len = skbn->len;
+
 	if (!rose_route_frame(skbn, NULL)) {
 		kfree_skb(skbn);
 		stats->tx_errors++;
@@ -82,7 +85,7 @@ static int rose_rebuild_header(struct sk_buff *skb)
 	}
 
 	stats->tx_packets++;
-	stats->tx_bytes += skbn->len;
+	stats->tx_bytes += len;
 #endif
 	return 1;
 }
-- 
cgit v0.10.2


From 006f68b84fe19fc5015a8cf838a10d75f91f0218 Mon Sep 17 00:00:00 2001
From: Ralf Baechle DL5RB <ralf@linux-mips.org>
Date: Mon, 3 Jul 2006 19:30:18 -0700
Subject: [AX.25]: Reference counting for AX.25 routes.

In the past routes could be freed even though the were possibly in use ...

Signed-off-by: Ralf Baechle DL5RB <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/ax25.h b/include/net/ax25.h
index 7cd528e..69374cd 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -182,14 +182,26 @@ typedef struct {
 
 typedef struct ax25_route {
 	struct ax25_route	*next;
-	atomic_t		ref;
+	atomic_t		refcount;
 	ax25_address		callsign;
 	struct net_device	*dev;
 	ax25_digi		*digipeat;
 	char			ip_mode;
-	struct timer_list	timer;
 } ax25_route;
 
+static inline void ax25_hold_route(ax25_route *ax25_rt)
+{
+	atomic_inc(&ax25_rt->refcount);
+}
+
+extern void __ax25_put_route(ax25_route *ax25_rt);
+
+static inline void ax25_put_route(ax25_route *ax25_rt)
+{
+	if (atomic_dec_and_test(&ax25_rt->refcount))
+		__ax25_put_route(ax25_rt);
+}
+
 typedef struct {
 	char			slave;			/* slave_mode?   */
 	struct timer_list	slave_timer;		/* timeout timer */
@@ -348,17 +360,11 @@ extern int  ax25_check_iframes_acked(ax25_cb *, unsigned short);
 extern void ax25_rt_device_down(struct net_device *);
 extern int  ax25_rt_ioctl(unsigned int, void __user *);
 extern struct file_operations ax25_route_fops;
+extern ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev);
 extern int  ax25_rt_autobind(ax25_cb *, ax25_address *);
-extern ax25_route *ax25_rt_find_route(ax25_route *, ax25_address *,
-	struct net_device *);
 extern struct sk_buff *ax25_rt_build_path(struct sk_buff *, ax25_address *, ax25_address *, ax25_digi *);
 extern void ax25_rt_free(void);
 
-static inline void ax25_put_route(ax25_route *ax25_rt)
-{
-	atomic_dec(&ax25_rt->ref);
-}
-
 /* ax25_std_in.c */
 extern int  ax25_std_frame_in(ax25_cb *, struct sk_buff *, int);
 
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 9be5c15..136c3ae 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -103,11 +103,13 @@ int ax25_rebuild_header(struct sk_buff *skb)
 {
 	struct sk_buff *ourskb;
 	unsigned char *bp  = skb->data;
-	struct net_device *dev;
+	ax25_route *route;
+	struct net_device *dev = NULL;
 	ax25_address *src, *dst;
+	ax25_digi *digipeat = NULL;
 	ax25_dev *ax25_dev;
-	ax25_route _route, *route = &_route;
 	ax25_cb *ax25;
+	char ip_mode = ' ';
 
 	dst = (ax25_address *)(bp + 1);
 	src = (ax25_address *)(bp + 8);
@@ -115,8 +117,12 @@ int ax25_rebuild_header(struct sk_buff *skb)
   	if (arp_find(bp + 1, skb))
   		return 1;
 
-	route = ax25_rt_find_route(route, dst, NULL);
-	dev      = route->dev;
+	route = ax25_get_route(dst, NULL);
+	if (route) {
+		digipeat = route->digipeat;
+		dev = route->dev;
+		ip_mode = route->ip_mode;
+	};
 
 	if (dev == NULL)
 		dev = skb->dev;
@@ -126,7 +132,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
 	}
 
 	if (bp[16] == AX25_P_IP) {
-		if (route->ip_mode == 'V' || (route->ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) {
+		if (ip_mode == 'V' || (ip_mode == ' ' && ax25_dev->values[AX25_VALUES_IPDEFMODE])) {
 			/*
 			 *	We copy the buffer and release the original thereby
 			 *	keeping it straight
@@ -172,7 +178,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
 			    ourskb, 
 			    ax25_dev->values[AX25_VALUES_PACLEN], 
 			    &src_c,
-			    &dst_c, route->digipeat, dev);
+			    &dst_c, digipeat, dev);
 			if (ax25) {
 				ax25_cb_put(ax25);
 			}
@@ -190,7 +196,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
 
 	skb_pull(skb, AX25_KISS_HEADER_LEN);
 
-	if (route->digipeat != NULL) {
+	if (digipeat != NULL) {
 		if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
 			kfree_skb(skb);
 			goto put;
@@ -202,7 +208,8 @@ int ax25_rebuild_header(struct sk_buff *skb)
 	ax25_queue_xmit(skb, dev);
 
 put:
-	ax25_put_route(route);
+	if (route)
+		ax25_put_route(route);
 
   	return 1;
 }
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 5ac9825..51b7bda 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -41,8 +41,6 @@
 static ax25_route *ax25_route_list;
 static DEFINE_RWLOCK(ax25_route_lock);
 
-static ax25_route *ax25_get_route(ax25_address *, struct net_device *);
-
 void ax25_rt_device_down(struct net_device *dev)
 {
 	ax25_route *s, *t, *ax25_rt;
@@ -115,7 +113,7 @@ static int ax25_rt_add(struct ax25_routes_struct *route)
 		return -ENOMEM;
 	}
 
-	atomic_set(&ax25_rt->ref, 0);
+	atomic_set(&ax25_rt->refcount, 1);
 	ax25_rt->callsign     = route->dest_addr;
 	ax25_rt->dev          = ax25_dev->dev;
 	ax25_rt->digipeat     = NULL;
@@ -140,23 +138,10 @@ static int ax25_rt_add(struct ax25_routes_struct *route)
 	return 0;
 }
 
-static void ax25_rt_destroy(ax25_route *ax25_rt)
+void __ax25_put_route(ax25_route *ax25_rt)
 {
-	if (atomic_read(&ax25_rt->ref) == 0) {
-		kfree(ax25_rt->digipeat);
-		kfree(ax25_rt);
-		return;
-	}
-
-	/*
-	 * Uh...  Route is still in use; we can't yet destroy it.  Retry later.
-	 */
-	init_timer(&ax25_rt->timer);
-	ax25_rt->timer.data	= (unsigned long) ax25_rt;
-	ax25_rt->timer.function	= (void *) ax25_rt_destroy;
-	ax25_rt->timer.expires	= jiffies + 5 * HZ;
-
-	add_timer(&ax25_rt->timer);
+	kfree(ax25_rt->digipeat);
+	kfree(ax25_rt);
 }
 
 static int ax25_rt_del(struct ax25_routes_struct *route)
@@ -177,12 +162,12 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
 		    ax25cmp(&route->dest_addr, &s->callsign) == 0) {
 			if (ax25_route_list == s) {
 				ax25_route_list = s->next;
-				ax25_rt_destroy(s);
+				ax25_put_route(s);
 			} else {
 				for (t = ax25_route_list; t != NULL; t = t->next) {
 					if (t->next == s) {
 						t->next = s->next;
-						ax25_rt_destroy(s);
+						ax25_put_route(s);
 						break;
 					}
 				}
@@ -362,7 +347,7 @@ struct file_operations ax25_route_fops = {
  *
  *	Only routes with a reference count of zero can be destroyed.
  */
-static ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
+ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
 {
 	ax25_route *ax25_spe_rt = NULL;
 	ax25_route *ax25_def_rt = NULL;
@@ -392,7 +377,7 @@ static ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev)
 		ax25_rt = ax25_spe_rt;
 
 	if (ax25_rt != NULL)
-		atomic_inc(&ax25_rt->ref);
+		ax25_hold_route(ax25_rt);
 
 	read_unlock(&ax25_route_lock);
 
@@ -467,24 +452,6 @@ put:
 	return 0;
 }
 
-ax25_route *ax25_rt_find_route(ax25_route * route, ax25_address *addr,
-	struct net_device *dev)
-{
-	ax25_route *ax25_rt;
-
-	if ((ax25_rt = ax25_get_route(addr, dev)))
-		return ax25_rt;
-
-	route->next     = NULL;
-	atomic_set(&route->ref, 1);
-	route->callsign = *addr;
-	route->dev      = dev;
-	route->digipeat = NULL;
-	route->ip_mode  = ' ';
-
-	return route;
-}
-
 struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
 	ax25_address *dest, ax25_digi *digi)
 {
-- 
cgit v0.10.2


From 18601a7d30c8340af4d786793ee52828471d2630 Mon Sep 17 00:00:00 2001
From: Ralf Baechle DL5RB <ralf@linux-mips.org>
Date: Mon, 3 Jul 2006 19:31:14 -0700
Subject: [NETROM]: Use socket helpers instead of direct fiddling with struct
 sock

Signed-off-by: Ralf Baechle DL5RB <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index eba6df0..389a411 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -800,7 +800,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	/* Now attach up the new socket */
 	kfree_skb(skb);
-	sk->sk_ack_backlog--;
+	sk_acceptq_removed(sk);
 	newsock->sk = newsk;
 
 out:
@@ -985,7 +985,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
 	nr_make->vr        = 0;
 	nr_make->vl        = 0;
 	nr_make->state     = NR_STATE_3;
-	sk->sk_ack_backlog++;
+	sk_acceptq_added(sk);
 
 	nr_insert_socket(make);
 
-- 
cgit v0.10.2


From d85838c55d836c33077344fab424f200f2827d84 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 3 Jul 2006 19:32:23 -0700
Subject: [ROSE]: Try all routes when establishing a ROSE connections.

From Jean-Paul F6FBB

ROSE will only try to establish a route using the first route in its
routing table.  Fix to iterate through all additional routes if a
connection attempt has failed.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 7799fe8..d0a67bb 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -752,7 +752,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
 
 		rose_insert_socket(sk);		/* Finish the bind */
 	}
-
+rose_try_next_neigh:
 	rose->dest_addr   = addr->srose_addr;
 	rose->dest_call   = addr->srose_call;
 	rose->rand        = ((long)rose & 0xFFFF) + rose->lci;
@@ -810,6 +810,11 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
 	}
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
+	/* Try next neighbour */
+		rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic);
+		if (rose->neighbour)
+			goto rose_try_next_neigh;
+	/* No more neighbour */
 		sock->state = SS_UNCONNECTED;
 		return sock_error(sk);	/* Always set at this point */
 	}
-- 
cgit v0.10.2


From 6ce1669fdb6b0a0faf9b2e2ba08048b520c57841 Mon Sep 17 00:00:00 2001
From: Horms <horms@verge.net.au>
Date: Mon, 3 Jul 2006 19:35:40 -0700
Subject: [IPVS]: Add sysctl documentation

* Derived from http://www.linuxvirtualserver.org/docs/sysctl.html, v1.4
  maintained by Wensong Zhang

* Adjusted preample to match ip-sysctl.txt

* Sorted options into alphabetical order

* Added expire_quiescent_template

* Removed timeout_* which are no longer present

* Incoporated doc/debug-levels.txt from IPVS source tree into
  description of ipvs_debug

* Minor spelling fixes

* Further editing more than welcome

Signed-Off-By: Horms <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt
new file mode 100644
index 0000000..4ccdbca
--- /dev/null
+++ b/Documentation/networking/ipvs-sysctl.txt
@@ -0,0 +1,143 @@
+/proc/sys/net/ipv4/vs/* Variables:
+
+am_droprate - INTEGER
+        default 10
+
+        It sets the always mode drop rate, which is used in the mode 3
+        of the drop_rate defense.
+
+amemthresh - INTEGER
+        default 1024
+
+        It sets the available memory threshold (in pages), which is
+        used in the automatic modes of defense. When there is no
+        enough available memory, the respective strategy will be
+        enabled and the variable is automatically set to 2, otherwise
+        the strategy is disabled and the variable is  set  to 1.
+
+cache_bypass - BOOLEAN
+        0 - disabled (default)
+        not 0 - enabled
+
+        If it is enabled, forward packets to the original destination
+        directly when no cache server is available and destination
+        address is not local (iph->daddr is RTN_UNICAST). It is mostly
+        used in transparent web cache cluster.
+
+debug_level - INTEGER
+	0          - transmission error messages (default)
+	1          - non-fatal error messages
+	2          - configuration
+	3          - destination trash
+	4          - drop entry
+	5          - service lookup
+	6          - scheduling
+	7          - connection new/expire, lookup and synchronization
+	8          - state transition
+	9          - binding destination, template checks and applications
+	10         - IPVS packet transmission
+	11         - IPVS packet handling (ip_vs_in/ip_vs_out)
+	12 or more - packet traversal
+
+	Only available when IPVS is compiled with the CONFIG_IPVS_DEBUG
+
+	Higher debugging levels include the messages for lower debugging
+	levels, so setting debug level 2, includes level 0, 1 and 2
+	messages. Thus, logging becomes more and more verbose the higher
+	the level.
+
+drop_entry - INTEGER
+        0  - disabled (default)
+
+        The drop_entry defense is to randomly drop entries in the
+        connection hash table, just in order to collect back some
+        memory for new connections. In the current code, the
+        drop_entry procedure can be activated every second, then it
+        randomly scans 1/32 of the whole and drops entries that are in
+        the SYN-RECV/SYNACK state, which should be effective against
+        syn-flooding attack.
+
+        The valid values of drop_entry are from 0 to 3, where 0 means
+        that this strategy is always disabled, 1 and 2 mean automatic
+        modes (when there is no enough available memory, the strategy
+        is enabled and the variable is automatically set to 2,
+        otherwise the strategy is disabled and the variable is set to
+        1), and 3 means that that the strategy is always enabled.
+
+drop_packet - INTEGER
+        0  - disabled (default)
+
+        The drop_packet defense is designed to drop 1/rate packets
+        before forwarding them to real servers. If the rate is 1, then
+        drop all the incoming packets.
+
+        The value definition is the same as that of the drop_entry. In
+        the automatic mode, the rate is determined by the follow
+        formula: rate = amemthresh / (amemthresh - available_memory)
+        when available memory is less than the available memory
+        threshold. When the mode 3 is set, the always mode drop rate
+        is controlled by the /proc/sys/net/ipv4/vs/am_droprate.
+
+expire_nodest_conn - BOOLEAN
+        0 - disabled (default)
+        not 0 - enabled
+
+        The default value is 0, the load balancer will silently drop
+        packets when its destination server is not available. It may
+        be useful, when user-space monitoring program deletes the
+        destination server (because of server overload or wrong
+        detection) and add back the server later, and the connections
+        to the server can continue.
+
+        If this feature is enabled, the load balancer will expire the
+        connection immediately when a packet arrives and its
+        destination server is not available, then the client program
+        will be notified that the connection is closed. This is
+        equivalent to the feature some people requires to flush
+        connections when its destination is not available.
+
+expire_quiescent_template - BOOLEAN
+	0 - disabled (default)
+	not 0 - enabled
+
+	When set to a non-zero value, the load balancer will expire
+	persistent templates when the destination server is quiescent.
+	This may be useful, when a user makes a destination server
+	quiescent by setting its weight to 0 and it is desired that
+	subsequent otherwise persistent connections are sent to a
+	different destination server.  By default new persistent
+	connections are allowed to quiescent destination servers.
+
+	If this feature is enabled, the load balancer will expire the
+	persistence template if it is to be used to schedule a new
+	connection and the destination server is quiescent.
+
+nat_icmp_send - BOOLEAN
+        0 - disabled (default)
+        not 0 - enabled
+
+        It controls sending icmp error messages (ICMP_DEST_UNREACH)
+        for VS/NAT when the load balancer receives packets from real
+        servers but the connection entries don't exist.
+
+secure_tcp - INTEGER
+        0  - disabled (default)
+
+        The secure_tcp defense is to use a more complicated state
+        transition table and some possible short timeouts of each
+        state. In the VS/NAT, it delays the entering the ESTABLISHED
+        until the real server starts to send data and ACK packet
+        (after 3-way handshake).
+
+        The value definition is the same as that of drop_entry or
+        drop_packet.
+
+sync_threshold - INTEGER
+        default 3
+
+        It sets synchronization threshold, which is the minimum number
+        of incoming packets that a connection needs to receive before
+        the connection will be synchronized. A connection will be
+        synchronized, every time the number of its incoming packets
+        modulus 50 equals the threshold. The range of the threshold is
+        from 0 to 49.
-- 
cgit v0.10.2


From bbcf467dab42ea3c85f368df346c82af2fbba665 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 3 Jul 2006 19:38:35 -0700
Subject: [NET]: Verify gso_type too in gso_segment

We don't want nasty Xen guests to pass a TCPv6 packet in with gso_type set
to TCPv4 or even UDP (or a packet that's both TCP and UDP).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8d15715..318d467 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1106,7 +1106,15 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	int ihl;
 	int id;
 
-	if (!pskb_may_pull(skb, sizeof(*iph)))
+	if (unlikely(skb_shinfo(skb)->gso_type &
+		     ~(SKB_GSO_TCPV4 |
+		       SKB_GSO_UDP |
+		       SKB_GSO_DODGY |
+		       SKB_GSO_TCP_ECN |
+		       0)))
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
 		goto out;
 
 	iph = skb->nh.iph;
@@ -1114,7 +1122,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	if (ihl < sizeof(*iph))
 		goto out;
 
-	if (!pskb_may_pull(skb, ihl))
+	if (unlikely(!pskb_may_pull(skb, ihl)))
 		goto out;
 
 	skb->h.raw = __skb_pull(skb, ihl);
@@ -1125,7 +1133,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 
 	rcu_read_lock();
 	ops = rcu_dereference(inet_protos[proto]);
-	if (ops && ops->gso_segment)
+	if (likely(ops && ops->gso_segment))
 		segs = ops->gso_segment(skb, features);
 	rcu_read_unlock();
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8044587..f6a2d92 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2170,8 +2170,19 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 
 	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
 		/* Packet is from an untrusted source, reset gso_segs. */
-		int mss = skb_shinfo(skb)->gso_size;
+		int type = skb_shinfo(skb)->gso_type;
+		int mss;
+
+		if (unlikely(type &
+			     ~(SKB_GSO_TCPV4 |
+			       SKB_GSO_DODGY |
+			       SKB_GSO_TCP_ECN |
+			       SKB_GSO_TCPV6 |
+			       0) ||
+			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
+			goto out;
 
+		mss = skb_shinfo(skb)->gso_size;
 		skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss;
 
 		segs = NULL;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c28e5c2..ec59344 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -64,6 +64,14 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 	struct inet6_protocol *ops;
 	int proto;
 
+	if (unlikely(skb_shinfo(skb)->gso_type &
+		     ~(SKB_GSO_UDP |
+		       SKB_GSO_DODGY |
+		       SKB_GSO_TCP_ECN |
+		       SKB_GSO_TCPV6 |
+		       0)))
+		goto out;
+
 	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
 		goto out;
 
-- 
cgit v0.10.2


From 863fae666acb87b150f4634e6e79476ebe274f43 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Mon, 3 Jul 2006 19:39:36 -0700
Subject: [TIPC] Fixed sk_buff panic caused by tipc_link_bundle_buf (REVISED)

The recent change to direct inspection of bundle buffer tailroom did not
account for the possiblity of unrequested tailroom added by skb_alloc(),
thereby allowing a bundle to be created that exceeds the current link MTU.
An additional check now ensures that bundling works correctly no matter
if the bundle buffer is smaller, larger, or equal to the link MTU.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Per Liden <per.liden@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/tipc/core.h b/net/tipc/core.h
index 86f54f3..762aac2 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -297,7 +297,10 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
  * buf_acquire - creates a TIPC message buffer
  * @size: message size (including TIPC header)
  *
- * Returns a new buffer.  Space is reserved for a data link header.
+ * Returns a new buffer with data pointers set to the specified size.
+ * 
+ * NOTE: Headroom is reserved to allow prepending of a data link header.
+ *       There may also be unrequested tailroom present at the buffer's end.
  */
 
 static inline struct sk_buff *buf_acquire(u32 size)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index c6831c7..c10e18a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -998,6 +998,8 @@ static int link_bundle_buf(struct link *l_ptr,
 		return 0;
 	if (skb_tailroom(bundler) < (pad + size))
 		return 0;
+	if (link_max_pkt(l_ptr) < (to_pos + size))
+		return 0;
 
 	skb_put(bundler, pad + size);
 	memcpy(bundler->data + to_pos, buf->data, size);
-- 
cgit v0.10.2


From 6703931c546e6dec0431776fa616d5accd3e7162 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 3 Jul 2006 19:41:11 -0700
Subject: [IPV6]: Fix ipv6 GSO payload length

Fix ipv6 GSO payload length calculation.

The ipv6 payload length excludes the ipv6 base header length and so
must be subtracted.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ec59344..0c17dec 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -119,7 +119,8 @@ unlock:
 
 	for (skb = segs; skb; skb = skb->next) {
 		ipv6h = skb->nh.ipv6h;
-		ipv6h->payload_len = htons(skb->len - skb->mac_len);
+		ipv6h->payload_len = htons(skb->len - skb->mac_len -
+					   sizeof(*ipv6h));
 	}
 
 out:
-- 
cgit v0.10.2


From b0026624f1aa3e38a887cb483de61f104d600b97 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Mon, 3 Jul 2006 19:42:14 -0700
Subject: [TG3]: Add ipv6 TSO feature

Enable ipv6 TSO feature on chips that support it.

Update version to 3.61.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index e5e1b29..f645921 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -68,8 +68,8 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"3.61"
-#define DRV_MODULE_RELDATE	"June 29, 2006"
+#define DRV_MODULE_VERSION	"3.62"
+#define DRV_MODULE_RELDATE	"June 30, 2006"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
@@ -3798,18 +3798,24 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto out_unlock;
 		}
 
-		tcp_opt_len = ((skb->h.th->doff - 5) * 4);
-		ip_tcp_len = (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr);
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+			mss |= (skb_headlen(skb) - ETH_HLEN) << 9;
+		else {
+			tcp_opt_len = ((skb->h.th->doff - 5) * 4);
+			ip_tcp_len = (skb->nh.iph->ihl * 4) +
+				     sizeof(struct tcphdr);
+
+			skb->nh.iph->check = 0;
+			skb->nh.iph->tot_len = htons(mss + ip_tcp_len +
+						     tcp_opt_len);
+			mss |= (ip_tcp_len + tcp_opt_len) << 9;
+		}
 
 		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
 			       TXD_FLAG_CPU_POST_DMA);
 
-		skb->nh.iph->check = 0;
-		skb->nh.iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
-
 		skb->h.th->check = 0;
 
-		mss |= (ip_tcp_len + tcp_opt_len) << 9;
 	}
 	else if (skb->ip_summed == CHECKSUM_HW)
 		base_flags |= TXD_FLAG_TCPUDP_CSUM;
@@ -7887,6 +7893,12 @@ static int tg3_set_tso(struct net_device *dev, u32 value)
 			return -EINVAL;
 		return 0;
 	}
+	if (tp->tg3_flags2 & TG3_FLG2_HW_TSO_2) {
+		if (value)
+			dev->features |= NETIF_F_TSO6;
+		else
+			dev->features &= ~NETIF_F_TSO6;
+	}
 	return ethtool_op_set_tso(dev, value);
 }
 #endif
@@ -11507,8 +11519,11 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	 * Firmware TSO on older chips gives lower performance, so it
 	 * is off by default, but can be enabled using ethtool.
 	 */
-	if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
+	if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
 		dev->features |= NETIF_F_TSO;
+		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO_2)
+			dev->features |= NETIF_F_TSO6;
+	}
 
 #endif
 
-- 
cgit v0.10.2


From fe4ada2d6f0b746246e9b5bf0f4f2e4d3a07d26e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Mon, 3 Jul 2006 19:44:51 -0700
Subject: [IOAT]: fix header file kernel-doc

Fix kernel-doc problems in include/linux/dmaengine.h:
- add some fields/parameters
- expand some descriptions
- fix typos

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 272010a6..c94d8f1 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -44,7 +44,7 @@ enum dma_event {
 };
 
 /**
- * typedef dma_cookie_t
+ * typedef dma_cookie_t - an opaque DMA cookie
  *
  * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code
  */
@@ -80,14 +80,14 @@ struct dma_chan_percpu {
 
 /**
  * struct dma_chan - devices supply DMA channels, clients use them
- * @client: ptr to the client user of this chan, will be NULL when unused
- * @device: ptr to the dma device who supplies this channel, always !NULL
+ * @client: ptr to the client user of this chan, will be %NULL when unused
+ * @device: ptr to the dma device who supplies this channel, always !%NULL
  * @cookie: last cookie value returned to client
- * @chan_id:
- * @class_dev:
+ * @chan_id: channel ID for sysfs
+ * @class_dev: class device for sysfs
  * @refcount: kref, used in "bigref" slow-mode
- * @slow_ref:
- * @rcu:
+ * @slow_ref: indicates that the DMA channel is free
+ * @rcu: the DMA channel's RCU head
  * @client_node: used to add this to the client chan list
  * @device_node: used to add this to the device chan list
  * @local: per-cpu pointer to a struct dma_chan_percpu
@@ -162,10 +162,17 @@ struct dma_client {
  * @chancnt: how many DMA channels are supported
  * @channels: the list of struct dma_chan
  * @global_node: list_head for global dma_device_list
- * @refcount:
- * @done:
- * @dev_id:
- * Other func ptrs: used to make use of this device's capabilities
+ * @refcount: reference count
+ * @done: IO completion struct
+ * @dev_id: unique device ID
+ * @device_alloc_chan_resources: allocate resources and return the
+ *	number of allocated descriptors
+ * @device_free_chan_resources: release DMA channel's resources
+ * @device_memcpy_buf_to_buf: memcpy buf pointer to buf pointer
+ * @device_memcpy_buf_to_pg: memcpy buf pointer to struct page
+ * @device_memcpy_pg_to_pg: memcpy struct page/offset to struct page/offset
+ * @device_memcpy_complete: poll the status of an IOAT DMA transaction
+ * @device_memcpy_issue_pending: push appended descriptors to hardware
  */
 struct dma_device {
 
@@ -211,7 +218,7 @@ void dma_async_client_chan_request(struct dma_client *client,
  * Both @dest and @src must be mappable to a bus address according to the
  * DMA mapping API rules for streaming mappings.
  * Both @dest and @src must stay memory resident (kernel memory or locked
- * user space pages)
+ * user space pages).
  */
 static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
 	void *dest, void *src, size_t len)
@@ -225,7 +232,7 @@ static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
 }
 
 /**
- * dma_async_memcpy_buf_to_pg - offloaded copy
+ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
  * @chan: DMA channel to offload copy to
  * @page: destination page
  * @offset: offset in page to copy to
@@ -250,18 +257,18 @@ static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
 }
 
 /**
- * dma_async_memcpy_buf_to_pg - offloaded copy
+ * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
  * @chan: DMA channel to offload copy to
- * @dest_page: destination page
+ * @dest_pg: destination page
  * @dest_off: offset in page to copy to
- * @src_page: source page
+ * @src_pg: source page
  * @src_off: offset in page to copy from
  * @len: length
  *
  * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
  * address according to the DMA mapping API rules for streaming mappings.
  * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
- * (kernel memory or locked user space pages)
+ * (kernel memory or locked user space pages).
  */
 static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
 	struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
@@ -278,7 +285,7 @@ static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
 
 /**
  * dma_async_memcpy_issue_pending - flush pending copies to HW
- * @chan:
+ * @chan: target DMA channel
  *
  * This allows drivers to push copies to HW in batches,
  * reducing MMIO writes where possible.
-- 
cgit v0.10.2


From 6508871eddbbd3e62799f3b6182a6a4fd3ef31d5 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Mon, 3 Jul 2006 19:45:31 -0700
Subject: [IOAT]: fix kernel-doc in source files

Fix kernel-doc warnings in drivers/dma/:
- use correct function & parameter names
- add descriptions where omitted

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 5829143..1527804 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -166,8 +166,8 @@ static struct dma_chan *dma_client_chan_alloc(struct dma_client *client)
 }
 
 /**
- * dma_client_chan_free - release a DMA channel
- * @chan: &dma_chan
+ * dma_chan_cleanup - release a DMA channel's resources
+ * @kref: kernel reference structure that contains the DMA channel device
  */
 void dma_chan_cleanup(struct kref *kref)
 {
@@ -199,7 +199,7 @@ static void dma_client_chan_free(struct dma_chan *chan)
  * dma_chans_rebalance - reallocate channels to clients
  *
  * When the number of DMA channel in the system changes,
- * channels need to be rebalanced among clients
+ * channels need to be rebalanced among clients.
  */
 static void dma_chans_rebalance(void)
 {
@@ -264,7 +264,7 @@ struct dma_client *dma_async_client_register(dma_event_callback event_callback)
 
 /**
  * dma_async_client_unregister - unregister a client and free the &dma_client
- * @client:
+ * @client: &dma_client to free
  *
  * Force frees any allocated DMA channels, frees the &dma_client memory
  */
@@ -306,7 +306,7 @@ void dma_async_client_chan_request(struct dma_client *client,
 }
 
 /**
- * dma_async_device_register -
+ * dma_async_device_register - registers DMA devices found
  * @device: &dma_device
  */
 int dma_async_device_register(struct dma_device *device)
@@ -348,8 +348,8 @@ int dma_async_device_register(struct dma_device *device)
 }
 
 /**
- * dma_async_device_unregister -
- * @device: &dma_device
+ * dma_async_device_cleanup - function called when all references are released
+ * @kref: kernel reference object
  */
 static void dma_async_device_cleanup(struct kref *kref)
 {
@@ -359,7 +359,11 @@ static void dma_async_device_cleanup(struct kref *kref)
 	complete(&device->done);
 }
 
-void dma_async_device_unregister(struct dma_device* device)
+/**
+ * dma_async_device_unregister - unregisters DMA devices
+ * @device: &dma_device
+ */
+void dma_async_device_unregister(struct dma_device *device)
 {
 	struct dma_chan *chan;
 	unsigned long flags;
diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
index d481069..78bf46d 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioatdma.c
@@ -217,7 +217,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
 
 /**
  * do_ioat_dma_memcpy - actual function that initiates a IOAT DMA transaction
- * @chan: IOAT DMA channel handle
+ * @ioat_chan: IOAT DMA channel handle
  * @dest: DMA destination address
  * @src: DMA source address
  * @len: transaction length in bytes
@@ -383,7 +383,7 @@ static dma_cookie_t ioat_dma_memcpy_buf_to_pg(struct dma_chan *chan,
  * @dest_off: offset into that page
  * @src_pg: pointer to the page to copy from
  * @src_off: offset into that page
- * @len: transaction length in bytes. This is guaranteed to not make a copy
+ * @len: transaction length in bytes. This is guaranteed not to make a copy
  *	 across a page boundary.
  */
 
@@ -407,7 +407,7 @@ static dma_cookie_t ioat_dma_memcpy_pg_to_pg(struct dma_chan *chan,
 }
 
 /**
- * ioat_dma_memcpy_issue_pending - push potentially unrecognoized appended descriptors to hw
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw
  * @chan: DMA channel handle
  */
 
@@ -510,6 +510,8 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
  * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
  * @chan: IOAT DMA channel handle
  * @cookie: DMA transaction identifier
+ * @done: if not %NULL, updated with last completed transaction
+ * @used: if not %NULL, updated with last used transaction
  */
 
 static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
-- 
cgit v0.10.2


From 4bdbf6c033ba05bff65f69989baccd7103c5a530 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Mon, 3 Jul 2006 19:47:27 -0700
Subject: [NET]: add+use poison defines

Add and use poison defines in net/.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/poison.h b/include/linux/poison.h
index a5347c0..05d7d0f 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -45,6 +45,10 @@
 /********** drivers/atm/ **********/
 #define ATM_POISON_FREE		0x12
 
+/********** net/ **********/
+#define NEIGHBOR_DEAD		0xdeadbeef
+#define NETFILTER_LINK_POISON	0xdead57ac
+
 /********** kernel/mutexes **********/
 #define MUTEX_DEBUG_INIT	0x11
 #define MUTEX_DEBUG_FREE	0x22
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 87a454f..121bf6f 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -23,6 +23,7 @@
 #include <linux/if.h> /* for IFF_UP */
 #include <linux/inetdevice.h>
 #include <linux/bitops.h>
+#include <linux/poison.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/rcupdate.h>
@@ -266,7 +267,7 @@ static void clip_neigh_destroy(struct neighbour *neigh)
 	DPRINTK("clip_neigh_destroy (neigh %p)\n", neigh);
 	if (NEIGH2ENTRY(neigh)->vccs)
 		printk(KERN_CRIT "clip_neigh_destroy: vccs != NULL !!!\n");
-	NEIGH2ENTRY(neigh)->vccs = (void *) 0xdeadbeef;
+	NEIGH2ENTRY(neigh)->vccs = (void *) NEIGHBOR_DEAD;
 }
 
 static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7ef143c..f26898b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -25,6 +25,7 @@
 #include <linux/vmalloc.h>
 #include <linux/netdevice.h>
 #include <linux/module.h>
+#include <linux/poison.h>
 #include <linux/icmpv6.h>
 #include <net/ipv6.h>
 #include <asm/uaccess.h>
@@ -376,7 +377,7 @@ ip6t_do_table(struct sk_buff **pskb,
 	} while (!hotdrop);
 
 #ifdef CONFIG_NETFILTER_DEBUG
-	((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac;
+	((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
 #endif
 	read_unlock_bh(&table->lock);
 
-- 
cgit v0.10.2


From 3c6b377321678c649f9b3c66da0149975c614102 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Mon, 3 Jul 2006 19:48:25 -0700
Subject: [ATM]: add+use poison defines

ATM: add and use POISON define values.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index d3b4263..4521a24 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -31,6 +31,7 @@
 #include <linux/atmdev.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/poison.h>
 
 #include <asm/atomic.h>
 #include <asm/io.h>
@@ -1995,7 +1996,7 @@ static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
     }
     i += 1;
   }
-  if (*pointer == 0xdeadbeef) {
+  if (*pointer == ATM_POISON) {
     return loader_start (lb, dev, ucode_start);
   } else {
     // cast needed as there is no %? for pointer differnces
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 5d1c6c9..b0369bb 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -35,6 +35,7 @@ static char const rcsid[] =
 
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/poison.h>
 #include <linux/skbuff.h>
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>
@@ -3657,7 +3658,7 @@ probe_sram(struct idt77252_dev *card)
 	writel(SAR_CMD_WRITE_SRAM | (0 << 2), SAR_REG_CMD);
 
 	for (addr = 0x4000; addr < 0x80000; addr += 0x4000) {
-		writel(0xdeadbeef, SAR_REG_DR0);
+		writel(ATM_POISON, SAR_REG_DR0);
 		writel(SAR_CMD_WRITE_SRAM | (addr << 2), SAR_REG_CMD);
 
 		writel(SAR_CMD_READ_SRAM | (0 << 2), SAR_REG_CMD);
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 05d7d0f..3e628f9 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -44,6 +44,7 @@
 
 /********** drivers/atm/ **********/
 #define ATM_POISON_FREE		0x12
+#define ATM_POISON		0xdeadbeef
 
 /********** net/ **********/
 #define NEIGHBOR_DEAD		0xdeadbeef
-- 
cgit v0.10.2


From 69ee20a58fa0cad6520c2a9538100a87ef0abd7d Mon Sep 17 00:00:00 2001
From: Andrey Savochkin <saw@swsoft.com>
Date: Mon, 3 Jul 2006 19:50:14 -0700
Subject: [BRIDGE]: br_dump_ifinfo index fix

Fix for inability of br_dump_ifinfo to handle non-zero start index:
loop index never increases when entered with non-zero start.
Spotted by Kirill Korotaev.

Signed-off-by: Andrey Savochkin <saw@swsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 881d7d1..06abb66 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -117,12 +117,13 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			continue;
 
 		if (idx < s_idx)
-			continue;
+			goto cont;
 
 		err = br_fill_ifinfo(skb, p, NETLINK_CB(cb->skb).pid,
 				     cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
 		if (err <= 0)
 			break;
+cont:
 		++idx;
 	}
 	read_unlock(&dev_base_lock);
-- 
cgit v0.10.2


From 2b86ad21deec4c47a1f0089298f12e4038c2aa68 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 3 Jul 2006 10:02:18 +0200
Subject: [Bluetooth] Use raw mode for the Frontline sniffer device

The Frontline sniffer device looks like a normal H:2 Bluetooth device,
but it is not and so mark it as raw mode device.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c
index a7d9d7e..95afce1 100644
--- a/drivers/bluetooth/hci_usb.c
+++ b/drivers/bluetooth/hci_usb.c
@@ -129,6 +129,9 @@ static struct usb_device_id blacklist_ids[] = {
 	/* CSR BlueCore Bluetooth Sniffer */
 	{ USB_DEVICE(0x0a12, 0x0002), .driver_info = HCI_SNIFFER },
 
+	/* Frontline ComProbe Bluetooth Sniffer */
+	{ USB_DEVICE(0x16d3, 0x0002), .driver_info = HCI_SNIFFER },
+
 	{ }	/* Terminating entry */
 };
 
-- 
cgit v0.10.2


From dcdcf63ef12dc3fbaa17a6d04f16ada8e63bb4d0 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 3 Jul 2006 10:02:24 +0200
Subject: [Bluetooth] Add suspend/resume support to the HCI USB driver

This patch implements the suspend/resume methods for the HCI USB
driver by killing all outstanding URBs on suspend, and re-issuing
them on resume.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c
index 95afce1..987a3df 100644
--- a/drivers/bluetooth/hci_usb.c
+++ b/drivers/bluetooth/hci_usb.c
@@ -1045,10 +1045,81 @@ static void hci_usb_disconnect(struct usb_interface *intf)
 	hci_free_dev(hdev);
 }
 
+static int hci_usb_suspend(struct usb_interface *intf, pm_message_t message)
+{
+	struct hci_usb *husb = usb_get_intfdata(intf);
+	struct list_head killed;
+	unsigned long flags;
+	int i;
+
+	if (!husb || intf == husb->isoc_iface)
+		return 0;
+
+	hci_suspend_dev(husb->hdev);
+
+	INIT_LIST_HEAD(&killed);
+
+	for (i = 0; i < 4; i++) {
+		struct _urb_queue *q = &husb->pending_q[i];
+		struct _urb *_urb, *_tmp;
+
+		while ((_urb = _urb_dequeue(q))) {
+			/* reset queue since _urb_dequeue sets it to NULL */
+			_urb->queue = q;
+			usb_kill_urb(&_urb->urb);
+			list_add(&_urb->list, &killed);
+		}
+
+		spin_lock_irqsave(&q->lock, flags);
+
+		list_for_each_entry_safe(_urb, _tmp, &killed, list) {
+			list_move_tail(&_urb->list, &q->head);
+		}
+
+		spin_unlock_irqrestore(&q->lock, flags);
+	}
+
+	return 0;
+}
+
+static int hci_usb_resume(struct usb_interface *intf)
+{
+	struct hci_usb *husb = usb_get_intfdata(intf);
+	unsigned long flags;
+	int i, err = 0;
+
+	if (!husb || intf == husb->isoc_iface)
+		return 0;
+	
+	for (i = 0; i < 4; i++) {
+		struct _urb_queue *q = &husb->pending_q[i];
+		struct _urb *_urb;
+
+		spin_lock_irqsave(&q->lock, flags);
+
+		list_for_each_entry(_urb, &q->head, list) {
+			err = usb_submit_urb(&_urb->urb, GFP_ATOMIC);
+			if (err)
+				break;
+		}
+
+		spin_unlock_irqrestore(&q->lock, flags);
+
+		if (err)
+			return -EIO;
+	}
+
+	hci_resume_dev(husb->hdev);
+
+	return 0;
+}
+
 static struct usb_driver hci_usb_driver = {
 	.name		= "hci_usb",
 	.probe		= hci_usb_probe,
 	.disconnect	= hci_usb_disconnect,
+	.suspend	= hci_usb_suspend,
+	.resume		= hci_usb_resume,
 	.id_table	= bluetooth_ids,
 };
 
-- 
cgit v0.10.2


From da1f519851d1c66331363253f364bdb5d924ea96 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 3 Jul 2006 10:02:29 +0200
Subject: [Bluetooth] Correct SCO buffer size on request

This patch introduces a quirk that allows the drivers to tell the host
to correct the SCO buffer size values.

Signed-off-by: Olivier Galibert <galibert@pobox.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c
index 987a3df..6a0c223 100644
--- a/drivers/bluetooth/hci_usb.c
+++ b/drivers/bluetooth/hci_usb.c
@@ -122,6 +122,9 @@ static struct usb_device_id blacklist_ids[] = {
 	/* RTX Telecom based adapter with buggy SCO support */
 	{ USB_DEVICE(0x0400, 0x0807), .driver_info = HCI_BROKEN_ISOC },
 
+	/* Belkin F8T012 */
+	{ USB_DEVICE(0x050d, 0x0012), .driver_info = HCI_WRONG_SCO_MTU },
+
 	/* Digianswer devices */
 	{ USB_DEVICE(0x08fd, 0x0001), .driver_info = HCI_DIGIANSWER },
 	{ USB_DEVICE(0x08fd, 0x0002), .driver_info = HCI_IGNORE },
@@ -987,6 +990,9 @@ static int hci_usb_probe(struct usb_interface *intf, const struct usb_device_id
 	if (reset || id->driver_info & HCI_RESET)
 		set_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks);
 
+	if (id->driver_info & HCI_WRONG_SCO_MTU)
+		set_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks);
+
 	if (id->driver_info & HCI_SNIFFER) {
 		if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x997)
 			set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks);
diff --git a/drivers/bluetooth/hci_usb.h b/drivers/bluetooth/hci_usb.h
index 37100a6..963fc55 100644
--- a/drivers/bluetooth/hci_usb.h
+++ b/drivers/bluetooth/hci_usb.h
@@ -35,6 +35,7 @@
 #define HCI_SNIFFER		0x10
 #define HCI_BCM92035		0x20
 #define HCI_BROKEN_ISOC		0x40
+#define HCI_WRONG_SCO_MTU	0x80
 
 #define HCI_MAX_IFACE_NUM	3
 
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index b06a2d2..99c53f6 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -54,7 +54,8 @@
 /* HCI device quirks */
 enum {
 	HCI_QUIRK_RESET_ON_INIT,
-	HCI_QUIRK_RAW_DEVICE
+	HCI_QUIRK_RAW_DEVICE,
+	HCI_QUIRK_FIXUP_BUFFER_SIZE
 };
 
 /* HCI device flags */
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 618bace..f41cf1a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -319,9 +319,17 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s
 		}
 
 		hdev->acl_mtu  = __le16_to_cpu(bs->acl_mtu);
-		hdev->sco_mtu  = bs->sco_mtu ? bs->sco_mtu : 64;
-		hdev->acl_pkts = hdev->acl_cnt = __le16_to_cpu(bs->acl_max_pkt);
-		hdev->sco_pkts = hdev->sco_cnt = __le16_to_cpu(bs->sco_max_pkt);
+		hdev->sco_mtu  = bs->sco_mtu;
+		hdev->acl_pkts = __le16_to_cpu(bs->acl_max_pkt);
+		hdev->sco_pkts = __le16_to_cpu(bs->sco_max_pkt);
+
+		if (test_bit(HCI_QUIRK_FIXUP_BUFFER_SIZE, &hdev->quirks)) {
+			hdev->sco_mtu  = 64;
+			hdev->sco_pkts = 8;
+		}
+
+		hdev->acl_cnt = hdev->acl_pkts;
+		hdev->sco_cnt = hdev->sco_pkts;
 
 		BT_DBG("%s mtu: acl %d, sco %d max_pkt: acl %d, sco %d", hdev->name,
 			hdev->acl_mtu, hdev->sco_mtu, hdev->acl_pkts, hdev->sco_pkts);
-- 
cgit v0.10.2


From 04837f6447c7f3ef114cda1ad761822dedbff8cf Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 3 Jul 2006 10:02:33 +0200
Subject: [Bluetooth] Add automatic sniff mode support

This patch introduces the automatic sniff mode feature. This allows
the host to switch idle connections into sniff mode to safe power.

Signed-off-by: Ulisses Furquim <ulissesf@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 99c53f6..b2bdb1a 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -101,9 +101,10 @@ enum {
 #define HCIINQUIRY	_IOR('H', 240, int)
 
 /* HCI timeouts */
-#define HCI_CONN_TIMEOUT	(HZ * 40)
-#define HCI_DISCONN_TIMEOUT	(HZ * 2)
-#define HCI_CONN_IDLE_TIMEOUT	(HZ * 60)
+#define HCI_CONNECT_TIMEOUT	(40000)	/* 40 seconds */
+#define HCI_DISCONN_TIMEOUT	(2000)	/* 2 seconds */
+#define HCI_IDLE_TIMEOUT	(6000)	/* 6 seconds */
+#define HCI_INIT_TIMEOUT	(10000)	/* 10 seconds */
 
 /* HCI Packet types */
 #define HCI_COMMAND_PKT		0x01
@@ -145,7 +146,7 @@ enum {
 #define LMP_TACCURACY	0x10
 #define LMP_RSWITCH	0x20
 #define LMP_HOLD	0x40
-#define LMP_SNIF	0x80
+#define LMP_SNIFF	0x80
 
 #define LMP_PARK	0x01
 #define LMP_RSSI	0x02
@@ -160,13 +161,21 @@ enum {
 #define LMP_PSCHEME	0x02
 #define LMP_PCONTROL	0x04
 
+#define LMP_SNIFF_SUBR	0x02
+
+/* Connection modes */
+#define HCI_CM_ACTIVE	0x0000
+#define HCI_CM_HOLD	0x0001
+#define HCI_CM_SNIFF	0x0002
+#define HCI_CM_PARK	0x0003
+
 /* Link policies */
 #define HCI_LP_RSWITCH	0x0001
 #define HCI_LP_HOLD	0x0002
 #define HCI_LP_SNIFF	0x0004
 #define HCI_LP_PARK	0x0008
 
-/* Link mode */
+/* Link modes */
 #define HCI_LM_ACCEPT	0x8000
 #define HCI_LM_MASTER	0x0001
 #define HCI_LM_AUTH	0x0002
@@ -192,7 +201,7 @@ struct hci_rp_read_loc_version {
 } __attribute__ ((packed));
 
 #define OCF_READ_LOCAL_FEATURES	0x0003
-struct hci_rp_read_loc_features {
+struct hci_rp_read_local_features {
 	__u8 status;
 	__u8 features[8];
 } __attribute__ ((packed));
@@ -376,17 +385,32 @@ struct hci_cp_change_conn_link_key {
 } __attribute__ ((packed));
 
 #define OCF_READ_REMOTE_FEATURES 0x001B
-struct hci_cp_read_rmt_features {
+struct hci_cp_read_remote_features {
 	__le16   handle;
 } __attribute__ ((packed));
 
 #define OCF_READ_REMOTE_VERSION 0x001D
-struct hci_cp_read_rmt_version {
+struct hci_cp_read_remote_version {
 	__le16   handle;
 } __attribute__ ((packed));
 
 /* Link Policy */
-#define OGF_LINK_POLICY	 0x02   
+#define OGF_LINK_POLICY	0x02   
+
+#define OCF_SNIFF_MODE		0x0003
+struct hci_cp_sniff_mode {
+	__le16   handle;
+	__le16   max_interval;
+	__le16   min_interval;
+	__le16   attempt;
+	__le16   timeout;
+} __attribute__ ((packed));
+
+#define OCF_EXIT_SNIFF_MODE	0x0004
+struct hci_cp_exit_sniff_mode {
+	__le16   handle;
+} __attribute__ ((packed));
+
 #define OCF_ROLE_DISCOVERY	0x0009
 struct hci_cp_role_discovery {
 	__le16   handle;
@@ -407,7 +431,7 @@ struct hci_rp_read_link_policy {
 	__le16   policy;
 } __attribute__ ((packed));
 
-#define OCF_SWITCH_ROLE	0x000B
+#define OCF_SWITCH_ROLE		0x000B
 struct hci_cp_switch_role {
 	bdaddr_t bdaddr;
 	__u8     role;
@@ -423,6 +447,14 @@ struct hci_rp_write_link_policy {
 	__le16   handle;
 } __attribute__ ((packed));
 
+#define OCF_SNIFF_SUBRATE	0x0011
+struct hci_cp_sniff_subrate {
+	__le16   handle;
+	__le16   max_latency;
+	__le16   min_remote_timeout;
+	__le16   min_local_timeout;
+} __attribute__ ((packed));
+
 /* Status params */
 #define OGF_STATUS_PARAM	0x05
 
@@ -582,15 +614,15 @@ struct hci_ev_link_key_notify {
 	__u8	 key_type;
 } __attribute__ ((packed));
 
-#define HCI_EV_RMT_FEATURES	0x0B
-struct hci_ev_rmt_features {
+#define HCI_EV_REMOTE_FEATURES	0x0B
+struct hci_ev_remote_features {
 	__u8     status;
 	__le16   handle;
 	__u8     features[8];
 } __attribute__ ((packed));
 
-#define HCI_EV_RMT_VERSION	0x0C
-struct hci_ev_rmt_version {
+#define HCI_EV_REMOTE_VERSION	0x0C
+struct hci_ev_remote_version {
 	__u8     status;
 	__le16   handle;
 	__u8     lmp_ver;
@@ -611,6 +643,16 @@ struct hci_ev_pscan_rep_mode {
 	__u8     pscan_rep_mode;
 } __attribute__ ((packed));
 
+#define HCI_EV_SNIFF_SUBRATE	0x2E
+struct hci_ev_sniff_subrate {
+	__u8     status;
+	__le16   handle;
+	__le16   max_tx_latency;
+	__le16   max_rx_latency;
+	__le16   max_remote_timeout;
+	__le16   max_local_timeout;
+} __attribute__ ((packed));
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xFD
 struct hci_ev_stack_internal {
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index bb9f81d..f685270 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -31,10 +31,7 @@
 #define HCI_PROTO_L2CAP	0
 #define HCI_PROTO_SCO	1
 
-#define HCI_INIT_TIMEOUT (HZ * 10)
-
 /* HCI Core structures */
-
 struct inquiry_data {
 	bdaddr_t	bdaddr;
 	__u8		pscan_rep_mode;
@@ -81,6 +78,10 @@ struct hci_dev {
 	__u16		link_policy;
 	__u16		link_mode;
 
+	__u32		idle_timeout;
+	__u16		sniff_min_interval;
+	__u16		sniff_max_interval;
+
 	unsigned long	quirks;
 
 	atomic_t	cmd_cnt;
@@ -145,18 +146,24 @@ struct hci_conn {
 	bdaddr_t	 dst;
 	__u16		 handle;
 	__u16		 state;
+	__u8             mode;
 	__u8		 type;
 	__u8		 out;
 	__u8		 dev_class[3];
+	__u8             features[8];
+	__u16            interval;
+	__u16            link_policy;
 	__u32		 link_mode;
+	__u8             power_save;
 	unsigned long	 pend;
-	
+
 	unsigned int	 sent;
-	
+
 	struct sk_buff_head data_q;
 
-	struct timer_list timer;
-	
+	struct timer_list disc_timer;
+	struct timer_list idle_timer;
+
 	struct hci_dev	*hdev;
 	void		*l2cap_data;
 	void		*sco_data;
@@ -211,7 +218,8 @@ void hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data);
 enum {
 	HCI_CONN_AUTH_PEND,
 	HCI_CONN_ENCRYPT_PEND,
-	HCI_CONN_RSWITCH_PEND
+	HCI_CONN_RSWITCH_PEND,
+	HCI_CONN_MODE_CHANGE_PEND,
 };
 
 static inline void hci_conn_hash_init(struct hci_dev *hdev)
@@ -286,31 +294,27 @@ int hci_conn_encrypt(struct hci_conn *conn);
 int hci_conn_change_link_key(struct hci_conn *conn);
 int hci_conn_switch_role(struct hci_conn *conn, uint8_t role);
 
-static inline void hci_conn_set_timer(struct hci_conn *conn, unsigned long timeout)
-{
-	mod_timer(&conn->timer, jiffies + timeout);
-}
-
-static inline void hci_conn_del_timer(struct hci_conn *conn)
-{
-	del_timer(&conn->timer);
-}
+void hci_conn_enter_active_mode(struct hci_conn *conn);
+void hci_conn_enter_sniff_mode(struct hci_conn *conn);
 
 static inline void hci_conn_hold(struct hci_conn *conn)
 {
 	atomic_inc(&conn->refcnt);
-	hci_conn_del_timer(conn);
+	del_timer(&conn->disc_timer);
 }
 
 static inline void hci_conn_put(struct hci_conn *conn)
 {
 	if (atomic_dec_and_test(&conn->refcnt)) {
+		unsigned long timeo;
 		if (conn->type == ACL_LINK) {
-			unsigned long timeo = (conn->out) ?
-				HCI_DISCONN_TIMEOUT : HCI_DISCONN_TIMEOUT * 2;
-			hci_conn_set_timer(conn, timeo);
+			timeo = msecs_to_jiffies(HCI_DISCONN_TIMEOUT);
+			if (!conn->out)
+				timeo *= 2;
+			del_timer(&conn->idle_timer);
 		} else
-			hci_conn_set_timer(conn, HZ / 100);
+			timeo = msecs_to_jiffies(10);
+		mod_timer(&conn->disc_timer, jiffies + timeo);
 	}
 }
 
@@ -411,8 +415,10 @@ void hci_unregister_sysfs(struct hci_dev *hdev);
 #define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->class_dev.dev = (pdev))
 
 /* ----- LMP capabilities ----- */
-#define lmp_rswitch_capable(dev) (dev->features[0] & LMP_RSWITCH)
-#define lmp_encrypt_capable(dev) (dev->features[0] & LMP_ENCRYPT)
+#define lmp_rswitch_capable(dev)   ((dev)->features[0] & LMP_RSWITCH)
+#define lmp_encrypt_capable(dev)   ((dev)->features[0] & LMP_ENCRYPT)
+#define lmp_sniff_capable(dev)     ((dev)->features[0] & LMP_SNIFF)
+#define lmp_sniffsubr_capable(dev) ((dev)->features[5] & LMP_SNIFF_SUBR)
 
 /* ----- HCI protocols ----- */
 struct hci_proto {
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 51f8670..729461f 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -48,7 +48,7 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "2.8"
+#define VERSION "2.9"
 
 /* Bluetooth sockets */
 #define BT_MAX_PROTO	8
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 5c0c2b1..420ed4d 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -115,8 +115,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
 
 static void hci_conn_timeout(unsigned long arg)
 {
-	struct hci_conn *conn = (void *)arg;
-	struct hci_dev  *hdev = conn->hdev;
+	struct hci_conn *conn = (void *) arg;
+	struct hci_dev *hdev = conn->hdev;
 
 	BT_DBG("conn %p state %d", conn, conn->state);
 
@@ -132,11 +132,13 @@ static void hci_conn_timeout(unsigned long arg)
 	return;
 }
 
-static void hci_conn_init_timer(struct hci_conn *conn)
+static void hci_conn_idle(unsigned long arg)
 {
-	init_timer(&conn->timer);
-	conn->timer.function = hci_conn_timeout;
-	conn->timer.data = (unsigned long)conn;
+	struct hci_conn *conn = (void *) arg;
+
+	BT_DBG("conn %p mode %d", conn, conn->mode);
+
+	hci_conn_enter_sniff_mode(conn);
 }
 
 struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
@@ -145,17 +147,27 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 
 	BT_DBG("%s dst %s", hdev->name, batostr(dst));
 
-	if (!(conn = kmalloc(sizeof(struct hci_conn), GFP_ATOMIC)))
+	conn = kzalloc(sizeof(struct hci_conn), GFP_ATOMIC);
+	if (!conn)
 		return NULL;
-	memset(conn, 0, sizeof(struct hci_conn));
 
 	bacpy(&conn->dst, dst);
-	conn->type   = type;
 	conn->hdev   = hdev;
+	conn->type   = type;
+	conn->mode   = HCI_CM_ACTIVE;
 	conn->state  = BT_OPEN;
 
+	conn->power_save = 1;
+
 	skb_queue_head_init(&conn->data_q);
-	hci_conn_init_timer(conn);
+
+	init_timer(&conn->disc_timer);
+	conn->disc_timer.function = hci_conn_timeout;
+	conn->disc_timer.data = (unsigned long) conn;
+
+	init_timer(&conn->idle_timer);
+	conn->idle_timer.function = hci_conn_idle;
+	conn->idle_timer.data = (unsigned long) conn;
 
 	atomic_set(&conn->refcnt, 0);
 
@@ -178,7 +190,9 @@ int hci_conn_del(struct hci_conn *conn)
 
 	BT_DBG("%s conn %p handle %d", hdev->name, conn, conn->handle);
 
-	hci_conn_del_timer(conn);
+	del_timer(&conn->idle_timer);
+
+	del_timer(&conn->disc_timer);
 
 	if (conn->type == SCO_LINK) {
 		struct hci_conn *acl = conn->link;
@@ -364,6 +378,70 @@ int hci_conn_switch_role(struct hci_conn *conn, uint8_t role)
 }
 EXPORT_SYMBOL(hci_conn_switch_role);
 
+/* Enter active mode */
+void hci_conn_enter_active_mode(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+
+	BT_DBG("conn %p mode %d", conn, conn->mode);
+
+	if (test_bit(HCI_RAW, &hdev->flags))
+		return;
+
+	if (conn->mode != HCI_CM_SNIFF || !conn->power_save)
+		goto timer;
+
+	if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
+		struct hci_cp_exit_sniff_mode cp;
+		cp.handle = __cpu_to_le16(conn->handle);
+		hci_send_cmd(hdev, OGF_LINK_POLICY,
+				OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp);
+	}
+
+timer:
+	if (hdev->idle_timeout > 0)
+		mod_timer(&conn->idle_timer,
+			jiffies + msecs_to_jiffies(hdev->idle_timeout));
+}
+
+/* Enter sniff mode */
+void hci_conn_enter_sniff_mode(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+
+	BT_DBG("conn %p mode %d", conn, conn->mode);
+
+	if (test_bit(HCI_RAW, &hdev->flags))
+		return;
+
+	if (!lmp_sniff_capable(hdev) || !lmp_sniff_capable(conn))
+		return;
+
+	if (conn->mode != HCI_CM_ACTIVE || !(conn->link_policy & HCI_LP_SNIFF))
+		return;
+
+	if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) {
+		struct hci_cp_sniff_subrate cp;
+		cp.handle             = __cpu_to_le16(conn->handle);
+		cp.max_latency        = __constant_cpu_to_le16(0);
+		cp.min_remote_timeout = __constant_cpu_to_le16(0);
+		cp.min_local_timeout  = __constant_cpu_to_le16(0);
+		hci_send_cmd(hdev, OGF_LINK_POLICY,
+				OCF_SNIFF_SUBRATE, sizeof(cp), &cp);
+	}
+
+	if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
+		struct hci_cp_sniff_mode cp;
+		cp.handle       = __cpu_to_le16(conn->handle);
+		cp.max_interval = __cpu_to_le16(hdev->sniff_max_interval);
+		cp.min_interval = __cpu_to_le16(hdev->sniff_min_interval);
+		cp.attempt      = __constant_cpu_to_le16(4);
+		cp.timeout      = __constant_cpu_to_le16(1);
+		hci_send_cmd(hdev, OGF_LINK_POLICY,
+				OCF_SNIFF_MODE, sizeof(cp), &cp);
+	}
+}
+
 /* Drop all connection on the device */
 void hci_conn_hash_flush(struct hci_dev *hdev)
 {
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index f67240b..3f9f156 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -411,7 +411,7 @@ int hci_inquiry(void __user *arg)
 	}
 	hci_dev_unlock_bh(hdev);
 
-	timeo = ir.length * 2 * HZ;
+	timeo = ir.length * msecs_to_jiffies(2000);
 	if (do_inquiry && (err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo)) < 0)
 		goto done;
 
@@ -479,7 +479,8 @@ int hci_dev_open(__u16 dev)
 		set_bit(HCI_INIT, &hdev->flags);
 
 		//__hci_request(hdev, hci_reset_req, 0, HZ);
-		ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT);
+		ret = __hci_request(hdev, hci_init_req, 0,
+					msecs_to_jiffies(HCI_INIT_TIMEOUT));
 
 		clear_bit(HCI_INIT, &hdev->flags);
 	}
@@ -546,7 +547,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	atomic_set(&hdev->cmd_cnt, 1);
 	if (!test_bit(HCI_RAW, &hdev->flags)) {
 		set_bit(HCI_INIT, &hdev->flags);
-		__hci_request(hdev, hci_reset_req, 0, HZ/4);
+		__hci_request(hdev, hci_reset_req, 0,
+					msecs_to_jiffies(250));
 		clear_bit(HCI_INIT, &hdev->flags);
 	}
 
@@ -619,7 +621,8 @@ int hci_dev_reset(__u16 dev)
 	hdev->acl_cnt = 0; hdev->sco_cnt = 0;
 
 	if (!test_bit(HCI_RAW, &hdev->flags))
-		ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
+		ret = __hci_request(hdev, hci_reset_req, 0,
+					msecs_to_jiffies(HCI_INIT_TIMEOUT));
 
 done:
 	tasklet_enable(&hdev->tx_task);
@@ -657,7 +660,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 
 	switch (cmd) {
 	case HCISETAUTH:
-		err = hci_request(hdev, hci_auth_req, dr.dev_opt, HCI_INIT_TIMEOUT);
+		err = hci_request(hdev, hci_auth_req, dr.dev_opt,
+					msecs_to_jiffies(HCI_INIT_TIMEOUT));
 		break;
 
 	case HCISETENCRYPT:
@@ -668,18 +672,19 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 
 		if (!test_bit(HCI_AUTH, &hdev->flags)) {
 			/* Auth must be enabled first */
-			err = hci_request(hdev, hci_auth_req,
-					dr.dev_opt, HCI_INIT_TIMEOUT);
+			err = hci_request(hdev, hci_auth_req, dr.dev_opt,
+					msecs_to_jiffies(HCI_INIT_TIMEOUT));
 			if (err)
 				break;
 		}
 
-		err = hci_request(hdev, hci_encrypt_req,
-					dr.dev_opt, HCI_INIT_TIMEOUT);
+		err = hci_request(hdev, hci_encrypt_req, dr.dev_opt,
+					msecs_to_jiffies(HCI_INIT_TIMEOUT));
 		break;
 
 	case HCISETSCAN:
-		err = hci_request(hdev, hci_scan_req, dr.dev_opt, HCI_INIT_TIMEOUT);
+		err = hci_request(hdev, hci_scan_req, dr.dev_opt,
+					msecs_to_jiffies(HCI_INIT_TIMEOUT));
 		break;
 
 	case HCISETPTYPE:
@@ -848,6 +853,10 @@ int hci_register_dev(struct hci_dev *hdev)
 	hdev->pkt_type  = (HCI_DM1 | HCI_DH1 | HCI_HV1);
 	hdev->link_mode = (HCI_LM_ACCEPT);
 
+	hdev->idle_timeout = 0;
+	hdev->sniff_max_interval = 800;
+	hdev->sniff_min_interval = 80;
+
 	tasklet_init(&hdev->cmd_task, hci_cmd_task,(unsigned long) hdev);
 	tasklet_init(&hdev->rx_task, hci_rx_task, (unsigned long) hdev);
 	tasklet_init(&hdev->tx_task, hci_tx_task, (unsigned long) hdev);
@@ -1220,6 +1229,9 @@ static inline void hci_sched_acl(struct hci_dev *hdev)
 	while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, &quote))) {
 		while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
 			BT_DBG("skb %p len %d", skb, skb->len);
+
+			hci_conn_enter_active_mode(conn);
+
 			hci_send_frame(skb);
 			hdev->acl_last_tx = jiffies;
 
@@ -1298,6 +1310,8 @@ static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
 	if (conn) {
 		register struct hci_proto *hp;
 
+		hci_conn_enter_active_mode(conn);
+
 		/* Send to upper protocol */
 		if ((hp = hci_proto[HCI_PROTO_L2CAP]) && hp->recv_acldata) {
 			hp->recv_acldata(conn, skb, flags);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f41cf1a..3896dab 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -83,6 +83,8 @@ static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff *
 {
 	struct hci_conn *conn;
 	struct hci_rp_role_discovery *rd;
+	struct hci_rp_write_link_policy *lp;
+	void *sent;
 
 	BT_DBG("%s ocf 0x%x", hdev->name, ocf);
 
@@ -106,6 +108,27 @@ static void hci_cc_link_policy(struct hci_dev *hdev, __u16 ocf, struct sk_buff *
 		hci_dev_unlock(hdev);
 		break;
 
+	case OCF_WRITE_LINK_POLICY:
+		sent = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY);
+		if (!sent)
+			break;
+
+		lp = (struct hci_rp_write_link_policy *) skb->data;
+
+		if (lp->status)
+			break;
+
+		hci_dev_lock(hdev);
+
+		conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(lp->handle));
+		if (conn) {
+			__le16 policy = get_unaligned((__le16 *) (sent + 2));
+			conn->link_policy = __le16_to_cpu(policy);
+		}
+
+		hci_dev_unlock(hdev);
+		break;
+
 	default:
 		BT_DBG("%s: Command complete: ogf LINK_POLICY ocf %x", 
 				hdev->name, ocf);
@@ -274,7 +297,7 @@ static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb
 /* Command Complete OGF INFO_PARAM  */
 static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb)
 {
-	struct hci_rp_read_loc_features *lf;
+	struct hci_rp_read_local_features *lf;
 	struct hci_rp_read_buffer_size *bs;
 	struct hci_rp_read_bd_addr *ba;
 
@@ -282,7 +305,7 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s
 
 	switch (ocf) {
 	case OCF_READ_LOCAL_FEATURES:
-		lf = (struct hci_rp_read_loc_features *) skb->data;
+		lf = (struct hci_rp_read_local_features *) skb->data;
 
 		if (lf->status) {
 			BT_DBG("%s READ_LOCAL_FEATURES failed %d", hdev->name, lf->status);
@@ -447,8 +470,46 @@ static void hci_cs_link_policy(struct hci_dev *hdev, __u16 ocf, __u8 status)
 	BT_DBG("%s ocf 0x%x", hdev->name, ocf);
 
 	switch (ocf) {
+	case OCF_SNIFF_MODE:
+		if (status) {
+			struct hci_conn *conn;
+			struct hci_cp_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_SNIFF_MODE);
+
+			if (!cp)
+				break;
+
+			hci_dev_lock(hdev);
+
+			conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+			if (conn) {
+				clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend);
+			}
+
+			hci_dev_unlock(hdev);
+		}
+		break;
+
+	case OCF_EXIT_SNIFF_MODE:
+		if (status) {
+			struct hci_conn *conn;
+			struct hci_cp_exit_sniff_mode *cp = hci_sent_cmd_data(hdev, OGF_LINK_POLICY, OCF_EXIT_SNIFF_MODE);
+
+			if (!cp)
+				break;
+
+			hci_dev_lock(hdev);
+
+			conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
+			if (conn) {
+				clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend);
+			}
+
+			hci_dev_unlock(hdev);
+		}
+		break;
+
 	default:
-		BT_DBG("%s Command status: ogf HOST_POLICY ocf %x", hdev->name, ocf);
+		BT_DBG("%s Command status: ogf LINK_POLICY ocf %x", hdev->name, ocf);
 		break;
 	}
 }
@@ -630,14 +691,16 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 		else
 			cp.role = 0x01; /* Remain slave */
 
-		hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp);
+		hci_send_cmd(hdev, OGF_LINK_CTL,
+				OCF_ACCEPT_CONN_REQ, sizeof(cp), &cp);
 	} else {
 		/* Connection rejected */
 		struct hci_cp_reject_conn_req cp;
 
 		bacpy(&cp.bdaddr, &ev->bdaddr);
 		cp.reason = 0x0f;
-		hci_send_cmd(hdev, OGF_LINK_CTL, OCF_REJECT_CONN_REQ, sizeof(cp), &cp);
+		hci_send_cmd(hdev, OGF_LINK_CTL,
+				OCF_REJECT_CONN_REQ, sizeof(cp), &cp);
 	}
 }
 
@@ -645,7 +708,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_conn_complete *ev = (struct hci_ev_conn_complete *) skb->data;
-	struct hci_conn *conn = NULL;
+	struct hci_conn *conn;
 
 	BT_DBG("%s", hdev->name);
 
@@ -667,12 +730,21 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		if (test_bit(HCI_ENCRYPT, &hdev->flags))
 			conn->link_mode |= HCI_LM_ENCRYPT;
 
+		/* Get remote features */
+		if (conn->type == ACL_LINK) {
+			struct hci_cp_read_remote_features cp;
+			cp.handle = ev->handle;
+			hci_send_cmd(hdev, OGF_LINK_CTL,
+				OCF_READ_REMOTE_FEATURES, sizeof(cp), &cp);
+		}
+
 		/* Set link policy */
 		if (conn->type == ACL_LINK && hdev->link_policy) {
 			struct hci_cp_write_link_policy cp;
 			cp.handle = ev->handle;
 			cp.policy = __cpu_to_le16(hdev->link_policy);
-			hci_send_cmd(hdev, OGF_LINK_POLICY, OCF_WRITE_LINK_POLICY, sizeof(cp), &cp);
+			hci_send_cmd(hdev, OGF_LINK_POLICY,
+				OCF_WRITE_LINK_POLICY, sizeof(cp), &cp);
 		}
 
 		/* Set packet type for incoming connection */
@@ -683,7 +755,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 				__cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
 				__cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
 
-			hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
+			hci_send_cmd(hdev, OGF_LINK_CTL,
+				OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
 		}
 	} else
 		conn->state = BT_CLOSED;
@@ -711,8 +784,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_disconn_complete *ev = (struct hci_ev_disconn_complete *) skb->data;
-	struct hci_conn *conn = NULL;
-	__u16 handle = __le16_to_cpu(ev->handle);
+	struct hci_conn *conn;
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
 
@@ -721,7 +793,7 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff
 
 	hci_dev_lock(hdev);
 
-	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
 	if (conn) {
 		conn->state = BT_CLOSED;
 		hci_proto_disconn_ind(conn, ev->reason);
@@ -778,7 +850,7 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s
 static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_role_change *ev = (struct hci_ev_role_change *) skb->data;
-	struct hci_conn *conn = NULL;
+	struct hci_conn *conn;
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
 
@@ -801,18 +873,43 @@ static inline void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb
 	hci_dev_unlock(hdev);
 }
 
+/* Mode Change */
+static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_mode_change *ev = (struct hci_ev_mode_change *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+	if (conn) {
+		conn->mode = ev->mode;
+		conn->interval = __le16_to_cpu(ev->interval);
+
+		if (!test_and_clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
+			if (conn->mode == HCI_CM_ACTIVE)
+				conn->power_save = 1;
+			else
+				conn->power_save = 0;
+		}
+	}
+
+	hci_dev_unlock(hdev);
+}
+
 /* Authentication Complete */
 static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_auth_complete *ev = (struct hci_ev_auth_complete *) skb->data;
-	struct hci_conn *conn = NULL;
-	__u16 handle = __le16_to_cpu(ev->handle);
+	struct hci_conn *conn;
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
 
 	hci_dev_lock(hdev);
 
-	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
 	if (conn) {
 		if (!ev->status)
 			conn->link_mode |= HCI_LM_AUTH;
@@ -827,8 +924,7 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 				cp.handle  = __cpu_to_le16(conn->handle);
 				cp.encrypt = 1;
 				hci_send_cmd(conn->hdev, OGF_LINK_CTL,
-						OCF_SET_CONN_ENCRYPT,
-						sizeof(cp), &cp);
+					OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
 			} else {
 				clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend);
 				hci_encrypt_cfm(conn, ev->status, 0x00);
@@ -843,14 +939,13 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_encrypt_change *ev = (struct hci_ev_encrypt_change *) skb->data;
-	struct hci_conn *conn = NULL;
-	__u16 handle = __le16_to_cpu(ev->handle);
+	struct hci_conn *conn;
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
 
 	hci_dev_lock(hdev);
 
-	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
 	if (conn) {
 		if (!ev->status) {
 			if (ev->encrypt)
@@ -871,14 +966,13 @@ static inline void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *
 static inline void hci_change_conn_link_key_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_change_conn_link_key_complete *ev = (struct hci_ev_change_conn_link_key_complete *) skb->data;
-	struct hci_conn *conn = NULL;
-	__u16 handle = __le16_to_cpu(ev->handle);
+	struct hci_conn *conn;
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
 
 	hci_dev_lock(hdev);
 
-	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
 	if (conn) {
 		if (!ev->status)
 			conn->link_mode |= HCI_LM_SECURE;
@@ -906,18 +1000,35 @@ static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff
 {
 }
 
+/* Remote Features */
+static inline void hci_remote_features_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_remote_features *ev = (struct hci_ev_remote_features *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+	if (conn && !ev->status) {
+		memcpy(conn->features, ev->features, sizeof(conn->features));
+	}
+
+	hci_dev_unlock(hdev);
+}
+
 /* Clock Offset */
 static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_clock_offset *ev = (struct hci_ev_clock_offset *) skb->data;
-	struct hci_conn *conn = NULL;
-	__u16 handle = __le16_to_cpu(ev->handle);
+	struct hci_conn *conn;
 
 	BT_DBG("%s status %d", hdev->name, ev->status);
 
 	hci_dev_lock(hdev);
 
-	conn = hci_conn_hash_lookup_handle(hdev, handle);
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
 	if (conn && !ev->status) {
 		struct inquiry_entry *ie;
 
@@ -948,6 +1059,23 @@ static inline void hci_pscan_rep_mode_evt(struct hci_dev *hdev, struct sk_buff *
 	hci_dev_unlock(hdev);
 }
 
+/* Sniff Subrate */
+static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_sniff_subrate *ev = (struct hci_ev_sniff_subrate *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status %d", hdev->name, ev->status);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle));
+	if (conn) {
+	}
+
+	hci_dev_unlock(hdev);
+}
+
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_event_hdr *hdr = (struct hci_event_hdr *) skb->data;
@@ -996,6 +1124,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_role_change_evt(hdev, skb);
 		break;
 
+	case HCI_EV_MODE_CHANGE:
+		hci_mode_change_evt(hdev, skb);
+		break;
+
 	case HCI_EV_AUTH_COMPLETE:
 		hci_auth_complete_evt(hdev, skb);
 		break;
@@ -1020,6 +1152,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_link_key_notify_evt(hdev, skb);
 		break;
 
+	case HCI_EV_REMOTE_FEATURES:
+		hci_remote_features_evt(hdev, skb);
+		break;
+
 	case HCI_EV_CLOCK_OFFSET:
 		hci_clock_offset_evt(hdev, skb);
 		break;
@@ -1028,6 +1164,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_pscan_rep_mode_evt(hdev, skb);
 		break;
 
+	case HCI_EV_SNIFF_SUBRATE:
+		hci_sniff_subrate_evt(hdev, skb);
+		break;
+
 	case HCI_EV_CMD_STATUS:
 		cs = (struct hci_ev_cmd_status *) skb->data;
 		skb_pull(skb, sizeof(cs));
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 19b234c..89918d2 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -61,18 +61,106 @@ static ssize_t show_inquiry_cache(struct class_device *cdev, char *buf)
 	return n;
 }
 
+static ssize_t show_idle_timeout(struct class_device *cdev, char *buf)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	return sprintf(buf, "%d\n", hdev->idle_timeout);
+}
+
+static ssize_t store_idle_timeout(struct class_device *cdev, const char *buf, size_t count)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	char *ptr;
+	__u32 val;
+
+	val = simple_strtoul(buf, &ptr, 10);
+	if (ptr == buf)
+		return -EINVAL;
+
+	if (val != 0 && (val < 500 || val > 3600000))
+		return -EINVAL;
+
+	hdev->idle_timeout = val;
+
+	return count;
+}
+
+static ssize_t show_sniff_max_interval(struct class_device *cdev, char *buf)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	return sprintf(buf, "%d\n", hdev->sniff_max_interval);
+}
+
+static ssize_t store_sniff_max_interval(struct class_device *cdev, const char *buf, size_t count)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	char *ptr;
+	__u16 val;
+
+	val = simple_strtoul(buf, &ptr, 10);
+	if (ptr == buf)
+		return -EINVAL;
+
+	if (val < 0x0002 || val > 0xFFFE || val % 2)
+		return -EINVAL;
+
+	if (val < hdev->sniff_min_interval)
+		return -EINVAL;
+
+	hdev->sniff_max_interval = val;
+
+	return count;
+}
+
+static ssize_t show_sniff_min_interval(struct class_device *cdev, char *buf)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	return sprintf(buf, "%d\n", hdev->sniff_min_interval);
+}
+
+static ssize_t store_sniff_min_interval(struct class_device *cdev, const char *buf, size_t count)
+{
+	struct hci_dev *hdev = class_get_devdata(cdev);
+	char *ptr;
+	__u16 val;
+
+	val = simple_strtoul(buf, &ptr, 10);
+	if (ptr == buf)
+		return -EINVAL;
+
+	if (val < 0x0002 || val > 0xFFFE || val % 2)
+		return -EINVAL;
+
+	if (val > hdev->sniff_max_interval)
+		return -EINVAL;
+
+	hdev->sniff_min_interval = val;
+
+	return count;
+}
+
 static CLASS_DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
 static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
 static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
 static CLASS_DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL);
 static CLASS_DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL);
 
+static CLASS_DEVICE_ATTR(idle_timeout, S_IRUGO | S_IWUSR,
+				show_idle_timeout, store_idle_timeout);
+static CLASS_DEVICE_ATTR(sniff_max_interval, S_IRUGO | S_IWUSR,
+				show_sniff_max_interval, store_sniff_max_interval);
+static CLASS_DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR,
+				show_sniff_min_interval, store_sniff_min_interval);
+
 static struct class_device_attribute *bt_attrs[] = {
 	&class_device_attr_name,
 	&class_device_attr_type,
 	&class_device_attr_address,
 	&class_device_attr_flags,
 	&class_device_attr_inquiry_cache,
+	&class_device_attr_idle_timeout,
+	&class_device_attr_sniff_max_interval,
+	&class_device_attr_sniff_min_interval,
 	NULL
 };
 
-- 
cgit v0.10.2


From 27d35284258c596900e0e41c46932ec4abe6a7b1 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 3 Jul 2006 10:02:37 +0200
Subject: [Bluetooth] Add platform device for virtual and serial devices

This patch adds a generic Bluetooth platform device that can be used
as parent device by virtual and serial devices.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 2830f58..8eebf9c 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -739,6 +739,7 @@ static int bluecard_open(bluecard_info_t *info)
 
 	hdev->type = HCI_PCCARD;
 	hdev->driver_data = info;
+	SET_HCIDEV_DEV(hdev, &info->p_dev->dev);
 
 	hdev->open     = bluecard_hci_open;
 	hdev->close    = bluecard_hci_close;
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index c9dba55..df7bb01 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -582,6 +582,7 @@ static int bt3c_open(bt3c_info_t *info)
 
 	hdev->type = HCI_PCCARD;
 	hdev->driver_data = info;
+	SET_HCIDEV_DEV(hdev, &info->p_dev->dev);
 
 	hdev->open     = bt3c_hci_open;
 	hdev->close    = bt3c_hci_close;
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index c889bf8..746ccca 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -502,6 +502,7 @@ static int btuart_open(btuart_info_t *info)
 
 	hdev->type = HCI_PCCARD;
 	hdev->driver_data = info;
+	SET_HCIDEV_DEV(hdev, &info->p_dev->dev);
 
 	hdev->open     = btuart_hci_open;
 	hdev->close    = btuart_hci_close;
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index be6eed1..0e99def 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -484,6 +484,7 @@ static int dtl1_open(dtl1_info_t *info)
 
 	hdev->type = HCI_PCCARD;
 	hdev->driver_data = info;
+	SET_HCIDEV_DEV(hdev, &info->p_dev->dev);
 
 	hdev->open     = dtl1_hci_open;
 	hdev->close    = dtl1_hci_close;
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index ea58900..aac67a3a 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -277,7 +277,6 @@ static int vhci_open(struct inode *inode, struct file *file)
 
 	hdev->type = HCI_VHCI;
 	hdev->driver_data = vhci;
-	SET_HCIDEV_DEV(hdev, vhci_miscdev.dev);
 
 	hdev->open     = vhci_open_dev;
 	hdev->close    = vhci_close_dev;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index f685270..ae67b36 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -124,6 +124,7 @@ struct hci_dev {
 
 	atomic_t 		promisc;
 
+	struct device		*dev;
 	struct class_device	class_dev;
 
 	struct module 		*owner;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 729461f..788ea7a 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -48,7 +48,7 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "2.9"
+#define VERSION "2.10"
 
 /* Bluetooth sockets */
 #define BT_MAX_PROTO	8
@@ -307,13 +307,21 @@ static struct net_proto_family bt_sock_family_ops = {
 
 static int __init bt_init(void)
 {
+	int err;
+
 	BT_INFO("Core ver %s", VERSION);
 
-	sock_register(&bt_sock_family_ops);
+	err = bt_sysfs_init();
+	if (err < 0)
+		return err;
 
-	BT_INFO("HCI device and connection manager initialized");
+	err = sock_register(&bt_sock_family_ops);
+	if (err < 0) {
+		bt_sysfs_cleanup();
+		return err;
+	}
 
-	bt_sysfs_init();
+	BT_INFO("HCI device and connection manager initialized");
 
 	hci_sock_init();
 
@@ -324,9 +332,9 @@ static void __exit bt_exit(void)
 {
 	hci_sock_cleanup();
 
-	bt_sysfs_cleanup();
-
 	sock_unregister(PF_BLUETOOTH);
+
+	bt_sysfs_cleanup();
 }
 
 subsys_initcall(bt_init);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 89918d2..7789e26 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -3,6 +3,8 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 
+#include <linux/platform_device.h>
+
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
@@ -197,9 +199,14 @@ struct class bt_class = {
 	.uevent		= bt_uevent,
 #endif
 };
-
 EXPORT_SYMBOL_GPL(bt_class);
 
+static struct bus_type bt_bus = {
+	.name	= "bluetooth",
+};
+
+static struct platform_device *bt_platform;
+
 int hci_register_sysfs(struct hci_dev *hdev)
 {
 	struct class_device *cdev = &hdev->class_dev;
@@ -211,6 +218,11 @@ int hci_register_sysfs(struct hci_dev *hdev)
 	cdev->class = &bt_class;
 	class_set_devdata(cdev, hdev);
 
+	if (!cdev->dev)
+		cdev->dev = &bt_platform->dev;
+
+	hdev->dev = cdev->dev;
+
 	strlcpy(cdev->class_id, hdev->name, BUS_ID_SIZE);
 	err = class_device_register(cdev);
 	if (err < 0)
@@ -233,10 +245,33 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
 
 int __init bt_sysfs_init(void)
 {
-	return class_register(&bt_class);
+	int err;
+
+	bt_platform = platform_device_register_simple("bluetooth", -1, NULL, 0);
+	if (IS_ERR(bt_platform))
+		return PTR_ERR(bt_platform);
+
+	err = bus_register(&bt_bus);
+	if (err < 0) {
+		platform_device_unregister(bt_platform);
+		return err;
+	}
+
+	err = class_register(&bt_class);
+	if (err < 0) {
+		bus_unregister(&bt_bus);
+		platform_device_unregister(bt_platform);
+		return err;
+	}
+
+	return 0;
 }
 
 void __exit bt_sysfs_cleanup(void)
 {
 	class_unregister(&bt_class);
+
+	bus_unregister(&bt_bus);
+
+	platform_device_unregister(bt_platform);
 }
-- 
cgit v0.10.2


From a91f2e396f5b32b21d842b4757bc8de5e88eac66 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 3 Jul 2006 10:02:41 +0200
Subject: [Bluetooth] Use real devices for host controllers

This patch converts the Bluetooth class devices into real devices. The
Bluetooth class is kept and the driver core provides the appropriate
symlinks for backward compatibility.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 911ceb5..771d177 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -175,6 +175,6 @@ extern int hci_sock_cleanup(void);
 extern int bt_sysfs_init(void);
 extern void bt_sysfs_cleanup(void);
 
-extern struct class bt_class;
+extern struct class *bt_class;
 
 #endif /* __BLUETOOTH_H */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ae67b36..d84855f 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -124,8 +124,8 @@ struct hci_dev {
 
 	atomic_t 		promisc;
 
-	struct device		*dev;
-	struct class_device	class_dev;
+	struct device		*parent;
+	struct device		dev;
 
 	struct module 		*owner;
 
@@ -413,7 +413,7 @@ static inline int hci_recv_frame(struct sk_buff *skb)
 int hci_register_sysfs(struct hci_dev *hdev);
 void hci_unregister_sysfs(struct hci_dev *hdev);
 
-#define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->class_dev.dev = (pdev))
+#define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->parent = (pdev))
 
 /* ----- LMP capabilities ----- */
 #define lmp_rswitch_capable(dev)   ((dev)->features[0] & LMP_RSWITCH)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3f9f156..54e8e5e 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -817,8 +817,8 @@ void hci_free_dev(struct hci_dev *hdev)
 {
 	skb_queue_purge(&hdev->driver_init);
 
-	/* will free via class release */
-	class_device_put(&hdev->class_dev);
+	/* will free via device release */
+	put_device(&hdev->dev);
 }
 EXPORT_SYMBOL(hci_free_dev);
 
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 7789e26..3987d16 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -13,35 +13,35 @@
 #define BT_DBG(D...)
 #endif
 
-static ssize_t show_name(struct class_device *cdev, char *buf)
+static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	return sprintf(buf, "%s\n", hdev->name);
 }
 
-static ssize_t show_type(struct class_device *cdev, char *buf)
+static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	return sprintf(buf, "%d\n", hdev->type);
 }
 
-static ssize_t show_address(struct class_device *cdev, char *buf)
+static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	bdaddr_t bdaddr;
 	baswap(&bdaddr, &hdev->bdaddr);
 	return sprintf(buf, "%s\n", batostr(&bdaddr));
 }
 
-static ssize_t show_flags(struct class_device *cdev, char *buf)
+static ssize_t show_flags(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	return sprintf(buf, "0x%lx\n", hdev->flags);
 }
 
-static ssize_t show_inquiry_cache(struct class_device *cdev, char *buf)
+static ssize_t show_inquiry_cache(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	struct inquiry_cache *cache = &hdev->inq_cache;
 	struct inquiry_entry *e;
 	int n = 0;
@@ -63,15 +63,15 @@ static ssize_t show_inquiry_cache(struct class_device *cdev, char *buf)
 	return n;
 }
 
-static ssize_t show_idle_timeout(struct class_device *cdev, char *buf)
+static ssize_t show_idle_timeout(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	return sprintf(buf, "%d\n", hdev->idle_timeout);
 }
 
-static ssize_t store_idle_timeout(struct class_device *cdev, const char *buf, size_t count)
+static ssize_t store_idle_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	char *ptr;
 	__u32 val;
 
@@ -87,15 +87,15 @@ static ssize_t store_idle_timeout(struct class_device *cdev, const char *buf, si
 	return count;
 }
 
-static ssize_t show_sniff_max_interval(struct class_device *cdev, char *buf)
+static ssize_t show_sniff_max_interval(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	return sprintf(buf, "%d\n", hdev->sniff_max_interval);
 }
 
-static ssize_t store_sniff_max_interval(struct class_device *cdev, const char *buf, size_t count)
+static ssize_t store_sniff_max_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	char *ptr;
 	__u16 val;
 
@@ -114,15 +114,15 @@ static ssize_t store_sniff_max_interval(struct class_device *cdev, const char *b
 	return count;
 }
 
-static ssize_t show_sniff_min_interval(struct class_device *cdev, char *buf)
+static ssize_t show_sniff_min_interval(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	return sprintf(buf, "%d\n", hdev->sniff_min_interval);
 }
 
-static ssize_t store_sniff_min_interval(struct class_device *cdev, const char *buf, size_t count)
+static ssize_t store_sniff_min_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct hci_dev *hdev = class_get_devdata(cdev);
+	struct hci_dev *hdev = dev_get_drvdata(dev);
 	char *ptr;
 	__u16 val;
 
@@ -141,64 +141,32 @@ static ssize_t store_sniff_min_interval(struct class_device *cdev, const char *b
 	return count;
 }
 
-static CLASS_DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
-static CLASS_DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
-static CLASS_DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
-static CLASS_DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL);
-static CLASS_DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL);
+static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+static DEVICE_ATTR(type, S_IRUGO, show_type, NULL);
+static DEVICE_ATTR(address, S_IRUGO, show_address, NULL);
+static DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL);
+static DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL);
 
-static CLASS_DEVICE_ATTR(idle_timeout, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(idle_timeout, S_IRUGO | S_IWUSR,
 				show_idle_timeout, store_idle_timeout);
-static CLASS_DEVICE_ATTR(sniff_max_interval, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(sniff_max_interval, S_IRUGO | S_IWUSR,
 				show_sniff_max_interval, store_sniff_max_interval);
-static CLASS_DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR,
 				show_sniff_min_interval, store_sniff_min_interval);
 
-static struct class_device_attribute *bt_attrs[] = {
-	&class_device_attr_name,
-	&class_device_attr_type,
-	&class_device_attr_address,
-	&class_device_attr_flags,
-	&class_device_attr_inquiry_cache,
-	&class_device_attr_idle_timeout,
-	&class_device_attr_sniff_max_interval,
-	&class_device_attr_sniff_min_interval,
+static struct device_attribute *bt_attrs[] = {
+	&dev_attr_name,
+	&dev_attr_type,
+	&dev_attr_address,
+	&dev_attr_flags,
+	&dev_attr_inquiry_cache,
+	&dev_attr_idle_timeout,
+	&dev_attr_sniff_max_interval,
+	&dev_attr_sniff_min_interval,
 	NULL
 };
 
-#ifdef CONFIG_HOTPLUG
-static int bt_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
-{
-	struct hci_dev *hdev = class_get_devdata(cdev);
-	int n, i = 0;
-
-	envp[i++] = buf;
-	n = snprintf(buf, size, "INTERFACE=%s", hdev->name) + 1;
-	buf += n;
-	size -= n;
-
-	if ((size <= 0) || (i >= num_envp))
-		return -ENOMEM;
-
-	envp[i] = NULL;
-	return 0;
-}
-#endif
-
-static void bt_release(struct class_device *cdev)
-{
-	struct hci_dev *hdev = class_get_devdata(cdev);
-
-	kfree(hdev);
-}
-
-struct class bt_class = {
-	.name		= "bluetooth",
-	.release	= bt_release,
-#ifdef CONFIG_HOTPLUG
-	.uevent		= bt_uevent,
-#endif
-};
+struct class *bt_class = NULL;
 EXPORT_SYMBOL_GPL(bt_class);
 
 static struct bus_type bt_bus = {
@@ -207,40 +175,50 @@ static struct bus_type bt_bus = {
 
 static struct platform_device *bt_platform;
 
+static void bt_release(struct device *dev)
+{
+	struct hci_dev *hdev = dev_get_drvdata(dev);
+	kfree(hdev);
+}
+
 int hci_register_sysfs(struct hci_dev *hdev)
 {
-	struct class_device *cdev = &hdev->class_dev;
+	struct device *dev = &hdev->dev;
 	unsigned int i;
 	int err;
 
 	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
 
-	cdev->class = &bt_class;
-	class_set_devdata(cdev, hdev);
+	dev->class = bt_class;
+
+	if (hdev->parent)
+		dev->parent = hdev->parent;
+	else
+		dev->parent = &bt_platform->dev;
+
+	strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE);
 
-	if (!cdev->dev)
-		cdev->dev = &bt_platform->dev;
+	dev->release = bt_release;
 
-	hdev->dev = cdev->dev;
+	dev_set_drvdata(dev, hdev);
 
-	strlcpy(cdev->class_id, hdev->name, BUS_ID_SIZE);
-	err = class_device_register(cdev);
+	err = device_register(dev);
 	if (err < 0)
 		return err;
 
 	for (i = 0; bt_attrs[i]; i++)
-		class_device_create_file(cdev, bt_attrs[i]);
+		device_create_file(dev, bt_attrs[i]);
 
 	return 0;
 }
 
 void hci_unregister_sysfs(struct hci_dev *hdev)
 {
-	struct class_device * cdev = &hdev->class_dev;
+	struct device *dev = &hdev->dev;
 
 	BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type);
 
-	class_device_del(cdev);
+	device_del(dev);
 }
 
 int __init bt_sysfs_init(void)
@@ -257,11 +235,11 @@ int __init bt_sysfs_init(void)
 		return err;
 	}
 
-	err = class_register(&bt_class);
-	if (err < 0) {
+	bt_class = class_create(THIS_MODULE, "bluetooth");
+	if (IS_ERR(bt_class)) {
 		bus_unregister(&bt_bus);
 		platform_device_unregister(bt_platform);
-		return err;
+		return PTR_ERR(bt_class);
 	}
 
 	return 0;
@@ -269,7 +247,7 @@ int __init bt_sysfs_init(void)
 
 void __exit bt_sysfs_cleanup(void)
 {
-	class_unregister(&bt_class);
+	class_destroy(bt_class);
 
 	bus_unregister(&bt_bus);
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7701011..f6501fa 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2219,7 +2219,7 @@ static int __init l2cap_init(void)
 		goto error;
 	}
 
-	class_create_file(&bt_class, &class_attr_l2cap);
+	class_create_file(bt_class, &class_attr_l2cap);
 
 	BT_INFO("L2CAP ver %s", VERSION);
 	BT_INFO("L2CAP socket layer initialized");
@@ -2233,7 +2233,7 @@ error:
 
 static void __exit l2cap_exit(void)
 {
-	class_remove_file(&bt_class, &class_attr_l2cap);
+	class_remove_file(bt_class, &class_attr_l2cap);
 
 	if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
 		BT_ERR("L2CAP socket unregistration failed");
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index bd46e89..6de33fe 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2035,7 +2035,7 @@ static int __init rfcomm_init(void)
 
 	kernel_thread(rfcomm_run, NULL, CLONE_KERNEL);
 
-	class_create_file(&bt_class, &class_attr_rfcomm_dlc);
+	class_create_file(bt_class, &class_attr_rfcomm_dlc);
 
 	rfcomm_init_sockets();
 
@@ -2050,7 +2050,7 @@ static int __init rfcomm_init(void)
 
 static void __exit rfcomm_exit(void)
 {
-	class_remove_file(&bt_class, &class_attr_rfcomm_dlc);
+	class_remove_file(bt_class, &class_attr_rfcomm_dlc);
 
 	hci_unregister_cb(&rfcomm_cb);
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4e9962c..220fee0 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -944,7 +944,7 @@ int __init rfcomm_init_sockets(void)
 	if (err < 0)
 		goto error;
 
-	class_create_file(&bt_class, &class_attr_rfcomm);
+	class_create_file(bt_class, &class_attr_rfcomm);
 
 	BT_INFO("RFCOMM socket layer initialized");
 
@@ -958,7 +958,7 @@ error:
 
 void __exit rfcomm_cleanup_sockets(void)
 {
-	class_remove_file(&bt_class, &class_attr_rfcomm);
+	class_remove_file(bt_class, &class_attr_rfcomm);
 
 	if (bt_sock_unregister(BTPROTO_RFCOMM) < 0)
 		BT_ERR("RFCOMM socket layer unregistration failed");
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index a5f1e44..85defcc 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -969,7 +969,7 @@ static int __init sco_init(void)
 		goto error;
 	}
 
-	class_create_file(&bt_class, &class_attr_sco);
+	class_create_file(bt_class, &class_attr_sco);
 
 	BT_INFO("SCO (Voice Link) ver %s", VERSION);
 	BT_INFO("SCO socket layer initialized");
@@ -983,7 +983,7 @@ error:
 
 static void __exit sco_exit(void)
 {
-	class_remove_file(&bt_class, &class_attr_sco);
+	class_remove_file(bt_class, &class_attr_sco);
 
 	if (bt_sock_unregister(BTPROTO_SCO) < 0)
 		BT_ERR("SCO socket unregistration failed");
-- 
cgit v0.10.2


From 0139418c943c3389cf75afc4f4d2b2fa52bbf7c9 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 3 Jul 2006 10:02:46 +0200
Subject: [Bluetooth] Small cleanup of the L2CAP source code

This patch is a small cleanup of the L2CAP source code. It makes some
coding style changes and moves some functions around to avoid forward
declarations.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f6501fa..eaaad65 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -63,11 +63,6 @@ static struct bt_sock_list l2cap_sk_list = {
 	.lock = RW_LOCK_UNLOCKED
 };
 
-static int l2cap_conn_del(struct hci_conn *conn, int err);
-
-static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent);
-static void l2cap_chan_del(struct sock *sk, int err);
-
 static void __l2cap_sock_close(struct sock *sk, int reason);
 static void l2cap_sock_close(struct sock *sk);
 static void l2cap_sock_kill(struct sock *sk);
@@ -109,24 +104,177 @@ static void l2cap_sock_init_timer(struct sock *sk)
 	sk->sk_timer.data = (unsigned long)sk;
 }
 
+/* ---- L2CAP channels ---- */
+static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
+{
+	struct sock *s;
+	for (s = l->head; s; s = l2cap_pi(s)->next_c) {
+		if (l2cap_pi(s)->dcid == cid)
+			break;
+	}
+	return s;
+}
+
+static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
+{
+	struct sock *s;
+	for (s = l->head; s; s = l2cap_pi(s)->next_c) {
+		if (l2cap_pi(s)->scid == cid)
+			break;
+	}
+	return s;
+}
+
+/* Find channel with given SCID.
+ * Returns locked socket */
+static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
+{
+	struct sock *s;
+	read_lock(&l->lock);
+	s = __l2cap_get_chan_by_scid(l, cid);
+	if (s) bh_lock_sock(s);
+	read_unlock(&l->lock);
+	return s;
+}
+
+static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
+{
+	struct sock *s;
+	for (s = l->head; s; s = l2cap_pi(s)->next_c) {
+		if (l2cap_pi(s)->ident == ident)
+			break;
+	}
+	return s;
+}
+
+static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
+{
+	struct sock *s;
+	read_lock(&l->lock);
+	s = __l2cap_get_chan_by_ident(l, ident);
+	if (s) bh_lock_sock(s);
+	read_unlock(&l->lock);
+	return s;
+}
+
+static u16 l2cap_alloc_cid(struct l2cap_chan_list *l)
+{
+	u16 cid = 0x0040;
+
+	for (; cid < 0xffff; cid++) {
+		if(!__l2cap_get_chan_by_scid(l, cid))
+			return cid;
+	}
+
+	return 0;
+}
+
+static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk)
+{
+	sock_hold(sk);
+
+	if (l->head)
+		l2cap_pi(l->head)->prev_c = sk;
+
+	l2cap_pi(sk)->next_c = l->head;
+	l2cap_pi(sk)->prev_c = NULL;
+	l->head = sk;
+}
+
+static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk)
+{
+	struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c;
+
+	write_lock(&l->lock);
+	if (sk == l->head)
+		l->head = next;
+
+	if (next)
+		l2cap_pi(next)->prev_c = prev;
+	if (prev)
+		l2cap_pi(prev)->next_c = next;
+	write_unlock(&l->lock);
+
+	__sock_put(sk);
+}
+
+static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent)
+{
+	struct l2cap_chan_list *l = &conn->chan_list;
+
+	BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid);
+
+	l2cap_pi(sk)->conn = conn;
+
+	if (sk->sk_type == SOCK_SEQPACKET) {
+		/* Alloc CID for connection-oriented socket */
+		l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
+	} else if (sk->sk_type == SOCK_DGRAM) {
+		/* Connectionless socket */
+		l2cap_pi(sk)->scid = 0x0002;
+		l2cap_pi(sk)->dcid = 0x0002;
+		l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
+	} else {
+		/* Raw socket can send/recv signalling messages only */
+		l2cap_pi(sk)->scid = 0x0001;
+		l2cap_pi(sk)->dcid = 0x0001;
+		l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
+	}
+
+	__l2cap_chan_link(l, sk);
+
+	if (parent)
+		bt_accept_enqueue(parent, sk);
+}
+
+/* Delete channel. 
+ * Must be called on the locked socket. */
+static void l2cap_chan_del(struct sock *sk, int err)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+	struct sock *parent = bt_sk(sk)->parent;
+
+	l2cap_sock_clear_timer(sk);
+
+	BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
+
+	if (conn) { 
+		/* Unlink from channel list */
+		l2cap_chan_unlink(&conn->chan_list, sk);
+		l2cap_pi(sk)->conn = NULL;
+		hci_conn_put(conn->hcon);
+	}
+
+	sk->sk_state  = BT_CLOSED;
+	sock_set_flag(sk, SOCK_ZAPPED);
+
+	if (err)
+		sk->sk_err = err;
+
+	if (parent) {
+		bt_accept_unlink(sk);
+		parent->sk_data_ready(parent, 0);
+	} else
+		sk->sk_state_change(sk);
+}
+
 /* ---- L2CAP connections ---- */
 static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
 {
-	struct l2cap_conn *conn;
-
-	if ((conn = hcon->l2cap_data))
-		return conn;
+	struct l2cap_conn *conn = hcon->l2cap_data;
 
-	if (status)
+	if (conn || status)
 		return conn;
 
-	if (!(conn = kmalloc(sizeof(struct l2cap_conn), GFP_ATOMIC)))
+	conn = kzalloc(sizeof(struct l2cap_conn), GFP_ATOMIC);
+	if (!conn)
 		return NULL;
-	memset(conn, 0, sizeof(struct l2cap_conn));
 
 	hcon->l2cap_data = conn;
 	conn->hcon = hcon;
 
+	BT_DBG("hcon %p conn %p", hcon, conn);
+
 	conn->mtu = hcon->hdev->acl_mtu;
 	conn->src = &hcon->hdev->bdaddr;
 	conn->dst = &hcon->dst;
@@ -134,17 +282,16 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
 	spin_lock_init(&conn->lock);
 	rwlock_init(&conn->chan_list.lock);
 
-	BT_DBG("hcon %p conn %p", hcon, conn);
 	return conn;
 }
 
-static int l2cap_conn_del(struct hci_conn *hcon, int err)
+static void l2cap_conn_del(struct hci_conn *hcon, int err)
 {
-	struct l2cap_conn *conn;
+	struct l2cap_conn *conn = hcon->l2cap_data;
 	struct sock *sk;
 
-	if (!(conn = hcon->l2cap_data)) 
-		return 0;
+	if (!conn)
+		return;
 
 	BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
 
@@ -161,7 +308,6 @@ static int l2cap_conn_del(struct hci_conn *hcon, int err)
 
 	hcon->l2cap_data = NULL;
 	kfree(conn);
-	return 0;
 }
 
 static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent)
@@ -925,160 +1071,6 @@ static int l2cap_sock_release(struct socket *sock)
 	return err;
 }
 
-/* ---- L2CAP channels ---- */
-static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid)
-{
-	struct sock *s;
-	for (s = l->head; s; s = l2cap_pi(s)->next_c) {
-		if (l2cap_pi(s)->dcid == cid)
-			break;
-	}
-	return s;
-}
-
-static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
-{
-	struct sock *s;
-	for (s = l->head; s; s = l2cap_pi(s)->next_c) {
-		if (l2cap_pi(s)->scid == cid)
-			break;
-	}
-	return s;
-}
-
-/* Find channel with given SCID.
- * Returns locked socket */
-static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid)
-{
-	struct sock *s;
-	read_lock(&l->lock);
-	s = __l2cap_get_chan_by_scid(l, cid);
-	if (s) bh_lock_sock(s);
-	read_unlock(&l->lock);
-	return s;
-}
-
-static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
-{
-	struct sock *s;
-	for (s = l->head; s; s = l2cap_pi(s)->next_c) {
-		if (l2cap_pi(s)->ident == ident)
-			break;
-	}
-	return s;
-}
-
-static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident)
-{
-	struct sock *s;
-	read_lock(&l->lock);
-	s = __l2cap_get_chan_by_ident(l, ident);
-	if (s) bh_lock_sock(s);
-	read_unlock(&l->lock);
-	return s;
-}
-
-static u16 l2cap_alloc_cid(struct l2cap_chan_list *l)
-{
-	u16 cid = 0x0040;
-
-	for (; cid < 0xffff; cid++) {
-		if(!__l2cap_get_chan_by_scid(l, cid))
-			return cid;
-	}
-
-	return 0;
-}
-
-static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk)
-{
-	sock_hold(sk);
-
-	if (l->head)
-		l2cap_pi(l->head)->prev_c = sk;
-
-	l2cap_pi(sk)->next_c = l->head;
-	l2cap_pi(sk)->prev_c = NULL;
-	l->head = sk;
-}
-
-static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk)
-{
-	struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c;
-
-	write_lock(&l->lock);
-	if (sk == l->head)
-		l->head = next;
-
-	if (next)
-		l2cap_pi(next)->prev_c = prev;
-	if (prev)
-		l2cap_pi(prev)->next_c = next;
-	write_unlock(&l->lock);
-
-	__sock_put(sk);
-}
-
-static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent)
-{
-	struct l2cap_chan_list *l = &conn->chan_list;
-
-	BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid);
-
-	l2cap_pi(sk)->conn = conn;
-
-	if (sk->sk_type == SOCK_SEQPACKET) {
-		/* Alloc CID for connection-oriented socket */
-		l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
-	} else if (sk->sk_type == SOCK_DGRAM) {
-		/* Connectionless socket */
-		l2cap_pi(sk)->scid = 0x0002;
-		l2cap_pi(sk)->dcid = 0x0002;
-		l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
-	} else {
-		/* Raw socket can send/recv signalling messages only */
-		l2cap_pi(sk)->scid = 0x0001;
-		l2cap_pi(sk)->dcid = 0x0001;
-		l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU;
-	}
-
-	__l2cap_chan_link(l, sk);
-
-	if (parent)
-		bt_accept_enqueue(parent, sk);
-}
-
-/* Delete channel. 
- * Must be called on the locked socket. */
-static void l2cap_chan_del(struct sock *sk, int err)
-{
-	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
-	struct sock *parent = bt_sk(sk)->parent;
-
-	l2cap_sock_clear_timer(sk);
-
-	BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
-
-	if (conn) { 
-		/* Unlink from channel list */
-		l2cap_chan_unlink(&conn->chan_list, sk);
-		l2cap_pi(sk)->conn = NULL;
-		hci_conn_put(conn->hcon);
-	}
-
-	sk->sk_state  = BT_CLOSED;
-	sock_set_flag(sk, SOCK_ZAPPED);
-
-	if (err)
-		sk->sk_err = err;
-
-	if (parent) {
-		bt_accept_unlink(sk);
-		parent->sk_data_ready(parent, 0);
-	} else
-		sk->sk_state_change(sk);
-}
-
 static void l2cap_conn_ready(struct l2cap_conn *conn)
 {
 	struct l2cap_chan_list *l = &conn->chan_list;
@@ -1834,7 +1826,9 @@ drop:
 	kfree_skb(skb);
 
 done:
-	if (sk) bh_unlock_sock(sk);
+	if (sk)
+		bh_unlock_sock(sk);
+
 	return 0;
 }
 
@@ -1925,18 +1919,18 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
 
 static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 {
+	struct l2cap_conn *conn;
+
 	BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
 
 	if (hcon->type != ACL_LINK)
 		return 0;
 
 	if (!status) {
-		struct l2cap_conn *conn;
-
 		conn = l2cap_conn_add(hcon, status);
 		if (conn)
 			l2cap_conn_ready(conn);
-	} else 
+	} else
 		l2cap_conn_del(hcon, bt_err(status));
 
 	return 0;
@@ -1950,19 +1944,21 @@ static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason)
 		return 0;
 
 	l2cap_conn_del(hcon, bt_err(reason));
+
 	return 0;
 }
 
 static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
 {
 	struct l2cap_chan_list *l;
-	struct l2cap_conn *conn;
+	struct l2cap_conn *conn = conn = hcon->l2cap_data;
 	struct l2cap_conn_rsp rsp;
 	struct sock *sk;
 	int result;
 
-	if (!(conn = hcon->l2cap_data))
+	if (!conn)
 		return 0;
+
 	l = &conn->chan_list;
 
 	BT_DBG("conn %p", conn);
@@ -2005,13 +2001,14 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
 static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status)
 {
 	struct l2cap_chan_list *l;
-	struct l2cap_conn *conn;
+	struct l2cap_conn *conn = hcon->l2cap_data;
 	struct l2cap_conn_rsp rsp;
 	struct sock *sk;
 	int result;
 
-	if (!(conn = hcon->l2cap_data))
+	if (!conn)
 		return 0;
+
 	l = &conn->chan_list;
 
 	BT_DBG("conn %p", conn);
-- 
cgit v0.10.2


From 7c2660b00fae0575dd4ce5c7b6bf30762b632045 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 3 Jul 2006 10:02:51 +0200
Subject: [Bluetooth] Allow disabling of credit based flow control

This patch adds the module parameter disable_cfc which can be used to
disable the credit based flow control. The credit based flow control
was introduced with the Bluetooth 1.1 specification and devices can
negotiate its support, but for testing purpose it is helpful to allow
disabling of it.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 6de33fe..fe18dc2 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -52,8 +52,9 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "1.7"
+#define VERSION "1.8"
 
+static int disable_cfc = 0;
 static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU;
 
 static struct task_struct *rfcomm_thread;
@@ -533,7 +534,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
 	s->sock  = sock;
 
 	s->mtu = RFCOMM_DEFAULT_MTU;
-	s->cfc = RFCOMM_CFC_UNKNOWN;
+	s->cfc = disable_cfc ? RFCOMM_CFC_DISABLED : RFCOMM_CFC_UNKNOWN;
 
 	/* Do not increment module usage count for listening sessions.
 	 * Otherwise we won't be able to unload the module. */
@@ -1222,14 +1223,18 @@ static int rfcomm_apply_pn(struct rfcomm_dlc *d, int cr, struct rfcomm_pn *pn)
 	BT_DBG("dlc %p state %ld dlci %d mtu %d fc 0x%x credits %d", 
 			d, d->state, d->dlci, pn->mtu, pn->flow_ctrl, pn->credits);
 
-	if (pn->flow_ctrl == 0xf0 || pn->flow_ctrl == 0xe0) {
-		d->cfc = s->cfc = RFCOMM_CFC_ENABLED;
+	if ((pn->flow_ctrl == 0xf0 && s->cfc != RFCOMM_CFC_DISABLED) ||
+						pn->flow_ctrl == 0xe0) {
+		d->cfc = RFCOMM_CFC_ENABLED;
 		d->tx_credits = pn->credits;
 	} else {
-		d->cfc = s->cfc = RFCOMM_CFC_DISABLED;
+		d->cfc = RFCOMM_CFC_DISABLED;
 		set_bit(RFCOMM_TX_THROTTLED, &d->flags);
 	}
 
+	if (s->cfc == RFCOMM_CFC_UNKNOWN)
+		s->cfc = d->cfc;
+
 	d->priority = pn->priority;
 
 	d->mtu = s->mtu = btohs(pn->mtu);
@@ -2073,6 +2078,9 @@ static void __exit rfcomm_exit(void)
 module_init(rfcomm_init);
 module_exit(rfcomm_exit);
 
+module_param(disable_cfc, bool, 0644);
+MODULE_PARM_DESC(disable_cfc, "Disable credit based flow control");
+
 module_param(l2cap_mtu, uint, 0644);
 MODULE_PARM_DESC(l2cap_mtu, "Default MTU for the L2CAP connection");
 
-- 
cgit v0.10.2


From 300b93974ff64f1bef1ac8294547c573954f0300 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 3 Jul 2006 10:37:55 +0200
Subject: [Bluetooth] Add RFCOMM role switch support

This patch adds the support for RFCOMM role switching before the
connection is fully established.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index fe18dc2..155a2b9 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1150,6 +1150,8 @@ static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d)
 
 static void rfcomm_dlc_accept(struct rfcomm_dlc *d)
 {
+	struct sock *sk = d->session->sock->sk;
+
 	BT_DBG("dlc %p", d);
 
 	rfcomm_send_ua(d->session, d->dlci);
@@ -1159,6 +1161,9 @@ static void rfcomm_dlc_accept(struct rfcomm_dlc *d)
 	d->state_change(d, 0);
 	rfcomm_dlc_unlock(d);
 
+	if (d->link_mode & RFCOMM_LM_MASTER)
+		hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00);
+
 	rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig);
 }
 
-- 
cgit v0.10.2


From b6a7c79a52939513ef043d6eb9fcf12a056c010e Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Mon, 3 Jul 2006 17:32:20 -0700
Subject: [PATCH] Fix modular cpuid.ko

With recent change, if CONFIG_HOTPLUG_CPU is disabled,
register_cpu_notifier() is not exported.  And it breaked moduler msr/cpuid
(msr.c was already fixed).

We need to use register_hotcpu_notifier() now in module, instead of
register_cpu_notifier().

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
index a8d3ecd..fde8bea 100644
--- a/arch/i386/kernel/cpuid.c
+++ b/arch/i386/kernel/cpuid.c
@@ -167,6 +167,7 @@ static int cpuid_class_device_create(int i)
 	return err;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
@@ -186,6 +187,7 @@ static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier =
 {
 	.notifier_call = cpuid_class_cpu_callback,
 };
+#endif /* !CONFIG_HOTPLUG_CPU */
 
 static int __init cpuid_init(void)
 {
@@ -208,7 +210,7 @@ static int __init cpuid_init(void)
 		if (err != 0) 
 			goto out_class;
 	}
-	register_cpu_notifier(&cpuid_class_cpu_notifier);
+	register_hotcpu_notifier(&cpuid_class_cpu_notifier);
 
 	err = 0;
 	goto out;
@@ -233,7 +235,7 @@ static void __exit cpuid_exit(void)
 		class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
 	class_destroy(cpuid_class);
 	unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
-	unregister_cpu_notifier(&cpuid_class_cpu_notifier);
+	unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
 }
 
 module_init(cpuid_init);
-- 
cgit v0.10.2


From 9f09c548e185b63a3567498b96783f897b5f0399 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 3 Jul 2006 17:32:20 -0700
Subject: [PATCH] zfcp: fix incorrect usage of erp_lock

  =================================
  [ INFO: inconsistent lock state ]
  ---------------------------------
  inconsistent {hardirq-on-W} -> {in-hardirq-W} usage.
  swapper/0 [HC1[1]:SC0[0]:HE0:SE1] takes:
   (&adapter->erp_lock){+-..}, at: [<000000000026c7f8>] zfcp_erp_async_handler+0x3c/0x70
  {hardirq-on-W} state was registered at:
    [<000000000005f33e>] __lock_acquire+0x30a/0xed0
    [<00000000000604ae>] lock_acquire+0x9a/0xc8
    [<000000000035a7ae>] _write_lock+0x4e/0x68
    [<000000000026d822>] zfcp_erp_adapter_strategy_generic+0x286/0xd94
    [<000000000026fd72>] zfcp_erp_strategy_do_action+0x91e/0x1a94
    [<0000000000271a3a>] zfcp_erp_thread+0x21a/0x1568
    [<0000000000019096>] kernel_thread_starter+0x6/0xc
    [<0000000000019090>] kernel_thread_starter+0x0/0xc
  irq event stamp: 12078
  hardirqs last  enabled at (12077): [<0000000000019416>] cpu_idle+0x206/0x250
  hardirqs last disabled at (12078): [<0000000000020458>] io_no_vtime+0xc/0x1c
  softirqs last  enabled at (12072): [<0000000000040b62>] __do_softirq+0x13a/0x180
  softirqs last disabled at (12059): [<000000000001fd58>] do_softirq+0xec/0xf0

  other info that might help us debug this:
  no locks held by swapper/0.

  stack backtrace:
  00000000012bb648 0000000000000002 0000000000000000 00000000012bb758
         00000000012bb6c0 0000000000399122 0000000000399122 0000000000016b0a
         0000000000000000 0000000000000001 0000000000000000 00000000004660e8
         0000000000000000 000000000000000d 00000000012bb6b8 00000000012bb730
         0000000000368b90 0000000000016b0a 00000000012bb6b8 00000000012bb708
  Call Trace:
  ([<0000000000016a26>] show_trace+0x76/0xdc)
   [<0000000000016b2c>] show_stack+0xa0/0xd0
   [<0000000000016b8a>] dump_stack+0x2e/0x3c
   [<000000000005e3da>] print_usage_bug+0x27e/0x290
   [<000000000005e934>] mark_lock+0x548/0x6c0
   [<000000000005fb0c>] __lock_acquire+0xad8/0xed0
   [<00000000000604ae>] lock_acquire+0x9a/0xc8
   [<000000000035a662>] _write_lock_irqsave+0x62/0x80
   [<000000000026c7f8>] zfcp_erp_async_handler+0x3c/0x70
   [<0000000000279178>] zfcp_fsf_req_dispatch+0xd8/0x1fa8
   [<000000000027e538>] zfcp_fsf_req_complete+0x104/0xe4c
   [<0000000000274534>] zfcp_qdio_reqid_check+0xf4/0x178
   [<000000000027469e>] zfcp_qdio_response_handler+0xe6/0x430
   [<0000000000219dd4>] tiqdio_thinint_handler+0xd20/0x213c
   [<000000000020229a>] do_adapter_IO+0xb2/0xc0
   [<0000000000206f32>] do_IRQ+0x136/0x16c
   [<0000000000020462>] io_no_vtime+0x16/0x1c
   [<0000000000019432>] cpu_idle+0x222/0x250
  ([<0000000000019416>] cpu_idle+0x206/0x250)
   [<000000000001405a>] rest_init+0x5a/0x68
   [<0000000000536998>] start_kernel+0x39c/0x3dc
   [<0000000000013046>] _stext+0x46/0x1000

Fix incorrect usage of erp_lock. Using the write_lock() variant is wrong,
since this might lead to deadlocks.

Acked-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 909731b..8ec8da0 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -2168,9 +2168,9 @@ zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *erp_action)
 		atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 				  &adapter->status);
 		ZFCP_LOG_DEBUG("Doing exchange config data\n");
-		write_lock(&adapter->erp_lock);
+		write_lock_irq(&adapter->erp_lock);
 		zfcp_erp_action_to_running(erp_action);
-		write_unlock(&adapter->erp_lock);
+		write_unlock_irq(&adapter->erp_lock);
 		zfcp_erp_timeout_init(erp_action);
 		if (zfcp_fsf_exchange_config_data(erp_action)) {
 			retval = ZFCP_ERP_FAILED;
@@ -2236,9 +2236,9 @@ zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *erp_action)
 	adapter = erp_action->adapter;
 	atomic_clear_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status);
 
-	write_lock(&adapter->erp_lock);
+	write_lock_irq(&adapter->erp_lock);
 	zfcp_erp_action_to_running(erp_action);
-	write_unlock(&adapter->erp_lock);
+	write_unlock_irq(&adapter->erp_lock);
 
 	zfcp_erp_timeout_init(erp_action);
 	ret = zfcp_fsf_exchange_port_data(erp_action, adapter, NULL);
-- 
cgit v0.10.2


From 38c54ee8d5338f49aca986081ea41a987c15cf9d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 3 Jul 2006 17:32:21 -0700
Subject: [PATCH] zfcp: fix incorrect usage of fsf_req_list_lock

  =================================
  [ INFO: inconsistent lock state ]
  ---------------------------------
  inconsistent {in-hardirq-W} -> {hardirq-on-W} usage.
  swapper/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
   (&adapter->fsf_req_list_lock){++..}, at: [<0000000000274486>] zfcp_qdio_reqid_check+0x46/0x178
  {in-hardirq-W} state was registered at:
    [<000000000005fb0c>] __lock_acquire+0xad8/0xed0
    [<00000000000604ae>] lock_acquire+0x9a/0xc8
    [<000000000035a326>] _spin_lock+0x4e/0x68
    [<0000000000274486>] zfcp_qdio_reqid_check+0x46/0x178
    [<000000000027469e>] zfcp_qdio_response_handler+0xe6/0x430
    [<0000000000219dd4>] tiqdio_thinint_handler+0xd20/0x213c
    [<000000000020229a>] do_adapter_IO+0xb2/0xc0
    [<0000000000206f32>] do_IRQ+0x136/0x16c
    [<0000000000020462>] io_no_vtime+0x16/0x1c
    [<0000000000019432>] cpu_idle+0x222/0x250
  irq event stamp: 129220
  hardirqs last  enabled at (129220): [<00000000000411e6>] tasklet_hi_action+0x5a/0x19c
  hardirqs last disabled at (129219): [<00000000000411c0>] tasklet_hi_action+0x34/0x19c
  softirqs last  enabled at (129212): [<0000000000040b62>] __do_softirq+0x13a/0x180
  softirqs last disabled at (129217): [<000000000001fd58>] do_softirq+0xec/0xf0

  other info that might help us debug this:
  no locks held by swapper/0.

  stack backtrace:
  00000000012bb670 0000000000000002 0000000000000000 00000000012bb780
         00000000012bb6e8 0000000000399122 0000000000399122 0000000000016b0a
         0000000000000000 0000000000000000 0000000000000000 00000000004660e8
         0000000000000000 000000000000000d 00000000012bb6e0 00000000012bb758
         0000000000368b90 0000000000016b0a 00000000012bb6e0 00000000012bb730
  Call Trace:
  ([<0000000000016a26>] show_trace+0x76/0xdc)
   [<0000000000016b2c>] show_stack+0xa0/0xd0
   [<0000000000016b8a>] dump_stack+0x2e/0x3c
   [<000000000005e3da>] print_usage_bug+0x27e/0x290
   [<000000000005ea9c>] mark_lock+0x6b0/0x6c0
   [<000000000005f33e>] __lock_acquire+0x30a/0xed0
   [<00000000000604ae>] lock_acquire+0x9a/0xc8
   [<000000000035a326>] _spin_lock+0x4e/0x68
   [<0000000000274486>] zfcp_qdio_reqid_check+0x46/0x178
   [<000000000027469e>] zfcp_qdio_response_handler+0xe6/0x430
   [<0000000000217bd2>] tiqdio_tl+0xd02/0x2120
   [<000000000004123a>] tasklet_hi_action+0xae/0x19c
   [<0000000000040ae4>] __do_softirq+0xbc/0x180
   [<000000000001fd58>] do_softirq+0xec/0xf0
   [<0000000000040c38>] irq_exit+0x90/0xa8
   [<0000000000206f40>] do_IRQ+0x144/0x16c
   [<0000000000020462>] io_no_vtime+0x16/0x1c
   [<0000000000019432>] cpu_idle+0x222/0x250
  ([<0000000000019416>] cpu_idle+0x206/0x250)
   [<000000000001405a>] rest_init+0x5a/0x68
   [<0000000000536998>] start_kernel+0x39c/0x3dc
   [<0000000000013046>] _stext+0x46/0x1000

Fix incorrect usage of fsf_req_list_lock. It's used in tasklet context
(irqs on) as well as in irq context. Therefore use the spin_lock_irqsave
variant to avoid deadlocks.

Acked-by: Andreas Herrmann <aherrman@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 345a191..49ea5ad 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -427,6 +427,7 @@ int
 zfcp_qdio_reqid_check(struct zfcp_adapter *adapter, void *sbale_addr)
 {
 	struct zfcp_fsf_req *fsf_req;
+	unsigned long flags;
 
 	/* invalid (per convention used in this driver) */
 	if (unlikely(!sbale_addr)) {
@@ -438,15 +439,15 @@ zfcp_qdio_reqid_check(struct zfcp_adapter *adapter, void *sbale_addr)
 	fsf_req = (struct zfcp_fsf_req *) sbale_addr;
 
 	/* serialize with zfcp_fsf_req_dismiss_all */
-	spin_lock(&adapter->fsf_req_list_lock);
+	spin_lock_irqsave(&adapter->fsf_req_list_lock, flags);
 	if (list_empty(&adapter->fsf_req_list_head)) {
-		spin_unlock(&adapter->fsf_req_list_lock);
+		spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
 		return 0;
 	}
 	list_del(&fsf_req->list);
 	atomic_dec(&adapter->fsf_reqs_active);
-	spin_unlock(&adapter->fsf_req_list_lock);
-	
+	spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
+
 	if (unlikely(adapter != fsf_req->adapter)) {
 		ZFCP_LOG_NORMAL("bug: invalid reqid (fsf_req=%p, "
 				"fsf_req->adapter=%p, adapter=%p)\n",
-- 
cgit v0.10.2


From d8cb7c1ded6e5a80a7335716dde60784a0d51c1d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 3 Jul 2006 17:32:22 -0700
Subject: [PATCH] revert "kthread: convert stop_machine into a kthread"

Jiri reports that the stop_machin kthread conversion caused his machine to
hang when suspending.  Hyperthreading is apparently involved.

I don't see why that would be and I can't reproduce it.  Revert to the 2.6.17
code.

Cc: "Serge E. Hallyn" <serue@us.ibm.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2c0aacc..dcfb5d7 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -4,7 +4,6 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/syscalls.h>
-#include <linux/kthread.h>
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
@@ -26,11 +25,13 @@ static unsigned int stopmachine_num_threads;
 static atomic_t stopmachine_thread_ack;
 static DECLARE_MUTEX(stopmachine_mutex);
 
-static int stopmachine(void *unused)
+static int stopmachine(void *cpu)
 {
 	int irqs_disabled = 0;
 	int prepared = 0;
 
+	set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
+
 	/* Ack: we are alive */
 	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
 	atomic_inc(&stopmachine_thread_ack);
@@ -84,8 +85,7 @@ static void stopmachine_set_state(enum stopmachine_state state)
 
 static int stop_machine(void)
 {
-	int ret = 0;
-	unsigned int i;
+	int i, ret = 0;
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 
 	/* One high-prio thread per cpu.  We'll do this one. */
@@ -96,16 +96,11 @@ static int stop_machine(void)
 	stopmachine_state = STOPMACHINE_WAIT;
 
 	for_each_online_cpu(i) {
-		struct task_struct *tsk;
 		if (i == raw_smp_processor_id())
 			continue;
-		tsk = kthread_create(stopmachine, NULL, "stopmachine");
-		if (IS_ERR(tsk)) {
-			ret = PTR_ERR(tsk);
+		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
+		if (ret < 0)
 			break;
-		}
-		kthread_bind(tsk, i);
-		wake_up_process(tsk);
 		stopmachine_num_threads++;
 	}
 
-- 
cgit v0.10.2


From 31304c909e6945b005af62cd55a582e9c010a0b4 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@snapgear.com>
Date: Tue, 4 Jul 2006 15:04:39 +1000
Subject: [PATCH] uclinux: fix proc_task()/get_proc-task() naming

Fix changed name of proc_task() to get_proc_task().

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index af69f28..4616ed5 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -107,7 +107,7 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
 {
 	struct vm_list_struct *vml;
 	struct vm_area_struct *vma;
-	struct task_struct *task = proc_task(inode);
+	struct task_struct *task = get_proc_task(inode);
 	struct mm_struct *mm = get_task_mm(task);
 	int result = -ENOENT;
 
-- 
cgit v0.10.2


From dd8041f16b117f63f40fb844d6cdebe8b03514d2 Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Tue, 4 Jul 2006 02:57:51 -0700
Subject: [PATCH] Fix copying of pgdat array on each node for ia64 memory
 hotplug

I found a bug in memory hot-add code for ia64.

IA64's code has copies of pgdat's array on each node to reduce memory
access over crossing node.  This array is used by NODE_DATA() macro.  When
new node is hot-added, this pgdat's array should be updated and copied on
new node too.

However, I used for_each_online_node() in scatter_node_data() to copy
it. This meant its array is not copied on new node.
Because initialization of structures for new node was halfway,
so online_node_map couldn't be set at this time.

To copy arrays on new node, I changed it to check value of pgdat_list[]
which is source array of copies.  I tested this patch with my Memory Hotadd
emulation on Tiger4.  This patch is for 2.6.17-git20.

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 525b082..99bd9e3 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -313,9 +313,19 @@ static void __meminit scatter_node_data(void)
 	pg_data_t **dst;
 	int node;
 
-	for_each_online_node(node) {
-		dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
-		memcpy(dst, pgdat_list, sizeof(pgdat_list));
+	/*
+	 * for_each_online_node() can't be used at here.
+	 * node_online_map is not set for hot-added nodes at this time,
+	 * because we are halfway through initialization of the new node's
+	 * structures.  If for_each_online_node() is used, a new node's
+	 * pg_data_ptrs will be not initialized. Insted of using it,
+	 * pgdat_list[] is checked.
+	 */
+	for_each_node(node) {
+		if (pgdat_list[node]) {
+			dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
+			memcpy(dst, pgdat_list, sizeof(pgdat_list));
+		}
 	}
 }
 
-- 
cgit v0.10.2


From a46f9484f8926aacb2e79a0e1676de3a6a6fbae8 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Tue, 4 Jul 2006 02:57:52 -0700
Subject: [PATCH] mthca: initialize send and receive queue locks separately

mthca: initialize send and receive queue locks separately

lockdep identifies a lock by the call site of its initialization.  By
initializing the send and receive queue locks in mthca_wq_init() we confuse
lockdep.  It warns that that the ordered acquiry of both locks in
mthca_modify_qp() is recursive acquiry of one lock:

  =============================================
  [ INFO: possible recursive locking detected ]
  ---------------------------------------------
  modprobe/1192 is trying to acquire lock:
   (&wq->lock){....}, at: [<f892b4db>] mthca_modify_qp+0x60/0xa7b [ib_mthca]
  but task is already holding lock:
   (&wq->lock){....}, at: [<f892b4ce>] mthca_modify_qp+0x53/0xa7b [ib_mthca]

Initializing the locks separately in mthca_alloc_qp_common() stops the
warning and will let lockdep enforce proper ordering on paths that acquire
both locks.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Cc: Roland Dreier <rolandd@cisco.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 16c387d..490fc78 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -224,7 +224,7 @@ static void *get_send_wqe(struct mthca_qp *qp, int n)
 
 static void mthca_wq_init(struct mthca_wq *wq)
 {
-	spin_lock_init(&wq->lock);
+	/* mthca_alloc_qp_common() initializes the locks */
 	wq->next_ind  = 0;
 	wq->last_comp = wq->max - 1;
 	wq->head      = 0;
@@ -1114,6 +1114,9 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
 	qp->sq_policy    = send_policy;
 	mthca_wq_init(&qp->sq);
 	mthca_wq_init(&qp->rq);
+	/* these are initialized separately so lockdep can tell them apart */
+	spin_lock_init(&qp->sq.lock);
+	spin_lock_init(&qp->rq.lock);
 
 	ret = mthca_map_memfree(dev, qp);
 	if (ret)
-- 
cgit v0.10.2


From c6482dde1c2811afba289b2344268f850595f350 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Tue, 4 Jul 2006 03:07:22 -0700
Subject: [PATCH] fix AB-BA deadlock inversion at cs46xx_dsp_remove_scb

There is a code sequence where the locking is substream->self_group.lock
-> ins->scbs[index].lock

substream->self_group.lock is interrupt safe, and taken from irq context
as well (trace is snipped for brevity)

so what can happen is

   cpu 0                   	cpu 1
   user context			user context

				take ins->scbs[index].lock without disabling interrupts

   get substream->self_group.lock (irqsafe)
   try to get ins->scbs[index].lock (spins)

				interrupt happens
				try to get substream->self_group.lock (spins)

which is an obvious AB-BA deadlock

fix is to just take the lock with _irqsafe

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Acked-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/sound/pci/cs46xx/dsp_spos_scb_lib.c b/sound/pci/cs46xx/dsp_spos_scb_lib.c
index 3844d18..232b337 100644
--- a/sound/pci/cs46xx/dsp_spos_scb_lib.c
+++ b/sound/pci/cs46xx/dsp_spos_scb_lib.c
@@ -180,6 +180,7 @@ static void _dsp_clear_sample_buffer (struct snd_cs46xx *chip, u32 sample_buffer
 void cs46xx_dsp_remove_scb (struct snd_cs46xx *chip, struct dsp_scb_descriptor * scb)
 {
 	struct dsp_spos_instance * ins = chip->dsp_spos_instance;
+	unsigned long flags;
 
 	/* check integrety */
 	snd_assert ( (scb->index >= 0 && 
@@ -194,9 +195,9 @@ void cs46xx_dsp_remove_scb (struct snd_cs46xx *chip, struct dsp_scb_descriptor *
 		     goto _end);
 #endif
 
-	spin_lock(&scb->lock);
+	spin_lock_irqsave(&scb->lock, flags);
 	_dsp_unlink_scb (chip,scb);
-	spin_unlock(&scb->lock);
+	spin_unlock_irqrestore(&scb->lock, flags);
 
 	cs46xx_dsp_proc_free_scb_desc(scb);
 	snd_assert (scb->scb_symbol != NULL, return );
-- 
cgit v0.10.2


From 7ad7153b051d9628ecd6a336b543ea6ef099bd2c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Tue, 4 Jul 2006 14:00:06 -0700
Subject: Fix up headers_install wrt devfs removal

No devfs_fs.h header any more..

Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index f7252be..2b8a7d6 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -8,7 +8,7 @@ header-y += affs_fs.h affs_hardblocks.h aio_abi.h a.out.h arcfb.h	\
 	atmppp.h atmsap.h atmsvc.h atm_zatm.h auto_fs4.h auxvec.h	\
 	awe_voice.h ax25.h b1lli.h baycom.h bfs_fs.h blkpg.h		\
 	bpqether.h cdk.h chio.h coda_psdev.h coff.h comstats.h		\
-	consolemap.h cycx_cfm.h devfs_fs.h dm-ioctl.h dn.h dqblk_v1.h	\
+	consolemap.h cycx_cfm.h dm-ioctl.h dn.h dqblk_v1.h		\
 	dqblk_v2.h dqblk_xfs.h efs_fs_sb.h elf-fdpic.h elf.h elf-em.h	\
 	fadvise.h fd.h fdreg.h ftape-header-segment.h ftape-vendors.h	\
 	fuse.h futex.h genetlink.h gen_stats.h gigaset_dev.h hdsmart.h	\
-- 
cgit v0.10.2