28 files changed, 125 insertions, 2005 deletions
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index a3fc437..389628f 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -4,7 +4,7 @@
 asflags-y := -ansi -DST_DIV0=0x02
 ccflags-y := -Werror
 
-lib-$(CONFIG_SPARC32) += mul.o rem.o sdiv.o udiv.o umul.o urem.o ashrdi3.o
+lib-$(CONFIG_SPARC32) += ashrdi3.o
 lib-$(CONFIG_SPARC32) += memcpy.o memset.o
 lib-y                 += strlen.o
 lib-y                 += checksum_$(BITS).o
@@ -13,7 +13,7 @@ lib-y                 += memscan_$(BITS).o memcmp.o strncmp_$(BITS).o
 lib-y                 += strncpy_from_user_$(BITS).o strlen_user_$(BITS).o
 lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o
 lib-$(CONFIG_SPARC32) += copy_user.o locks.o
-lib-y                 += atomic_$(BITS).o
+lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 
@@ -40,7 +40,7 @@ lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
 lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
 
 obj-y                 += iomap.o
-obj-$(CONFIG_SPARC32) += atomic32.o
+obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
 obj-y                 += ksyms.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
 obj-y                 += usercopy.o
diff --git a/arch/sparc/lib/ashldi3.S b/arch/sparc/lib/ashldi3.S
index 17912e6..86f60de 100644
--- a/arch/sparc/lib/ashldi3.S
+++ b/arch/sparc/lib/ashldi3.S
@@ -5,10 +5,10 @@
  * Copyright (C) 1999 David S. Miller (davem@redhat.com)
  */
 
+#include <linux/linkage.h>
+
 	.text
-	.align	4
-	.globl	__ashldi3
-__ashldi3:
+ENTRY(__ashldi3)
 	cmp	%o2, 0
 	be	9f
 	 mov	0x20, %g2
@@ -32,3 +32,4 @@ __ashldi3:
 9:
 	retl
 	 nop
+ENDPROC(__ashldi3)
diff --git a/arch/sparc/lib/ashrdi3.S b/arch/sparc/lib/ashrdi3.S
index 85398fd6..6eb8ba2 100644
--- a/arch/sparc/lib/ashrdi3.S
+++ b/arch/sparc/lib/ashrdi3.S
@@ -5,10 +5,10 @@
  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/linkage.h>
+
 	.text
-	.align	4
-	.globl __ashrdi3
-__ashrdi3:
+ENTRY(__ashrdi3)
 	tst	%o2
 	be	3f
 	 or	%g0, 32, %g2
@@ -34,3 +34,4 @@ __ashrdi3:
 3:
 	jmpl	%o7 + 8, %g0
 	 nop
+ENDPROC(__ashrdi3)
diff --git a/arch/sparc/lib/atomic_32.S b/arch/sparc/lib/atomic_32.S
deleted file mode 100644
index eb6c735..0000000
--- a/arch/sparc/lib/atomic_32.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* atomic.S: Move this stuff here for better ICACHE hit rates.
- *
- * Copyright (C) 1996 David S. Miller (davem@caipfs.rutgers.edu)
- */
-
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-
-	.text
-	.align	4
-
-	.globl  __atomic_begin
-__atomic_begin:
-
-#ifndef CONFIG_SMP
-	.globl	___xchg32_sun4c
-___xchg32_sun4c:
-	rd	%psr, %g3
-	andcc	%g3, PSR_PIL, %g0
-	bne	1f
-	 nop
-	wr	%g3, PSR_PIL, %psr
-	nop; nop; nop
-1:
-	andcc	%g3, PSR_PIL, %g0
-	ld	[%g1], %g7
-	bne	1f
-	 st	%g2, [%g1]
-	wr	%g3, 0x0, %psr
-	nop; nop; nop
-1:
-	mov	%g7, %g2
-	jmpl	%o7 + 8, %g0
-	 mov	%g4, %o7
-
-	.globl	___xchg32_sun4md
-___xchg32_sun4md:
-	swap	[%g1], %g2
-	jmpl	%o7 + 8, %g0
-	 mov	%g4, %o7
-#endif
-
-	.globl  __atomic_end
-__atomic_end:
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index 59186e0..4d502da 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -3,6 +3,7 @@
  * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/backoff.h>
 
@@ -13,9 +14,7 @@
 	 * memory barriers, and a second which returns
 	 * a value and does the barriers.
 	 */
-	.globl	atomic_add
-	.type	atomic_add,#function
-atomic_add: /* %o0 = increment, %o1 = atomic_ptr */
+ENTRY(atomic_add) /* %o0 = increment, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
 1:	lduw	[%o1], %g1
 	add	%g1, %o0, %g7
@@ -26,11 +25,9 @@ atomic_add: /* %o0 = increment, %o1 = atomic_ptr */
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
-	.size	atomic_add, .-atomic_add
+ENDPROC(atomic_add)
 
-	.globl	atomic_sub
-	.type	atomic_sub,#function
-atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
+ENTRY(atomic_sub) /* %o0 = decrement, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
 1:	lduw	[%o1], %g1
 	sub	%g1, %o0, %g7
@@ -41,11 +38,9 @@ atomic_sub: /* %o0 = decrement, %o1 = atomic_ptr */
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
-	.size	atomic_sub, .-atomic_sub
+ENDPROC(atomic_sub)
 
-	.globl	atomic_add_ret
-	.type	atomic_add_ret,#function
-atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
+ENTRY(atomic_add_ret) /* %o0 = increment, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
 1:	lduw	[%o1], %g1
 	add	%g1, %o0, %g7
@@ -56,11 +51,9 @@ atomic_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	retl
 	 sra	%g1, 0, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
-	.size	atomic_add_ret, .-atomic_add_ret
+ENDPROC(atomic_add_ret)
 
-	.globl	atomic_sub_ret
-	.type	atomic_sub_ret,#function
-atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
+ENTRY(atomic_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
 1:	lduw	[%o1], %g1
 	sub	%g1, %o0, %g7
@@ -71,11 +64,9 @@ atomic_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	retl
 	 sra	%g1, 0, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
-	.size	atomic_sub_ret, .-atomic_sub_ret
+ENDPROC(atomic_sub_ret)
 
-	.globl	atomic64_add
-	.type	atomic64_add,#function
-atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */
+ENTRY(atomic64_add) /* %o0 = increment, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
 1:	ldx	[%o1], %g1
 	add	%g1, %o0, %g7
@@ -86,11 +77,9 @@ atomic64_add: /* %o0 = increment, %o1 = atomic_ptr */
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
-	.size	atomic64_add, .-atomic64_add
+ENDPROC(atomic64_add)
 
-	.globl	atomic64_sub
-	.type	atomic64_sub,#function
-atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */
+ENTRY(atomic64_sub) /* %o0 = decrement, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
 1:	ldx	[%o1], %g1
 	sub	%g1, %o0, %g7
@@ -101,11 +90,9 @@ atomic64_sub: /* %o0 = decrement, %o1 = atomic_ptr */
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
-	.size	atomic64_sub, .-atomic64_sub
+ENDPROC(atomic64_sub)
 
-	.globl	atomic64_add_ret
-	.type	atomic64_add_ret,#function
-atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
+ENTRY(atomic64_add_ret) /* %o0 = increment, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
 1:	ldx	[%o1], %g1
 	add	%g1, %o0, %g7
@@ -116,11 +103,9 @@ atomic64_add_ret: /* %o0 = increment, %o1 = atomic_ptr */
 	retl
 	 add	%g1, %o0, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
-	.size	atomic64_add_ret, .-atomic64_add_ret
+ENDPROC(atomic64_add_ret)
 
-	.globl	atomic64_sub_ret
-	.type	atomic64_sub_ret,#function
-atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
+ENTRY(atomic64_sub_ret) /* %o0 = decrement, %o1 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
 1:	ldx	[%o1], %g1
 	sub	%g1, %o0, %g7
@@ -131,4 +116,4 @@ atomic64_sub_ret: /* %o0 = decrement, %o1 = atomic_ptr */
 	retl
 	 sub	%g1, %o0, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
-	.size	atomic64_sub_ret, .-atomic64_sub_ret
+ENDPROC(atomic64_sub_ret)
diff --git a/arch/sparc/lib/bitops.S b/arch/sparc/lib/bitops.S
index 3dc61d5..36f72cc 100644
--- a/arch/sparc/lib/bitops.S
+++ b/arch/sparc/lib/bitops.S
@@ -3,14 +3,13 @@
  * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net)
  */
 
+#include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/backoff.h>
 
 	.text
 
-	.globl	test_and_set_bit
-	.type	test_and_set_bit,#function
-test_and_set_bit:	/* %o0=nr, %o1=addr */
+ENTRY(test_and_set_bit)	/* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
 	srlx	%o0, 6, %g1
 	mov	1, %o2
@@ -29,11 +28,9 @@ test_and_set_bit:	/* %o0=nr, %o1=addr */
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
-	.size	test_and_set_bit, .-test_and_set_bit
+ENDPROC(test_and_set_bit)
 
-	.globl	test_and_clear_bit
-	.type	test_and_clear_bit,#function
-test_and_clear_bit:	/* %o0=nr, %o1=addr */
+ENTRY(test_and_clear_bit) /* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
 	srlx	%o0, 6, %g1
 	mov	1, %o2
@@ -52,11 +49,9 @@ test_and_clear_bit:	/* %o0=nr, %o1=addr */
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
-	.size	test_and_clear_bit, .-test_and_clear_bit
+ENDPROC(test_and_clear_bit)
 
-	.globl	test_and_change_bit
-	.type	test_and_change_bit,#function
-test_and_change_bit:	/* %o0=nr, %o1=addr */
+ENTRY(test_and_change_bit) /* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
 	srlx	%o0, 6, %g1
 	mov	1, %o2
@@ -75,11 +70,9 @@ test_and_change_bit:	/* %o0=nr, %o1=addr */
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
-	.size	test_and_change_bit, .-test_and_change_bit
+ENDPROC(test_and_change_bit)
 
-	.globl	set_bit
-	.type	set_bit,#function
-set_bit:		/* %o0=nr, %o1=addr */
+ENTRY(set_bit) /* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
 	srlx	%o0, 6, %g1
 	mov	1, %o2
@@ -96,11 +89,9 @@ set_bit:		/* %o0=nr, %o1=addr */
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
-	.size	set_bit, .-set_bit
+ENDPROC(set_bit)
 
-	.globl	clear_bit
-	.type	clear_bit,#function
-clear_bit:		/* %o0=nr, %o1=addr */
+ENTRY(clear_bit) /* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
 	srlx	%o0, 6, %g1
 	mov	1, %o2
@@ -117,11 +108,9 @@ clear_bit:		/* %o0=nr, %o1=addr */
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
-	.size	clear_bit, .-clear_bit
+ENDPROC(clear_bit)
 
-	.globl	change_bit
-	.type	change_bit,#function
-change_bit:		/* %o0=nr, %o1=addr */
+ENTRY(change_bit) /* %o0=nr, %o1=addr */
 	BACKOFF_SETUP(%o3)
 	srlx	%o0, 6, %g1
 	mov	1, %o2
@@ -138,4 +127,4 @@ change_bit:		/* %o0=nr, %o1=addr */
 	retl
 	 nop
 2:	BACKOFF_SPIN(%o3, %o4, 1b)
-	.size	change_bit, .-change_bit
+ENDPROC(change_bit)
diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S
index 804be87..3c77101 100644
--- a/arch/sparc/lib/blockops.S
+++ b/arch/sparc/lib/blockops.S
@@ -4,6 +4,7 @@
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/linkage.h>
 #include <asm/page.h>
 
 	/* Zero out 64 bytes of memory at (buf + offset).
@@ -44,10 +45,7 @@
 	 */
 
 	.text
-	.align	4
-	.globl	bzero_1page, __copy_1page
-
-bzero_1page:
+ENTRY(bzero_1page)
 /* NOTE: If you change the number of insns of this routine, please check
  * arch/sparc/mm/hypersparc.S */
 	/* %o0 = buf */
@@ -65,8 +63,9 @@ bzero_1page:
 
 	retl
 	 nop
+ENDPROC(bzero_1page)
 
-__copy_1page:
+ENTRY(__copy_1page)
 /* NOTE: If you change the number of insns of this routine, please check
  * arch/sparc/mm/hypersparc.S */
 	/* %o0 = dst, %o1 = src */
@@ -87,3 +86,4 @@ __copy_1page:
 
 	retl
 	 nop
+ENDPROC(__copy_1page)
diff --git a/arch/sparc/lib/bzero.S b/arch/sparc/lib/bzero.S
index 615f401..8c05811 100644
--- a/arch/sparc/lib/bzero.S
+++ b/arch/sparc/lib/bzero.S
@@ -4,11 +4,11 @@
  * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
  */
 
+#include <linux/linkage.h>
+
 	.text
 
-	.globl	memset
-	.type	memset, #function
-memset:			/* %o0=buf, %o1=pat, %o2=len */
+ENTRY(memset) /* %o0=buf, %o1=pat, %o2=len */
 	and		%o1, 0xff, %o3
 	mov		%o2, %o1
 	sllx		%o3, 8, %g1
@@ -19,9 +19,7 @@ memset:			/* %o0=buf, %o1=pat, %o2=len */
 	ba,pt		%xcc, 1f
 	 or		%g1, %o2, %o2
 
-	.globl	__bzero
-	.type	__bzero, #function
-__bzero:		/* %o0=buf, %o1=len */
+ENTRY(__bzero) /* %o0=buf, %o1=len */
 	clr		%o2
 1:	mov		%o0, %o3
 	brz,pn		%o1, __bzero_done
@@ -78,8 +76,8 @@ __bzero_tiny:
 __bzero_done:
 	retl
 	 mov		%o3, %o0
-	.size		__bzero, .-__bzero
-	.size		memset, .-memset
+ENDPROC(__bzero)
+ENDPROC(memset)
 
 #define EX_ST(x,y)		\
 98:	x,y;			\
@@ -89,9 +87,7 @@ __bzero_done:
 	.text;			\
 	.align 4;
 
-	.globl	__clear_user
-	.type	__clear_user, #function
-__clear_user:		/* %o0=buf, %o1=len */
+ENTRY(__clear_user) /* %o0=buf, %o1=len */
 	brz,pn		%o1, __clear_user_done
 	 cmp		%o1, 16
 	bl,pn		%icc, __clear_user_tiny
@@ -146,4 +142,4 @@ __clear_user_tiny:
 __clear_user_done:
 	retl
 	 clr		%o0
-	.size		__clear_user, .-__clear_user
+ENDPROC(__clear_user)
diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S
index d74bc09..9614b48 100644
--- a/arch/sparc/lib/divdi3.S
+++ b/arch/sparc/lib/divdi3.S
@@ -19,7 +19,6 @@ Boston, MA 02111-1307, USA.  */
 
 	.text
 	.align 4
-	.global .udiv
 	.globl __divdi3
 __divdi3:
 	save %sp,-104,%sp
@@ -83,8 +82,9 @@ __divdi3:
 	bne .LL85
 	mov %i0,%o2
 	mov 1,%o0
-	call .udiv,0
 	mov 0,%o1
+	wr %g0, 0, %y
+	udiv %o0, %o1, %o0
 	mov %o0,%o4
 	mov %i0,%o2
 .LL85:
diff --git a/arch/sparc/lib/ipcsum.S b/arch/sparc/lib/ipcsum.S
index 58ca5b9..4742d59 100644
--- a/arch/sparc/lib/ipcsum.S
+++ b/arch/sparc/lib/ipcsum.S
@@ -1,8 +1,7 @@
+#include <linux/linkage.h>
+
 	.text
-	.align	32
-	.globl	ip_fast_csum
-	.type	ip_fast_csum,#function
-ip_fast_csum:	/* %o0 = iph, %o1 = ihl */
+ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */
 	sub	%o1, 4, %g7
 	lduw	[%o0 + 0x00], %o2
 	lduw	[%o0 + 0x04], %g2
@@ -31,4 +30,4 @@ ip_fast_csum:	/* %o0 = iph, %o1 = ihl */
 	set	0xffff, %o1
 	retl
 	 and	%o2, %o1, %o0
-	.size	ip_fast_csum, .-ip_fast_csum
+ENDPROC(ip_fast_csum)
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index f73c224..2dc3087 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -56,23 +56,11 @@ extern int __divdi3(int, int);
 extern void (*__copy_1page)(void *, const void *);
 extern void (*bzero_1page)(void *);
 
-extern int __strncmp(const char *, const char *, __kernel_size_t);
-
 extern void ___rw_read_enter(void);
 extern void ___rw_read_try(void);
 extern void ___rw_read_exit(void);
 extern void ___rw_write_enter(void);
 
-/* Alias functions whose names begin with "." and export the aliases.
- * The module references will be fixed up by module_frob_arch_sections.
- */
-extern int _Div(int, int);
-extern int _Mul(int, int);
-extern int _Rem(int, int);
-extern unsigned _Udiv(unsigned, unsigned);
-extern unsigned _Umul(unsigned, unsigned);
-extern unsigned _Urem(unsigned, unsigned);
-
 /* Networking helper routines. */
 EXPORT_SYMBOL(__csum_partial_copy_sparc_generic);
 
@@ -81,9 +69,6 @@ EXPORT_SYMBOL(__copy_1page);
 EXPORT_SYMBOL(__memmove);
 EXPORT_SYMBOL(bzero_1page);
 
-/* string functions */
-EXPORT_SYMBOL(__strncmp);
-
 /* Moving data to/from/in userspace. */
 EXPORT_SYMBOL(__copy_user);
 
@@ -100,13 +85,6 @@ EXPORT_SYMBOL(__ashldi3);
 EXPORT_SYMBOL(__lshrdi3);
 EXPORT_SYMBOL(__muldi3);
 EXPORT_SYMBOL(__divdi3);
-
-EXPORT_SYMBOL(_Rem);
-EXPORT_SYMBOL(_Urem);
-EXPORT_SYMBOL(_Mul);
-EXPORT_SYMBOL(_Umul);
-EXPORT_SYMBOL(_Div);
-EXPORT_SYMBOL(_Udiv);
 #endif
 
 /*
diff --git a/arch/sparc/lib/lshrdi3.S b/arch/sparc/lib/lshrdi3.S
index 47a1354..60ebc7c 100644
--- a/arch/sparc/lib/lshrdi3.S
+++ b/arch/sparc/lib/lshrdi3.S
@@ -1,6 +1,6 @@
+#include <linux/linkage.h>
 
-	.globl	__lshrdi3
-__lshrdi3:
+ENTRY(__lshrdi3)
 	cmp	%o2, 0
 	be	3f
 	 mov	0x20, %g2
@@ -24,3 +24,4 @@ __lshrdi3:
 3:
 	retl 
 	 nop 
+ENDPROC(__lshrdi3)
diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S
index 9739580..b7f6334 100644
--- a/arch/sparc/lib/memmove.S
+++ b/arch/sparc/lib/memmove.S
@@ -4,11 +4,10 @@
  * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
+#include <linux/linkage.h>
+
 	.text
-	.align		32
-	.globl		memmove
-	.type		memmove,#function
-memmove:		/* o0=dst o1=src o2=len */
+ENTRY(memmove) /* o0=dst o1=src o2=len */
 	mov		%o0, %g1
 	cmp		%o0, %o1
 	bleu,pt		%xcc, memcpy
@@ -28,4 +27,4 @@ memmove:		/* o0=dst o1=src o2=len */
 
 	retl
 	 mov		%g1, %o0
-	.size		memmove, .-memmove
+ENDPROC(memmove)
diff --git a/arch/sparc/lib/mul.S b/arch/sparc/lib/mul.S
deleted file mode 100644
index c45470d..0000000
--- a/arch/sparc/lib/mul.S
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * mul.S:       This routine was taken from glibc-1.09 and is covered
- *              by the GNU Library General Public License Version 2.
- */
-
-/*
- * Signed multiply, from Appendix E of the Sparc Version 8
- * Architecture Manual.
- */
-
-/*
- * Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the upper 32 bits of
- * the 64-bit product).
- *
- * This code optimizes short (less than 13-bit) multiplies.
- */
-
-	.globl .mul
-	.globl _Mul
-.mul:
-_Mul:	/* needed for export */
-	mov	%o0, %y		! multiplier -> Y
-	andncc	%o0, 0xfff, %g0	! test bits 12..31
-	be	Lmul_shortway	! if zero, can do it the short way
-	 andcc	%g0, %g0, %o4	! zero the partial product and clear N and V
-
-	/*
-	 * Long multiply.  32 steps, followed by a final shift step.
-	 */
-	mulscc	%o4, %o1, %o4	! 1
-	mulscc	%o4, %o1, %o4	! 2
-	mulscc	%o4, %o1, %o4	! 3
-	mulscc	%o4, %o1, %o4	! 4
-	mulscc	%o4, %o1, %o4	! 5
-	mulscc	%o4, %o1, %o4	! 6
-	mulscc	%o4, %o1, %o4	! 7
-	mulscc	%o4, %o1, %o4	! 8
-	mulscc	%o4, %o1, %o4	! 9
-	mulscc	%o4, %o1, %o4	! 10
-	mulscc	%o4, %o1, %o4	! 11
-	mulscc	%o4, %o1, %o4	! 12
-	mulscc	%o4, %o1, %o4	! 13
-	mulscc	%o4, %o1, %o4	! 14
-	mulscc	%o4, %o1, %o4	! 15
-	mulscc	%o4, %o1, %o4	! 16
-	mulscc	%o4, %o1, %o4	! 17
-	mulscc	%o4, %o1, %o4	! 18
-	mulscc	%o4, %o1, %o4	! 19
-	mulscc	%o4, %o1, %o4	! 20
-	mulscc	%o4, %o1, %o4	! 21
-	mulscc	%o4, %o1, %o4	! 22
-	mulscc	%o4, %o1, %o4	! 23
-	mulscc	%o4, %o1, %o4	! 24
-	mulscc	%o4, %o1, %o4	! 25
-	mulscc	%o4, %o1, %o4	! 26
-	mulscc	%o4, %o1, %o4	! 27
-	mulscc	%o4, %o1, %o4	! 28
-	mulscc	%o4, %o1, %o4	! 29
-	mulscc	%o4, %o1, %o4	! 30
-	mulscc	%o4, %o1, %o4	! 31
-	mulscc	%o4, %o1, %o4	! 32
-	mulscc	%o4, %g0, %o4	! final shift
-
-	! If %o0 was negative, the result is
-	!	(%o0 * %o1) + (%o1 << 32))
-	! We fix that here.
-
-#if 0
-	tst	%o0
-	bge	1f
-	 rd	%y, %o0
-
-	! %o0 was indeed negative; fix upper 32 bits of result by subtracting 
-	! %o1 (i.e., return %o4 - %o1 in %o1).
-	retl
-	 sub	%o4, %o1, %o1
-
-1:
-	retl
-	 mov	%o4, %o1
-#else
-	/* Faster code adapted from tege@sics.se's code for umul.S.  */
-	sra	%o0, 31, %o2	! make mask from sign bit
-	and	%o1, %o2, %o2	! %o2 = 0 or %o1, depending on sign of %o0
-	rd	%y, %o0		! get lower half of product
-	retl
-	 sub	%o4, %o2, %o1	! subtract compensation 
-				!  and put upper half in place
-#endif
-
-Lmul_shortway:
-	/*
-	 * Short multiply.  12 steps, followed by a final shift step.
-	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
-	 * but there is no problem with %o0 being negative (unlike above).
-	 */
-	mulscc	%o4, %o1, %o4	! 1
-	mulscc	%o4, %o1, %o4	! 2
-	mulscc	%o4, %o1, %o4	! 3
-	mulscc	%o4, %o1, %o4	! 4
-	mulscc	%o4, %o1, %o4	! 5
-	mulscc	%o4, %o1, %o4	! 6
-	mulscc	%o4, %o1, %o4	! 7
-	mulscc	%o4, %o1, %o4	! 8
-	mulscc	%o4, %o1, %o4	! 9
-	mulscc	%o4, %o1, %o4	! 10
-	mulscc	%o4, %o1, %o4	! 11
-	mulscc	%o4, %o1, %o4	! 12
-	mulscc	%o4, %g0, %o4	! final shift
-
-	/*
-	 *  %o4 has 20 of the bits that should be in the low part of the
-	 * result; %y has the bottom 12 (as %y's top 12).  That is:
-	 *
-	 *	  %o4		    %y
-	 * +----------------+----------------+
-	 * | -12- |   -20-  | -12- |   -20-  |
-	 * +------(---------+------)---------+
-	 *  --hi-- ----low-part----
-	 *
-	 * The upper 12 bits of %o4 should be sign-extended to form the
-	 * high part of the product (i.e., highpart = %o4 >> 20).
-	 */
-
-	rd	%y, %o5
-	sll	%o4, 12, %o0	! shift middle bits left 12
-	srl	%o5, 20, %o5	! shift low bits right 20, zero fill at left
-	or	%o5, %o0, %o0	! construct low part of result
-	retl
-	 sra	%o4, 20, %o1	! ... and extract high part of result
-
-	.globl	.mul_patch
-.mul_patch:
-	smul	%o0, %o1, %o0
-	retl
-	 rd	%y, %o1
-	nop
diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S
index 7f17872..9794939 100644
--- a/arch/sparc/lib/muldi3.S
+++ b/arch/sparc/lib/muldi3.S
@@ -63,12 +63,12 @@ __muldi3:
 	rd  %y, %o1
 	mov  %o1, %l3
 	mov  %i1, %o0
-	call  .umul
 	mov  %i2, %o1
+	umul %o0, %o1, %o0
 	mov  %o0, %l0
 	mov  %i0, %o0
-	call  .umul
 	mov  %i3, %o1
+	umul %o0, %o1, %o0
 	add  %l0, %o0, %l0
 	mov  %l2, %i0
 	add  %l2, %l0, %i0
diff --git a/arch/sparc/lib/rem.S b/arch/sparc/lib/rem.S
deleted file mode 100644
index 42fb862..0000000
--- a/arch/sparc/lib/rem.S
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * rem.S:       This routine was taken from glibc-1.09 and is covered
- *              by the GNU Library General Public License Version 2.
- */
-
-
-/* This file is generated from divrem.m4; DO NOT EDIT! */
-/*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
- */
-
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .rem	name of function to generate
- *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1
- *  true		true=true => signed; true=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-
-
-	.globl .rem
-	.globl _Rem
-.rem:
-_Rem:	/* needed for export */
-	! compute sign of result; if neither is negative, no problem
-	orcc	%o1, %o0, %g0	! either negative?
-	bge	2f			! no, go do the divide
-	 mov	%o0, %g2	! compute sign in any case
-
-	tst	%o1
-	bge	1f
-	 tst	%o0
-	! %o1 is definitely negative; %o0 might also be negative
-	bge	2f			! if %o0 not negative...
-	 sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
-1:	! %o0 is negative, %o1 is nonnegative
-	sub	%g0, %o0, %o0	! make %o0 nonnegative
-2:
-
-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	%o1, %g0, %o5
-	bne	1f
-	 mov	%o0, %o3
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	ST_DIV0
-		retl
-		 clr	%o0
-
-1:
-	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	Lgot_result		! (and algorithm fails otherwise)
-	 clr	%o2
-
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-
-	cmp	%o3, %g1
-	blu	Lnot_really_big
-	 clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		 mov	1, %g7
-
-		sll	%o5, 4, %o5
-
-		b	1b
-		 add	%o4, 1, %o4
-
-	! Now compute %g7.
-	2:
-		addcc	%o5, %o5, %o5
-
-		bcc	Lnot_too_big
-		 add	%g7, 1, %g7
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5		! rest of %o5
-		add	%o5, %g1, %o5
-
-		b	Ldo_single_div
-		 sub	%g7, 1, %g7
-
-	Lnot_too_big:
-	3:
-		cmp	%o5, %o3
-		blu	2b
-		 nop
-
-		be	Ldo_single_div
-		 nop
-	/* NB: these are commented out in the V8-Sparc manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g7
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	Ldo_single_div:
-		subcc	%g7, 1, %g7
-		bl	Lend_regular_divide
-		 nop
-
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-
-		b	Lend_single_divloop
-		 nop
-	Lsingle_divloop:
-		sll	%o2, 1, %o2
-
-		bl	1f
-		 srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-
-		b	2f
-		 add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	Lend_single_divloop:
-		subcc	%g7, 1, %g7
-		bge	Lsingle_divloop
-		 tst	%o3
-
-		b,a	Lend_regular_divide
-
-Lnot_really_big:
-1:
-	sll	%o5, 4, %o5
-	cmp	%o5, %o3
-	bleu	1b
-	 addcc	%o4, 1, %o4
-	be	Lgot_result
-	 sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-Ldivloop:
-	sll	%o2, 4, %o2
-		! depth 1, accumulated bits 0
-	bl	L.1.16
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 2, accumulated bits 1
-	bl	L.2.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 3
-	bl	L.3.19
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 7
-	bl	L.4.23
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-
-	b	9f
-	 add	%o2, (7*2+1), %o2
-	
-L.4.23:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (7*2-1), %o2
-	
-L.3.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 5
-	bl	L.4.21
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (5*2+1), %o2
-	
-L.4.21:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (5*2-1), %o2
-	
-L.2.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 1
-	bl	L.3.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 3
-	bl	L.4.19
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (3*2+1), %o2
-
-L.4.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (3*2-1), %o2
-
-L.3.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 1
-	bl	L.4.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (1*2+1), %o2
-
-L.4.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (1*2-1), %o2
-
-L.1.16:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 2, accumulated bits -1
-	bl	L.2.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -1
-	bl	L.3.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -1
-	bl	L.4.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-1*2+1), %o2
-
-L.4.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-1*2-1), %o2
-
-L.3.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -3
-	bl	L.4.13
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-3*2+1), %o2
-
-L.4.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-3*2-1), %o2
-
-L.2.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -3
-	bl	L.3.13
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -5
-	bl	L.4.11
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-5*2+1), %o2
-
-L.4.11:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-5*2-1), %o2
-
-
-L.3.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -7
-	bl	L.4.9
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-7*2+1), %o2
-
-L.4.9:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-7*2-1), %o2
-
-	9:
-Lend_regular_divide:
-	subcc	%o4, 1, %o4
-	bge	Ldivloop
-	 tst	%o3
-
-	bl,a	Lgot_result
-	! non-restoring fixup here (one instruction only!)
-	add	%o3, %o1, %o3
-
-Lgot_result:
-	! check to see if answer should be < 0
-	tst	%g2
-	bl,a	1f
-	 sub %g0, %o3, %o3
-1:
-	retl
-	 mov %o3, %o0
-
-	.globl	.rem_patch
-.rem_patch:
-	sra	%o0, 0x1f, %o4
-	wr	%o4, 0x0, %y
-	nop
-	nop
-	nop
-	sdivcc	%o0, %o1, %o2
-	bvs,a	1f
-	 xnor	%o2, %g0, %o2
-1:	smul	%o2, %o1, %o2
-	retl
-	 sub	%o0, %o2, %o0
-	nop
diff --git a/arch/sparc/lib/sdiv.S b/arch/sparc/lib/sdiv.S
deleted file mode 100644
index f0a0d4e..0000000
--- a/arch/sparc/lib/sdiv.S
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
- * sdiv.S:      This routine was taken from glibc-1.09 and is covered
- *              by the GNU Library General Public License Version 2.
- */
-
-
-/* This file is generated from divrem.m4; DO NOT EDIT! */
-/*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
- */
-
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .div	name of function to generate
- *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
- *  true		true=true => signed; true=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-
-
-	.globl .div
-	.globl _Div
-.div:
-_Div:	/* needed for export */
-	! compute sign of result; if neither is negative, no problem
-	orcc	%o1, %o0, %g0	! either negative?
-	bge	2f			! no, go do the divide
-	 xor	%o1, %o0, %g2	! compute sign in any case
-
-	tst	%o1
-	bge	1f
-	 tst	%o0
-	! %o1 is definitely negative; %o0 might also be negative
-	bge	2f			! if %o0 not negative...
-	 sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
-1:	! %o0 is negative, %o1 is nonnegative
-	sub	%g0, %o0, %o0	! make %o0 nonnegative
-2:
-
-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	%o1, %g0, %o5
-	bne	1f
-	 mov	%o0, %o3
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	ST_DIV0
-		retl
-		 clr	%o0
-
-1:
-	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	Lgot_result		! (and algorithm fails otherwise)
-	 clr	%o2
-
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-
-	cmp	%o3, %g1
-	blu	Lnot_really_big
-	 clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		 mov	1, %g7
-
-		sll	%o5, 4, %o5
-
-		b	1b
-		 add	%o4, 1, %o4
-
-	! Now compute %g7.
-	2:
-		addcc	%o5, %o5, %o5
-		bcc	Lnot_too_big
-		 add	%g7, 1, %g7
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5		! rest of %o5
-		add	%o5, %g1, %o5
-
-		b	Ldo_single_div
-		 sub	%g7, 1, %g7
-
-	Lnot_too_big:
-	3:
-		cmp	%o5, %o3
-		blu	2b
-		 nop
-
-		be	Ldo_single_div
-		 nop
-	/* NB: these are commented out in the V8-Sparc manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g7
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	Ldo_single_div:
-		subcc	%g7, 1, %g7
-		bl	Lend_regular_divide
-		 nop
-
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-
-		b	Lend_single_divloop
-		 nop
-	Lsingle_divloop:
-		sll	%o2, 1, %o2
-
-		bl	1f
-		 srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-
-		b	2f
-		 add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	Lend_single_divloop:
-		subcc	%g7, 1, %g7
-		bge	Lsingle_divloop
-		 tst	%o3
-
-		b,a	Lend_regular_divide
-
-Lnot_really_big:
-1:
-	sll	%o5, 4, %o5
-	cmp	%o5, %o3
-	bleu	1b
-	 addcc	%o4, 1, %o4
-
-	be	Lgot_result
-	 sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-Ldivloop:
-	sll	%o2, 4, %o2
-		! depth 1, accumulated bits 0
-	bl	L.1.16
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 2, accumulated bits 1
-	bl	L.2.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 3
-	bl	L.3.19
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 7
-	bl	L.4.23
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (7*2+1), %o2
-
-L.4.23:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (7*2-1), %o2
-
-L.3.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 5
-	bl	L.4.21
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (5*2+1), %o2
-
-L.4.21:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (5*2-1), %o2
-
-L.2.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 1
-	bl	L.3.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 3
-	bl	L.4.19
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (3*2+1), %o2
-
-L.4.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (3*2-1), %o2
-	
-	
-L.3.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 1
-	bl	L.4.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (1*2+1), %o2
-
-L.4.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (1*2-1), %o2
-
-L.1.16:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 2, accumulated bits -1
-	bl	L.2.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -1
-	bl	L.3.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -1
-	bl	L.4.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-1*2+1), %o2
-
-L.4.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-1*2-1), %o2
-
-L.3.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -3
-	bl	L.4.13
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-3*2+1), %o2
-
-L.4.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-3*2-1), %o2
-
-L.2.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -3
-	bl	L.3.13
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -5
-	bl	L.4.11
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-5*2+1), %o2
-
-L.4.11:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-5*2-1), %o2
-
-L.3.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -7
-	bl	L.4.9
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-7*2+1), %o2
-
-L.4.9:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-7*2-1), %o2
-
-	9:
-Lend_regular_divide:
-	subcc	%o4, 1, %o4
-	bge	Ldivloop
-	 tst	%o3
-
-	bl,a	Lgot_result
-	! non-restoring fixup here (one instruction only!)
-	sub	%o2, 1, %o2
-
-Lgot_result:
-	! check to see if answer should be < 0
-	tst	%g2
-	bl,a	1f
-	 sub %g0, %o2, %o2
-1:
-	retl
-	 mov %o2, %o0
-
-	.globl	.div_patch
-.div_patch:
-	sra	%o0, 0x1f, %o2
-	wr	%o2, 0x0, %y
-	nop
-	nop
-	nop
-	sdivcc	%o0, %o1, %o0
-	bvs,a	1f
-	 xnor	%o0, %g0, %o0
-1:	retl
-	 nop
diff --git a/arch/sparc/lib/strlen_user_64.S b/arch/sparc/lib/strlen_user_64.S
index 114ed11..c3df71f 100644
--- a/arch/sparc/lib/strlen_user_64.S
+++ b/arch/sparc/lib/strlen_user_64.S
@@ -8,16 +8,16 @@
  * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <linux/linkage.h>
 #include <asm/asi.h>
 
 #define LO_MAGIC 0x01010101
 #define HI_MAGIC 0x80808080
 
 	.align 4
-	.global __strlen_user, __strnlen_user
-__strlen_user:
+ENTRY(__strlen_user)
 	sethi	%hi(32768), %o1
-__strnlen_user:	
+ENTRY(__strnlen_user)
 	mov	%o1, %g1
 	mov	%o0, %o1
 	andcc	%o0, 3, %g0
@@ -78,6 +78,8 @@ __strnlen_user:
 	 mov	2, %o0
 23:	retl
 	 mov	3, %o0
+ENDPROC(__strlen_user)
+ENDPROC(__strnlen_user)
 
         .section .fixup,#alloc,#execinstr
         .align  4
diff --git a/arch/sparc/lib/strncmp_32.S b/arch/sparc/lib/strncmp_32.S
index 494ec66..c0d1b56 100644
--- a/arch/sparc/lib/strncmp_32.S
+++ b/arch/sparc/lib/strncmp_32.S
@@ -3,11 +3,10 @@
  *            generic strncmp routine.
  */
 
+#include <linux/linkage.h>
+
 	.text
-	.align 4
-	.global __strncmp, strncmp
-__strncmp:
-strncmp:
+ENTRY(strncmp)
 	mov	%o0, %g3
 	mov	0, %o3
 
@@ -116,3 +115,4 @@ strncmp:
 	and	%g2, 0xff, %o0
 	retl
 	 sub	%o3, %o0, %o0
+ENDPROC(strncmp)
diff --git a/arch/sparc/lib/strncmp_64.S b/arch/sparc/lib/strncmp_64.S
index 980e837..0656627 100644
--- a/arch/sparc/lib/strncmp_64.S
+++ b/arch/sparc/lib/strncmp_64.S
@@ -4,13 +4,11 @@
  * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <linux/linkage.h>
 #include <asm/asi.h>
 
 	.text
-	.align	32
-	.globl	strncmp
-	.type	strncmp,#function
-strncmp:
+ENTRY(strncmp)
 	brlez,pn %o2, 3f
 	 lduba	[%o0] (ASI_PNF), %o3
 1:
@@ -29,4 +27,4 @@ strncmp:
 3:
 	retl
 	 clr	%o0
-	.size	strncmp, .-strncmp
+ENDPROC(strncmp)
diff --git a/arch/sparc/lib/strncpy_from_user_32.S b/arch/sparc/lib/strncpy_from_user_32.S
index d771989..db0ed29 100644
--- a/arch/sparc/lib/strncpy_from_user_32.S
+++ b/arch/sparc/lib/strncpy_from_user_32.S
@@ -3,11 +3,11 @@
  *  Copyright(C) 1996 David S. Miller
  */
 
+#include <linux/linkage.h>
 #include <asm/ptrace.h>
 #include <asm/errno.h>
 
 	.text
-	.align	4
 
 	/* Must return:
 	 *
@@ -16,8 +16,7 @@
 	 * bytes copied		if we hit a null byte
 	 */
 
-	.globl	__strncpy_from_user
-__strncpy_from_user:
+ENTRY(__strncpy_from_user)
 	/* %o0=dest, %o1=src, %o2=count */
 	mov	%o2, %o3
 1:
@@ -35,6 +34,7 @@ __strncpy_from_user:
 	add	%o2, 1, %o0
 	retl
 	 sub	%o3, %o0, %o0
+ENDPROC(__strncpy_from_user)
 
 	.section .fixup,#alloc,#execinstr
 	.align	4
diff --git a/arch/sparc/lib/strncpy_from_user_64.S b/arch/sparc/lib/strncpy_from_user_64.S
index 511c8f1..d1246b7 100644
--- a/arch/sparc/lib/strncpy_from_user_64.S
+++ b/arch/sparc/lib/strncpy_from_user_64.S
@@ -4,6 +4,7 @@
  *  Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
  */
 
+#include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/errno.h>
 
@@ -12,7 +13,6 @@
 0:	.xword	0x0101010101010101
 
 	.text
-	.align	32
 
 	/* Must return:
 	 *
@@ -30,9 +30,7 @@
 	 * and average length is 18 or so.
 	 */
 
-	.globl	__strncpy_from_user
-	.type	__strncpy_from_user,#function
-__strncpy_from_user:
+ENTRY(__strncpy_from_user)
 	/* %o0=dest, %o1=src, %o2=count */
 	andcc	%o1, 7, %g0		! IEU1	Group
 	bne,pn	%icc, 30f		! CTI
@@ -123,7 +121,7 @@ __strncpy_from_user:
 	 mov	%o2, %o0
 2:	retl
 	 add	%o2, %o3, %o0
-	.size	__strncpy_from_user, .-__strncpy_from_user
+ENDPROC(__strncpy_from_user)
 
 	.section __ex_table,"a"
 	.align	4
diff --git a/arch/sparc/lib/ucmpdi2.c b/arch/sparc/lib/ucmpdi2.c
new file mode 100644
index 0000000..1e06ed5
--- /dev/null
+++ b/arch/sparc/lib/ucmpdi2.c
@@ -0,0 +1,19 @@
+#include <linux/module.h>
+#include "libgcc.h"
+
+word_type __ucmpdi2(unsigned long long a, unsigned long long b)
+{
+	const DWunion au = {.ll = a};
+	const DWunion bu = {.ll = b};
+
+	if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
+		return 0;
+	else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
+		return 2;
+	if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
+		return 0;
+	else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
+		return 2;
+	return 1;
+}
+EXPORT_SYMBOL(__ucmpdi2);
diff --git a/arch/sparc/lib/udiv.S b/arch/sparc/lib/udiv.S
deleted file mode 100644
index 2101405..0000000
--- a/arch/sparc/lib/udiv.S
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- * udiv.S:      This routine was taken from glibc-1.09 and is covered
- *              by the GNU Library General Public License Version 2.
- */
-
-
-/* This file is generated from divrem.m4; DO NOT EDIT! */
-/*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
- */
-
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .udiv	name of function to generate
- *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
- *  false		false=true => signed; false=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-
-
-	.globl .udiv
-	.globl _Udiv
-.udiv:
-_Udiv:	/* needed for export */
-
-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	%o1, %g0, %o5
-	bne	1f
-	 mov	%o0, %o3
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	ST_DIV0
-		retl
-		 clr	%o0
-
-1:
-	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	Lgot_result		! (and algorithm fails otherwise)
-	 clr	%o2
-
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-
-	cmp	%o3, %g1
-	blu	Lnot_really_big
-	 clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		 mov	1, %g7
-
-		sll	%o5, 4, %o5
-
-		b	1b
-		 add	%o4, 1, %o4
-
-	! Now compute %g7.
-	2:
-		addcc	%o5, %o5, %o5
-		bcc	Lnot_too_big
-		 add	%g7, 1, %g7
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5		! rest of %o5
-		add	%o5, %g1, %o5
-
-		b	Ldo_single_div
-		 sub	%g7, 1, %g7
-
-	Lnot_too_big:
-	3:
-		cmp	%o5, %o3
-		blu	2b
-		 nop
-
-		be	Ldo_single_div
-		 nop
-	/* NB: these are commented out in the V8-Sparc manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g7
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	Ldo_single_div:
-		subcc	%g7, 1, %g7
-		bl	Lend_regular_divide
-		 nop
-
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-
-		b	Lend_single_divloop
-		 nop
-	Lsingle_divloop:
-		sll	%o2, 1, %o2
-		bl	1f
-		 srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-		b	2f
-		 add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	Lend_single_divloop:
-		subcc	%g7, 1, %g7
-		bge	Lsingle_divloop
-		 tst	%o3
-
-		b,a	Lend_regular_divide
-
-Lnot_really_big:
-1:
-	sll	%o5, 4, %o5
-
-	cmp	%o5, %o3
-	bleu	1b
-	 addcc	%o4, 1, %o4
-
-	be	Lgot_result
-	 sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-Ldivloop:
-	sll	%o2, 4, %o2
-		! depth 1, accumulated bits 0
-	bl	L.1.16
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 2, accumulated bits 1
-	bl	L.2.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 3
-	bl	L.3.19
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 7
-	bl	L.4.23
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (7*2+1), %o2
-
-L.4.23:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (7*2-1), %o2
-
-L.3.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 5
-	bl	L.4.21
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (5*2+1), %o2
-
-L.4.21:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (5*2-1), %o2
-
-L.2.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 1
-	bl	L.3.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 3
-	bl	L.4.19
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (3*2+1), %o2
-
-L.4.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (3*2-1), %o2
-
-L.3.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 1
-	bl	L.4.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (1*2+1), %o2
-
-L.4.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (1*2-1), %o2
-
-L.1.16:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 2, accumulated bits -1
-	bl	L.2.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -1
-	bl	L.3.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -1
-	bl	L.4.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-1*2+1), %o2
-
-L.4.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-1*2-1), %o2
-
-L.3.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -3
-	bl	L.4.13
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-3*2+1), %o2
-
-L.4.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-3*2-1), %o2
-
-L.2.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -3
-	bl	L.3.13
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -5
-	bl	L.4.11
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-5*2+1), %o2
-
-L.4.11:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-5*2-1), %o2
-
-L.3.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -7
-	bl	L.4.9
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-7*2+1), %o2
-
-L.4.9:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-7*2-1), %o2
-
-	9:
-Lend_regular_divide:
-	subcc	%o4, 1, %o4
-	bge	Ldivloop
-	 tst	%o3
-
-	bl,a	Lgot_result
-	! non-restoring fixup here (one instruction only!)
-	sub	%o2, 1, %o2
-
-Lgot_result:
-
-	retl
-	 mov %o2, %o0
-
-	.globl	.udiv_patch
-.udiv_patch:
-	wr	%g0, 0x0, %y
-	nop
-	nop
-	retl
-	 udiv	%o0, %o1, %o0
-	nop
diff --git a/arch/sparc/lib/udivdi3.S b/arch/sparc/lib/udivdi3.S
index b430f1f..24e0a35 100644
--- a/arch/sparc/lib/udivdi3.S
+++ b/arch/sparc/lib/udivdi3.S
@@ -60,8 +60,9 @@ __udivdi3:
 	bne .LL77
 	mov %i0,%o2
 	mov 1,%o0
-	call .udiv,0
 	mov 0,%o1
+	wr %g0, 0, %y
+	udiv %o0, %o1, %o0
 	mov %o0,%o3
 	mov %i0,%o2
 .LL77:
diff --git a/arch/sparc/lib/umul.S b/arch/sparc/lib/umul.S
deleted file mode 100644
index 1f36ae6..0000000
--- a/arch/sparc/lib/umul.S
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * umul.S:      This routine was taken from glibc-1.09 and is covered
- *              by the GNU Library General Public License Version 2.
- */
-
-
-/*
- * Unsigned multiply.  Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
- * upper 32 bits of the 64-bit product).
- *
- * This code optimizes short (less than 13-bit) multiplies.  Short
- * multiplies require 25 instruction cycles, and long ones require
- * 45 instruction cycles.
- *
- * On return, overflow has occurred (%o1 is not zero) if and only if
- * the Z condition code is clear, allowing, e.g., the following:
- *
- *	call	.umul
- *	nop
- *	bnz	overflow	(or tnz)
- */
-
-	.globl .umul
-	.globl _Umul
-.umul:
-_Umul:	/* needed for export */
-	or	%o0, %o1, %o4
-	mov	%o0, %y		! multiplier -> Y
-
-	andncc	%o4, 0xfff, %g0	! test bits 12..31 of *both* args
-	be	Lmul_shortway	! if zero, can do it the short way
-	 andcc	%g0, %g0, %o4	! zero the partial product and clear N and V
-
-	/*
-	 * Long multiply.  32 steps, followed by a final shift step.
-	 */
-	mulscc	%o4, %o1, %o4	! 1
-	mulscc	%o4, %o1, %o4	! 2
-	mulscc	%o4, %o1, %o4	! 3
-	mulscc	%o4, %o1, %o4	! 4
-	mulscc	%o4, %o1, %o4	! 5
-	mulscc	%o4, %o1, %o4	! 6
-	mulscc	%o4, %o1, %o4	! 7
-	mulscc	%o4, %o1, %o4	! 8
-	mulscc	%o4, %o1, %o4	! 9
-	mulscc	%o4, %o1, %o4	! 10
-	mulscc	%o4, %o1, %o4	! 11
-	mulscc	%o4, %o1, %o4	! 12
-	mulscc	%o4, %o1, %o4	! 13
-	mulscc	%o4, %o1, %o4	! 14
-	mulscc	%o4, %o1, %o4	! 15
-	mulscc	%o4, %o1, %o4	! 16
-	mulscc	%o4, %o1, %o4	! 17
-	mulscc	%o4, %o1, %o4	! 18
-	mulscc	%o4, %o1, %o4	! 19
-	mulscc	%o4, %o1, %o4	! 20
-	mulscc	%o4, %o1, %o4	! 21
-	mulscc	%o4, %o1, %o4	! 22
-	mulscc	%o4, %o1, %o4	! 23
-	mulscc	%o4, %o1, %o4	! 24
-	mulscc	%o4, %o1, %o4	! 25
-	mulscc	%o4, %o1, %o4	! 26
-	mulscc	%o4, %o1, %o4	! 27
-	mulscc	%o4, %o1, %o4	! 28
-	mulscc	%o4, %o1, %o4	! 29
-	mulscc	%o4, %o1, %o4	! 30
-	mulscc	%o4, %o1, %o4	! 31
-	mulscc	%o4, %o1, %o4	! 32
-	mulscc	%o4, %g0, %o4	! final shift
-
-
-	/*
-	 * Normally, with the shift-and-add approach, if both numbers are
-	 * positive you get the correct result.  With 32-bit two's-complement
-	 * numbers, -x is represented as
-	 *
-	 *		  x		    32
-	 *	( 2  -  ------ ) mod 2  *  2
-	 *		   32
-	 *		  2
-	 *
-	 * (the `mod 2' subtracts 1 from 1.bbbb).  To avoid lots of 2^32s,
-	 * we can treat this as if the radix point were just to the left
-	 * of the sign bit (multiply by 2^32), and get
-	 *
-	 *	-x  =  (2 - x) mod 2
-	 *
-	 * Then, ignoring the `mod 2's for convenience:
-	 *
-	 *   x *  y	= xy
-	 *  -x *  y	= 2y - xy
-	 *   x * -y	= 2x - xy
-	 *  -x * -y	= 4 - 2x - 2y + xy
-	 *
-	 * For signed multiplies, we subtract (x << 32) from the partial
-	 * product to fix this problem for negative multipliers (see mul.s).
-	 * Because of the way the shift into the partial product is calculated
-	 * (N xor V), this term is automatically removed for the multiplicand,
-	 * so we don't have to adjust.
-	 *
-	 * But for unsigned multiplies, the high order bit wasn't a sign bit,
-	 * and the correction is wrong.  So for unsigned multiplies where the
-	 * high order bit is one, we end up with xy - (y << 32).  To fix it
-	 * we add y << 32.
-	 */
-#if 0
-	tst	%o1
-	bl,a	1f		! if %o1 < 0 (high order bit = 1),
-	 add	%o4, %o0, %o4	! %o4 += %o0 (add y to upper half)
-
-1:
-	rd	%y, %o0		! get lower half of product
-	retl
-	 addcc	%o4, %g0, %o1	! put upper half in place and set Z for %o1==0
-#else
-	/* Faster code from tege@sics.se.  */
-	sra	%o1, 31, %o2	! make mask from sign bit
-	and	%o0, %o2, %o2	! %o2 = 0 or %o0, depending on sign of %o1
-	rd	%y, %o0		! get lower half of product
-	retl
-	 addcc	%o4, %o2, %o1	! add compensation and put upper half in place
-#endif
-
-Lmul_shortway:
-	/*
-	 * Short multiply.  12 steps, followed by a final shift step.
-	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
-	 * but there is no problem with %o0 being negative (unlike above),
-	 * and overflow is impossible (the answer is at most 24 bits long).
-	 */
-	mulscc	%o4, %o1, %o4	! 1
-	mulscc	%o4, %o1, %o4	! 2
-	mulscc	%o4, %o1, %o4	! 3
-	mulscc	%o4, %o1, %o4	! 4
-	mulscc	%o4, %o1, %o4	! 5
-	mulscc	%o4, %o1, %o4	! 6
-	mulscc	%o4, %o1, %o4	! 7
-	mulscc	%o4, %o1, %o4	! 8
-	mulscc	%o4, %o1, %o4	! 9
-	mulscc	%o4, %o1, %o4	! 10
-	mulscc	%o4, %o1, %o4	! 11
-	mulscc	%o4, %o1, %o4	! 12
-	mulscc	%o4, %g0, %o4	! final shift
-
-	/*
-	 * %o4 has 20 of the bits that should be in the result; %y has
-	 * the bottom 12 (as %y's top 12).  That is:
-	 *
-	 *	  %o4		    %y
-	 * +----------------+----------------+
-	 * | -12- |   -20-  | -12- |   -20-  |
-	 * +------(---------+------)---------+
-	 *	   -----result-----
-	 *
-	 * The 12 bits of %o4 left of the `result' area are all zero;
-	 * in fact, all top 20 bits of %o4 are zero.
-	 */
-
-	rd	%y, %o5
-	sll	%o4, 12, %o0	! shift middle bits left 12
-	srl	%o5, 20, %o5	! shift low bits right 20
-	or	%o5, %o0, %o0
-	retl
-	 addcc	%g0, %g0, %o1	! %o1 = zero, and set Z
-
-	.globl	.umul_patch
-.umul_patch:
-	umul	%o0, %o1, %o0
-	retl
-	 rd	%y, %o1
-	nop
diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S
deleted file mode 100644
index 77123eb..0000000
--- a/arch/sparc/lib/urem.S
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- * urem.S:      This routine was taken from glibc-1.09 and is covered
- *              by the GNU Library General Public License Version 2.
- */
-
-/* This file is generated from divrem.m4; DO NOT EDIT! */
-/*
- * Division and remainder, from Appendix E of the Sparc Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
- */
-
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .urem	name of function to generate
- *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1
- *  false		false=true => signed; false=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-
-	.globl .urem
-	.globl _Urem
-.urem:
-_Urem:	/* needed for export */
-
-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	%o1, %g0, %o5
-	bne	1f
-	 mov	%o0, %o3
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	ST_DIV0
-		retl
-		 clr	%o0
-
-1:
-	cmp	%o3, %o5			! if %o1 exceeds %o0, done
-	blu	Lgot_result		! (and algorithm fails otherwise)
-	 clr	%o2
-
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-
-	cmp	%o3, %g1
-	blu	Lnot_really_big
-	 clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		 mov	1, %g7
-
-		sll	%o5, 4, %o5
-
-		b	1b
-		 add	%o4, 1, %o4
-
-	! Now compute %g7.
-	2:
-		addcc	%o5, %o5, %o5
-		bcc	Lnot_too_big
-		 add	%g7, 1, %g7
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5		! rest of %o5
-		add	%o5, %g1, %o5
-
-		b	Ldo_single_div
-		 sub	%g7, 1, %g7
-
-	Lnot_too_big:
-	3:
-		cmp	%o5, %o3
-		blu	2b
-		 nop
-
-		be	Ldo_single_div
-		 nop
-	/* NB: these are commented out in the V8-Sparc manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g7
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	Ldo_single_div:
-		subcc	%g7, 1, %g7
-		bl	Lend_regular_divide
-		 nop
-
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-
-		b	Lend_single_divloop
-		 nop
-	Lsingle_divloop:
-		sll	%o2, 1, %o2
-		bl	1f
-		 srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-		b	2f
-		 add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	Lend_single_divloop:
-		subcc	%g7, 1, %g7
-		bge	Lsingle_divloop
-		 tst	%o3
-
-		b,a	Lend_regular_divide
-
-Lnot_really_big:
-1:
-	sll	%o5, 4, %o5
-
-	cmp	%o5, %o3
-	bleu	1b
-	 addcc	%o4, 1, %o4
-
-	be	Lgot_result
-	 sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-Ldivloop:
-	sll	%o2, 4, %o2
-		! depth 1, accumulated bits 0
-	bl	L.1.16
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 2, accumulated bits 1
-	bl	L.2.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 3
-	bl	L.3.19
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 7
-	bl	L.4.23
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (7*2+1), %o2
-
-L.4.23:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (7*2-1), %o2
-
-L.3.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 5
-	bl	L.4.21
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (5*2+1), %o2
-
-L.4.21:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (5*2-1), %o2
-
-L.2.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits 1
-	bl	L.3.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 3
-	bl	L.4.19
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (3*2+1), %o2
-
-L.4.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (3*2-1), %o2
-
-L.3.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits 1
-	bl	L.4.17
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (1*2+1), %o2
-	
-L.4.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (1*2-1), %o2
-
-L.1.16:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 2, accumulated bits -1
-	bl	L.2.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -1
-	bl	L.3.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -1
-	bl	L.4.15
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-1*2+1), %o2
-
-L.4.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-1*2-1), %o2
-
-L.3.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -3
-	bl	L.4.13
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-3*2+1), %o2
-
-L.4.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-3*2-1), %o2
-
-L.2.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 3, accumulated bits -3
-	bl	L.3.13
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -5
-	bl	L.4.11
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-5*2+1), %o2
-	
-L.4.11:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-5*2-1), %o2
-
-L.3.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-			! depth 4, accumulated bits -7
-	bl	L.4.9
-	 srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-7*2+1), %o2
-
-L.4.9:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	 add	%o2, (-7*2-1), %o2
-
-	9:
-Lend_regular_divide:
-	subcc	%o4, 1, %o4
-	bge	Ldivloop
-	 tst	%o3
-
-	bl,a	Lgot_result
-	! non-restoring fixup here (one instruction only!)
-	add	%o3, %o1, %o3
-
-Lgot_result:
-
-	retl
-	 mov %o3, %o0
-
-	.globl	.urem_patch
-.urem_patch:
-	wr	%g0, 0x0, %y
-	nop
-	nop
-	nop
-	udiv	%o0, %o1, %o2
-	umul	%o2, %o1, %o2
-	retl
-	 sub	%o0, %o2, %o0
diff --git a/arch/sparc/lib/xor.S b/arch/sparc/lib/xor.S
index f44f58f..2c05641 100644
--- a/arch/sparc/lib/xor.S
+++ b/arch/sparc/lib/xor.S
@@ -8,6 +8,7 @@
  * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
  */
 
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #include <asm/dcu.h>
@@ -19,12 +20,9 @@
  *	!(len & 127) && len >= 256
  */
 	.text
-	.align	32
 
 	/* VIS versions. */
-	.globl	xor_vis_2
-	.type	xor_vis_2,#function
-xor_vis_2:
+ENTRY(xor_vis_2)
 	rd	%fprs, %o5
 	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
 	be,pt	%icc, 0f
@@ -91,11 +89,9 @@ xor_vis_2:
 	wr	%g1, %g0, %asi
 	retl
 	  wr	%g0, 0, %fprs
-	.size	xor_vis_2, .-xor_vis_2
+ENDPROC(xor_vis_2)
 
-	.globl	xor_vis_3
-	.type	xor_vis_3,#function
-xor_vis_3:
+ENTRY(xor_vis_3)
 	rd	%fprs, %o5
 	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
 	be,pt	%icc, 0f
@@ -159,11 +155,9 @@ xor_vis_3:
 	wr	%g1, %g0, %asi
 	retl
 	 wr	%g0, 0, %fprs
-	.size	xor_vis_3, .-xor_vis_3
+ENDPROC(xor_vis_3)
 
-	.globl	xor_vis_4
-	.type	xor_vis_4,#function
-xor_vis_4:
+ENTRY(xor_vis_4)
 	rd	%fprs, %o5
 	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
 	be,pt	%icc, 0f
@@ -246,11 +240,9 @@ xor_vis_4:
 	wr	%g1, %g0, %asi
 	retl
 	 wr	%g0, 0, %fprs
-	.size	xor_vis_4, .-xor_vis_4
+ENDPROC(xor_vis_4)
 
-	.globl	xor_vis_5
-	.type	xor_vis_5,#function
-xor_vis_5:
+ENTRY(xor_vis_5)
 	save	%sp, -192, %sp
 	rd	%fprs, %o5
 	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
@@ -354,12 +346,10 @@ xor_vis_5:
 	wr	%g0, 0, %fprs
 	ret
 	 restore
-	.size	xor_vis_5, .-xor_vis_5
+ENDPROC(xor_vis_5)
 
 	/* Niagara versions. */
-	.globl		xor_niagara_2
-	.type		xor_niagara_2,#function
-xor_niagara_2:		/* %o0=bytes, %o1=dest, %o2=src */
+ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */
 	save		%sp, -192, %sp
 	prefetch	[%i1], #n_writes
 	prefetch	[%i2], #one_read
@@ -402,11 +392,9 @@ xor_niagara_2:		/* %o0=bytes, %o1=dest, %o2=src */
 	wr		%g7, 0x0, %asi
 	ret
 	 restore
-	.size		xor_niagara_2, .-xor_niagara_2
+ENDPROC(xor_niagara_2)
 
-	.globl		xor_niagara_3
-	.type		xor_niagara_3,#function
-xor_niagara_3:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */
+ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */
 	save		%sp, -192, %sp
 	prefetch	[%i1], #n_writes
 	prefetch	[%i2], #one_read
@@ -465,11 +453,9 @@ xor_niagara_3:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */
 	wr		%g7, 0x0, %asi
 	ret
 	 restore
-	.size		xor_niagara_3, .-xor_niagara_3
+ENDPROC(xor_niagara_3)
 
-	.globl		xor_niagara_4
-	.type		xor_niagara_4,#function
-xor_niagara_4:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */
+ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */
 	save		%sp, -192, %sp
 	prefetch	[%i1], #n_writes
 	prefetch	[%i2], #one_read
@@ -549,11 +535,9 @@ xor_niagara_4:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */
 	wr		%g7, 0x0, %asi
 	ret
 	 restore
-	.size		xor_niagara_4, .-xor_niagara_4
+ENDPROC(xor_niagara_4)
 
-	.globl		xor_niagara_5
-	.type		xor_niagara_5,#function
-xor_niagara_5:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */
+ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */
 	save		%sp, -192, %sp
 	prefetch	[%i1], #n_writes
 	prefetch	[%i2], #one_read
@@ -649,4 +633,4 @@ xor_niagara_5:		/* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 *
 	wr		%g7, 0x0, %asi
 	ret
 	 restore
-	.size		xor_niagara_5, .-xor_niagara_5
+ENDPROC(xor_niagara_5)