summaryrefslogtreecommitdiff
path: root/arch/hexagon/lib/memset.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-11-01 14:48:13 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2011-11-01 14:48:13 (GMT)
commitc87d5d594736dd8b56df67e31846c7d7b8c41a8f (patch)
treea4496b74b932e55b544d040af2668e68abcb1e56 /arch/hexagon/lib/memset.S
parent094803e0aab3fe75bbf8202a8f4b5280eaade375 (diff)
parent4e29198e1cd7728c30c96a8483a6068c71b34e4e (diff)
downloadlinux-fsl-qoriq-c87d5d594736dd8b56df67e31846c7d7b8c41a8f.tar.xz
Merge Qualcom Hexagon architecture
This is the fifth version of the patchset (with one tiny whitespace fix) to the Linux kernel to support the Qualcomm Hexagon architecture. Between now and the next pull requests, Richard Kuo should have his key signed, etc., and should be back on kernel.org. In the meantime, this got merged as a emailed patch-series. * Hexagon: (36 commits) Add extra arch overrides to asm-generic/checksum.h Hexagon: Add self to MAINTAINERS Hexagon: Add basic stacktrace functionality for Hexagon architecture. Hexagon: Add configuration and makefiles for the Hexagon architecture. Hexagon: Comet platform support Hexagon: kgdb support files Hexagon: Add page-fault support. Hexagon: Add page table header files & etc. Hexagon: Add ioremap support Hexagon: Provide DMA implementation Hexagon: Implement basic TLB management routines for Hexagon. Hexagon: Implement basic cache-flush support Hexagon: Provide basic implementation and/or stubs for I/O routines. Hexagon: Add user access functions Hexagon: Add locking types and functions Hexagon: Add SMP support Hexagon: Provide basic debugging and system trap support. Hexagon: Add ptrace support Hexagon: Add time and timer functions Hexagon: Add interrupts ...
Diffstat (limited to 'arch/hexagon/lib/memset.S')
-rw-r--r--arch/hexagon/lib/memset.S315
1 files changed, 315 insertions, 0 deletions
diff --git a/arch/hexagon/lib/memset.S b/arch/hexagon/lib/memset.S
new file mode 100644
index 0000000..26d9614
--- /dev/null
+++ b/arch/hexagon/lib/memset.S
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2011 Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+
+/* HEXAGON assembly optimized memset */
+/* Replaces the standard library function memset */
+
+
+ .macro HEXAGON_OPT_FUNC_BEGIN name
+ .text
+ .p2align 4
+ .globl \name
+ .type \name, @function
+\name:
+ .endm
+
+ .macro HEXAGON_OPT_FUNC_FINISH name
+ .size \name, . - \name
+ .endm
+
+/* FUNCTION: memset (v2 version) */
+#if __HEXAGON_ARCH__ < 3
+HEXAGON_OPT_FUNC_BEGIN memset
+ {
+ r6 = #8
+ r7 = extractu(r0, #3 , #0)
+ p0 = cmp.eq(r2, #0)
+ p1 = cmp.gtu(r2, #7)
+ }
+ {
+ r4 = vsplatb(r1)
+ r8 = r0 /* leave r0 intact for return val */
+ r9 = sub(r6, r7) /* bytes until double alignment */
+ if p0 jumpr r31 /* count == 0, so return */
+ }
+ {
+ r3 = #0
+ r7 = #0
+ p0 = tstbit(r9, #0)
+ if p1 jump 2f /* skip byte loop */
+ }
+
+/* less than 8 bytes to set, so just set a byte at a time and return */
+
+ loop0(1f, r2) /* byte loop */
+ .falign
+1: /* byte loop */
+ {
+ memb(r8++#1) = r4
+ }:endloop0
+ jumpr r31
+ .falign
+2: /* skip byte loop */
+ {
+ r6 = #1
+ p0 = tstbit(r9, #1)
+ p1 = cmp.eq(r2, #1)
+ if !p0 jump 3f /* skip initial byte store */
+ }
+ {
+ memb(r8++#1) = r4
+ r3:2 = sub(r3:2, r7:6)
+ if p1 jumpr r31
+ }
+ .falign
+3: /* skip initial byte store */
+ {
+ r6 = #2
+ p0 = tstbit(r9, #2)
+ p1 = cmp.eq(r2, #2)
+ if !p0 jump 4f /* skip initial half store */
+ }
+ {
+ memh(r8++#2) = r4
+ r3:2 = sub(r3:2, r7:6)
+ if p1 jumpr r31
+ }
+ .falign
+4: /* skip initial half store */
+ {
+ r6 = #4
+ p0 = cmp.gtu(r2, #7)
+ p1 = cmp.eq(r2, #4)
+ if !p0 jump 5f /* skip initial word store */
+ }
+ {
+ memw(r8++#4) = r4
+ r3:2 = sub(r3:2, r7:6)
+ p0 = cmp.gtu(r2, #11)
+ if p1 jumpr r31
+ }
+ .falign
+5: /* skip initial word store */
+ {
+ r10 = lsr(r2, #3)
+ p1 = cmp.eq(r3, #1)
+ if !p0 jump 7f /* skip double loop */
+ }
+ {
+ r5 = r4
+ r6 = #8
+ loop0(6f, r10) /* double loop */
+ }
+
+/* set bytes a double word at a time */
+
+ .falign
+6: /* double loop */
+ {
+ memd(r8++#8) = r5:4
+ r3:2 = sub(r3:2, r7:6)
+ p1 = cmp.eq(r2, #8)
+ }:endloop0
+ .falign
+7: /* skip double loop */
+ {
+ p0 = tstbit(r2, #2)
+ if p1 jumpr r31
+ }
+ {
+ r6 = #4
+ p0 = tstbit(r2, #1)
+ p1 = cmp.eq(r2, #4)
+ if !p0 jump 8f /* skip final word store */
+ }
+ {
+ memw(r8++#4) = r4
+ r3:2 = sub(r3:2, r7:6)
+ if p1 jumpr r31
+ }
+ .falign
+8: /* skip final word store */
+ {
+ p1 = cmp.eq(r2, #2)
+ if !p0 jump 9f /* skip final half store */
+ }
+ {
+ memh(r8++#2) = r4
+ if p1 jumpr r31
+ }
+ .falign
+9: /* skip final half store */
+ {
+ memb(r8++#1) = r4
+ jumpr r31
+ }
+HEXAGON_OPT_FUNC_FINISH memset
+#endif
+
+
+/* FUNCTION: memset (v3 and higher version) */
+#if __HEXAGON_ARCH__ >= 3
+HEXAGON_OPT_FUNC_BEGIN memset
+ {
+ r7=vsplatb(r1)
+ r6 = r0
+ if (r2==#0) jump:nt .L1
+ }
+ {
+ r5:4=combine(r7,r7)
+ p0 = cmp.gtu(r2,#8)
+ if (p0.new) jump:nt .L3
+ }
+ {
+ r3 = r0
+ loop0(.L47,r2)
+ }
+ .falign
+.L47:
+ {
+ memb(r3++#1) = r1
+ }:endloop0 /* start=.L47 */
+ jumpr r31
+.L3:
+ {
+ p0 = tstbit(r0,#0)
+ if (!p0.new) jump:nt .L8
+ p1 = cmp.eq(r2, #1)
+ }
+ {
+ r6 = add(r0, #1)
+ r2 = add(r2,#-1)
+ memb(r0) = r1
+ if (p1) jump .L1
+ }
+.L8:
+ {
+ p0 = tstbit(r6,#1)
+ if (!p0.new) jump:nt .L10
+ }
+ {
+ r2 = add(r2,#-2)
+ memh(r6++#2) = r7
+ p0 = cmp.eq(r2, #2)
+ if (p0.new) jump:nt .L1
+ }
+.L10:
+ {
+ p0 = tstbit(r6,#2)
+ if (!p0.new) jump:nt .L12
+ }
+ {
+ r2 = add(r2,#-4)
+ memw(r6++#4) = r7
+ p0 = cmp.eq(r2, #4)
+ if (p0.new) jump:nt .L1
+ }
+.L12:
+ {
+ p0 = cmp.gtu(r2,#127)
+ if (!p0.new) jump:nt .L14
+ }
+ r3 = and(r6,#31)
+ if (r3==#0) jump:nt .L17
+ {
+ memd(r6++#8) = r5:4
+ r2 = add(r2,#-8)
+ }
+ r3 = and(r6,#31)
+ if (r3==#0) jump:nt .L17
+ {
+ memd(r6++#8) = r5:4
+ r2 = add(r2,#-8)
+ }
+ r3 = and(r6,#31)
+ if (r3==#0) jump:nt .L17
+ {
+ memd(r6++#8) = r5:4
+ r2 = add(r2,#-8)
+ }
+.L17:
+ {
+ r3 = lsr(r2,#5)
+ if (r1!=#0) jump:nt .L18
+ }
+ {
+ r8 = r3
+ r3 = r6
+ loop0(.L46,r3)
+ }
+ .falign
+.L46:
+ {
+ dczeroa(r6)
+ r6 = add(r6,#32)
+ r2 = add(r2,#-32)
+ }:endloop0 /* start=.L46 */
+.L14:
+ {
+ p0 = cmp.gtu(r2,#7)
+ if (!p0.new) jump:nt .L28
+ r8 = lsr(r2,#3)
+ }
+ loop0(.L44,r8)
+ .falign
+.L44:
+ {
+ memd(r6++#8) = r5:4
+ r2 = add(r2,#-8)
+ }:endloop0 /* start=.L44 */
+.L28:
+ {
+ p0 = tstbit(r2,#2)
+ if (!p0.new) jump:nt .L33
+ }
+ {
+ r2 = add(r2,#-4)
+ memw(r6++#4) = r7
+ }
+.L33:
+ {
+ p0 = tstbit(r2,#1)
+ if (!p0.new) jump:nt .L35
+ }
+ {
+ r2 = add(r2,#-2)
+ memh(r6++#2) = r7
+ }
+.L35:
+ p0 = cmp.eq(r2,#1)
+ if (p0) memb(r6) = r1
+.L1:
+ jumpr r31
+.L18:
+ loop0(.L45,r3)
+ .falign
+.L45:
+ dczeroa(r6)
+ {
+ memd(r6++#8) = r5:4
+ r2 = add(r2,#-32)
+ }
+ memd(r6++#8) = r5:4
+ memd(r6++#8) = r5:4
+ {
+ memd(r6++#8) = r5:4
+ }:endloop0 /* start=.L45 */
+ jump .L14
+HEXAGON_OPT_FUNC_FINISH memset
+#endif