summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMa, Ling <ling.ma@intel.com>2010-08-23 21:11:12 (GMT)
committerH. Peter Anvin <hpa@linux.intel.com>2010-08-23 21:14:27 (GMT)
commitfdf4289679fd41d76553ce224750e9737cd80eea (patch)
tree6191ed574019b3f22e6d283dde0108517fe62f8f
parent76be97c1fc945db08aae1f1b746012662d643e97 (diff)
downloadlinux-fdf4289679fd41d76553ce224750e9737cd80eea.tar.xz
x86, mem: Don't implement forward memmove() as memcpy()
memmove() allow source and destination address to be overlap, but there is no such limitation for memcpy(). Therefore, explicitly implement memmove() in both the forwards and backward directions, to give us the ability to optimize memcpy(). Signed-off-by: Ma Ling <ling.ma@intel.com> LKML-Reference: <C10D3FB0CD45994C8A51FEC1227CE22F0E483AD86A@shsmsx502.ccr.corp.intel.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r--arch/x86/lib/memcpy_32.c38
-rw-r--r--arch/x86/lib/memmove_64.c46
2 files changed, 68 insertions, 16 deletions
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index 5415a9d..be424df 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -25,19 +25,35 @@ void *memmove(void *dest, const void *src, size_t n)
int d0, d1, d2;
if (dest < src) {
- memcpy(dest, src, n);
+ if ((dest + n) < src)
+ return memcpy(dest, src, n);
+ else
+ __asm__ __volatile__(
+ "rep\n\t"
+ "movsb\n\t"
+ : "=&c" (d0), "=&S" (d1), "=&D" (d2)
+ :"0" (n),
+ "1" (src),
+ "2" (dest)
+ :"memory");
+
} else {
- __asm__ __volatile__(
- "std\n\t"
- "rep\n\t"
- "movsb\n\t"
- "cld"
- : "=&c" (d0), "=&S" (d1), "=&D" (d2)
- :"0" (n),
- "1" (n-1+src),
- "2" (n-1+dest)
- :"memory");
+
+ if((src + count) < dest)
+ return memcpy(dest, src, count);
+ else
+ __asm__ __volatile__(
+ "std\n\t"
+ "rep\n\t"
+ "movsb\n\t"
+ "cld"
+ : "=&c" (d0), "=&S" (d1), "=&D" (d2)
+ :"0" (n),
+ "1" (n-1+src),
+ "2" (n-1+dest)
+ :"memory");
}
+
return dest;
}
EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
index 0a33909..ecacc4b 100644
--- a/arch/x86/lib/memmove_64.c
+++ b/arch/x86/lib/memmove_64.c
@@ -8,13 +8,49 @@
#undef memmove
void *memmove(void *dest, const void *src, size_t count)
{
+ unsigned long d0, d1, d2, d3;
if (dest < src) {
- return memcpy(dest, src, count);
+ if ((dest + count) < src)
+ return memcpy(dest, src, count);
+ else
+ __asm__ __volatile__(
+ "movq %0, %3\n\t"
+ "shr $3, %0\n\t"
+ "andq $7, %3\n\t"
+ "rep\n\t"
+ "movsq\n\t"
+ "movq %3, %0\n\t"
+ "rep\n\t"
+ "movsb"
+ : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=r" (d3)
+ :"0" (count),
+ "1" (src),
+ "2" (dest)
+ :"memory");
} else {
- char *p = dest + count;
- const char *s = src + count;
- while (count--)
- *--p = *--s;
+ if((src + count) < dest)
+ return memcpy(dest, src, count);
+ else
+ __asm__ __volatile__(
+ "movq %0, %3\n\t"
+ "lea -8(%1, %0), %1\n\t"
+ "lea -8(%2, %0), %2\n\t"
+ "shr $3, %0\n\t"
+ "andq $7, %3\n\t"
+ "std\n\t"
+ "rep\n\t"
+ "movsq\n\t"
+ "lea 7(%1), %1\n\t"
+ "lea 7(%2), %2\n\t"
+ "movq %3, %0\n\t"
+ "rep\n\t"
+ "movsb\n\t"
+ "cld"
+ : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=r" (d3)
+ :"0" (count),
+ "1" (src),
+ "2" (dest)
+ :"memory");
}
return dest;
}