From 564ee7a5668e8b6d3b369fd807c75c77285c88d4 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 19 Sep 2005 17:33:25 +1000 Subject: [PATCH] powerpc: Move arch/ppc*/kernel/vecemu.c to arch/powerpc This file is the same in both architectures so create arch/powerpc/kernel and move it there. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c new file mode 100644 index 0000000..604d094 --- /dev/null +++ b/arch/powerpc/kernel/vecemu.c @@ -0,0 +1,345 @@ +/* + * Routines to emulate some Altivec/VMX instructions, specifically + * those that can trap when given denormalized operands in Java mode. + */ +#include +#include +#include +#include +#include +#include + +/* Functions in vector.S */ +extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); +extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); +extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); +extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); +extern void vrefp(vector128 *dst, vector128 *src); +extern void vrsqrtefp(vector128 *dst, vector128 *src); +extern void vexptep(vector128 *dst, vector128 *src); + +static unsigned int exp2s[8] = { + 0x800000, + 0x8b95c2, + 0x9837f0, + 0xa5fed7, + 0xb504f3, + 0xc5672a, + 0xd744fd, + 0xeac0c7 +}; + +/* + * Computes an estimate of 2^x. The `s' argument is the 32-bit + * single-precision floating-point representation of x. + */ +static unsigned int eexp2(unsigned int s) +{ + int exp, pwr; + unsigned int mant, frac; + + /* extract exponent field from input */ + exp = ((s >> 23) & 0xff) - 127; + if (exp > 7) { + /* check for NaN input */ + if (exp == 128 && (s & 0x7fffff) != 0) + return s | 0x400000; /* return QNaN */ + /* 2^-big = 0, 2^+big = +Inf */ + return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ + } + if (exp < -23) + return 0x3f800000; /* 1.0 */ + + /* convert to fixed point integer in 9.23 representation */ + pwr = (s & 0x7fffff) | 0x800000; + if (exp > 0) + pwr <<= exp; + else + pwr >>= -exp; + if (s & 0x80000000) + pwr = -pwr; + + /* extract integer part, which becomes exponent part of result */ + exp = (pwr >> 23) + 126; + if (exp >= 254) + return 0x7f800000; + if (exp < -23) + return 0; + + /* table lookup on top 3 bits of fraction to get mantissa */ + mant = exp2s[(pwr >> 20) & 7]; + + /* linear interpolation using remaining 20 bits of fraction */ + asm("mulhwu %0,%1,%2" : "=r" (frac) + : "r" (pwr << 12), "r" (0x172b83ff)); + asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); + mant += frac; + + if (exp >= 0) + return mant + (exp << 23); + + /* denormalized result */ + exp = -exp; + mant += 1 << (exp - 1); + return mant >> exp; +} + +/* + * Computes an estimate of log_2(x). The `s' argument is the 32-bit + * single-precision floating-point representation of x. + */ +static unsigned int elog2(unsigned int s) +{ + int exp, mant, lz, frac; + + exp = s & 0x7f800000; + mant = s & 0x7fffff; + if (exp == 0x7f800000) { /* Inf or NaN */ + if (mant != 0) + s |= 0x400000; /* turn NaN into QNaN */ + return s; + } + if ((exp | mant) == 0) /* +0 or -0 */ + return 0xff800000; /* return -Inf */ + + if (exp == 0) { + /* denormalized */ + asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); + mant <<= lz - 8; + exp = (-118 - lz) << 23; + } else { + mant |= 0x800000; + exp -= 127 << 23; + } + + if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ + exp |= 0x400000; /* 0.5 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ + } + if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ + exp |= 0x200000; /* 0.25 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ + } + if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ + exp |= 0x100000; /* 0.125 * 2^23 */ + asm("mulhwu %0,%1,%2" : "=r" (mant) + : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ + } + if (mant > 0x800000) { /* 1.0 * 2^23 */ + /* calculate (mant - 1) * 1.381097463 */ + /* 1.381097463 == 0.125 / (2^0.125 - 1) */ + asm("mulhwu %0,%1,%2" : "=r" (frac) + : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); + exp += frac; + } + s = exp & 0x80000000; + if (exp != 0) { + if (s) + exp = -exp; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); + lz = 8 - lz; + if (lz > 0) + exp >>= lz; + else if (lz < 0) + exp <<= -lz; + s += ((lz + 126) << 23) + exp; + } + return s; +} + +#define VSCR_SAT 1 + +static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) +{ + int exp, mant; + + exp = (x >> 23) & 0xff; + mant = x & 0x7fffff; + if (exp == 255 && mant != 0) + return 0; /* NaN -> 0 */ + exp = exp - 127 + scale; + if (exp < 0) + return 0; /* round towards zero */ + if (exp >= 31) { + /* saturate, unless the result would be -2^31 */ + if (x + (scale << 23) != 0xcf000000) + *vscrp |= VSCR_SAT; + return (x & 0x80000000)? 0x80000000: 0x7fffffff; + } + mant |= 0x800000; + mant = (mant << 7) >> (30 - exp); + return (x & 0x80000000)? -mant: mant; +} + +static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) +{ + int exp; + unsigned int mant; + + exp = (x >> 23) & 0xff; + mant = x & 0x7fffff; + if (exp == 255 && mant != 0) + return 0; /* NaN -> 0 */ + exp = exp - 127 + scale; + if (exp < 0) + return 0; /* round towards zero */ + if (x & 0x80000000) { + /* negative => saturate to 0 */ + *vscrp |= VSCR_SAT; + return 0; + } + if (exp >= 32) { + /* saturate */ + *vscrp |= VSCR_SAT; + return 0xffffffff; + } + mant |= 0x800000; + mant = (mant << 8) >> (31 - exp); + return mant; +} + +/* Round to floating integer, towards 0 */ +static unsigned int rfiz(unsigned int x) +{ + int exp; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if (exp < 0) + return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ + return x & ~(0x7fffff >> exp); +} + +/* Round to floating integer, towards +/- Inf */ +static unsigned int rfii(unsigned int x) +{ + int exp, mask; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if ((x & 0x7fffffff) == 0) + return x; /* +/-0 -> +/-0 */ + if (exp < 0) + /* 0 < |x| < 1.0 rounds to +/- 1.0 */ + return (x & 0x80000000) | 0x3f800000; + mask = 0x7fffff >> exp; + /* mantissa overflows into exponent - that's OK, + it can't overflow into the sign bit */ + return (x + mask) & ~mask; +} + +/* Round to floating integer, to nearest */ +static unsigned int rfin(unsigned int x) +{ + int exp, half; + + exp = ((x >> 23) & 0xff) - 127; + if (exp == 128 && (x & 0x7fffff) != 0) + return x | 0x400000; /* NaN -> make it a QNaN */ + if (exp >= 23) + return x; /* it's an integer already (or Inf) */ + if (exp < -1) + return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ + if (exp == -1) + /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ + return (x & 0x80000000) | 0x3f800000; + half = 0x400000 >> exp; + /* add 0.5 to the magnitude and chop off the fraction bits */ + return (x + half) & ~(0x7fffff >> exp); +} + +int emulate_altivec(struct pt_regs *regs) +{ + unsigned int instr, i; + unsigned int va, vb, vc, vd; + vector128 *vrs; + + if (get_user(instr, (unsigned int __user *) regs->nip)) + return -EFAULT; + if ((instr >> 26) != 4) + return -EINVAL; /* not an altivec instruction */ + vd = (instr >> 21) & 0x1f; + va = (instr >> 16) & 0x1f; + vb = (instr >> 11) & 0x1f; + vc = (instr >> 6) & 0x1f; + + vrs = current->thread.vr; + switch (instr & 0x3f) { + case 10: + switch (vc) { + case 0: /* vaddfp */ + vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); + break; + case 1: /* vsubfp */ + vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); + break; + case 4: /* vrefp */ + vrefp(&vrs[vd], &vrs[vb]); + break; + case 5: /* vrsqrtefp */ + vrsqrtefp(&vrs[vd], &vrs[vb]); + break; + case 6: /* vexptefp */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = eexp2(vrs[vb].u[i]); + break; + case 7: /* vlogefp */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = elog2(vrs[vb].u[i]); + break; + case 8: /* vrfin */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = rfin(vrs[vb].u[i]); + break; + case 9: /* vrfiz */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = rfiz(vrs[vb].u[i]); + break; + case 10: /* vrfip */ + for (i = 0; i < 4; ++i) { + u32 x = vrs[vb].u[i]; + x = (x & 0x80000000)? rfiz(x): rfii(x); + vrs[vd].u[i] = x; + } + break; + case 11: /* vrfim */ + for (i = 0; i < 4; ++i) { + u32 x = vrs[vb].u[i]; + x = (x & 0x80000000)? rfii(x): rfiz(x); + vrs[vd].u[i] = x; + } + break; + case 14: /* vctuxs */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, + ¤t->thread.vscr.u[3]); + break; + case 15: /* vctsxs */ + for (i = 0; i < 4; ++i) + vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, + ¤t->thread.vscr.u[3]); + break; + default: + return -EINVAL; + } + break; + case 46: /* vmaddfp */ + vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); + break; + case 47: /* vnmsubfp */ + vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile index 1fb92f1..abf10dc 100644 --- a/arch/ppc/kernel/Makefile +++ b/arch/ppc/kernel/Makefile @@ -36,3 +36,5 @@ ifndef CONFIG_MATH_EMULATION obj-$(CONFIG_8xx) += softemu8xx.o endif +# These are here while we do the architecture merge +vecemu-y += ../../powerpc/kernel/vecemu.o diff --git a/arch/ppc/kernel/vecemu.c b/arch/ppc/kernel/vecemu.c deleted file mode 100644 index 604d094..0000000 --- a/arch/ppc/kernel/vecemu.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Routines to emulate some Altivec/VMX instructions, specifically - * those that can trap when given denormalized operands in Java mode. - */ -#include -#include -#include -#include -#include -#include - -/* Functions in vector.S */ -extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); -extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); -extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); -extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); -extern void vrefp(vector128 *dst, vector128 *src); -extern void vrsqrtefp(vector128 *dst, vector128 *src); -extern void vexptep(vector128 *dst, vector128 *src); - -static unsigned int exp2s[8] = { - 0x800000, - 0x8b95c2, - 0x9837f0, - 0xa5fed7, - 0xb504f3, - 0xc5672a, - 0xd744fd, - 0xeac0c7 -}; - -/* - * Computes an estimate of 2^x. The `s' argument is the 32-bit - * single-precision floating-point representation of x. - */ -static unsigned int eexp2(unsigned int s) -{ - int exp, pwr; - unsigned int mant, frac; - - /* extract exponent field from input */ - exp = ((s >> 23) & 0xff) - 127; - if (exp > 7) { - /* check for NaN input */ - if (exp == 128 && (s & 0x7fffff) != 0) - return s | 0x400000; /* return QNaN */ - /* 2^-big = 0, 2^+big = +Inf */ - return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ - } - if (exp < -23) - return 0x3f800000; /* 1.0 */ - - /* convert to fixed point integer in 9.23 representation */ - pwr = (s & 0x7fffff) | 0x800000; - if (exp > 0) - pwr <<= exp; - else - pwr >>= -exp; - if (s & 0x80000000) - pwr = -pwr; - - /* extract integer part, which becomes exponent part of result */ - exp = (pwr >> 23) + 126; - if (exp >= 254) - return 0x7f800000; - if (exp < -23) - return 0; - - /* table lookup on top 3 bits of fraction to get mantissa */ - mant = exp2s[(pwr >> 20) & 7]; - - /* linear interpolation using remaining 20 bits of fraction */ - asm("mulhwu %0,%1,%2" : "=r" (frac) - : "r" (pwr << 12), "r" (0x172b83ff)); - asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); - mant += frac; - - if (exp >= 0) - return mant + (exp << 23); - - /* denormalized result */ - exp = -exp; - mant += 1 << (exp - 1); - return mant >> exp; -} - -/* - * Computes an estimate of log_2(x). The `s' argument is the 32-bit - * single-precision floating-point representation of x. - */ -static unsigned int elog2(unsigned int s) -{ - int exp, mant, lz, frac; - - exp = s & 0x7f800000; - mant = s & 0x7fffff; - if (exp == 0x7f800000) { /* Inf or NaN */ - if (mant != 0) - s |= 0x400000; /* turn NaN into QNaN */ - return s; - } - if ((exp | mant) == 0) /* +0 or -0 */ - return 0xff800000; /* return -Inf */ - - if (exp == 0) { - /* denormalized */ - asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); - mant <<= lz - 8; - exp = (-118 - lz) << 23; - } else { - mant |= 0x800000; - exp -= 127 << 23; - } - - if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ - exp |= 0x400000; /* 0.5 * 2^23 */ - asm("mulhwu %0,%1,%2" : "=r" (mant) - : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ - } - if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ - exp |= 0x200000; /* 0.25 * 2^23 */ - asm("mulhwu %0,%1,%2" : "=r" (mant) - : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ - } - if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ - exp |= 0x100000; /* 0.125 * 2^23 */ - asm("mulhwu %0,%1,%2" : "=r" (mant) - : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ - } - if (mant > 0x800000) { /* 1.0 * 2^23 */ - /* calculate (mant - 1) * 1.381097463 */ - /* 1.381097463 == 0.125 / (2^0.125 - 1) */ - asm("mulhwu %0,%1,%2" : "=r" (frac) - : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); - exp += frac; - } - s = exp & 0x80000000; - if (exp != 0) { - if (s) - exp = -exp; - asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); - lz = 8 - lz; - if (lz > 0) - exp >>= lz; - else if (lz < 0) - exp <<= -lz; - s += ((lz + 126) << 23) + exp; - } - return s; -} - -#define VSCR_SAT 1 - -static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) -{ - int exp, mant; - - exp = (x >> 23) & 0xff; - mant = x & 0x7fffff; - if (exp == 255 && mant != 0) - return 0; /* NaN -> 0 */ - exp = exp - 127 + scale; - if (exp < 0) - return 0; /* round towards zero */ - if (exp >= 31) { - /* saturate, unless the result would be -2^31 */ - if (x + (scale << 23) != 0xcf000000) - *vscrp |= VSCR_SAT; - return (x & 0x80000000)? 0x80000000: 0x7fffffff; - } - mant |= 0x800000; - mant = (mant << 7) >> (30 - exp); - return (x & 0x80000000)? -mant: mant; -} - -static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) -{ - int exp; - unsigned int mant; - - exp = (x >> 23) & 0xff; - mant = x & 0x7fffff; - if (exp == 255 && mant != 0) - return 0; /* NaN -> 0 */ - exp = exp - 127 + scale; - if (exp < 0) - return 0; /* round towards zero */ - if (x & 0x80000000) { - /* negative => saturate to 0 */ - *vscrp |= VSCR_SAT; - return 0; - } - if (exp >= 32) { - /* saturate */ - *vscrp |= VSCR_SAT; - return 0xffffffff; - } - mant |= 0x800000; - mant = (mant << 8) >> (31 - exp); - return mant; -} - -/* Round to floating integer, towards 0 */ -static unsigned int rfiz(unsigned int x) -{ - int exp; - - exp = ((x >> 23) & 0xff) - 127; - if (exp == 128 && (x & 0x7fffff) != 0) - return x | 0x400000; /* NaN -> make it a QNaN */ - if (exp >= 23) - return x; /* it's an integer already (or Inf) */ - if (exp < 0) - return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ - return x & ~(0x7fffff >> exp); -} - -/* Round to floating integer, towards +/- Inf */ -static unsigned int rfii(unsigned int x) -{ - int exp, mask; - - exp = ((x >> 23) & 0xff) - 127; - if (exp == 128 && (x & 0x7fffff) != 0) - return x | 0x400000; /* NaN -> make it a QNaN */ - if (exp >= 23) - return x; /* it's an integer already (or Inf) */ - if ((x & 0x7fffffff) == 0) - return x; /* +/-0 -> +/-0 */ - if (exp < 0) - /* 0 < |x| < 1.0 rounds to +/- 1.0 */ - return (x & 0x80000000) | 0x3f800000; - mask = 0x7fffff >> exp; - /* mantissa overflows into exponent - that's OK, - it can't overflow into the sign bit */ - return (x + mask) & ~mask; -} - -/* Round to floating integer, to nearest */ -static unsigned int rfin(unsigned int x) -{ - int exp, half; - - exp = ((x >> 23) & 0xff) - 127; - if (exp == 128 && (x & 0x7fffff) != 0) - return x | 0x400000; /* NaN -> make it a QNaN */ - if (exp >= 23) - return x; /* it's an integer already (or Inf) */ - if (exp < -1) - return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ - if (exp == -1) - /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ - return (x & 0x80000000) | 0x3f800000; - half = 0x400000 >> exp; - /* add 0.5 to the magnitude and chop off the fraction bits */ - return (x + half) & ~(0x7fffff >> exp); -} - -int emulate_altivec(struct pt_regs *regs) -{ - unsigned int instr, i; - unsigned int va, vb, vc, vd; - vector128 *vrs; - - if (get_user(instr, (unsigned int __user *) regs->nip)) - return -EFAULT; - if ((instr >> 26) != 4) - return -EINVAL; /* not an altivec instruction */ - vd = (instr >> 21) & 0x1f; - va = (instr >> 16) & 0x1f; - vb = (instr >> 11) & 0x1f; - vc = (instr >> 6) & 0x1f; - - vrs = current->thread.vr; - switch (instr & 0x3f) { - case 10: - switch (vc) { - case 0: /* vaddfp */ - vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); - break; - case 1: /* vsubfp */ - vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); - break; - case 4: /* vrefp */ - vrefp(&vrs[vd], &vrs[vb]); - break; - case 5: /* vrsqrtefp */ - vrsqrtefp(&vrs[vd], &vrs[vb]); - break; - case 6: /* vexptefp */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = eexp2(vrs[vb].u[i]); - break; - case 7: /* vlogefp */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = elog2(vrs[vb].u[i]); - break; - case 8: /* vrfin */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = rfin(vrs[vb].u[i]); - break; - case 9: /* vrfiz */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = rfiz(vrs[vb].u[i]); - break; - case 10: /* vrfip */ - for (i = 0; i < 4; ++i) { - u32 x = vrs[vb].u[i]; - x = (x & 0x80000000)? rfiz(x): rfii(x); - vrs[vd].u[i] = x; - } - break; - case 11: /* vrfim */ - for (i = 0; i < 4; ++i) { - u32 x = vrs[vb].u[i]; - x = (x & 0x80000000)? rfii(x): rfiz(x); - vrs[vd].u[i] = x; - } - break; - case 14: /* vctuxs */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, - ¤t->thread.vscr.u[3]); - break; - case 15: /* vctsxs */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, - ¤t->thread.vscr.u[3]); - break; - default: - return -EINVAL; - } - break; - case 46: /* vmaddfp */ - vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); - break; - case 47: /* vnmsubfp */ - vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); - break; - default: - return -EINVAL; - } - - return 0; -} diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index ae60eb1..813718d 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -83,3 +83,6 @@ ifeq ($(CONFIG_PPC_ISERIES),y) arch/ppc64/kernel/head.o: arch/ppc64/kernel/lparmap.s AFLAGS_head.o += -Iarch/ppc64/kernel endif + +# These are here while we do the architecture merge +vecemu-y += ../../powerpc/kernel/vecemu.o diff --git a/arch/ppc64/kernel/vecemu.c b/arch/ppc64/kernel/vecemu.c deleted file mode 100644 index cb20762..0000000 --- a/arch/ppc64/kernel/vecemu.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Routines to emulate some Altivec/VMX instructions, specifically - * those that can trap when given denormalized operands in Java mode. - */ -#include -#include -#include -#include -#include -#include - -/* Functions in vector.S */ -extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); -extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); -extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); -extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); -extern void vrefp(vector128 *dst, vector128 *src); -extern void vrsqrtefp(vector128 *dst, vector128 *src); -extern void vexptep(vector128 *dst, vector128 *src); - -static unsigned int exp2s[8] = { - 0x800000, - 0x8b95c2, - 0x9837f0, - 0xa5fed7, - 0xb504f3, - 0xc5672a, - 0xd744fd, - 0xeac0c7 -}; - -/* - * Computes an estimate of 2^x. The `s' argument is the 32-bit - * single-precision floating-point representation of x. - */ -static unsigned int eexp2(unsigned int s) -{ - int exp, pwr; - unsigned int mant, frac; - - /* extract exponent field from input */ - exp = ((s >> 23) & 0xff) - 127; - if (exp > 7) { - /* check for NaN input */ - if (exp == 128 && (s & 0x7fffff) != 0) - return s | 0x400000; /* return QNaN */ - /* 2^-big = 0, 2^+big = +Inf */ - return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ - } - if (exp < -23) - return 0x3f800000; /* 1.0 */ - - /* convert to fixed point integer in 9.23 representation */ - pwr = (s & 0x7fffff) | 0x800000; - if (exp > 0) - pwr <<= exp; - else - pwr >>= -exp; - if (s & 0x80000000) - pwr = -pwr; - - /* extract integer part, which becomes exponent part of result */ - exp = (pwr >> 23) + 126; - if (exp >= 254) - return 0x7f800000; - if (exp < -23) - return 0; - - /* table lookup on top 3 bits of fraction to get mantissa */ - mant = exp2s[(pwr >> 20) & 7]; - - /* linear interpolation using remaining 20 bits of fraction */ - asm("mulhwu %0,%1,%2" : "=r" (frac) - : "r" (pwr << 12), "r" (0x172b83ff)); - asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); - mant += frac; - - if (exp >= 0) - return mant + (exp << 23); - - /* denormalized result */ - exp = -exp; - mant += 1 << (exp - 1); - return mant >> exp; -} - -/* - * Computes an estimate of log_2(x). The `s' argument is the 32-bit - * single-precision floating-point representation of x. - */ -static unsigned int elog2(unsigned int s) -{ - int exp, mant, lz, frac; - - exp = s & 0x7f800000; - mant = s & 0x7fffff; - if (exp == 0x7f800000) { /* Inf or NaN */ - if (mant != 0) - s |= 0x400000; /* turn NaN into QNaN */ - return s; - } - if ((exp | mant) == 0) /* +0 or -0 */ - return 0xff800000; /* return -Inf */ - - if (exp == 0) { - /* denormalized */ - asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); - mant <<= lz - 8; - exp = (-118 - lz) << 23; - } else { - mant |= 0x800000; - exp -= 127 << 23; - } - - if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ - exp |= 0x400000; /* 0.5 * 2^23 */ - asm("mulhwu %0,%1,%2" : "=r" (mant) - : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ - } - if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ - exp |= 0x200000; /* 0.25 * 2^23 */ - asm("mulhwu %0,%1,%2" : "=r" (mant) - : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ - } - if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ - exp |= 0x100000; /* 0.125 * 2^23 */ - asm("mulhwu %0,%1,%2" : "=r" (mant) - : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ - } - if (mant > 0x800000) { /* 1.0 * 2^23 */ - /* calculate (mant - 1) * 1.381097463 */ - /* 1.381097463 == 0.125 / (2^0.125 - 1) */ - asm("mulhwu %0,%1,%2" : "=r" (frac) - : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); - exp += frac; - } - s = exp & 0x80000000; - if (exp != 0) { - if (s) - exp = -exp; - asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); - lz = 8 - lz; - if (lz > 0) - exp >>= lz; - else if (lz < 0) - exp <<= -lz; - s += ((lz + 126) << 23) + exp; - } - return s; -} - -#define VSCR_SAT 1 - -static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) -{ - int exp, mant; - - exp = (x >> 23) & 0xff; - mant = x & 0x7fffff; - if (exp == 255 && mant != 0) - return 0; /* NaN -> 0 */ - exp = exp - 127 + scale; - if (exp < 0) - return 0; /* round towards zero */ - if (exp >= 31) { - /* saturate, unless the result would be -2^31 */ - if (x + (scale << 23) != 0xcf000000) - *vscrp |= VSCR_SAT; - return (x & 0x80000000)? 0x80000000: 0x7fffffff; - } - mant |= 0x800000; - mant = (mant << 7) >> (30 - exp); - return (x & 0x80000000)? -mant: mant; -} - -static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) -{ - int exp; - unsigned int mant; - - exp = (x >> 23) & 0xff; - mant = x & 0x7fffff; - if (exp == 255 && mant != 0) - return 0; /* NaN -> 0 */ - exp = exp - 127 + scale; - if (exp < 0) - return 0; /* round towards zero */ - if (x & 0x80000000) { - /* negative => saturate to 0 */ - *vscrp |= VSCR_SAT; - return 0; - } - if (exp >= 32) { - /* saturate */ - *vscrp |= VSCR_SAT; - return 0xffffffff; - } - mant |= 0x800000; - mant = (mant << 8) >> (31 - exp); - return mant; -} - -/* Round to floating integer, towards 0 */ -static unsigned int rfiz(unsigned int x) -{ - int exp; - - exp = ((x >> 23) & 0xff) - 127; - if (exp == 128 && (x & 0x7fffff) != 0) - return x | 0x400000; /* NaN -> make it a QNaN */ - if (exp >= 23) - return x; /* it's an integer already (or Inf) */ - if (exp < 0) - return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ - return x & ~(0x7fffff >> exp); -} - -/* Round to floating integer, towards +/- Inf */ -static unsigned int rfii(unsigned int x) -{ - int exp, mask; - - exp = ((x >> 23) & 0xff) - 127; - if (exp == 128 && (x & 0x7fffff) != 0) - return x | 0x400000; /* NaN -> make it a QNaN */ - if (exp >= 23) - return x; /* it's an integer already (or Inf) */ - if ((x & 0x7fffffff) == 0) - return x; /* +/-0 -> +/-0 */ - if (exp < 0) - /* 0 < |x| < 1.0 rounds to +/- 1.0 */ - return (x & 0x80000000) | 0x3f800000; - mask = 0x7fffff >> exp; - /* mantissa overflows into exponent - that's OK, - it can't overflow into the sign bit */ - return (x + mask) & ~mask; -} - -/* Round to floating integer, to nearest */ -static unsigned int rfin(unsigned int x) -{ - int exp, half; - - exp = ((x >> 23) & 0xff) - 127; - if (exp == 128 && (x & 0x7fffff) != 0) - return x | 0x400000; /* NaN -> make it a QNaN */ - if (exp >= 23) - return x; /* it's an integer already (or Inf) */ - if (exp < -1) - return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ - if (exp == -1) - /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ - return (x & 0x80000000) | 0x3f800000; - half = 0x400000 >> exp; - /* add 0.5 to the magnitude and chop off the fraction bits */ - return (x + half) & ~(0x7fffff >> exp); -} - -int -emulate_altivec(struct pt_regs *regs) -{ - unsigned int instr, i; - unsigned int va, vb, vc, vd; - vector128 *vrs; - - if (get_user(instr, (unsigned int __user *) regs->nip)) - return -EFAULT; - if ((instr >> 26) != 4) - return -EINVAL; /* not an altivec instruction */ - vd = (instr >> 21) & 0x1f; - va = (instr >> 16) & 0x1f; - vb = (instr >> 11) & 0x1f; - vc = (instr >> 6) & 0x1f; - - vrs = current->thread.vr; - switch (instr & 0x3f) { - case 10: - switch (vc) { - case 0: /* vaddfp */ - vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); - break; - case 1: /* vsubfp */ - vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); - break; - case 4: /* vrefp */ - vrefp(&vrs[vd], &vrs[vb]); - break; - case 5: /* vrsqrtefp */ - vrsqrtefp(&vrs[vd], &vrs[vb]); - break; - case 6: /* vexptefp */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = eexp2(vrs[vb].u[i]); - break; - case 7: /* vlogefp */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = elog2(vrs[vb].u[i]); - break; - case 8: /* vrfin */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = rfin(vrs[vb].u[i]); - break; - case 9: /* vrfiz */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = rfiz(vrs[vb].u[i]); - break; - case 10: /* vrfip */ - for (i = 0; i < 4; ++i) { - u32 x = vrs[vb].u[i]; - x = (x & 0x80000000)? rfiz(x): rfii(x); - vrs[vd].u[i] = x; - } - break; - case 11: /* vrfim */ - for (i = 0; i < 4; ++i) { - u32 x = vrs[vb].u[i]; - x = (x & 0x80000000)? rfii(x): rfiz(x); - vrs[vd].u[i] = x; - } - break; - case 14: /* vctuxs */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, - ¤t->thread.vscr.u[3]); - break; - case 15: /* vctsxs */ - for (i = 0; i < 4; ++i) - vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, - ¤t->thread.vscr.u[3]); - break; - default: - return -EINVAL; - } - break; - case 46: /* vmaddfp */ - vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); - break; - case 47: /* vnmsubfp */ - vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); - break; - default: - return -EINVAL; - } - - return 0; -} -- cgit v0.10.2