From 3ac4c949e02f26be1e4378f9acfb07ec87db947b Mon Sep 17 00:00:00 2001 From: Robert Reif Date: Fri, 10 Aug 2007 15:52:06 -0700 Subject: [SPARC32]: Remove iommu from struct sbus_bus and use archdata like sparc64. Signed-off-by: Robert Reif Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c index ac352eb..e2d02fd 100644 --- a/arch/sparc/kernel/ebus.c +++ b/arch/sparc/kernel/ebus.c @@ -238,6 +238,7 @@ void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_device *d sd = &dev->ofdev.dev.archdata; sd->prom_node = dp; sd->op = &dev->ofdev; + sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu; dev->ofdev.node = dp; dev->ofdev.dev.parent = &dev->bus->ofdev.dev; diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index 4ccda77..7c89893 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -66,7 +66,7 @@ iounit_init(int sbi_node, int io_node, struct sbus_bus *sbus) } if(!xpt) panic("Cannot map External Page Table."); - sbus->iommu = (struct iommu_struct *)iounit; + sbus->ofdev.dev.archdata.iommu = iounit; iounit->page_table = xpt; spin_lock_init(&iounit->lock); @@ -127,7 +127,7 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan); static __u32 iounit_get_scsi_one(char *vaddr, unsigned long len, struct sbus_bus *sbus) { unsigned long ret, flags; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); ret = iounit_get_area(iounit, (unsigned long)vaddr, len); @@ -138,7 +138,7 @@ static __u32 iounit_get_scsi_one(char *vaddr, unsigned long len, struct sbus_bus static void iounit_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus *sbus) { unsigned long flags; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; /* FIXME: Cache some resolved pages - often several sg entries are to the same page */ spin_lock_irqsave(&iounit->lock, flags); @@ -153,7 +153,7 @@ static void iounit_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus static void iounit_release_scsi_one(__u32 vaddr, unsigned long len, struct sbus_bus *sbus) { unsigned long flags; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); len = ((vaddr & ~PAGE_MASK) + len + (PAGE_SIZE-1)) >> PAGE_SHIFT; @@ -168,7 +168,7 @@ static void iounit_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_ { unsigned long flags; unsigned long vaddr, len; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); while (sz != 0) { @@ -211,7 +211,7 @@ static int iounit_map_dma_area(dma_addr_t *pba, unsigned long va, __u32 addr, in i = ((addr - IOUNIT_DMA_BASE) >> PAGE_SHIFT); for_each_sbus(sbus) { - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; iopte = (iopte_t *)(iounit->page_table + i); *iopte = MKIOPTE(__pa(page)); @@ -235,7 +235,7 @@ static void iounit_unmap_dma_area(unsigned long addr, int len) static struct page *iounit_translate_dvma(unsigned long addr) { struct sbus_bus *sbus = sbus_root; /* They are all the same */ - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; int i; iopte_t *iopte; @@ -279,7 +279,7 @@ __u32 iounit_map_dma_init(struct sbus_bus *sbus, int size) unsigned long rotor, scan, limit; unsigned long flags; __u32 ret; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; npages = (size + (PAGE_SIZE-1)) >> PAGE_SHIFT; i = 0x0213; @@ -315,7 +315,7 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan); __u32 iounit_map_dma_page(__u32 vaddr, void *addr, struct sbus_bus *sbus) { int scan = (vaddr - IOUNIT_DMA_BASE) >> PAGE_SHIFT; - struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu; + struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; iounit->page_table[scan] = MKIOPTE(__pa(((unsigned long)addr) & PAGE_MASK)); return vaddr + (((unsigned long)addr) & ~PAGE_MASK); diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index be042ef..52e907a 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -132,7 +132,7 @@ iommu_init(int iommund, struct sbus_bus *sbus) impl, vers, iommu->page_table, (int)(IOMMU_NPTES*sizeof(iopte_t)), (int)IOMMU_NPTES); - sbus->iommu = iommu; + sbus->ofdev.dev.archdata.iommu = iommu; } /* This begs to be btfixup-ed by srmmu. */ @@ -166,7 +166,7 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte) static u32 iommu_get_one(struct page *page, int npages, struct sbus_bus *sbus) { - struct iommu_struct *iommu = sbus->iommu; + struct iommu_struct *iommu = sbus->ofdev.dev.archdata.iommu; int ioptex; iopte_t *iopte, *iopte0; unsigned int busa, busa0; @@ -291,7 +291,7 @@ static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct sbu static void iommu_release_one(u32 busa, int npages, struct sbus_bus *sbus) { - struct iommu_struct *iommu = sbus->iommu; + struct iommu_struct *iommu = sbus->ofdev.dev.archdata.iommu; int ioptex; int i; @@ -334,7 +334,7 @@ static int iommu_map_dma_area(dma_addr_t *pba, unsigned long va, unsigned long addr, int len) { unsigned long page, end; - struct iommu_struct *iommu = sbus_root->iommu; + struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; iopte_t *iopte = iommu->page_table; iopte_t *first; int ioptex; @@ -399,7 +399,7 @@ static int iommu_map_dma_area(dma_addr_t *pba, unsigned long va, static void iommu_unmap_dma_area(unsigned long busa, int len) { - struct iommu_struct *iommu = sbus_root->iommu; + struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; iopte_t *iopte = iommu->page_table; unsigned long end; int ioptex = (busa - iommu->start) >> PAGE_SHIFT; @@ -420,7 +420,7 @@ static void iommu_unmap_dma_area(unsigned long busa, int len) static struct page *iommu_translate_dvma(unsigned long busa) { - struct iommu_struct *iommu = sbus_root->iommu; + struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; iopte_t *iopte = iommu->page_table; iopte += ((busa - iommu->start) >> PAGE_SHIFT); diff --git a/include/asm-sparc/sbus.h b/include/asm-sparc/sbus.h index d036e44..27d076c 100644 --- a/include/asm-sparc/sbus.h +++ b/include/asm-sparc/sbus.h @@ -68,7 +68,6 @@ struct sbus_dev { /* This struct describes the SBus(s) found on this machine. */ struct sbus_bus { struct of_device ofdev; - void *iommu; /* Opaque IOMMU cookie */ struct sbus_dev *devices; /* Link to devices on this SBus */ struct sbus_bus *next; /* next SBus, if more than one SBus */ int prom_node; /* PROM device tree node for this SBus */ -- cgit v0.10.2 From f642b263800e6e57c377d630be6d2a999683b579 Mon Sep 17 00:00:00 2001 From: Mark Fortescue Date: Tue, 14 Aug 2007 18:22:03 -0700 Subject: [SPARC32]: Fix bogus ramdisk image location check. This mirrors sparc64 commit 715a0ecc29c850d2b2f76e1803d3f22cd5a0ac0d sparc_ramdisk_image should always be decremented by KERNBASE. Signed-off-by: Mark Fortescue Signed-off-by: David S. Miller diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index a1bef07..c13e6cd 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c @@ -206,8 +206,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) #ifdef CONFIG_BLK_DEV_INITRD /* Now have to check initial ramdisk, so that bootmap does not overwrite it */ if (sparc_ramdisk_image) { - if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE) - sparc_ramdisk_image -= KERNBASE; + sparc_ramdisk_image -= KERNBASE; initrd_start = sparc_ramdisk_image + phys_base; initrd_end = initrd_start + sparc_ramdisk_size; if (initrd_end > end_of_phys_memory) { -- cgit v0.10.2 From 70b0e7a919b7961285c685a87928ed78c9fb07f0 Mon Sep 17 00:00:00 2001 From: Mark Fortescue Date: Tue, 14 Aug 2007 18:24:10 -0700 Subject: [SPARC32]: Remove superfluous 'kernel_end' alignment on sun4c. In sun4c_init_clean_mmu(), aligning 'kernel_end' using SUN4C_REAL_PGDIR_ALIGN() is unnecessary since the caller does this already. In sun4c_paging_init(), 4 page sizes of "fluff" were added to the address of &end. This was necessary a long time ago when sparc32 would allocate some early data structures by carving out memory chunks after &end but that no longer occurs. Signed-off-by: Mark Fortescue Signed-off-by: David S. Miller diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c index 79d60d8..005a3e7 100644 --- a/arch/sparc/mm/sun4c.c +++ b/arch/sparc/mm/sun4c.c @@ -268,7 +268,6 @@ static inline void sun4c_init_clean_mmu(unsigned long kernel_end) unsigned char savectx, ctx; savectx = sun4c_get_context(); - kernel_end = SUN4C_REAL_PGDIR_ALIGN(kernel_end); for (ctx = 0; ctx < num_contexts; ctx++) { sun4c_set_context(ctx); for (vaddr = 0; vaddr < 0x20000000; vaddr += SUN4C_REAL_PGDIR_SIZE) @@ -2064,7 +2063,6 @@ void __init sun4c_paging_init(void) unsigned long end_pfn, pages_avail; kernel_end = (unsigned long) &end; - kernel_end += (SUN4C_REAL_PGDIR_SIZE * 4); kernel_end = SUN4C_REAL_PGDIR_ALIGN(kernel_end); pages_avail = 0; -- cgit v0.10.2 From 0fdb7f96d8c27e37ed2ca1ae5a763baf14b8fe0c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 15 Aug 2007 21:02:23 -0700 Subject: [SPARC64]: Allow userspace to get at the machine description. Like the OF device tree, it's useful to let userland get at the machine description so it can pretty print the graph etc. The implementation is a simple MISC device with a read method. Signed-off-by: David S. Miller diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c index 95059c2..9f22e4f 100644 --- a/arch/sparc64/kernel/mdesc.c +++ b/arch/sparc64/kernel/mdesc.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -836,6 +837,43 @@ void __devinit mdesc_fill_in_cpu_data(cpumask_t mask) mdesc_release(hp); } +static ssize_t mdesc_read(struct file *file, char __user *buf, + size_t len, loff_t *offp) +{ + struct mdesc_handle *hp = mdesc_grab(); + int err; + + if (!hp) + return -ENODEV; + + err = hp->handle_size; + if (len < hp->handle_size) + err = -EMSGSIZE; + else if (copy_to_user(buf, &hp->mdesc, hp->handle_size)) + err = -EFAULT; + mdesc_release(hp); + + return err; +} + +static const struct file_operations mdesc_fops = { + .read = mdesc_read, + .owner = THIS_MODULE, +}; + +static struct miscdevice mdesc_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "mdesc", + .fops = &mdesc_fops, +}; + +static int __init mdesc_misc_init(void) +{ + return misc_register(&mdesc_misc); +} + +__initcall(mdesc_misc_init); + void __init sun4v_mdesc_init(void) { struct mdesc_handle *hp; -- cgit v0.10.2 From cf5adce11743e98739fcb97e76d688f0b0bc2199 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2007 01:47:25 -0700 Subject: [SPARC64]: Niagara-2 optimized copies. The bzero/memset implementation stays the same as Niagara-1. Signed-off-by: David S. Miller diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index ac18bd8..a2652bc 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -501,7 +501,7 @@ niagara_tlb_fixup: cmp %g1, SUN4V_CHIP_NIAGARA1 be,pt %xcc, niagara_patch cmp %g1, SUN4V_CHIP_NIAGARA2 - be,pt %xcc, niagara_patch + be,pt %xcc, niagara2_patch nop call generic_patch_copyops @@ -512,6 +512,15 @@ niagara_tlb_fixup: nop ba,a,pt %xcc, 80f +niagara2_patch: + call niagara2_patch_copyops + nop + call niagara_patch_bzero + nop + call niagara2_patch_pageops + nop + + ba,a,pt %xcc, 80f niagara_patch: call niagara_patch_copyops diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index f95fbfa..f095e13 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile @@ -13,6 +13,8 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ NGpage.o NGbzero.o \ + NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o NG2patch.o \ + NG2page.o \ GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o GENpatch.o \ GENpage.o GENbzero.o \ copy_in_user.o user_fixup.o memmove.o \ diff --git a/arch/sparc64/lib/NG2copy_from_user.S b/arch/sparc64/lib/NG2copy_from_user.S new file mode 100644 index 0000000..c77ef5f --- /dev/null +++ b/arch/sparc64/lib/NG2copy_from_user.S @@ -0,0 +1,40 @@ +/* NG2copy_from_user.S: Niagara-2 optimized copy from userspace. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +#define EX_LD(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: wr %g0, ASI_AIUS, %asi;\ + retl; \ + mov 1, %o0; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_AIUS_4V +#define ASI_BLK_AIUS_4V 0x17 +#endif + +#define FUNC_NAME NG2copy_from_user +#define LOAD(type,addr,dest) type##a [addr] %asi, dest +#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_AIUS_4V, dest +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop +#endif + +#include "NG2memcpy.S" diff --git a/arch/sparc64/lib/NG2copy_to_user.S b/arch/sparc64/lib/NG2copy_to_user.S new file mode 100644 index 0000000..4bd4093 --- /dev/null +++ b/arch/sparc64/lib/NG2copy_to_user.S @@ -0,0 +1,49 @@ +/* NG2copy_to_user.S: Niagara-2 optimized copy to userspace. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +#define EX_ST(x) \ +98: x; \ + .section .fixup; \ + .align 4; \ +99: wr %g0, ASI_AIUS, %asi;\ + retl; \ + mov 1, %o0; \ + .section __ex_table,"a";\ + .align 4; \ + .word 98b, 99b; \ + .text; \ + .align 4; + +#ifndef ASI_AIUS +#define ASI_AIUS 0x11 +#endif + +#ifndef ASI_BLK_AIUS_4V +#define ASI_BLK_AIUS_4V 0x17 +#endif + +#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS +#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 +#endif + +#define FUNC_NAME NG2copy_to_user +#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS +#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_AIUS_4V +#define EX_RETVAL(x) 0 + +#ifdef __KERNEL__ + /* Writing to %asi is _expensive_ so we hardcode it. + * Reading %asi to check for KERNEL_DS is comparatively + * cheap. + */ +#define PREAMBLE \ + rd %asi, %g1; \ + cmp %g1, ASI_AIUS; \ + bne,pn %icc, memcpy_user_stub; \ + nop +#endif + +#include "NG2memcpy.S" diff --git a/arch/sparc64/lib/NG2memcpy.S b/arch/sparc64/lib/NG2memcpy.S new file mode 100644 index 0000000..0aed756 --- /dev/null +++ b/arch/sparc64/lib/NG2memcpy.S @@ -0,0 +1,520 @@ +/* NG2memcpy.S: Niagara-2 optimized memcpy. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ + +#ifdef __KERNEL__ +#include +#include +#define GLOBAL_SPARE %g7 +#else +#define ASI_PNF 0x82 +#define ASI_BLK_P 0xf0 +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define FPRS_FEF 0x04 +#ifdef MEMCPY_DEBUG +#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ + clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#else +#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs +#endif +#define GLOBAL_SPARE %g5 +#endif + +#ifndef STORE_ASI +#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P +#else +#define STORE_ASI 0x80 /* ASI_P */ +#endif +#endif + +#ifndef EX_LD +#define EX_LD(x) x +#endif + +#ifndef EX_ST +#define EX_ST(x) x +#endif + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +#endif + +#ifndef LOAD +#define LOAD(type,addr,dest) type [addr], dest +#endif + +#ifndef LOAD_BLK +#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest +#endif + +#ifndef STORE +#ifndef MEMCPY_DEBUG +#define STORE(type,src,addr) type src, [addr] +#else +#define STORE(type,src,addr) type##a src, [addr] 0x80 +#endif +#endif + +#ifndef STORE_BLK +#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P +#endif + +#ifndef STORE_INIT +#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI +#endif + +#ifndef FUNC_NAME +#define FUNC_NAME NG2memcpy +#endif + +#ifndef PREAMBLE +#define PREAMBLE +#endif + +#ifndef XCC +#define XCC xcc +#endif + +#define FREG_FROB(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ + faligndata %x0, %x1, %f0; \ + faligndata %x1, %x2, %f2; \ + faligndata %x2, %x3, %f4; \ + faligndata %x3, %x4, %f6; \ + faligndata %x4, %x5, %f8; \ + faligndata %x5, %x6, %f10; \ + faligndata %x6, %x7, %f12; \ + faligndata %x7, %x8, %f14; + +#define FREG_MOVE_1(x0) \ + fmovd %x0, %f0; +#define FREG_MOVE_2(x0, x1) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; +#define FREG_MOVE_3(x0, x1, x2) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; +#define FREG_MOVE_4(x0, x1, x2, x3) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; \ + fmovd %x3, %f6; +#define FREG_MOVE_5(x0, x1, x2, x3, x4) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; \ + fmovd %x3, %f6; \ + fmovd %x4, %f8; +#define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; \ + fmovd %x3, %f6; \ + fmovd %x4, %f8; \ + fmovd %x5, %f10; +#define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; \ + fmovd %x3, %f6; \ + fmovd %x4, %f8; \ + fmovd %x5, %f10; \ + fmovd %x6, %f12; +#define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \ + fmovd %x0, %f0; \ + fmovd %x1, %f2; \ + fmovd %x2, %f4; \ + fmovd %x3, %f6; \ + fmovd %x4, %f8; \ + fmovd %x5, %f10; \ + fmovd %x6, %f12; \ + fmovd %x7, %f14; +#define FREG_LOAD_1(base, x0) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)) +#define FREG_LOAD_2(base, x0, x1) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); +#define FREG_LOAD_3(base, x0, x1, x2) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); \ + EX_LD(LOAD(ldd, base + 0x10, %x2)); +#define FREG_LOAD_4(base, x0, x1, x2, x3) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); \ + EX_LD(LOAD(ldd, base + 0x10, %x2)); \ + EX_LD(LOAD(ldd, base + 0x18, %x3)); +#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); \ + EX_LD(LOAD(ldd, base + 0x10, %x2)); \ + EX_LD(LOAD(ldd, base + 0x18, %x3)); \ + EX_LD(LOAD(ldd, base + 0x20, %x4)); +#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); \ + EX_LD(LOAD(ldd, base + 0x10, %x2)); \ + EX_LD(LOAD(ldd, base + 0x18, %x3)); \ + EX_LD(LOAD(ldd, base + 0x20, %x4)); \ + EX_LD(LOAD(ldd, base + 0x28, %x5)); +#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ + EX_LD(LOAD(ldd, base + 0x00, %x0)); \ + EX_LD(LOAD(ldd, base + 0x08, %x1)); \ + EX_LD(LOAD(ldd, base + 0x10, %x2)); \ + EX_LD(LOAD(ldd, base + 0x18, %x3)); \ + EX_LD(LOAD(ldd, base + 0x20, %x4)); \ + EX_LD(LOAD(ldd, base + 0x28, %x5)); \ + EX_LD(LOAD(ldd, base + 0x30, %x6)); + + .register %g2,#scratch + .register %g3,#scratch + + .text + .align 64 + + .globl FUNC_NAME + .type FUNC_NAME,#function +FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ + srlx %o2, 31, %g2 + cmp %g2, 0 + tne %xcc, 5 + PREAMBLE + mov %o0, GLOBAL_SPARE + cmp %o2, 0 + be,pn %XCC, 85f + or %o0, %o1, %o3 + cmp %o2, 16 + blu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + /* 2 blocks (128 bytes) is the minimum we can do the block + * copy with. We need to ensure that we'll iterate at least + * once in the block copy loop. At worst we'll need to align + * the destination to a 64-byte boundary which can chew up + * to (64 - 1) bytes from the length before we perform the + * block copy loop. + * + * However, the cut-off point, performance wise, is around + * 4 64-byte blocks. + */ + cmp %o2, (4 * 64) + blu,pt %XCC, 75f + andcc %o3, 0x7, %g0 + + /* %o0: dst + * %o1: src + * %o2: len (known to be >= 128) + * + * The block copy loops can use %o4, %g2, %g3 as + * temporaries while copying the data. %o5 must + * be preserved between VISEntryHalf and VISExitHalf + */ + + LOAD(prefetch, %o1 + 0x000, #one_read) + LOAD(prefetch, %o1 + 0x040, #one_read) + LOAD(prefetch, %o1 + 0x080, #one_read) + + /* Align destination on 64-byte boundary. */ + andcc %o0, (64 - 1), %o4 + be,pt %XCC, 2f + sub %o4, 64, %o4 + sub %g0, %o4, %o4 ! bytes to align dst + sub %o2, %o4, %o2 +1: subcc %o4, 1, %o4 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o0)) + add %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 + +2: + /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve + * o5 from here until we hit VISExitHalf. + */ + VISEntryHalf + + alignaddr %o1, %g0, %g0 + + add %o1, (64 - 1), %o4 + andn %o4, (64 - 1), %o4 + andn %o2, (64 - 1), %g1 + sub %o2, %g1, %o2 + + and %o1, (64 - 1), %g2 + add %o1, %g1, %o1 + sub %o0, %o4, %g3 + brz,pt %g2, 190f + cmp %g2, 32 + blu,a 5f + cmp %g2, 16 + cmp %g2, 48 + blu,a 4f + cmp %g2, 40 + cmp %g2, 56 + blu 170f + nop + ba,a,pt %xcc, 180f + +4: /* 32 <= low bits < 48 */ + blu 150f + nop + ba,a,pt %xcc, 160f +5: /* 0 < low bits < 32 */ + blu,a 6f + cmp %g2, 8 + cmp %g2, 24 + blu 130f + nop + ba,a,pt %xcc, 140f +6: /* 0 < low bits < 16 */ + bgeu 120f + nop + /* fall through for 0 < low bits < 8 */ +110: sub %o4, 64, %g2 + EX_LD(LOAD_BLK(%g2, %f0)) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +120: sub %o4, 56, %g2 + FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +130: sub %o4, 48, %g2 + FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_6(f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +140: sub %o4, 40, %g2 + FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_5(f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +150: sub %o4, 32, %g2 + FREG_LOAD_4(%g2, f0, f2, f4, f6) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_4(f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +160: sub %o4, 24, %g2 + FREG_LOAD_3(%g2, f0, f2, f4) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_3(f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +170: sub %o4, 16, %g2 + FREG_LOAD_2(%g2, f0, f2) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_2(f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +180: sub %o4, 8, %g2 + FREG_LOAD_1(%g2, f0) +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + EX_LD(LOAD_BLK(%o4, %f16)) + FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + FREG_MOVE_1(f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +190: +1: EX_ST(STORE_INIT(%g0, %o4 + %g3)) + subcc %g1, 64, %g1 + EX_LD(LOAD_BLK(%o4, %f0)) + EX_ST(STORE_BLK(%f0, %o4 + %g3)) + add %o4, 64, %o4 + bne,pt %xcc, 1b + LOAD(prefetch, %o4 + 64, #one_read) + +195: + add %o4, %g3, %o0 + membar #Sync + + VISExitHalf + + /* %o2 contains any final bytes still needed to be copied + * over. If anything is left, we copy it one byte at a time. + */ + brz,pt %o2, 85f + sub %o0, %o1, %o3 + ba,a,pt %XCC, 90f + + .align 64 +75: /* 16 < len <= 64 */ + bne,pn %XCC, 75f + sub %o0, %o1, %o3 + +72: + andn %o2, 0xf, %o4 + and %o2, 0xf, %o2 +1: subcc %o4, 0x10, %o4 + EX_LD(LOAD(ldx, %o1, %o5)) + add %o1, 0x08, %o1 + EX_LD(LOAD(ldx, %o1, %g1)) + sub %o1, 0x08, %o1 + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 + EX_ST(STORE(stx, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 +73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x8, %o2 + EX_LD(LOAD(ldx, %o1, %o5)) + EX_ST(STORE(stx, %o5, %o1 + %o3)) + add %o1, 0x8, %o1 +1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + EX_LD(LOAD(lduw, %o1, %o5)) + EX_ST(STORE(stw, %o5, %o1 + %o3)) + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, 85f + nop + ba,pt %xcc, 90f + nop + +75: + andcc %o0, 0x7, %g1 + sub %g1, 0x8, %g1 + be,pn %icc, 2f + sub %g0, %g1, %g1 + sub %o2, %g1, %o2 + +1: subcc %g1, 1, %g1 + EX_LD(LOAD(ldub, %o1, %o5)) + EX_ST(STORE(stb, %o5, %o1 + %o3)) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +2: add %o1, %o3, %o0 + andcc %o1, 0x7, %g1 + bne,pt %icc, 8f + sll %g1, 3, %g1 + + cmp %o2, 16 + bgeu,pt %icc, 72b + nop + ba,a,pt %xcc, 73b + +8: mov 64, %o3 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1, %g2)) + sub %o3, %g1, %o3 + andn %o2, 0x7, %o4 + sllx %g2, %g1, %g2 +1: add %o1, 0x8, %o1 + EX_LD(LOAD(ldx, %o1, %g3)) + subcc %o4, 0x8, %o4 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 + EX_ST(STORE(stx, %o5, %o0)) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 + + srl %g1, 3, %g1 + andcc %o2, 0x7, %o2 + be,pn %icc, 85f + add %o1, %g1, %o1 + ba,pt %xcc, 90f + sub %o0, %o1, %o3 + + .align 64 +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +1: + subcc %o2, 4, %o2 + EX_LD(LOAD(lduw, %o1, %g1)) + EX_ST(STORE(stw, %g1, %o1 + %o3)) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +85: retl + mov EX_RETVAL(GLOBAL_SPARE), %o0 + + .align 32 +90: + subcc %o2, 1, %o2 + EX_LD(LOAD(ldub, %o1, %g1)) + EX_ST(STORE(stb, %g1, %o1 + %o3)) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + mov EX_RETVAL(GLOBAL_SPARE), %o0 + + .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc64/lib/NG2page.S b/arch/sparc64/lib/NG2page.S new file mode 100644 index 0000000..73b6b7c --- /dev/null +++ b/arch/sparc64/lib/NG2page.S @@ -0,0 +1,61 @@ +/* NG2page.S: Niagara-2 optimized clear and copy page. + * + * Copyright (C) 2007 (davem@davemloft.net) + */ + +#include +#include +#include + + .text + .align 32 + + /* This is heavily simplified from the sun4u variants + * because Niagara-2 does not have any D-cache aliasing issues. + */ +NG2copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ + prefetch [%o1 + 0x00], #one_read + prefetch [%o1 + 0x40], #one_read + VISEntryHalf + set PAGE_SIZE, %g7 + sub %o0, %o1, %g3 +1: stxa %g0, [%o1 + %g3] ASI_BLK_INIT_QUAD_LDD_P + subcc %g7, 64, %g7 + ldda [%o1] ASI_BLK_P, %f0 + stda %f0, [%o1 + %g3] ASI_BLK_P + add %o1, 64, %o1 + bne,pt %xcc, 1b + prefetch [%o1 + 0x40], #one_read + membar #Sync + VISExitHalf + retl + nop + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara2_patch_pageops + .type niagara2_patch_pageops,#function +niagara2_patch_pageops: + NG_DO_PATCH(copy_user_page, NG2copy_user_page) + NG_DO_PATCH(_clear_page, NGclear_page) + NG_DO_PATCH(clear_user_page, NGclear_user_page) + retl + nop + .size niagara2_patch_pageops,.-niagara2_patch_pageops diff --git a/arch/sparc64/lib/NG2patch.S b/arch/sparc64/lib/NG2patch.S new file mode 100644 index 0000000..28c36f0 --- /dev/null +++ b/arch/sparc64/lib/NG2patch.S @@ -0,0 +1,33 @@ +/* NG2patch.S: Patch Ultra-I routines with Niagara-2 variant. + * + * Copyright (C) 2007 David S. Miller + */ + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define NG_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + sll %g1, 11, %g1; \ + srl %g1, 11 + 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl niagara2_patch_copyops + .type niagara2_patch_copyops,#function +niagara2_patch_copyops: + NG_DO_PATCH(memcpy, NG2memcpy) + NG_DO_PATCH(___copy_from_user, NG2copy_from_user) + NG_DO_PATCH(___copy_to_user, NG2copy_to_user) + retl + nop + .size niagara2_patch_copyops,.-niagara2_patch_copyops diff --git a/arch/sparc64/lib/NGpage.S b/arch/sparc64/lib/NGpage.S index 8ce3a0c..428920d 100644 --- a/arch/sparc64/lib/NGpage.S +++ b/arch/sparc64/lib/NGpage.S @@ -45,6 +45,7 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ retl nop + .globl NGclear_page, NGclear_user_page NGclear_page: /* %o0=dest */ NGclear_user_page: /* %o0=dest, %o1=vaddr */ mov 8, %g1 -- cgit v0.10.2 From 53140b71c5e7b5370e4ac6ffc13fddbdfae71473 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2007 01:52:44 -0700 Subject: [SPARC64]: Do not touch %tick_cmpr on sun4v cpus. This register is not a part of the sun4v architecture. Niagara 1 and 2 happened to leave it around. Signed-off-by: David S. Miller diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index a2652bc..63144ad 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -715,12 +715,13 @@ setup_trap_table: membar #Sync + BRANCH_IF_SUN4V(o2, 1f) + /* Kill PROM timer */ sethi %hi(0x80000000), %o2 sllx %o2, 32, %o2 wr %o2, 0, %tick_cmpr - BRANCH_IF_SUN4V(o2, 1f) BRANCH_IF_ANY_CHEETAH(o2, o3, 1f) ba,pt %xcc, 2f -- cgit v0.10.2 From 7dc408808ad40b9a5d031966035713ed5d5153b8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2007 01:56:00 -0700 Subject: [SPARC64]: SMP trampoline needs to avoid %tick_cmpr on sun4v too. Signed-off-by: David S. Miller diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 9448595..9533a25 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -95,14 +95,13 @@ spitfire_startup: membar #Sync startup_continue: + mov %o0, %l0 + BRANCH_IF_SUN4V(g1, niagara_lock_tlb) + sethi %hi(0x80000000), %g2 sllx %g2, 32, %g2 wr %g2, 0, %tick_cmpr - mov %o0, %l0 - - BRANCH_IF_SUN4V(g1, niagara_lock_tlb) - /* Call OBP by hand to lock KERNBASE into i/d tlbs. * We lock 2 consequetive entries if we are 'bigkernel'. */ -- cgit v0.10.2 From 8b224b813aad0231af62dc75d056aae83c9d4d12 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2007 13:56:39 -0700 Subject: [SPARC64]: Create a HWCAP_SPARC_N2 and report it to userspace on Niagara-2. Signed-off-by: David S. Miller diff --git a/include/asm-sparc64/elf.h b/include/asm-sparc64/elf.h index 303d85e..8653e86 100644 --- a/include/asm-sparc64/elf.h +++ b/include/asm-sparc64/elf.h @@ -70,6 +70,7 @@ #define HWCAP_SPARC_V9 16 #define HWCAP_SPARC_ULTRA3 32 #define HWCAP_SPARC_BLKINIT 64 +#define HWCAP_SPARC_N2 128 /* * These are used to set parameters in the core dumps. @@ -155,8 +156,13 @@ static inline unsigned int sparc64_elf_hwcap(void) if (tlb_type == cheetah || tlb_type == cheetah_plus) cap |= HWCAP_SPARC_ULTRA3; - else if (tlb_type == hypervisor) - cap |= HWCAP_SPARC_BLKINIT; + else if (tlb_type == hypervisor) { + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA2) + cap |= HWCAP_SPARC_BLKINIT; + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2) + cap |= HWCAP_SPARC_N2; + } return cap; } -- cgit v0.10.2 From 405849610fd96b4f34cd1875c4c033228fea6c0f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Aug 2007 22:59:49 -0700 Subject: [MATH-EMU]: Fix underflow exception reporting. The underflow exception cases were wrong. This is one weird area of ieee1754 handling in that the underflow behavior changes based upon whether underflow is enabled in the trap enable mask of the FPU control register. As a specific case the Sparc V9 manual gives us the following description: -------------------- If UFM = 0: Underflow occurs if a nonzero result is tiny and a loss of accuracy occurs. Tininess may be detected before or after rounding. Loss of accuracy may be either a denormalization loss or an inexact result. If UFM = 1: Underflow occurs if a nonzero result is tiny. Tininess may be detected before or after rounding. -------------------- What this amounts to in the packing case is if we go subnormal, we set underflow if any of the following are true: 1) rounding sets inexact 2) we ended up rounding back up to normal (this is the case where we set the exponent to 1 and set the fraction to zero), this should set inexact too 3) underflow is set in FPU control register trap-enable mask The initially discovered example was "DBL_MIN / 16.0" which incorrectly generated an underflow. It should not, unless underflow is set in the trap-enable mask of the FPU csr. Another example, "0x0.0000000000001p-1022 / 16.0", should signal both inexact and underflow. The cpu implementations and ieee1754 literature is very clear about this. This is case #2 above. However, if underflow is set in the trap enable mask, only underflow should be set and reported as a trap. That is handled properly by the prioritization logic in arch/sparc{,64}/math-emu/math.c:record_exception(). Based upon a report and test case from Jakub Jelinek. Signed-off-by: David S. Miller diff --git a/include/asm-sparc/sfp-machine.h b/include/asm-sparc/sfp-machine.h index ecfc86a..266a42b 100644 --- a/include/asm-sparc/sfp-machine.h +++ b/include/asm-sparc/sfp-machine.h @@ -203,4 +203,10 @@ extern struct task_struct *last_task_used_math; #define FP_INHIBIT_RESULTS ((last_task_used_math->thread.fsr >> 23) & _fex) #endif +#ifdef CONFIG_SMP +#define FP_TRAPPING_EXCEPTIONS ((current->thread.fsr >> 23) & 0x1f) +#else +#define FP_TRAPPING_EXCEPTIONS ((last_task_used_math->thread.fsr >> 23) & 0x1f) +#endif + #endif diff --git a/include/asm-sparc64/sfp-machine.h b/include/asm-sparc64/sfp-machine.h index 89d4243..c9331b0 100644 --- a/include/asm-sparc64/sfp-machine.h +++ b/include/asm-sparc64/sfp-machine.h @@ -88,4 +88,6 @@ #define FP_INHIBIT_RESULTS ((current_thread_info()->xfsr[0] >> 23) & _fex) +#define FP_TRAPPING_EXCEPTIONS ((current_thread_info()->xfsr[0] >> 23) & 0x1f) + #endif diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h index 93780ab..bb46e76 100644 --- a/include/math-emu/op-common.h +++ b/include/math-emu/op-common.h @@ -145,13 +145,16 @@ do { \ { \ X##_e = 1; \ _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + FP_SET_EXCEPTION(FP_EX_INEXACT); \ } \ else \ { \ X##_e = 0; \ _FP_FRAC_SRL_##wc(X, _FP_WORKBITS); \ - FP_SET_EXCEPTION(FP_EX_UNDERFLOW); \ } \ + if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT) || \ + (FP_TRAPPING_EXCEPTIONS & FP_EX_UNDERFLOW)) \ + FP_SET_EXCEPTION(FP_EX_UNDERFLOW); \ } \ else \ { \ diff --git a/include/math-emu/soft-fp.h b/include/math-emu/soft-fp.h index d02eb64..a0721ef 100644 --- a/include/math-emu/soft-fp.h +++ b/include/math-emu/soft-fp.h @@ -97,12 +97,19 @@ #define FP_INHIBIT_RESULTS 0 #endif +#ifndef FP_TRAPPING_EXCEPTIONS +#define FP_TRAPPING_EXCPETIONS 0 +#endif + #define FP_SET_EXCEPTION(ex) \ _fex |= (ex) #define FP_UNSET_EXCEPTION(ex) \ _fex &= ~(ex) +#define FP_CUR_EXCEPTIONS \ + (_fex) + #define FP_CLEAR_EXCEPTIONS \ _fex = 0 -- cgit v0.10.2