From 59da1d4b010ef87aa153eaadc0fa33874d4d74a5 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 9 Jan 2015 19:55:03 -0600 Subject: powerpc/e6500: Optimize hugepage TLB misses Some workloads take a lot of TLB misses despite using traditional hugepages. Handle these TLB misses in the asm fastpath rather than going through a bunch of C code. Signed-off-by: Scott Wood Change-Id: I84a1f6fad189130c32a44e73ff60a26ffadfd59b Reviewed-on: http://git.am.freescale.net:8181/32729 Tested-by: Review Code-CDREVIEW Reviewed-by: Richard Schmitt Reviewed-by: Honghua Yin diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index ce37943..422f49f 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -464,18 +464,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_fsl_htw /* Bad pgd entry or hugepage; bail */ + bge tlb_miss_huge_fsl_htw /* Bad pgd entry or hugepage; bail */ ldx r14,r14,r15 /* grab pud entry */ rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_fsl_htw + bge tlb_miss_huge_fsl_htw ldx r14,r14,r15 /* Grab pmd entry */ mfspr r10,SPRN_MAS0 cmpdi cr0,r14,0 - bge tlb_miss_fault_fsl_htw + bge tlb_miss_huge_fsl_htw /* Now we build the MAS for a 2M indirect page: * @@ -490,12 +490,13 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) clrrdi r15,r16,21 mtspr SPRN_MAS2,r15 - rldicr r16,r11,0,62 - lwz r15,0(r16) - ori r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) mtspr SPRN_MAS7_MAS3,r14 +tlb_miss_huge_done_fsl_htw: + rldicr r16,r11,0,62 + lwz r15,0(r16) + /* Not MAS0_ESEL_MASK because source is smaller */ rlwimi r10,r15,24,0x00ff0000 /* insert esel_next into MAS0 */ addis r15,r15,0x0100 /* increment esel_next */ @@ -528,6 +529,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) tlb_epilog_bolted rfi +tlb_miss_huge_fsl_htw: + beq tlb_miss_fault_fsl_htw + li r10,1 + andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */ + rldimi r14,r10,63,0 /* Set PD_HUGE */ + xor r14,r14,r15 /* Clear size bits */ + ldx r14,0,r14 + + /* + * Now we build the MAS for a huge page. + * + * MAS 0 : ESEL needs to be filled by software round-robin + * - can be handled by indirect code + * MAS 1 : Need to clear IND and set TSIZE + * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler + */ + + subi r15,r15,10 /* Convert psize to tsize */ + mfspr r10,SPRN_MAS1 + rlwinm r10,r10,0,~MAS1_IND + rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK + mtspr SPRN_MAS1,r10 + + li r10,-0x400 + sld r15,r10,r15 /* Generate mask based on size */ + and r10,r16,r15 + rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ + clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + mtspr SPRN_MAS2,r10 + andi. r10,r14,_PAGE_DIRTY + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + bne 1f + li r10,MAS3_SW|MAS3_UW + andc r15,r15,r10 +1: + mtspr SPRN_MAS7_MAS3,r15 + + mfspr r10,SPRN_MAS0 + b tlb_miss_huge_done_fsl_htw + tlb_miss_kernel_fsl_htw: ld r14,PACA_KERNELPGD(r13) cmpldi cr1,r15,8 /* Check for vmalloc region */ -- cgit v0.10.2