diff options
author | Scott Wood <scottwood@freescale.com> | 2015-01-10 01:55:03 (GMT) |
---|---|---|
committer | Honghua Yin <Hong-Hua.Yin@freescale.com> | 2015-03-20 05:09:19 (GMT) |
commit | 59da1d4b010ef87aa153eaadc0fa33874d4d74a5 (patch) | |
tree | 5a5e110a9d7903d4b62de8ddf5e4805b6d8ee66a /arch | |
parent | 7bfbba1e607190bf542f56e87ff1ebc6d82388b5 (diff) | |
download | linux-fsl-qoriq-59da1d4b010ef87aa153eaadc0fa33874d4d74a5.tar.xz |
powerpc/e6500: Optimize hugepage TLB misses
Some workloads take a lot of TLB misses despite using traditional
hugepages. Handle these TLB misses in the asm fastpath rather than
going through a bunch of C code.
Signed-off-by: Scott Wood <scottwood@freescale.com>
Change-Id: I84a1f6fad189130c32a44e73ff60a26ffadfd59b
Reviewed-on: http://git.am.freescale.net:8181/32729
Tested-by: Review Code-CDREVIEW <CDREVIEW@freescale.com>
Reviewed-by: Richard Schmitt <richard.schmitt@freescale.com>
Reviewed-by: Honghua Yin <Hong-Hua.Yin@freescale.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/mm/tlb_low_64e.S | 57 |
1 files changed, 51 insertions, 6 deletions
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index ce37943..422f49f 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -464,18 +464,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_fsl_htw /* Bad pgd entry or hugepage; bail */ + bge tlb_miss_huge_fsl_htw /* Bad pgd entry or hugepage; bail */ ldx r14,r14,r15 /* grab pud entry */ rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_fsl_htw + bge tlb_miss_huge_fsl_htw ldx r14,r14,r15 /* Grab pmd entry */ mfspr r10,SPRN_MAS0 cmpdi cr0,r14,0 - bge tlb_miss_fault_fsl_htw + bge tlb_miss_huge_fsl_htw /* Now we build the MAS for a 2M indirect page: * @@ -490,12 +490,13 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) clrrdi r15,r16,21 mtspr SPRN_MAS2,r15 - rldicr r16,r11,0,62 - lwz r15,0(r16) - ori r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) mtspr SPRN_MAS7_MAS3,r14 +tlb_miss_huge_done_fsl_htw: + rldicr r16,r11,0,62 + lwz r15,0(r16) + /* Not MAS0_ESEL_MASK because source is smaller */ rlwimi r10,r15,24,0x00ff0000 /* insert esel_next into MAS0 */ addis r15,r15,0x0100 /* increment esel_next */ @@ -528,6 +529,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) tlb_epilog_bolted rfi +tlb_miss_huge_fsl_htw: + beq tlb_miss_fault_fsl_htw + li r10,1 + andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */ + rldimi r14,r10,63,0 /* Set PD_HUGE */ + xor r14,r14,r15 /* Clear size bits */ + ldx r14,0,r14 + + /* + * Now we build the MAS for a huge page. + * + * MAS 0 : ESEL needs to be filled by software round-robin + * - can be handled by indirect code + * MAS 1 : Need to clear IND and set TSIZE + * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler + */ + + subi r15,r15,10 /* Convert psize to tsize */ + mfspr r10,SPRN_MAS1 + rlwinm r10,r10,0,~MAS1_IND + rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK + mtspr SPRN_MAS1,r10 + + li r10,-0x400 + sld r15,r10,r15 /* Generate mask based on size */ + and r10,r16,r15 + rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ + clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + mtspr SPRN_MAS2,r10 + andi. r10,r14,_PAGE_DIRTY + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + bne 1f + li r10,MAS3_SW|MAS3_UW + andc r15,r15,r10 +1: + mtspr SPRN_MAS7_MAS3,r15 + + mfspr r10,SPRN_MAS0 + b tlb_miss_huge_done_fsl_htw + tlb_miss_kernel_fsl_htw: ld r14,PACA_KERNELPGD(r13) cmpldi cr1,r15,8 /* Check for vmalloc region */ |