summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott Wood <scottwood@freescale.com>2015-01-10 01:55:03 (GMT)
committerHonghua Yin <Hong-Hua.Yin@freescale.com>2015-03-20 05:09:19 (GMT)
commit59da1d4b010ef87aa153eaadc0fa33874d4d74a5 (patch)
tree5a5e110a9d7903d4b62de8ddf5e4805b6d8ee66a
parent7bfbba1e607190bf542f56e87ff1ebc6d82388b5 (diff)
downloadlinux-fsl-qoriq-59da1d4b010ef87aa153eaadc0fa33874d4d74a5.tar.xz
powerpc/e6500: Optimize hugepage TLB misses
Some workloads take a lot of TLB misses despite using traditional hugepages. Handle these TLB misses in the asm fastpath rather than going through a bunch of C code. Signed-off-by: Scott Wood <scottwood@freescale.com> Change-Id: I84a1f6fad189130c32a44e73ff60a26ffadfd59b Reviewed-on: http://git.am.freescale.net:8181/32729 Tested-by: Review Code-CDREVIEW <CDREVIEW@freescale.com> Reviewed-by: Richard Schmitt <richard.schmitt@freescale.com> Reviewed-by: Honghua Yin <Hong-Hua.Yin@freescale.com>
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S57
1 files changed, 51 insertions, 6 deletions
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index ce37943..422f49f 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -464,18 +464,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
clrrdi r15,r15,3
cmpdi cr0,r14,0
- bge tlb_miss_fault_fsl_htw /* Bad pgd entry or hugepage; bail */
+ bge tlb_miss_huge_fsl_htw /* Bad pgd entry or hugepage; bail */
ldx r14,r14,r15 /* grab pud entry */
rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
clrrdi r15,r15,3
cmpdi cr0,r14,0
- bge tlb_miss_fault_fsl_htw
+ bge tlb_miss_huge_fsl_htw
ldx r14,r14,r15 /* Grab pmd entry */
mfspr r10,SPRN_MAS0
cmpdi cr0,r14,0
- bge tlb_miss_fault_fsl_htw
+ bge tlb_miss_huge_fsl_htw
/* Now we build the MAS for a 2M indirect page:
*
@@ -490,12 +490,13 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
clrrdi r15,r16,21
mtspr SPRN_MAS2,r15
- rldicr r16,r11,0,62
- lwz r15,0(r16)
-
ori r14,r14,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
mtspr SPRN_MAS7_MAS3,r14
+tlb_miss_huge_done_fsl_htw:
+ rldicr r16,r11,0,62
+ lwz r15,0(r16)
+
/* Not MAS0_ESEL_MASK because source is smaller */
rlwimi r10,r15,24,0x00ff0000 /* insert esel_next into MAS0 */
addis r15,r15,0x0100 /* increment esel_next */
@@ -528,6 +529,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
tlb_epilog_bolted
rfi
+tlb_miss_huge_fsl_htw:
+ beq tlb_miss_fault_fsl_htw
+ li r10,1
+ andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */
+ rldimi r14,r10,63,0 /* Set PD_HUGE */
+ xor r14,r14,r15 /* Clear size bits */
+ ldx r14,0,r14
+
+ /*
+ * Now we build the MAS for a huge page.
+ *
+ * MAS 0 : ESEL needs to be filled by software round-robin
+ * - can be handled by indirect code
+ * MAS 1 : Need to clear IND and set TSIZE
+ * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler
+ */
+
+ subi r15,r15,10 /* Convert psize to tsize */
+ mfspr r10,SPRN_MAS1
+ rlwinm r10,r10,0,~MAS1_IND
+ rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
+ mtspr SPRN_MAS1,r10
+
+ li r10,-0x400
+ sld r15,r10,r15 /* Generate mask based on size */
+ and r10,r16,r15
+ rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+ rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */
+ clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */
+ rlwimi r15,r14,32-8,22,25 /* Move in U bits */
+ mtspr SPRN_MAS2,r10
+ andi. r10,r14,_PAGE_DIRTY
+ rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */
+
+ /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
+ bne 1f
+ li r10,MAS3_SW|MAS3_UW
+ andc r15,r15,r10
+1:
+ mtspr SPRN_MAS7_MAS3,r15
+
+ mfspr r10,SPRN_MAS0
+ b tlb_miss_huge_done_fsl_htw
+
tlb_miss_kernel_fsl_htw:
ld r14,PACA_KERNELPGD(r13)
cmpldi cr1,r15,8 /* Check for vmalloc region */