summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
authorLaurentiu Tudor <Laurentiu.Tudor@freescale.com>2014-03-31 13:48:53 (GMT)
committerRichard Schmitt <richard.schmitt@freescale.com>2014-05-09 20:58:26 (GMT)
commit30acac164faea93af2e9c5040e87bc62c442868a (patch)
tree5175f52c51b1f36bb54ba1d6e4ea439493eabf19 /arch/powerpc/mm
parent77ffa7282e5c7d02e4c2b04862658ae832a26fa8 (diff)
downloadlinux-fsl-qoriq-30acac164faea93af2e9c5040e87bc62c442868a.tar.xz
powerpc/booke64: wrap tlb lock and search in htw miss with FTR_SMT
Virtualized environments expose a e6500 dual-threaded core as two single-threaded e6500 cores. Take advantage of this and get rid of the tlb lock and the trap-causing tlbsx in the htw miss handler by guarding with CPU_FTR_SMT, as it's already being done in the bolted tlb1 miss handler. As results below show, lmbench random memory access latency test shows an improvement of ~34%. Memory latencies in nanoseconds - smaller is better (WARNING - may not be correct, check graphs) ---------------------------------------------------- Host Mhz L1 $ L2 $ Main mem Rand mem --------- --- ---- ---- -------- -------- smt 1665 1.8020 13.2 83.0 1149.7 nosmt 1665 1.8020 13.2 83.0 758.1 Signed-off-by: Laurentiu Tudor <Laurentiu.Tudor@freescale.com> Cc: Scott Wood <scottwood@freescale.com> Change-Id: Ia6c028b8bb9c847d46d32f788a7257527cd6af09 Reviewed-on: http://git.am.freescale.net:8181/12089 Tested-by: Review Code-CDREVIEW <CDREVIEW@freescale.com> Reviewed-by: Scott Wood <scottwood@freescale.com> Reviewed-by: Richard Schmitt <richard.schmitt@freescale.com>
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/tlb_low_64e.S4
1 files changed, 4 insertions, 0 deletions
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 4090991..b9274d5 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -360,6 +360,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
* r10 = cpu number
*/
tlb_miss_common_fsl_htw:
+BEGIN_FTR_SECTION
/*
* Search if we already have an indirect entry for that virtual
* address, and if we do, bail out.
@@ -400,6 +401,7 @@ tlb_miss_common_fsl_htw:
// ori r10,r10,MAS1_IND
mtspr SPRN_MAS1,r10
mtspr SPRN_MAS2,r15
+END_FTR_SECTION_IFSET(CPU_FTR_SMT)
/* Now, we need to walk the page tables. First check if we are in
* range.
@@ -461,6 +463,7 @@ tlb_miss_common_fsl_htw:
tlb_miss_done_fsl_htw:
.macro tlb_unlock_fsl_htw
+BEGIN_FTR_SECTION
beq cr1,1f /* no unlock if lock was recursively grabbed */
mtocrf 0x01,r11
addi r10,r11,PACA_TLB_LOCK-1
@@ -469,6 +472,7 @@ tlb_miss_done_fsl_htw:
isync
stb r15,0(r10)
1:
+END_FTR_SECTION_IFSET(CPU_FTR_SMT)
.endm
tlb_unlock_fsl_htw