diff options
Diffstat (limited to 'arch/ia64/kernel/fsys.S')
-rw-r--r-- | arch/ia64/kernel/fsys.S | 88 |
1 files changed, 68 insertions, 20 deletions
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 4484197..c1625c7 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -61,13 +61,29 @@ ENTRY(fsys_getpid) .prologue .altrp b6 .body + add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 + ;; + ld8 r17=[r17] // r17 = current->group_leader add r9=TI_FLAGS+IA64_TASK_SIZE,r16 ;; ld4 r9=[r9] - add r8=IA64_TASK_TGID_OFFSET,r16 + add r17=IA64_TASK_TGIDLINK_OFFSET,r17 ;; and r9=TIF_ALLWORK_MASK,r9 - ld4 r8=[r8] // r8 = current->tgid + ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid + ;; + add r8=IA64_PID_LEVEL_OFFSET,r17 + ;; + ld4 r8=[r8] // r8 = pid->level + add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] + ;; + shl r8=r8,IA64_UPID_SHIFT + ;; + add r17=r17,r8 // r17 = &pid->numbers[pid->level] + ;; + ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr + ;; + mov r17=0 ;; cmp.ne p8,p0=0,r9 (p8) br.spnt.many fsys_fallback_syscall @@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address) .altrp b6 .body add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + add r17=IA64_TASK_TGIDLINK_OFFSET,r16 ;; ld4 r9=[r9] tnat.z p6,p7=r32 // check argument register for being NaT + ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid ;; and r9=TIF_ALLWORK_MASK,r9 - add r8=IA64_TASK_PID_OFFSET,r16 + add r8=IA64_PID_LEVEL_OFFSET,r17 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 ;; - ld4 r8=[r8] + ld4 r8=[r8] // r8 = pid->level + add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] + ;; + shl r8=r8,IA64_UPID_SHIFT + ;; + add r17=r17,r8 // r17 = &pid->numbers[pid->level] + ;; + ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr + ;; cmp.ne p8,p0=0,r9 mov r17=-1 ;; @@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday) // Note that instructions are optimized for McKinley. McKinley can // process two bundles simultaneously and therefore we continuously // try to feed the CPU two bundles and then a stop. - // - // Additional note that code has changed a lot. Optimization is TBD. - // Comments begin with "?" are maybe outdated. - tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle - mov pr = r30,0xc000 // Set predicates according to function + add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 + tnat.nz p6,p0 = r31 // guard against Nat argument +(p6) br.cond.spnt.few .fail_einval movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address ;; + ld4 r2 = [r2] // process work pending flags movl r29 = itc_jitter_data // itc_jitter add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time - ld4 r2 = [r2] // process work pending flags - ;; -(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 - add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 + mov pr = r30,0xc000 // Set predicates according to function + ;; and r2 = TIF_ALLWORK_MASK,r2 -(p6) br.cond.spnt.few .fail_einval // ? deferred branch + add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 +(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time ;; - add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last + add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled -(p6) br.cond.spnt.many fsys_fallback_syscall +(p6) br.cond.spnt.many fsys_fallback_syscall ;; // Begin critical section .time_redo: @@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday) (p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! (p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. (p13) ld8 r25 = [r19] // get itc_lastcycle value - ;; // ? could be removed by moving the last add upward ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec ;; ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec @@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday) EX(.fail_efault, probe.w.fault r31, 3) xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) ;; - // ? simulate tbit.nz.or p7,p0 = r28,0 getf.sig r2 = f8 mf ;; ld4 r10 = [r20] // gtod_lock.sequence shr.u r2 = r2,r23 // shift by factor - ;; // ? overloaded 3 bundles! + ;; add r8 = r8,r2 // Add xtime.nsecs cmp4.ne p7,p0 = r28,r10 (p7) br.cond.dpnt.few .time_redo // sequence number changed, redo @@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3) EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles (p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it ;; - mov r8 = r0 (p14) getf.sig r2 = f8 ;; + mov r8 = r0 (p14) shr.u r21 = r2, 4 ;; EX(.fail_efault, st8 [r31] = r9) @@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down) nop.i 0 ;; mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + mov.m r30=ar.itc // M get cycle for accounting +#else nop.m 0 +#endif nop.i 0 ;; mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore @@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down) cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 br.call.sptk.many b7=ia64_syscall_setup // B ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING + // mov.m r30=ar.itc is called in advance + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel + ;; + ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime + ld8 r21=[r17] // cumulated utime + sub r22=r19,r18 // stime before leave kernel + ;; + st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp + sub r18=r30,r19 // elapsed time in user mode + ;; + add r20=r20,r22 // sum stime + add r21=r21,r18 // sum utime + ;; + st8 [r16]=r20 // update stime + st8 [r17]=r21 // update utime + ;; +#endif mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 mov rp=r14 // I0 set the real return addr and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A |