/* * VMI paravirtual timer support routines. * * Copyright (C) 2005, VMware, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or * NON INFRINGEMENT. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Send feedback to dhecht@vmware.com * */ /* * Portions of this code from arch/i386/kernel/timers/timer_tsc.c. * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c. * See comments there for proper credits. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CONFIG_X86_LOCAL_APIC #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT #else #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0 #endif /* Cached VMI operations */ struct vmi_timer_ops vmi_timer_ops; #ifdef CONFIG_NO_IDLE_HZ /* /proc/sys/kernel/hz_timer state. */ int sysctl_hz_timer; /* Some stats */ static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs); static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies); static DEFINE_PER_CPU(unsigned long, idle_start_jiffies); #endif /* CONFIG_NO_IDLE_HZ */ /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */ static int alarm_hz = CONFIG_VMI_ALARM_HZ; /* Cache of the value get_cycle_frequency / HZ. */ static signed long long cycles_per_jiffy; /* Cache of the value get_cycle_frequency / alarm_hz. */ static signed long long cycles_per_alarm; /* The number of cycles accounted for by the 'jiffies'/'xtime' count. * Protected by xtime_lock. */ static unsigned long long real_cycles_accounted_system; /* The number of cycles accounted for by update_process_times(), per cpu. */ static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu); /* The number of stolen cycles accounted, per cpu. */ static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu); /* Clock source. */ static cycle_t read_real_cycles(void) { return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); } static cycle_t read_available_cycles(void) { return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); } #if 0 static cycle_t read_stolen_cycles(void) { return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN); } #endif /* 0 */ static struct clocksource clocksource_vmi = { .name = "vmi-timer", .rating = 450, .read = read_real_cycles, .mask = CLOCKSOURCE_MASK(64), .mult = 0, /* to be set */ .shift = 22, .is_continuous = 1, }; /* Timer interrupt handler. */ static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id); static struct irqaction vmi_timer_irq = { vmi_timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "VMI-alarm", NULL, NULL }; /* Alarm rate */ static int __init vmi_timer_alarm_rate_setup(char* str) { int alarm_rate; if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) { alarm_hz = alarm_rate; printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz); } return 1; } __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup); /* Initialization */ static void vmi_get_wallclock_ts(struct timespec *ts) { unsigned long long wallclock; wallclock = vmi_timer_ops.get_wallclock(); // nsec units ts->tv_nsec = do_div(wallclock, 1000000000); ts->tv_sec = wallclock; } static void update_xtime_from_wallclock(void) { struct timespec ts; vmi_get_wallclock_ts(&ts); do_settimeofday(&ts); } unsigned long vmi_get_wallclock(void) { struct timespec ts; vmi_get_wallclock_ts(&ts); return ts.tv_sec; } int vmi_set_wallclock(unsigned long now) { return -1; } unsigned long long vmi_sched_clock(void) { return read_available_cycles(); } void __init vmi_time_init(void) { unsigned long long cycles_per_sec, cycles_per_msec; setup_irq(0, &vmi_timer_irq); #ifdef CONFIG_X86_LOCAL_APIC set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); #endif no_sync_cmos_clock = 1; vmi_get_wallclock_ts(&xtime); set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); real_cycles_accounted_system = read_real_cycles(); update_xtime_from_wallclock(); per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); cycles_per_jiffy = cycles_per_sec; (void)do_div(cycles_per_jiffy, HZ); cycles_per_alarm = cycles_per_sec; (void)do_div(cycles_per_alarm, alarm_hz); cycles_per_msec = cycles_per_sec; (void)do_div(cycles_per_msec, 1000); cpu_khz = cycles_per_msec; printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, cycles_per_alarm); clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, clocksource_vmi.shift); if (clocksource_register(&clocksource_vmi)) printk(KERN_WARNING "Error registering VMITIME clocksource."); /* Disable PIT. */ outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu * reduce the latency calling update_process_times. */ vmi_timer_ops.set_alarm( VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, cycles_per_alarm); } #ifdef CONFIG_X86_LOCAL_APIC void __init vmi_timer_setup_boot_alarm(void) { local_irq_disable(); /* Route the interrupt to the correct vector. */ apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */ vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); vmi_timer_ops.set_alarm( VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, cycles_per_alarm); local_irq_enable(); } /* Initialize the time accounting variables for an AP on an SMP system. * Also, set the local alarm for the AP. */ void __init vmi_timer_setup_secondary_alarm(void) { int cpu = smp_processor_id(); /* Route the interrupt to the correct vector. */ apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles(); vmi_timer_ops.set_alarm( VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, cycles_per_alarm); } #endif /* Update system wide (real) time accounting (e.g. jiffies, xtime). */ static void vmi_account_real_cycles(unsigned long long cur_real_cycles) { long long cycles_not_accounted; write_seqlock(&xtime_lock); cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; while (cycles_not_accounted >= cycles_per_jiffy) { /* systems wide jiffies and wallclock. */ do_timer(1); cycles_not_accounted -= cycles_per_jiffy; real_cycles_accounted_system += cycles_per_jiffy; } if (vmi_timer_ops.wallclock_updated()) update_xtime_from_wallclock(); write_sequnlock(&xtime_lock); } /* Update per-cpu process times. */ static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu, unsigned long long cur_process_times_cycles) { long long cycles_not_accounted; cycles_not_accounted = cur_process_times_cycles - per_cpu(process_times_cycles_accounted_cpu, cpu); while (cycles_not_accounted >= cycles_per_jiffy) { /* Account time to the current process. This includes * calling into the scheduler to decrement the timeslice * and possibly reschedule.*/ update_process_times(user_mode(regs)); /* XXX handle /proc/profile multiplier. */ profile_tick(CPU_PROFILING); cycles_not_accounted -= cycles_per_jiffy; per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; } } #ifdef CONFIG_NO_IDLE_HZ /* Update per-cpu idle times. Used when a no-hz halt is ended. */ static void vmi_account_no_hz_idle_cycles(int cpu, unsigned long long cur_process_times_cycles) { long long cycles_not_accounted; unsigned long no_idle_hz_jiffies = 0; cycles_not_accounted = cur_process_times_cycles - per_cpu(process_times_cycles_accounted_cpu, cpu); while (cycles_not_accounted >= cycles_per_jiffy) { no_idle_hz_jiffies++; cycles_not_accounted -= cycles_per_jiffy; per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; } /* Account time to the idle process. */ account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies)); } #endif /* Update per-cpu stolen time. */ static void vmi_account_stolen_cycles(int cpu, unsigned long long cur_real_cycles, unsigned long long cur_avail_cycles) { long long stolen_cycles_not_accounted; unsigned long stolen_jiffies = 0; if (cur_real_cycles < cur_avail_cycles) return; stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles - per_cpu(stolen_cycles_accounted_cpu, cpu); while (stolen_cycles_not_accounted >= cycles_per_jiffy) { stolen_jiffies++; stolen_cycles_not_accounted -= cycles_per_jiffy; per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy; } /* HACK: pass NULL to force time onto cpustat->steal. */ account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies)); } /* Body of either IRQ0 interrupt handler (UP no local-APIC) or * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */ static void vmi_local_timer_interrupt(int cpu) { unsigned long long cur_real_cycles, cur_process_times_cycles; cur_real_cycles = read_real_cycles(); cur_process_times_cycles = read_available_cycles(); /* Update system wide (real) time state (xtime, jiffies). */ vmi_account_real_cycles(cur_real_cycles); /* Update per-cpu process times. */ vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles); /* Update time stolen from this cpu by the hypervisor. */ vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); } #ifdef CONFIG_NO_IDLE_HZ /* Must be called only from idle loop, with interrupts disabled. */ int vmi_stop_hz_timer(void) { /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */ unsigned long seq, next; unsigned long long real_cycles_expiry; int cpu = smp_processor_id(); int idle; BUG_ON(!irqs_disabled()); if (sysctl_hz_timer != 0) return 0; cpu_set(cpu, nohz_cpu_mask); smp_mb(); if (rcu_needs_cpu(cpu) || local_softirq_pending() || (next = next_timer_interrupt(), time_before_eq(next, jiffies))) { cpu_clear(cpu, nohz_cpu_mask); next = jiffies; idle = 0; } else idle = 1; /* Convert jiffies to the real cycle counter. */ do { seq = read_seqbegin(&xtime_lock); real_cycles_expiry = real_cycles_accounted_system + (long)(next - jiffies) * cycles_per_jiffy; } while (read_seqretry(&xtime_lock, seq)); /* This cpu is going idle. Disable the periodic alarm. */ if (idle) { vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); per_cpu(idle_start_jiffies, cpu) = jiffies; } /* Set the real time alarm to expire at the next event. */ vmi_timer_ops.set_alarm( VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, real_cycles_expiry, 0); return idle; } static void vmi_reenable_hz_timer(int cpu) { /* For /proc/vmi/info idle_hz stat. */ per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu); per_cpu(vmi_idle_no_hz_irqs, cpu)++; /* Don't bother explicitly cancelling the one-shot alarm -- at * worse we will receive a spurious timer interrupt. */ vmi_timer_ops.set_alarm( VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, cycles_per_alarm); /* Indicate this cpu is no longer nohz idle. */ cpu_clear(cpu, nohz_cpu_mask); } /* Called from interrupt handlers when (local) HZ timer is disabled. */ void vmi_account_time_restart_hz_timer(void) { unsigned long long cur_real_cycles, cur_process_times_cycles; int cpu = smp_processor_id(); BUG_ON(!irqs_disabled()); /* Account the time during which the HZ timer was disabled. */ cur_real_cycles = read_real_cycles(); cur_process_times_cycles = read_available_cycles(); /* Update system wide (real) time state (xtime, jiffies). */ vmi_account_real_cycles(cur_real_cycles); /* Update per-cpu idle times. */ vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles); /* Update time stolen from this cpu by the hypervisor. */ vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); /* Reenable the hz timer. */ vmi_reenable_hz_timer(cpu); } #endif /* CONFIG_NO_IDLE_HZ */ /* UP (and no local-APIC) VMI-timer alarm interrupt handler. * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after * APIC setup and setup_boot_vmi_alarm() is called. */ static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) { vmi_local_timer_interrupt(smp_processor_id()); return IRQ_HANDLED; } #ifdef CONFIG_X86_LOCAL_APIC /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector. * Also used in UP when CONFIG_X86_LOCAL_APIC. * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */ void smp_apic_vmi_timer_interrupt(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); int cpu = smp_processor_id(); /* * the NMI deadlock-detector uses this. */ per_cpu(irq_stat,cpu).apic_timer_irqs++; /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. */ ack_APIC_irq(); /* * update_process_times() expects us to have done irq_enter(). * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ irq_enter(); vmi_local_timer_interrupt(cpu); irq_exit(); set_irq_regs(old_regs); } #endif /* CONFIG_X86_LOCAL_APIC */