diff options
39 files changed, 3466 insertions, 878 deletions
diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt index 52478c8..20746e5 100644 --- a/Documentation/devicetree/bindings/arm/arch_timer.txt +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt @@ -1,13 +1,14 @@ * ARM architected timer -ARM Cortex-A7 and Cortex-A15 have a per-core architected timer, which -provides per-cpu timers. +ARM cores may have a per-core architected timer, which provides per-cpu timers. The timer is attached to a GIC to deliver its per-processor interrupts. ** Timer node properties: -- compatible : Should at least contain "arm,armv7-timer". +- compatible : Should at least contain one of + "arm,armv7-timer" + "arm,armv8-timer" - interrupts : Interrupt list for secure, non-secure, virtual and hypervisor timers, in that order. diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c25439a..e0fa0ea 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2210,6 +2210,44 @@ This ioctl returns the guest registers that are supported for the KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. +4.80 KVM_ARM_SET_DEVICE_ADDR + +Capability: KVM_CAP_ARM_SET_DEVICE_ADDR +Architectures: arm +Type: vm ioctl +Parameters: struct kvm_arm_device_address (in) +Returns: 0 on success, -1 on error +Errors: + ENODEV: The device id is unknown + ENXIO: Device not supported on current system + EEXIST: Address already set + E2BIG: Address outside guest physical address space + EBUSY: Address overlaps with other device range + +struct kvm_arm_device_addr { + __u64 id; + __u64 addr; +}; + +Specify a device address in the guest's physical address space where guests +can access emulated or directly exposed devices, which the host kernel needs +to know about. The id field is an architecture specific identifier for a +specific device. + +ARM divides the id field into two parts, a device id and an address type id +specific to the individual device. + + bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | + field: | 0x00000000 | device id | addr type id | + +ARM currently only require this when using the in-kernel GIC support for the +hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id. When +setting the base address for the guest's mapping of the VGIC virtual CPU +and distributor interface, the ioctl must be called after calling +KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling +this ioctl twice for any of the base addresses will return -EEXIST. + + 5. The kvm_run structure ------------------------ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2cb9c35..38ec1f8 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -4,6 +4,7 @@ config ARM select ARCH_BINFMT_ELF_RANDOMIZE_PIE select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAVE_CUSTOM_GPIO_H + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT if MMU select CPU_PM if (SUSPEND || CPU_IDLE) @@ -1112,6 +1113,8 @@ source "arch/arm/mach-versatile/Kconfig" source "arch/arm/mach-vexpress/Kconfig" source "arch/arm/plat-versatile/Kconfig" +source "arch/arm/mach-virt/Kconfig" + source "arch/arm/mach-vt8500/Kconfig" source "arch/arm/mach-w90x900/Kconfig" @@ -1560,9 +1563,10 @@ config HAVE_ARM_SCU help This option enables support for the ARM system coherency unit -config ARM_ARCH_TIMER +config HAVE_ARM_ARCH_TIMER bool "Architected timer support" depends on CPU_V7 + select ARM_ARCH_TIMER help This option enables support for the ARM architected timer diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 1b70716..ee4605f 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -194,6 +194,7 @@ machine-$(CONFIG_ARCH_SOCFPGA) += socfpga machine-$(CONFIG_ARCH_SPEAR13XX) += spear13xx machine-$(CONFIG_ARCH_SPEAR3XX) += spear3xx machine-$(CONFIG_MACH_SPEAR600) += spear6xx +machine-$(CONFIG_ARCH_VIRT) += virt machine-$(CONFIG_ARCH_ZYNQ) += zynq machine-$(CONFIG_ARCH_SUNXI) += sunxi diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index d40229d..7ade91d 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -1,13 +1,115 @@ #ifndef __ASMARM_ARCH_TIMER_H #define __ASMARM_ARCH_TIMER_H +#include <asm/barrier.h> #include <asm/errno.h> #include <linux/clocksource.h> +#include <linux/init.h> +#include <linux/types.h> + +#include <clocksource/arm_arch_timer.h> #ifdef CONFIG_ARM_ARCH_TIMER int arch_timer_of_register(void); int arch_timer_sched_clock_init(void); -struct timecounter *arch_timer_get_timecounter(void); + +/* + * These register accessors are marked inline so the compiler can + * nicely work out which register we want, and chuck away the rest of + * the code. At least it does so with a recent GCC (4.6.3). + */ +static inline void arch_timer_reg_write(const int access, const int reg, u32 val) +{ + if (access == ARCH_TIMER_PHYS_ACCESS) { + switch (reg) { + case ARCH_TIMER_REG_CTRL: + asm volatile("mcr p15, 0, %0, c14, c2, 1" : : "r" (val)); + break; + case ARCH_TIMER_REG_TVAL: + asm volatile("mcr p15, 0, %0, c14, c2, 0" : : "r" (val)); + break; + } + } + + if (access == ARCH_TIMER_VIRT_ACCESS) { + switch (reg) { + case ARCH_TIMER_REG_CTRL: + asm volatile("mcr p15, 0, %0, c14, c3, 1" : : "r" (val)); + break; + case ARCH_TIMER_REG_TVAL: + asm volatile("mcr p15, 0, %0, c14, c3, 0" : : "r" (val)); + break; + } + } + + isb(); +} + +static inline u32 arch_timer_reg_read(const int access, const int reg) +{ + u32 val = 0; + + if (access == ARCH_TIMER_PHYS_ACCESS) { + switch (reg) { + case ARCH_TIMER_REG_CTRL: + asm volatile("mrc p15, 0, %0, c14, c2, 1" : "=r" (val)); + break; + case ARCH_TIMER_REG_TVAL: + asm volatile("mrc p15, 0, %0, c14, c2, 0" : "=r" (val)); + break; + } + } + + if (access == ARCH_TIMER_VIRT_ACCESS) { + switch (reg) { + case ARCH_TIMER_REG_CTRL: + asm volatile("mrc p15, 0, %0, c14, c3, 1" : "=r" (val)); + break; + case ARCH_TIMER_REG_TVAL: + asm volatile("mrc p15, 0, %0, c14, c3, 0" : "=r" (val)); + break; + } + } + + return val; +} + +static inline u32 arch_timer_get_cntfrq(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c14, c0, 0" : "=r" (val)); + return val; +} + +static inline u64 arch_counter_get_cntpct(void) +{ + u64 cval; + + isb(); + asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval)); + return cval; +} + +static inline u64 arch_counter_get_cntvct(void) +{ + u64 cval; + + isb(); + asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (cval)); + return cval; +} + +static inline void __cpuinit arch_counter_set_user_access(void) +{ + u32 cntkctl; + + asm volatile("mrc p15, 0, %0, c14, c1, 0" : "=r" (cntkctl)); + + /* disable user access to everything */ + cntkctl &= ~((3 << 8) | (7 << 0)); + + asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl)); +} #else static inline int arch_timer_of_register(void) { @@ -18,11 +120,6 @@ static inline int arch_timer_sched_clock_init(void) { return -ENXIO; } - -static inline struct timecounter *arch_timer_get_timecounter(void) -{ - return NULL; -} #endif #endif diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/arch/arm/include/asm/kvm_arch_timer.h new file mode 100644 index 0000000..68cb9e1 --- /dev/null +++ b/arch/arm/include/asm/kvm_arch_timer.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_ARM_KVM_ARCH_TIMER_H +#define __ASM_ARM_KVM_ARCH_TIMER_H + +#include <linux/clocksource.h> +#include <linux/hrtimer.h> +#include <linux/workqueue.h> + +struct arch_timer_kvm { +#ifdef CONFIG_KVM_ARM_TIMER + /* Is the timer enabled */ + bool enabled; + + /* Virtual offset */ + cycle_t cntvoff; +#endif +}; + +struct arch_timer_cpu { +#ifdef CONFIG_KVM_ARM_TIMER + /* Registers: control register, timer value */ + u32 cntv_ctl; /* Saved/restored */ + cycle_t cntv_cval; /* Saved/restored */ + + /* + * Anything that is not used directly from assembly code goes + * here. + */ + + /* Background timer used when the guest is not running */ + struct hrtimer timer; + + /* Work queued with the above timer expires */ + struct work_struct expired; + + /* Background timer active */ + bool armed; + + /* Timer IRQ */ + const struct kvm_irq_level *irq; +#endif +}; + +#ifdef CONFIG_KVM_ARM_TIMER +int kvm_timer_hyp_init(void); +int kvm_timer_init(struct kvm *kvm); +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); +void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); +#else +static inline int kvm_timer_hyp_init(void) +{ + return 0; +}; + +static inline int kvm_timer_init(struct kvm *kvm) +{ + return 0; +} + +static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} +static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} +static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} +static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} +#endif + +#endif diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 5e06e81..e4956f4 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -45,7 +45,8 @@ #define c13_TID_URW 23 /* Thread ID, User R/W */ #define c13_TID_URO 24 /* Thread ID, User R/O */ #define c13_TID_PRIV 25 /* Thread ID, Privileged */ -#define NR_CP15_REGS 26 /* Number of regs (incl. invalid) */ +#define c14_CNTKCTL 26 /* Timer Control Register (PL1) */ +#define NR_CP15_REGS 27 /* Number of regs (incl. invalid) */ #define ARM_EXCEPTION_RESET 0 #define ARM_EXCEPTION_UNDEFINED 1 diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 98b4d1a..dfe9886 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -23,6 +23,7 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> #include <asm/fpstate.h> +#include <asm/kvm_arch_timer.h> #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #define KVM_MEMORY_SLOTS 32 @@ -37,6 +38,8 @@ #define KVM_NR_PAGE_SIZES 1 #define KVM_PAGES_PER_HPAGE(x) (1UL<<31) +#include <asm/kvm_vgic.h> + struct kvm_vcpu; u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int kvm_target_cpu(void); @@ -47,6 +50,9 @@ struct kvm_arch { /* VTTBR value associated with below pgd and vmid */ u64 vttbr; + /* Timer */ + struct arch_timer_kvm timer; + /* * Anything that is not used directly from assembly code goes * here. @@ -58,6 +64,9 @@ struct kvm_arch { /* Stage-2 page table */ pgd_t *pgd; + + /* Interrupt controller */ + struct vgic_dist vgic; }; #define KVM_NR_MEM_OBJS 40 @@ -92,6 +101,10 @@ struct kvm_vcpu_arch { struct vfp_hard_struct vfp_guest; struct vfp_hard_struct *vfp_host; + /* VGIC state */ + struct vgic_cpu vgic_cpu; + struct arch_timer_cpu timer_cpu; + /* * Anything that is not used directly from assembly code goes * here. @@ -158,4 +171,14 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) { return 0; } + +struct kvm_vcpu *kvm_arm_get_running_vcpu(void); +struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); + +int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); +struct kvm_one_reg; +int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h new file mode 100644 index 0000000..ab97207 --- /dev/null +++ b/arch/arm/include/asm/kvm_vgic.h @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_ARM_KVM_VGIC_H +#define __ASM_ARM_KVM_VGIC_H + +#include <linux/kernel.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/irqreturn.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include <linux/irqchip/arm-gic.h> + +#define VGIC_NR_IRQS 128 +#define VGIC_NR_SGIS 16 +#define VGIC_NR_PPIS 16 +#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) +#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS) +#define VGIC_MAX_CPUS KVM_MAX_VCPUS +#define VGIC_MAX_LRS (1 << 6) + +/* Sanity checks... */ +#if (VGIC_MAX_CPUS > 8) +#error Invalid number of CPU interfaces +#endif + +#if (VGIC_NR_IRQS & 31) +#error "VGIC_NR_IRQS must be a multiple of 32" +#endif + +#if (VGIC_NR_IRQS > 1024) +#error "VGIC_NR_IRQS must be <= 1024" +#endif + +/* + * The GIC distributor registers describing interrupts have two parts: + * - 32 per-CPU interrupts (SGI + PPI) + * - a bunch of shared interrupts (SPI) + */ +struct vgic_bitmap { + union { + u32 reg[VGIC_NR_PRIVATE_IRQS / 32]; + DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS); + } percpu[VGIC_MAX_CPUS]; + union { + u32 reg[VGIC_NR_SHARED_IRQS / 32]; + DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS); + } shared; +}; + +struct vgic_bytemap { + u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4]; + u32 shared[VGIC_NR_SHARED_IRQS / 4]; +}; + +struct vgic_dist { +#ifdef CONFIG_KVM_ARM_VGIC + spinlock_t lock; + bool ready; + + /* Virtual control interface mapping */ + void __iomem *vctrl_base; + + /* Distributor and vcpu interface mapping in the guest */ + phys_addr_t vgic_dist_base; + phys_addr_t vgic_cpu_base; + + /* Distributor enabled */ + u32 enabled; + + /* Interrupt enabled (one bit per IRQ) */ + struct vgic_bitmap irq_enabled; + + /* Interrupt 'pin' level */ + struct vgic_bitmap irq_state; + + /* Level-triggered interrupt in progress */ + struct vgic_bitmap irq_active; + + /* Interrupt priority. Not used yet. */ + struct vgic_bytemap irq_priority; + + /* Level/edge triggered */ + struct vgic_bitmap irq_cfg; + + /* Source CPU per SGI and target CPU */ + u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS]; + + /* Target CPU for each IRQ */ + u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS]; + struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS]; + + /* Bitmap indicating which CPU has something pending */ + unsigned long irq_pending_on_cpu; +#endif +}; + +struct vgic_cpu { +#ifdef CONFIG_KVM_ARM_VGIC + /* per IRQ to LR mapping */ + u8 vgic_irq_lr_map[VGIC_NR_IRQS]; + + /* Pending interrupts on this VCPU */ + DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS); + + /* Bitmap of used/free list registers */ + DECLARE_BITMAP( lr_used, VGIC_MAX_LRS); + + /* Number of list registers on this CPU */ + int nr_lr; + + /* CPU vif control registers for world switch */ + u32 vgic_hcr; + u32 vgic_vmcr; + u32 vgic_misr; /* Saved only */ + u32 vgic_eisr[2]; /* Saved only */ + u32 vgic_elrsr[2]; /* Saved only */ + u32 vgic_apr; + u32 vgic_lr[VGIC_MAX_LRS]; +#endif +}; + +#define LR_EMPTY 0xff + +struct kvm; +struct kvm_vcpu; +struct kvm_run; +struct kvm_exit_mmio; + +#ifdef CONFIG_KVM_ARM_VGIC +int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); +int kvm_vgic_hyp_init(void); +int kvm_vgic_init(struct kvm *kvm); +int kvm_vgic_create(struct kvm *kvm); +int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); +void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); +int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, + bool level); +int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); +bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio); + +#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) +#define vgic_initialized(k) ((k)->arch.vgic.ready) + +#else +static inline int kvm_vgic_hyp_init(void) +{ + return 0; +} + +static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) +{ + return 0; +} + +static inline int kvm_vgic_init(struct kvm *kvm) +{ + return 0; +} + +static inline int kvm_vgic_create(struct kvm *kvm) +{ + return 0; +} + +static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {} +static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {} + +static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, + unsigned int irq_num, bool level) +{ + return 0; +} + +static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + return false; +} + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return 0; +} + +static inline bool vgic_initialized(struct kvm *kvm) +{ + return true; +} +#endif + +#endif diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 3303ff5..023bfeb 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -65,6 +65,22 @@ struct kvm_regs { #define KVM_ARM_TARGET_CORTEX_A15 0 #define KVM_ARM_NUM_TARGETS 1 +/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ +#define KVM_ARM_DEVICE_TYPE_SHIFT 0 +#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_ID_SHIFT 16 +#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT) + +/* Supported device IDs */ +#define KVM_ARM_DEVICE_VGIC_V2 0 + +/* Supported VGIC address types */ +#define KVM_VGIC_V2_ADDR_TYPE_DIST 0 +#define KVM_VGIC_V2_ADDR_TYPE_CPU 1 + +#define KVM_VGIC_V2_DIST_SIZE 0x1000 +#define KVM_VGIC_V2_CPU_SIZE 0x2000 + #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ struct kvm_vcpu_init { diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index c8ef207..d957a51 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -9,516 +9,53 @@ * published by the Free Software Foundation. */ #include <linux/init.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/device.h> -#include <linux/smp.h> -#include <linux/cpu.h> -#include <linux/jiffies.h> -#include <linux/clockchips.h> -#include <linux/interrupt.h> -#include <linux/of_irq.h> -#include <linux/io.h> +#include <linux/types.h> +#include <linux/errno.h> -#include <asm/cputype.h> #include <asm/delay.h> -#include <asm/localtimer.h> -#include <asm/arch_timer.h> -#include <asm/system_info.h> #include <asm/sched_clock.h> -static unsigned long arch_timer_rate; +#include <clocksource/arm_arch_timer.h> -enum ppi_nr { - PHYS_SECURE_PPI, - PHYS_NONSECURE_PPI, - VIRT_PPI, - HYP_PPI, - MAX_TIMER_PPI -}; - -static int arch_timer_ppi[MAX_TIMER_PPI]; - -static struct clock_event_device __percpu **arch_timer_evt; -static struct delay_timer arch_delay_timer; - -static bool arch_timer_use_virtual = true; - -/* - * Architected system timer support. - */ - -#define ARCH_TIMER_CTRL_ENABLE (1 << 0) -#define ARCH_TIMER_CTRL_IT_MASK (1 << 1) -#define ARCH_TIMER_CTRL_IT_STAT (1 << 2) - -#define ARCH_TIMER_REG_CTRL 0 -#define ARCH_TIMER_REG_FREQ 1 -#define ARCH_TIMER_REG_TVAL 2 - -#define ARCH_TIMER_PHYS_ACCESS 0 -#define ARCH_TIMER_VIRT_ACCESS 1 - -/* - * These register accessors are marked inline so the compiler can - * nicely work out which register we want, and chuck away the rest of - * the code. At least it does so with a recent GCC (4.6.3). - */ -static inline void arch_timer_reg_write(const int access, const int reg, u32 val) -{ - if (access == ARCH_TIMER_PHYS_ACCESS) { - switch (reg) { - case ARCH_TIMER_REG_CTRL: - asm volatile("mcr p15, 0, %0, c14, c2, 1" : : "r" (val)); - break; - case ARCH_TIMER_REG_TVAL: - asm volatile("mcr p15, 0, %0, c14, c2, 0" : : "r" (val)); - break; - } - } - - if (access == ARCH_TIMER_VIRT_ACCESS) { - switch (reg) { - case ARCH_TIMER_REG_CTRL: - asm volatile("mcr p15, 0, %0, c14, c3, 1" : : "r" (val)); - break; - case ARCH_TIMER_REG_TVAL: - asm volatile("mcr p15, 0, %0, c14, c3, 0" : : "r" (val)); - break; - } - } - - isb(); -} - -static inline u32 arch_timer_reg_read(const int access, const int reg) -{ - u32 val = 0; - - if (access == ARCH_TIMER_PHYS_ACCESS) { - switch (reg) { - case ARCH_TIMER_REG_CTRL: - asm volatile("mrc p15, 0, %0, c14, c2, 1" : "=r" (val)); - break; - case ARCH_TIMER_REG_TVAL: - asm volatile("mrc p15, 0, %0, c14, c2, 0" : "=r" (val)); - break; - case ARCH_TIMER_REG_FREQ: - asm volatile("mrc p15, 0, %0, c14, c0, 0" : "=r" (val)); - break; - } - } - - if (access == ARCH_TIMER_VIRT_ACCESS) { - switch (reg) { - case ARCH_TIMER_REG_CTRL: - asm volatile("mrc p15, 0, %0, c14, c3, 1" : "=r" (val)); - break; - case ARCH_TIMER_REG_TVAL: - asm volatile("mrc p15, 0, %0, c14, c3, 0" : "=r" (val)); - break; - } - } - - return val; -} - -static inline cycle_t arch_timer_counter_read(const int access) -{ - cycle_t cval = 0; - - if (access == ARCH_TIMER_PHYS_ACCESS) - asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval)); - - if (access == ARCH_TIMER_VIRT_ACCESS) - asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (cval)); - - return cval; -} - -static inline cycle_t arch_counter_get_cntpct(void) -{ - return arch_timer_counter_read(ARCH_TIMER_PHYS_ACCESS); -} - -static inline cycle_t arch_counter_get_cntvct(void) -{ - return arch_timer_counter_read(ARCH_TIMER_VIRT_ACCESS); -} - -static irqreturn_t inline timer_handler(const int access, - struct clock_event_device *evt) -{ - unsigned long ctrl; - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); - if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { - ctrl |= ARCH_TIMER_CTRL_IT_MASK; - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); - evt->event_handler(evt); - return IRQ_HANDLED; - } - - return IRQ_NONE; -} - -static irqreturn_t arch_timer_handler_virt(int irq, void *dev_id) -{ - struct clock_event_device *evt = *(struct clock_event_device **)dev_id; - - return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt); -} - -static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) -{ - struct clock_event_device *evt = *(struct clock_event_device **)dev_id; - - return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); -} - -static inline void timer_set_mode(const int access, int mode) -{ - unsigned long ctrl; - switch (mode) { - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); - ctrl &= ~ARCH_TIMER_CTRL_ENABLE; - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); - break; - default: - break; - } -} - -static void arch_timer_set_mode_virt(enum clock_event_mode mode, - struct clock_event_device *clk) -{ - timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode); -} - -static void arch_timer_set_mode_phys(enum clock_event_mode mode, - struct clock_event_device *clk) -{ - timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode); -} - -static inline void set_next_event(const int access, unsigned long evt) -{ - unsigned long ctrl; - ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); - ctrl |= ARCH_TIMER_CTRL_ENABLE; - ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; - arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt); - arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); -} - -static int arch_timer_set_next_event_virt(unsigned long evt, - struct clock_event_device *unused) -{ - set_next_event(ARCH_TIMER_VIRT_ACCESS, evt); - return 0; -} - -static int arch_timer_set_next_event_phys(unsigned long evt, - struct clock_event_device *unused) -{ - set_next_event(ARCH_TIMER_PHYS_ACCESS, evt); - return 0; -} - -static int __cpuinit arch_timer_setup(struct clock_event_device *clk) -{ - clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP; - clk->name = "arch_sys_timer"; - clk->rating = 450; - if (arch_timer_use_virtual) { - clk->irq = arch_timer_ppi[VIRT_PPI]; - clk->set_mode = arch_timer_set_mode_virt; - clk->set_next_event = arch_timer_set_next_event_virt; - } else { - clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; - clk->set_mode = arch_timer_set_mode_phys; - clk->set_next_event = arch_timer_set_next_event_phys; - } - - clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, NULL); - - clockevents_config_and_register(clk, arch_timer_rate, - 0xf, 0x7fffffff); - - *__this_cpu_ptr(arch_timer_evt) = clk; - - if (arch_timer_use_virtual) - enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0); - else { - enable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], 0); - if (arch_timer_ppi[PHYS_NONSECURE_PPI]) - enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0); - } - - return 0; -} - -/* Is the optional system timer available? */ -static int local_timer_is_architected(void) -{ - return (cpu_architecture() >= CPU_ARCH_ARMv7) && - ((read_cpuid_ext(CPUID_EXT_PFR1) >> 16) & 0xf) == 1; -} - -static int arch_timer_available(void) -{ - unsigned long freq; - - if (!local_timer_is_architected()) - return -ENXIO; - - if (arch_timer_rate == 0) { - freq = arch_timer_reg_read(ARCH_TIMER_PHYS_ACCESS, - ARCH_TIMER_REG_FREQ); - - /* Check the timer frequency. */ - if (freq == 0) { - pr_warn("Architected timer frequency not available\n"); - return -EINVAL; - } - - arch_timer_rate = freq; - } - - pr_info_once("Architected local timer running at %lu.%02luMHz (%s).\n", - arch_timer_rate / 1000000, (arch_timer_rate / 10000) % 100, - arch_timer_use_virtual ? "virt" : "phys"); - return 0; -} - -static u32 notrace arch_counter_get_cntpct32(void) -{ - cycle_t cnt = arch_counter_get_cntpct(); - - /* - * The sched_clock infrastructure only knows about counters - * with at most 32bits. Forget about the upper 24 bits for the - * time being... - */ - return (u32)cnt; -} - -static u32 notrace arch_counter_get_cntvct32(void) -{ - cycle_t cnt = arch_counter_get_cntvct(); - - /* - * The sched_clock infrastructure only knows about counters - * with at most 32bits. Forget about the upper 24 bits for the - * time being... - */ - return (u32)cnt; -} - -static cycle_t arch_counter_read(struct clocksource *cs) -{ - /* - * Always use the physical counter for the clocksource. - * CNTHCTL.PL1PCTEN must be set to 1. - */ - return arch_counter_get_cntpct(); -} - -static unsigned long arch_timer_read_current_timer(void) +static unsigned long arch_timer_read_counter_long(void) { - return arch_counter_get_cntpct(); + return arch_timer_read_counter(); } -static cycle_t arch_counter_read_cc(const struct cyclecounter *cc) +static u32 arch_timer_read_counter_u32(void) { - /* - * Always use the physical counter for the clocksource. - * CNTHCTL.PL1PCTEN must be set to 1. - */ - return arch_counter_get_cntpct(); + return arch_timer_read_counter(); } -static struct clocksource clocksource_counter = { - .name = "arch_sys_counter", - .rating = 400, - .read = arch_counter_read, - .mask = CLOCKSOURCE_MASK(56), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static struct cyclecounter cyclecounter = { - .read = arch_counter_read_cc, - .mask = CLOCKSOURCE_MASK(56), -}; - -static struct timecounter timecounter; - -struct timecounter *arch_timer_get_timecounter(void) -{ - return &timecounter; -} - -static void __cpuinit arch_timer_stop(struct clock_event_device *clk) -{ - pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n", - clk->irq, smp_processor_id()); - - if (arch_timer_use_virtual) - disable_percpu_irq(arch_timer_ppi[VIRT_PPI]); - else { - disable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI]); - if (arch_timer_ppi[PHYS_NONSECURE_PPI]) - disable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI]); - } - - clk->set_mode(CLOCK_EVT_MODE_UNUSED, clk); -} - -static struct local_timer_ops arch_timer_ops __cpuinitdata = { - .setup = arch_timer_setup, - .stop = arch_timer_stop, -}; - -static struct clock_event_device arch_timer_global_evt; +static struct delay_timer arch_delay_timer; -static int __init arch_timer_register(void) +static void __init arch_timer_delay_timer_register(void) { - int err; - int ppi; - - err = arch_timer_available(); - if (err) - goto out; - - arch_timer_evt = alloc_percpu(struct clock_event_device *); - if (!arch_timer_evt) { - err = -ENOMEM; - goto out; - } - - clocksource_register_hz(&clocksource_counter, arch_timer_rate); - cyclecounter.mult = clocksource_counter.mult; - cyclecounter.shift = clocksource_counter.shift; - timecounter_init(&timecounter, &cyclecounter, - arch_counter_get_cntpct()); - - if (arch_timer_use_virtual) { - ppi = arch_timer_ppi[VIRT_PPI]; - err = request_percpu_irq(ppi, arch_timer_handler_virt, - "arch_timer", arch_timer_evt); - } else { - ppi = arch_timer_ppi[PHYS_SECURE_PPI]; - err = request_percpu_irq(ppi, arch_timer_handler_phys, - "arch_timer", arch_timer_evt); - if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) { - ppi = arch_timer_ppi[PHYS_NONSECURE_PPI]; - err = request_percpu_irq(ppi, arch_timer_handler_phys, - "arch_timer", arch_timer_evt); - if (err) - free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], - arch_timer_evt); - } - } - - if (err) { - pr_err("arch_timer: can't register interrupt %d (%d)\n", - ppi, err); - goto out_free; - } - - err = local_timer_register(&arch_timer_ops); - if (err) { - /* - * We couldn't register as a local timer (could be - * because we're on a UP platform, or because some - * other local timer is already present...). Try as a - * global timer instead. - */ - arch_timer_global_evt.cpumask = cpumask_of(0); - err = arch_timer_setup(&arch_timer_global_evt); - } - if (err) - goto out_free_irq; - /* Use the architected timer for the delay loop. */ - arch_delay_timer.read_current_timer = &arch_timer_read_current_timer; - arch_delay_timer.freq = arch_timer_rate; + arch_delay_timer.read_current_timer = arch_timer_read_counter_long; + arch_delay_timer.freq = arch_timer_get_rate(); register_current_timer_delay(&arch_delay_timer); - return 0; - -out_free_irq: - if (arch_timer_use_virtual) - free_percpu_irq(arch_timer_ppi[VIRT_PPI], arch_timer_evt); - else { - free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], - arch_timer_evt); - if (arch_timer_ppi[PHYS_NONSECURE_PPI]) - free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], - arch_timer_evt); - } - -out_free: - free_percpu(arch_timer_evt); -out: - return err; } -static const struct of_device_id arch_timer_of_match[] __initconst = { - { .compatible = "arm,armv7-timer", }, - {}, -}; - int __init arch_timer_of_register(void) { - struct device_node *np; - u32 freq; - int i; - - np = of_find_matching_node(NULL, arch_timer_of_match); - if (!np) { - pr_err("arch_timer: can't find DT node\n"); - return -ENODEV; - } - - /* Try to determine the frequency from the device tree or CNTFRQ */ - if (!of_property_read_u32(np, "clock-frequency", &freq)) - arch_timer_rate = freq; - - for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++) - arch_timer_ppi[i] = irq_of_parse_and_map(np, i); + int ret; - /* - * If no interrupt provided for virtual timer, we'll have to - * stick to the physical timer. It'd better be accessible... - */ - if (!arch_timer_ppi[VIRT_PPI]) { - arch_timer_use_virtual = false; + ret = arch_timer_init(); + if (ret) + return ret; - if (!arch_timer_ppi[PHYS_SECURE_PPI] || - !arch_timer_ppi[PHYS_NONSECURE_PPI]) { - pr_warn("arch_timer: No interrupt available, giving up\n"); - return -EINVAL; - } - } + arch_timer_delay_timer_register(); - return arch_timer_register(); + return 0; } int __init arch_timer_sched_clock_init(void) { - u32 (*cnt32)(void); - int err; - - err = arch_timer_available(); - if (err) - return err; - - if (arch_timer_use_virtual) - cnt32 = arch_counter_get_cntvct32; - else - cnt32 = arch_counter_get_cntpct32; + if (arch_timer_get_rate() == 0) + return -ENXIO; - setup_sched_clock(cnt32, 32, arch_timer_rate); + setup_sched_clock(arch_timer_read_counter_u32, + 32, arch_timer_get_rate()); return 0; } diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index c8b3272..5ce738b 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -169,6 +169,24 @@ int main(void) DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.hxfar)); DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.hpfar)); DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.hyp_pc)); +#ifdef CONFIG_KVM_ARM_VGIC + DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); + DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); + DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); + DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); + DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); + DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); + DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); +#ifdef CONFIG_KVM_ARM_TIMER + DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); + DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); + DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); + DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); +#endif + DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); #endif return 0; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 60340fa..5f73f70 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -461,14 +461,8 @@ u64 smp_irq_stat_cpu(unsigned int cpu) */ static DEFINE_PER_CPU(struct clock_event_device, percpu_clockevent); -static void ipi_timer(void) -{ - struct clock_event_device *evt = &__get_cpu_var(percpu_clockevent); - evt->event_handler(evt); -} - #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -static void smp_timer_broadcast(const struct cpumask *mask) +void tick_broadcast(const struct cpumask *mask) { smp_cross_call(mask, IPI_TIMER); } @@ -516,7 +510,6 @@ static void __cpuinit percpu_timer_setup(void) struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu); evt->cpumask = cpumask_of(cpu); - evt->broadcast = smp_timer_broadcast; if (!lt_ops || lt_ops->setup(evt)) broadcast_timer_setup(evt); @@ -582,11 +575,13 @@ void handle_IPI(int ipinr, struct pt_regs *regs) case IPI_WAKEUP: break; +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST case IPI_TIMER: irq_enter(); - ipi_timer(); + tick_receive_broadcast(); irq_exit(); break; +#endif case IPI_RESCHEDULE: scheduler_ipi(); diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 05227cb..49dd64e 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -51,6 +51,22 @@ config KVM_ARM_MAX_VCPUS large, so only choose a reasonable number that you expect to actually use. +config KVM_ARM_VGIC + bool "KVM support for Virtual GIC" + depends on KVM_ARM_HOST && OF + select HAVE_KVM_IRQCHIP + default y + ---help--- + Adds support for a hardware assisted, in-kernel GIC emulation. + +config KVM_ARM_TIMER + bool "KVM support for Architected Timers" + depends on KVM_ARM_VGIC && ARM_ARCH_TIMER + select HAVE_KVM_IRQCHIP + default y + ---help--- + Adds support for the Architected Timers in virtual machines + source drivers/virtio/Kconfig endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index ea27987..fc96ce6 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -19,3 +19,5 @@ kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o mmio.o psci.o +obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o +obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o diff --git a/arch/arm/kvm/arch_timer.c b/arch/arm/kvm/arch_timer.c new file mode 100644 index 0000000..6ac938d --- /dev/null +++ b/arch/arm/kvm/arch_timer.c @@ -0,0 +1,271 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/cpu.h> +#include <linux/of_irq.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> + +#include <asm/arch_timer.h> + +#include <asm/kvm_vgic.h> +#include <asm/kvm_arch_timer.h> + +static struct timecounter *timecounter; +static struct workqueue_struct *wqueue; +static struct kvm_irq_level timer_irq = { + .level = 1, +}; + +static cycle_t kvm_phys_timer_read(void) +{ + return timecounter->cc->read(timecounter->cc); +} + +static bool timer_is_armed(struct arch_timer_cpu *timer) +{ + return timer->armed; +} + +/* timer_arm: as in "arm the timer", not as in ARM the company */ +static void timer_arm(struct arch_timer_cpu *timer, u64 ns) +{ + timer->armed = true; + hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns), + HRTIMER_MODE_ABS); +} + +static void timer_disarm(struct arch_timer_cpu *timer) +{ + if (timer_is_armed(timer)) { + hrtimer_cancel(&timer->timer); + cancel_work_sync(&timer->expired); + timer->armed = false; + } +} + +static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + timer->cntv_ctl |= 1 << 1; /* Mask the interrupt in the guest */ + kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + vcpu->arch.timer_cpu.irq->irq, + vcpu->arch.timer_cpu.irq->level); +} + +static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) +{ + struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; + + /* + * We disable the timer in the world switch and let it be + * handled by kvm_timer_sync_hwstate(). Getting a timer + * interrupt at this point is a sure sign of some major + * breakage. + */ + pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu); + return IRQ_HANDLED; +} + +static void kvm_timer_inject_irq_work(struct work_struct *work) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); + vcpu->arch.timer_cpu.armed = false; + kvm_timer_inject_irq(vcpu); +} + +static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) +{ + struct arch_timer_cpu *timer; + timer = container_of(hrt, struct arch_timer_cpu, timer); + queue_work(wqueue, &timer->expired); + return HRTIMER_NORESTART; +} + +/** + * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu + * @vcpu: The vcpu pointer + * + * Disarm any pending soft timers, since the world-switch code will write the + * virtual timer state back to the physical CPU. + */ +void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + /* + * We're about to run this vcpu again, so there is no need to + * keep the background timer running, as we're about to + * populate the CPU timer again. + */ + timer_disarm(timer); +} + +/** + * kvm_timer_sync_hwstate - sync timer state from cpu + * @vcpu: The vcpu pointer + * + * Check if the virtual timer was armed and either schedule a corresponding + * soft timer or inject directly if already expired. + */ +void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + cycle_t cval, now; + u64 ns; + + /* Check if the timer is enabled and unmasked first */ + if ((timer->cntv_ctl & 3) != 1) + return; + + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + BUG_ON(timer_is_armed(timer)); + + if (cval <= now) { + /* + * Timer has already expired while we were not + * looking. Inject the interrupt and carry on. + */ + kvm_timer_inject_irq(vcpu); + return; + } + + ns = cyclecounter_cyc2ns(timecounter->cc, cval - now); + timer_arm(timer, ns); +} + +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); + hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + timer->timer.function = kvm_timer_expire; + timer->irq = &timer_irq; +} + +static void kvm_timer_init_interrupt(void *info) +{ + enable_percpu_irq(timer_irq.irq, 0); +} + + +static int kvm_timer_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + kvm_timer_init_interrupt(NULL); + break; + case CPU_DYING: + case CPU_DYING_FROZEN: + disable_percpu_irq(timer_irq.irq); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block kvm_timer_cpu_nb = { + .notifier_call = kvm_timer_cpu_notify, +}; + +static const struct of_device_id arch_timer_of_match[] = { + { .compatible = "arm,armv7-timer", }, + {}, +}; + +int kvm_timer_hyp_init(void) +{ + struct device_node *np; + unsigned int ppi; + int err; + + timecounter = arch_timer_get_timecounter(); + if (!timecounter) + return -ENODEV; + + np = of_find_matching_node(NULL, arch_timer_of_match); + if (!np) { + kvm_err("kvm_arch_timer: can't find DT node\n"); + return -ENODEV; + } + + ppi = irq_of_parse_and_map(np, 2); + if (!ppi) { + kvm_err("kvm_arch_timer: no virtual timer interrupt\n"); + err = -EINVAL; + goto out; + } + + err = request_percpu_irq(ppi, kvm_arch_timer_handler, + "kvm guest timer", kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", + ppi, err); + goto out; + } + + timer_irq.irq = ppi; + + err = register_cpu_notifier(&kvm_timer_cpu_nb); + if (err) { + kvm_err("Cannot register timer CPU notifier\n"); + goto out_free; + } + + wqueue = create_singlethread_workqueue("kvm_arch_timer"); + if (!wqueue) { + err = -ENOMEM; + goto out_free; + } + + kvm_info("%s IRQ%d\n", np->name, ppi); + on_each_cpu(kvm_timer_init_interrupt, NULL, 1); + + goto out; +out_free: + free_percpu_irq(ppi, kvm_get_running_vcpus()); +out: + of_node_put(np); + return err; +} + +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + timer_disarm(timer); +} + +int kvm_timer_init(struct kvm *kvm) +{ + if (timecounter && wqueue) { + kvm->arch.timer.cntvoff = kvm_phys_timer_read(); + kvm->arch.timer.enabled = 1; + } + + return 0; +} diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2d30e3a..9ada554 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -54,11 +54,40 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); static struct vfp_hard_struct __percpu *kvm_host_vfp_state; static unsigned long hyp_default_vectors; +/* Per-CPU variable containing the currently running vcpu. */ +static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); + /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +static bool vgic_present; + +static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) +{ + BUG_ON(preemptible()); + __get_cpu_var(kvm_arm_running_vcpu) = vcpu; +} + +/** + * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU. + * Must be called from non-preemptible context + */ +struct kvm_vcpu *kvm_arm_get_running_vcpu(void) +{ + BUG_ON(preemptible()); + return __get_cpu_var(kvm_arm_running_vcpu); +} + +/** + * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus. + */ +struct kvm_vcpu __percpu **kvm_get_running_vcpus(void) +{ + return &kvm_arm_running_vcpu; +} + int kvm_arch_hardware_enable(void *garbage) { return 0; @@ -157,6 +186,9 @@ int kvm_dev_ioctl_check_extension(long ext) { int r; switch (ext) { + case KVM_CAP_IRQCHIP: + r = vgic_present; + break; case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: @@ -167,6 +199,8 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; + case KVM_CAP_ARM_SET_DEVICE_ADDR: + r = 1; case KVM_CAP_NR_VCPUS: r = num_online_cpus(); break; @@ -255,6 +289,7 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { kvm_mmu_free_memory_caches(vcpu); + kvm_timer_vcpu_terminate(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -286,8 +321,19 @@ int __attribute_const__ kvm_target_cpu(void) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + int ret; + /* Force users to call KVM_ARM_VCPU_INIT */ vcpu->arch.target = -1; + + /* Set up VGIC */ + ret = kvm_vgic_vcpu_init(vcpu); + if (ret) + return ret; + + /* Set up the timer */ + kvm_timer_vcpu_init(vcpu); + return 0; } @@ -308,10 +354,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) */ if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush)) flush_cache_all(); /* We'd really want v7_flush_dcache_all() */ + + kvm_arm_set_running_vcpu(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_arm_set_running_vcpu(NULL); } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, @@ -342,7 +391,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, */ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !!v->arch.irq_lines; + return !!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v); } /* Just ensure a guest exit from a particular CPU */ @@ -597,6 +646,17 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) vcpu->arch.has_run_once = true; /* + * Initialize the VGIC before running a vcpu the first time on + * this VM. + */ + if (irqchip_in_kernel(vcpu->kvm) && + unlikely(!vgic_initialized(vcpu->kvm))) { + int ret = kvm_vgic_init(vcpu->kvm); + if (ret) + return ret; + } + + /* * Handle the "start in power-off" case by calling into the * PSCI code. */ @@ -661,6 +721,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (vcpu->arch.pause) vcpu_pause(vcpu); + kvm_vgic_flush_hwstate(vcpu); + kvm_timer_flush_hwstate(vcpu); + local_irq_disable(); /* @@ -673,6 +736,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { local_irq_enable(); + kvm_timer_sync_hwstate(vcpu); + kvm_vgic_sync_hwstate(vcpu); continue; } @@ -705,6 +770,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * Back from guest *************************************************************/ + kvm_timer_sync_hwstate(vcpu); + kvm_vgic_sync_hwstate(vcpu); + ret = handle_exit(vcpu, run, ret); } @@ -760,20 +828,49 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level) trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); - if (irq_type != KVM_ARM_IRQ_TYPE_CPU) - return -EINVAL; + switch (irq_type) { + case KVM_ARM_IRQ_TYPE_CPU: + if (irqchip_in_kernel(kvm)) + return -ENXIO; - if (vcpu_idx >= nrcpus) - return -EINVAL; + if (vcpu_idx >= nrcpus) + return -EINVAL; - vcpu = kvm_get_vcpu(kvm, vcpu_idx); - if (!vcpu) - return -EINVAL; + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; - if (irq_num > KVM_ARM_IRQ_CPU_FIQ) - return -EINVAL; + if (irq_num > KVM_ARM_IRQ_CPU_FIQ) + return -EINVAL; + + return vcpu_interrupt_line(vcpu, irq_num, level); + case KVM_ARM_IRQ_TYPE_PPI: + if (!irqchip_in_kernel(kvm)) + return -ENXIO; - return vcpu_interrupt_line(vcpu, irq_num, level); + if (vcpu_idx >= nrcpus) + return -EINVAL; + + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; + + if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) + return -EINVAL; + + return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level); + case KVM_ARM_IRQ_TYPE_SPI: + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + if (irq_num < VGIC_NR_PRIVATE_IRQS || + irq_num > KVM_ARM_IRQ_GIC_MAX) + return -EINVAL; + + return kvm_vgic_inject_irq(kvm, 0, irq_num, level); + } + + return -EINVAL; } long kvm_arch_vcpu_ioctl(struct file *filp, @@ -827,10 +924,49 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -EINVAL; } +static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, + struct kvm_arm_device_addr *dev_addr) +{ + unsigned long dev_id, type; + + dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >> + KVM_ARM_DEVICE_ID_SHIFT; + type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >> + KVM_ARM_DEVICE_TYPE_SHIFT; + + switch (dev_id) { + case KVM_ARM_DEVICE_VGIC_V2: + if (!vgic_present) + return -ENXIO; + return kvm_vgic_set_addr(kvm, type, dev_addr->addr); + default: + return -ENODEV; + } +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - return -EINVAL; + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_CREATE_IRQCHIP: { + if (vgic_present) + return kvm_vgic_create(kvm); + else + return -ENXIO; + } + case KVM_ARM_SET_DEVICE_ADDR: { + struct kvm_arm_device_addr dev_addr; + + if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) + return -EFAULT; + return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); + } + default: + return -EINVAL; + } } static void cpu_init_hyp_mode(void *vector) @@ -960,6 +1096,24 @@ static int init_hyp_mode(void) } } + /* + * Init HYP view of VGIC + */ + err = kvm_vgic_hyp_init(); + if (err) + goto out_free_vfp; + +#ifdef CONFIG_KVM_ARM_VGIC + vgic_present = true; +#endif + + /* + * Init HYP architected timer support + */ + err = kvm_timer_hyp_init(); + if (err) + goto out_free_mappings; + kvm_info("Hyp mode initialized successfully\n"); return 0; out_free_vfp: diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index d782638..4ea9a98 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -222,6 +222,10 @@ static const struct coproc_reg cp15_regs[] = { NULL, reset_unknown, c13_TID_URO }, { CRn(13), CRm( 0), Op1( 0), Op2( 4), is32, NULL, reset_unknown, c13_TID_PRIV }, + + /* CNTKCTL: swapped by interrupt.S. */ + { CRn(14), CRm( 1), Op1( 0), Op2( 0), is32, + NULL, reset_val, c14_CNTKCTL, 0x00000000 }, }; /* Target specific emulation tables */ diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index c5400d2..8ca87ab 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -94,6 +94,9 @@ ENTRY(__kvm_vcpu_run) save_host_regs + restore_vgic_state + restore_timer_state + @ Store hardware CP15 state and load guest state read_cp15_state store_to_vcpu = 0 write_cp15_state read_from_vcpu = 1 @@ -187,6 +190,9 @@ after_vfp_restore: read_cp15_state store_to_vcpu = 1 write_cp15_state read_from_vcpu = 0 + save_timer_state + save_vgic_state + restore_host_regs clrex @ Clear exclusive monitor mov r0, r1 @ Return the return code diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 6a95d34..3c8f2f0 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -1,3 +1,5 @@ +#include <linux/irqchip/arm-gic.h> + #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) #define VCPU_USR_SP (VCPU_USR_REG(13)) #define VCPU_USR_LR (VCPU_USR_REG(14)) @@ -298,6 +300,14 @@ vcpu .req r0 @ vcpu pointer always in r0 str r11, [vcpu, #CP15_OFFSET(c6_IFAR)] str r12, [vcpu, #CP15_OFFSET(c12_VBAR)] .endif + + mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL + + .if \store_to_vcpu == 0 + push {r2} + .else + str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] + .endif .endm /* @@ -309,6 +319,14 @@ vcpu .req r0 @ vcpu pointer always in r0 */ .macro write_cp15_state read_from_vcpu .if \read_from_vcpu == 0 + pop {r2} + .else + ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] + .endif + + mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL + + .if \read_from_vcpu == 0 pop {r2-r12} .else ldr r2, [vcpu, #CP15_OFFSET(c13_CID)] @@ -369,6 +387,49 @@ vcpu .req r0 @ vcpu pointer always in r0 * Assumes vcpu pointer in vcpu reg */ .macro save_vgic_state +#ifdef CONFIG_KVM_ARM_VGIC + /* Get VGIC VCTRL base into r2 */ + ldr r2, [vcpu, #VCPU_KVM] + ldr r2, [r2, #KVM_VGIC_VCTRL] + cmp r2, #0 + beq 2f + + /* Compute the address of struct vgic_cpu */ + add r11, vcpu, #VCPU_VGIC_CPU + + /* Save all interesting registers */ + ldr r3, [r2, #GICH_HCR] + ldr r4, [r2, #GICH_VMCR] + ldr r5, [r2, #GICH_MISR] + ldr r6, [r2, #GICH_EISR0] + ldr r7, [r2, #GICH_EISR1] + ldr r8, [r2, #GICH_ELRSR0] + ldr r9, [r2, #GICH_ELRSR1] + ldr r10, [r2, #GICH_APR] + + str r3, [r11, #VGIC_CPU_HCR] + str r4, [r11, #VGIC_CPU_VMCR] + str r5, [r11, #VGIC_CPU_MISR] + str r6, [r11, #VGIC_CPU_EISR] + str r7, [r11, #(VGIC_CPU_EISR + 4)] + str r8, [r11, #VGIC_CPU_ELRSR] + str r9, [r11, #(VGIC_CPU_ELRSR + 4)] + str r10, [r11, #VGIC_CPU_APR] + + /* Clear GICH_HCR */ + mov r5, #0 + str r5, [r2, #GICH_HCR] + + /* Save list registers */ + add r2, r2, #GICH_LR0 + add r3, r11, #VGIC_CPU_LR + ldr r4, [r11, #VGIC_CPU_NR_LR] +1: ldr r6, [r2], #4 + str r6, [r3], #4 + subs r4, r4, #1 + bne 1b +2: +#endif .endm /* @@ -377,6 +438,109 @@ vcpu .req r0 @ vcpu pointer always in r0 * Assumes vcpu pointer in vcpu reg */ .macro restore_vgic_state +#ifdef CONFIG_KVM_ARM_VGIC + /* Get VGIC VCTRL base into r2 */ + ldr r2, [vcpu, #VCPU_KVM] + ldr r2, [r2, #KVM_VGIC_VCTRL] + cmp r2, #0 + beq 2f + + /* Compute the address of struct vgic_cpu */ + add r11, vcpu, #VCPU_VGIC_CPU + + /* We only restore a minimal set of registers */ + ldr r3, [r11, #VGIC_CPU_HCR] + ldr r4, [r11, #VGIC_CPU_VMCR] + ldr r8, [r11, #VGIC_CPU_APR] + + str r3, [r2, #GICH_HCR] + str r4, [r2, #GICH_VMCR] + str r8, [r2, #GICH_APR] + + /* Restore list registers */ + add r2, r2, #GICH_LR0 + add r3, r11, #VGIC_CPU_LR + ldr r4, [r11, #VGIC_CPU_NR_LR] +1: ldr r6, [r3], #4 + str r6, [r2], #4 + subs r4, r4, #1 + bne 1b +2: +#endif +.endm + +#define CNTHCTL_PL1PCTEN (1 << 0) +#define CNTHCTL_PL1PCEN (1 << 1) + +/* + * Save the timer state onto the VCPU and allow physical timer/counter access + * for the host. + * + * Assumes vcpu pointer in vcpu reg + * Clobbers r2-r5 + */ +.macro save_timer_state +#ifdef CONFIG_KVM_ARM_TIMER + ldr r4, [vcpu, #VCPU_KVM] + ldr r2, [r4, #KVM_TIMER_ENABLED] + cmp r2, #0 + beq 1f + + mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL + str r2, [vcpu, #VCPU_TIMER_CNTV_CTL] + bic r2, #1 @ Clear ENABLE + mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + isb + + mrrc p15, 3, r2, r3, c14 @ CNTV_CVAL + ldr r4, =VCPU_TIMER_CNTV_CVAL + add r5, vcpu, r4 + strd r2, r3, [r5] + +1: +#endif + @ Allow physical timer/counter access for the host + mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL + orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) + mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL +.endm + +/* + * Load the timer state from the VCPU and deny physical timer/counter access + * for the host. + * + * Assumes vcpu pointer in vcpu reg + * Clobbers r2-r5 + */ +.macro restore_timer_state + @ Disallow physical timer access for the guest + @ Physical counter access is allowed + mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL + orr r2, r2, #CNTHCTL_PL1PCTEN + bic r2, r2, #CNTHCTL_PL1PCEN + mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL + +#ifdef CONFIG_KVM_ARM_TIMER + ldr r4, [vcpu, #VCPU_KVM] + ldr r2, [r4, #KVM_TIMER_ENABLED] + cmp r2, #0 + beq 1f + + ldr r2, [r4, #KVM_TIMER_CNTVOFF] + ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)] + mcrr p15, 4, r2, r3, c14 @ CNTVOFF + + ldr r4, =VCPU_TIMER_CNTV_CVAL + add r5, vcpu, r4 + ldrd r2, r3, [r5] + mcrr p15, 3, r2, r3, c14 @ CNTV_CVAL + isb + + ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL] + and r2, r2, #3 + mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL +1: +#endif .endm .equ vmentry, 0 diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 0144baf..98a870f 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -148,6 +148,9 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, if (mmio.is_write) memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len); + if (vgic_handle_mmio(vcpu, run, &mmio)) + return 1; + kvm_prepare_mmio(run, &mmio); return 0; } diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c new file mode 100644 index 0000000..c9a1731 --- /dev/null +++ b/arch/arm/kvm/vgic.c @@ -0,0 +1,1506 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> + +#include <linux/irqchip/arm-gic.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +/* + * How the whole thing works (courtesy of Christoffer Dall): + * + * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if + * something is pending + * - VGIC pending interrupts are stored on the vgic.irq_state vgic + * bitmap (this bitmap is updated by both user land ioctls and guest + * mmio ops, and other in-kernel peripherals such as the + * arch. timers) and indicate the 'wire' state. + * - Every time the bitmap changes, the irq_pending_on_cpu oracle is + * recalculated + * - To calculate the oracle, we need info for each cpu from + * compute_pending_for_cpu, which considers: + * - PPI: dist->irq_state & dist->irq_enable + * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target + * - irq_spi_target is a 'formatted' version of the GICD_ICFGR + * registers, stored on each vcpu. We only keep one bit of + * information per interrupt, making sure that only one vcpu can + * accept the interrupt. + * - The same is true when injecting an interrupt, except that we only + * consider a single interrupt at a time. The irq_spi_cpu array + * contains the target CPU for each SPI. + * + * The handling of level interrupts adds some extra complexity. We + * need to track when the interrupt has been EOIed, so we can sample + * the 'line' again. This is achieved as such: + * + * - When a level interrupt is moved onto a vcpu, the corresponding + * bit in irq_active is set. As long as this bit is set, the line + * will be ignored for further interrupts. The interrupt is injected + * into the vcpu with the GICH_LR_EOI bit set (generate a + * maintenance interrupt on EOI). + * - When the interrupt is EOIed, the maintenance interrupt fires, + * and clears the corresponding bit in irq_active. This allow the + * interrupt line to be sampled again. + */ + +#define VGIC_ADDR_UNDEF (-1) +#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) + +/* Physical address of vgic virtual cpu interface */ +static phys_addr_t vgic_vcpu_base; + +/* Virtual control interface base address */ +static void __iomem *vgic_vctrl_base; + +static struct device_node *vgic_node; + +#define ACCESS_READ_VALUE (1 << 0) +#define ACCESS_READ_RAZ (0 << 0) +#define ACCESS_READ_MASK(x) ((x) & (1 << 0)) +#define ACCESS_WRITE_IGNORED (0 << 1) +#define ACCESS_WRITE_SETBIT (1 << 1) +#define ACCESS_WRITE_CLEARBIT (2 << 1) +#define ACCESS_WRITE_VALUE (3 << 1) +#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) + +static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); +static void vgic_update_state(struct kvm *kvm); +static void vgic_kick_vcpus(struct kvm *kvm); +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); +static u32 vgic_nr_lr; + +static unsigned int vgic_maint_irq; + +static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, + int cpuid, u32 offset) +{ + offset >>= 2; + if (!offset) + return x->percpu[cpuid].reg; + else + return x->shared.reg + offset - 1; +} + +static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, + int cpuid, int irq) +{ + if (irq < VGIC_NR_PRIVATE_IRQS) + return test_bit(irq, x->percpu[cpuid].reg_ul); + + return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul); +} + +static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, + int irq, int val) +{ + unsigned long *reg; + + if (irq < VGIC_NR_PRIVATE_IRQS) { + reg = x->percpu[cpuid].reg_ul; + } else { + reg = x->shared.reg_ul; + irq -= VGIC_NR_PRIVATE_IRQS; + } + + if (val) + set_bit(irq, reg); + else + clear_bit(irq, reg); +} + +static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) +{ + if (unlikely(cpuid >= VGIC_MAX_CPUS)) + return NULL; + return x->percpu[cpuid].reg_ul; +} + +static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) +{ + return x->shared.reg_ul; +} + +static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) +{ + offset >>= 2; + BUG_ON(offset > (VGIC_NR_IRQS / 4)); + if (offset < 4) + return x->percpu[cpuid] + offset; + else + return x->shared + offset - 8; +} + +#define VGIC_CFG_LEVEL 0 +#define VGIC_CFG_EDGE 1 + +static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int irq_val; + + irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq); + return irq_val == VGIC_CFG_EDGE; +} + +static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); +} + +static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); +} + +static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); +} + +static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); +} + +static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq); +} + +static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1); +} + +static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0); +} + +static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) +{ + if (irq < VGIC_NR_PRIVATE_IRQS) + set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); + else + set_bit(irq - VGIC_NR_PRIVATE_IRQS, + vcpu->arch.vgic_cpu.pending_shared); +} + +static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) +{ + if (irq < VGIC_NR_PRIVATE_IRQS) + clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); + else + clear_bit(irq - VGIC_NR_PRIVATE_IRQS, + vcpu->arch.vgic_cpu.pending_shared); +} + +static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) +{ + return *((u32 *)mmio->data) & mask; +} + +static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) +{ + *((u32 *)mmio->data) = value & mask; +} + +/** + * vgic_reg_access - access vgic register + * @mmio: pointer to the data describing the mmio access + * @reg: pointer to the virtual backing of vgic distributor data + * @offset: least significant 2 bits used for word offset + * @mode: ACCESS_ mode (see defines above) + * + * Helper to make vgic register access easier using one of the access + * modes defined for vgic register access + * (read,raz,write-ignored,setbit,clearbit,write) + */ +static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, + phys_addr_t offset, int mode) +{ + int word_offset = (offset & 3) * 8; + u32 mask = (1UL << (mmio->len * 8)) - 1; + u32 regval; + + /* + * Any alignment fault should have been delivered to the guest + * directly (ARM ARM B3.12.7 "Prioritization of aborts"). + */ + + if (reg) { + regval = *reg; + } else { + BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED)); + regval = 0; + } + + if (mmio->is_write) { + u32 data = mmio_data_read(mmio, mask) << word_offset; + switch (ACCESS_WRITE_MASK(mode)) { + case ACCESS_WRITE_IGNORED: + return; + + case ACCESS_WRITE_SETBIT: + regval |= data; + break; + + case ACCESS_WRITE_CLEARBIT: + regval &= ~data; + break; + + case ACCESS_WRITE_VALUE: + regval = (regval & ~(mask << word_offset)) | data; + break; + } + *reg = regval; + } else { + switch (ACCESS_READ_MASK(mode)) { + case ACCESS_READ_RAZ: + regval = 0; + /* fall through */ + + case ACCESS_READ_VALUE: + mmio_data_write(mmio, mask, regval >> word_offset); + } + } +} + +static bool handle_mmio_misc(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 reg; + u32 word_offset = offset & 3; + + switch (offset & ~3) { + case 0: /* CTLR */ + reg = vcpu->kvm->arch.vgic.enabled; + vgic_reg_access(mmio, ®, word_offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + vcpu->kvm->arch.vgic.enabled = reg & 1; + vgic_update_state(vcpu->kvm); + return true; + } + break; + + case 4: /* TYPER */ + reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; + reg |= (VGIC_NR_IRQS >> 5) - 1; + vgic_reg_access(mmio, ®, word_offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + break; + + case 8: /* IIDR */ + reg = 0x4B00043B; + vgic_reg_access(mmio, ®, word_offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + break; + } + + return false; +} + +static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + if (mmio->is_write) { + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + if (mmio->is_write) { + if (offset < 4) /* Force SGI enabled */ + *reg |= 0xffff; + vgic_retire_disabled_irqs(vcpu); + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + if (mmio->is_write) { + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + if (mmio->is_write) { + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, + vcpu->vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + return false; +} + +#define GICD_ITARGETSR_SIZE 32 +#define GICD_CPUTARGETS_BITS 8 +#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) +static u32 vgic_get_target_reg(struct kvm *kvm, int irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i, c; + unsigned long *bmap; + u32 val = 0; + + irq -= VGIC_NR_PRIVATE_IRQS; + + kvm_for_each_vcpu(c, vcpu, kvm) { + bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); + for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) + if (test_bit(irq + i, bmap)) + val |= 1 << (c + i * 8); + } + + return val; +} + +static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i, c; + unsigned long *bmap; + u32 target; + + irq -= VGIC_NR_PRIVATE_IRQS; + + /* + * Pick the LSB in each byte. This ensures we target exactly + * one vcpu per IRQ. If the byte is null, assume we target + * CPU0. + */ + for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { + int shift = i * GICD_CPUTARGETS_BITS; + target = ffs((val >> shift) & 0xffU); + target = target ? (target - 1) : 0; + dist->irq_spi_cpu[irq + i] = target; + kvm_for_each_vcpu(c, vcpu, kvm) { + bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); + if (c == target) + set_bit(irq + i, bmap); + else + clear_bit(irq + i, bmap); + } + } +} + +static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + + /* We treat the banked interrupts targets as read-only */ + if (offset < 32) { + u32 roreg = 1 << vcpu->vcpu_id; + roreg |= roreg << 8; + roreg |= roreg << 16; + + vgic_reg_access(mmio, &roreg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; + } + + reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +static u32 vgic_cfg_expand(u16 val) +{ + u32 res = 0; + int i; + + /* + * Turn a 16bit value like abcd...mnop into a 32bit word + * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is. + */ + for (i = 0; i < 16; i++) + res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1); + + return res; +} + +static u16 vgic_cfg_compress(u32 val) +{ + u16 res = 0; + int i; + + /* + * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like + * abcd...mnop which is what we really care about. + */ + for (i = 0; i < 16; i++) + res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i; + + return res; +} + +/* + * The distributor uses 2 bits per IRQ for the CFG register, but the + * LSB is always 0. As such, we only keep the upper bit, and use the + * two above functions to compress/expand the bits + */ +static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 val; + u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, + vcpu->vcpu_id, offset >> 1); + if (offset & 2) + val = *reg >> 16; + else + val = *reg & 0xffff; + + val = vgic_cfg_expand(val); + vgic_reg_access(mmio, &val, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + if (offset < 4) { + *reg = ~0U; /* Force PPIs/SGIs to 1 */ + return false; + } + + val = vgic_cfg_compress(val); + if (offset & 2) { + *reg &= 0xffff; + *reg |= val << 16; + } else { + *reg &= 0xffff << 16; + *reg |= val; + } + } + + return false; +} + +static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + u32 reg; + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); + if (mmio->is_write) { + vgic_dispatch_sgi(vcpu, reg); + vgic_update_state(vcpu->kvm); + return true; + } + + return false; +} + +/* + * I would have liked to use the kvm_bus_io_*() API instead, but it + * cannot cope with banked registers (only the VM pointer is passed + * around, and we need the vcpu). One of these days, someone please + * fix it! + */ +struct mmio_range { + phys_addr_t base; + unsigned long len; + bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, + phys_addr_t offset); +}; + +static const struct mmio_range vgic_ranges[] = { + { + .base = GIC_DIST_CTRL, + .len = 12, + .handle_mmio = handle_mmio_misc, + }, + { + .base = GIC_DIST_IGROUP, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GIC_DIST_ENABLE_SET, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_set_enable_reg, + }, + { + .base = GIC_DIST_ENABLE_CLEAR, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_clear_enable_reg, + }, + { + .base = GIC_DIST_PENDING_SET, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_set_pending_reg, + }, + { + .base = GIC_DIST_PENDING_CLEAR, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_clear_pending_reg, + }, + { + .base = GIC_DIST_ACTIVE_SET, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GIC_DIST_ACTIVE_CLEAR, + .len = VGIC_NR_IRQS / 8, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GIC_DIST_PRI, + .len = VGIC_NR_IRQS, + .handle_mmio = handle_mmio_priority_reg, + }, + { + .base = GIC_DIST_TARGET, + .len = VGIC_NR_IRQS, + .handle_mmio = handle_mmio_target_reg, + }, + { + .base = GIC_DIST_CONFIG, + .len = VGIC_NR_IRQS / 4, + .handle_mmio = handle_mmio_cfg_reg, + }, + { + .base = GIC_DIST_SOFTINT, + .len = 4, + .handle_mmio = handle_mmio_sgi_reg, + }, + {} +}; + +static const +struct mmio_range *find_matching_range(const struct mmio_range *ranges, + struct kvm_exit_mmio *mmio, + phys_addr_t base) +{ + const struct mmio_range *r = ranges; + phys_addr_t addr = mmio->phys_addr - base; + + while (r->len) { + if (addr >= r->base && + (addr + mmio->len) <= (r->base + r->len)) + return r; + r++; + } + + return NULL; +} + +/** + * vgic_handle_mmio - handle an in-kernel MMIO access + * @vcpu: pointer to the vcpu performing the access + * @run: pointer to the kvm_run structure + * @mmio: pointer to the data describing the access + * + * returns true if the MMIO access has been performed in kernel space, + * and false if it needs to be emulated in user space. + */ +bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_exit_mmio *mmio) +{ + const struct mmio_range *range; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long base = dist->vgic_dist_base; + bool updated_state; + unsigned long offset; + + if (!irqchip_in_kernel(vcpu->kvm) || + mmio->phys_addr < base || + (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE)) + return false; + + /* We don't support ldrd / strd or ldm / stm to the emulated vgic */ + if (mmio->len > 4) { + kvm_inject_dabt(vcpu, mmio->phys_addr); + return true; + } + + range = find_matching_range(vgic_ranges, mmio, base); + if (unlikely(!range || !range->handle_mmio)) { + pr_warn("Unhandled access %d %08llx %d\n", + mmio->is_write, mmio->phys_addr, mmio->len); + return false; + } + + spin_lock(&vcpu->kvm->arch.vgic.lock); + offset = mmio->phys_addr - range->base - base; + updated_state = range->handle_mmio(vcpu, mmio, offset); + spin_unlock(&vcpu->kvm->arch.vgic.lock); + kvm_prepare_mmio(run, mmio); + kvm_handle_mmio_return(vcpu, run); + + if (updated_state) + vgic_kick_vcpus(vcpu->kvm); + + return true; +} + +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *dist = &kvm->arch.vgic; + int nrcpus = atomic_read(&kvm->online_vcpus); + u8 target_cpus; + int sgi, mode, c, vcpu_id; + + vcpu_id = vcpu->vcpu_id; + + sgi = reg & 0xf; + target_cpus = (reg >> 16) & 0xff; + mode = (reg >> 24) & 3; + + switch (mode) { + case 0: + if (!target_cpus) + return; + + case 1: + target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; + break; + + case 2: + target_cpus = 1 << vcpu_id; + break; + } + + kvm_for_each_vcpu(c, vcpu, kvm) { + if (target_cpus & 1) { + /* Flag the SGI as pending */ + vgic_dist_irq_set(vcpu, sgi); + dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; + kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); + } + + target_cpus >>= 1; + } +} + +static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *pending, *enabled, *pend_percpu, *pend_shared; + unsigned long pending_private, pending_shared; + int vcpu_id; + + vcpu_id = vcpu->vcpu_id; + pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; + pend_shared = vcpu->arch.vgic_cpu.pending_shared; + + pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id); + enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); + bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); + + pending = vgic_bitmap_get_shared_map(&dist->irq_state); + enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); + bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS); + bitmap_and(pend_shared, pend_shared, + vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), + VGIC_NR_SHARED_IRQS); + + pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); + pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS); + return (pending_private < VGIC_NR_PRIVATE_IRQS || + pending_shared < VGIC_NR_SHARED_IRQS); +} + +/* + * Update the interrupt state and determine which CPUs have pending + * interrupts. Must be called with distributor lock held. + */ +static void vgic_update_state(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int c; + + if (!dist->enabled) { + set_bit(0, &dist->irq_pending_on_cpu); + return; + } + + kvm_for_each_vcpu(c, vcpu, kvm) { + if (compute_pending_for_cpu(vcpu)) { + pr_debug("CPU%d has pending interrupts\n", c); + set_bit(c, &dist->irq_pending_on_cpu); + } + } +} + +#define LR_CPUID(lr) \ + (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) +#define MK_LR_PEND(src, irq) \ + (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) + +/* + * An interrupt may have been disabled after being made pending on the + * CPU interface (the classic case is a timer running while we're + * rebooting the guest - the interrupt would kick as soon as the CPU + * interface gets enabled, with deadly consequences). + * + * The solution is to examine already active LRs, and check the + * interrupt is still enabled. If not, just retire it. + */ +static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int lr; + + for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + + if (!vgic_irq_is_enabled(vcpu, irq)) { + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; + clear_bit(lr, vgic_cpu->lr_used); + vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE; + if (vgic_irq_is_active(vcpu, irq)) + vgic_irq_clear_active(vcpu, irq); + } + } +} + +/* + * Queue an interrupt to a CPU virtual interface. Return true on success, + * or false if it wasn't possible to queue it. + */ +static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int lr; + + /* Sanitize the input... */ + BUG_ON(sgi_source_id & ~7); + BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); + BUG_ON(irq >= VGIC_NR_IRQS); + + kvm_debug("Queue IRQ%d\n", irq); + + lr = vgic_cpu->vgic_irq_lr_map[irq]; + + /* Do we have an active interrupt for the same CPUID? */ + if (lr != LR_EMPTY && + (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) { + kvm_debug("LR%d piggyback for IRQ%d %x\n", + lr, irq, vgic_cpu->vgic_lr[lr]); + BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); + vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; + + goto out; + } + + /* Try to use another LR for this interrupt */ + lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, + vgic_cpu->nr_lr); + if (lr >= vgic_cpu->nr_lr) + return false; + + kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); + vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); + vgic_cpu->vgic_irq_lr_map[irq] = lr; + set_bit(lr, vgic_cpu->lr_used); + +out: + if (!vgic_irq_is_edge(vcpu, irq)) + vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; + + return true; +} + +static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long sources; + int vcpu_id = vcpu->vcpu_id; + int c; + + sources = dist->irq_sgi_sources[vcpu_id][irq]; + + for_each_set_bit(c, &sources, VGIC_MAX_CPUS) { + if (vgic_queue_irq(vcpu, c, irq)) + clear_bit(c, &sources); + } + + dist->irq_sgi_sources[vcpu_id][irq] = sources; + + /* + * If the sources bitmap has been cleared it means that we + * could queue all the SGIs onto link registers (see the + * clear_bit above), and therefore we are done with them in + * our emulated gic and can get rid of them. + */ + if (!sources) { + vgic_dist_irq_clear(vcpu, irq); + vgic_cpu_irq_clear(vcpu, irq); + return true; + } + + return false; +} + +static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) +{ + if (vgic_irq_is_active(vcpu, irq)) + return true; /* level interrupt, already queued */ + + if (vgic_queue_irq(vcpu, 0, irq)) { + if (vgic_irq_is_edge(vcpu, irq)) { + vgic_dist_irq_clear(vcpu, irq); + vgic_cpu_irq_clear(vcpu, irq); + } else { + vgic_irq_set_active(vcpu, irq); + } + + return true; + } + + return false; +} + +/* + * Fill the list registers with pending interrupts before running the + * guest. + */ +static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int i, vcpu_id; + int overflow = 0; + + vcpu_id = vcpu->vcpu_id; + + /* + * We may not have any pending interrupt, or the interrupts + * may have been serviced from another vcpu. In all cases, + * move along. + */ + if (!kvm_vgic_vcpu_pending_irq(vcpu)) { + pr_debug("CPU%d has no pending interrupt\n", vcpu_id); + goto epilog; + } + + /* SGIs */ + for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { + if (!vgic_queue_sgi(vcpu, i)) + overflow = 1; + } + + /* PPIs */ + for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) { + if (!vgic_queue_hwirq(vcpu, i)) + overflow = 1; + } + + /* SPIs */ + for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) { + if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) + overflow = 1; + } + +epilog: + if (overflow) { + vgic_cpu->vgic_hcr |= GICH_HCR_UIE; + } else { + vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + /* + * We're about to run this VCPU, and we've consumed + * everything the distributor had in store for + * us. Claim we don't have anything pending. We'll + * adjust that if needed while exiting. + */ + clear_bit(vcpu_id, &dist->irq_pending_on_cpu); + } +} + +static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + bool level_pending = false; + + kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); + + /* + * We do not need to take the distributor lock here, since the only + * action we perform is clearing the irq_active_bit for an EOIed + * level interrupt. There is a potential race with + * the queuing of an interrupt in __kvm_vgic_flush_hwstate(), where we + * check if the interrupt is already active. Two possibilities: + * + * - The queuing is occurring on the same vcpu: cannot happen, + * as we're already in the context of this vcpu, and + * executing the handler + * - The interrupt has been migrated to another vcpu, and we + * ignore this interrupt for this run. Big deal. It is still + * pending though, and will get considered when this vcpu + * exits. + */ + if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { + /* + * Some level interrupts have been EOIed. Clear their + * active bit. + */ + int lr, irq; + + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, + vgic_cpu->nr_lr) { + irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + + vgic_irq_clear_active(vcpu, irq); + vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI; + + /* Any additional pending interrupt? */ + if (vgic_dist_irq_is_pending(vcpu, irq)) { + vgic_cpu_irq_set(vcpu, irq); + level_pending = true; + } else { + vgic_cpu_irq_clear(vcpu, irq); + } + } + } + + if (vgic_cpu->vgic_misr & GICH_MISR_U) + vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE; + + return level_pending; +} + +/* + * Sync back the VGIC state after a guest run. We do not really touch + * the distributor here (the irq_pending_on_cpu bit is safe to set), + * so there is no need for taking its lock. + */ +static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int lr, pending; + bool level_pending; + + level_pending = vgic_process_maintenance(vcpu); + + /* Clear mappings for empty LRs */ + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr, + vgic_cpu->nr_lr) { + int irq; + + if (!test_and_clear_bit(lr, vgic_cpu->lr_used)) + continue; + + irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; + + BUG_ON(irq >= VGIC_NR_IRQS); + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; + } + + /* Check if we still have something up our sleeve... */ + pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr, + vgic_cpu->nr_lr); + if (level_pending || pending < vgic_cpu->nr_lr) + set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); +} + +void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + spin_lock(&dist->lock); + __kvm_vgic_flush_hwstate(vcpu); + spin_unlock(&dist->lock); +} + +void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) +{ + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + __kvm_vgic_sync_hwstate(vcpu); +} + +int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu); +} + +static void vgic_kick_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int c; + + /* + * We've injected an interrupt, time to find out who deserves + * a good kick... + */ + kvm_for_each_vcpu(c, vcpu, kvm) { + if (kvm_vgic_vcpu_pending_irq(vcpu)) + kvm_vcpu_kick(vcpu); + } +} + +static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) +{ + int is_edge = vgic_irq_is_edge(vcpu, irq); + int state = vgic_dist_irq_is_pending(vcpu, irq); + + /* + * Only inject an interrupt if: + * - edge triggered and we have a rising edge + * - level triggered and we change level + */ + if (is_edge) + return level > state; + else + return level != state; +} + +static bool vgic_update_irq_state(struct kvm *kvm, int cpuid, + unsigned int irq_num, bool level) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int is_edge, is_level; + int enabled; + bool ret = true; + + spin_lock(&dist->lock); + + vcpu = kvm_get_vcpu(kvm, cpuid); + is_edge = vgic_irq_is_edge(vcpu, irq_num); + is_level = !is_edge; + + if (!vgic_validate_injection(vcpu, irq_num, level)) { + ret = false; + goto out; + } + + if (irq_num >= VGIC_NR_PRIVATE_IRQS) { + cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; + vcpu = kvm_get_vcpu(kvm, cpuid); + } + + kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); + + if (level) + vgic_dist_irq_set(vcpu, irq_num); + else + vgic_dist_irq_clear(vcpu, irq_num); + + enabled = vgic_irq_is_enabled(vcpu, irq_num); + + if (!enabled) { + ret = false; + goto out; + } + + if (is_level && vgic_irq_is_active(vcpu, irq_num)) { + /* + * Level interrupt in progress, will be picked up + * when EOId. + */ + ret = false; + goto out; + } + + if (level) { + vgic_cpu_irq_set(vcpu, irq_num); + set_bit(cpuid, &dist->irq_pending_on_cpu); + } + +out: + spin_unlock(&dist->lock); + + return ret; +} + +/** + * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic + * @kvm: The VM structure pointer + * @cpuid: The CPU for PPIs + * @irq_num: The IRQ number that is assigned to the device + * @level: Edge-triggered: true: to trigger the interrupt + * false: to ignore the call + * Level-sensitive true: activates an interrupt + * false: deactivates an interrupt + * + * The GIC is not concerned with devices being active-LOW or active-HIGH for + * level-sensitive interrupts. You can think of the level parameter as 1 + * being HIGH and 0 being LOW and all devices being active-HIGH. + */ +int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, + bool level) +{ + if (vgic_update_irq_state(kvm, cpuid, irq_num, level)) + vgic_kick_vcpus(kvm); + + return 0; +} + +static irqreturn_t vgic_maintenance_handler(int irq, void *data) +{ + /* + * We cannot rely on the vgic maintenance interrupt to be + * delivered synchronously. This means we can only use it to + * exit the VM, and we perform the handling of EOIed + * interrupts on the exit path (see vgic_process_maintenance). + */ + return IRQ_HANDLED; +} + +int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int i; + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + if (vcpu->vcpu_id >= VGIC_MAX_CPUS) + return -EBUSY; + + for (i = 0; i < VGIC_NR_IRQS; i++) { + if (i < VGIC_NR_PPIS) + vgic_bitmap_set_irq_val(&dist->irq_enabled, + vcpu->vcpu_id, i, 1); + if (i < VGIC_NR_PRIVATE_IRQS) + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, VGIC_CFG_EDGE); + + vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; + } + + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vgic_cpu->vgic_vmcr = 0; + + vgic_cpu->nr_lr = vgic_nr_lr; + vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */ + + return 0; +} + +static void vgic_init_maintenance_interrupt(void *info) +{ + enable_percpu_irq(vgic_maint_irq, 0); +} + +static int vgic_cpu_notify(struct notifier_block *self, + unsigned long action, void *cpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + vgic_init_maintenance_interrupt(NULL); + break; + case CPU_DYING: + case CPU_DYING_FROZEN: + disable_percpu_irq(vgic_maint_irq); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block vgic_cpu_nb = { + .notifier_call = vgic_cpu_notify, +}; + +int kvm_vgic_hyp_init(void) +{ + int ret; + struct resource vctrl_res; + struct resource vcpu_res; + + vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); + if (!vgic_node) { + kvm_err("error: no compatible vgic node in DT\n"); + return -ENODEV; + } + + vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0); + if (!vgic_maint_irq) { + kvm_err("error getting vgic maintenance irq from DT\n"); + ret = -ENXIO; + goto out; + } + + ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler, + "vgic", kvm_get_running_vcpus()); + if (ret) { + kvm_err("Cannot register interrupt %d\n", vgic_maint_irq); + goto out; + } + + ret = register_cpu_notifier(&vgic_cpu_nb); + if (ret) { + kvm_err("Cannot register vgic CPU notifier\n"); + goto out_free_irq; + } + + ret = of_address_to_resource(vgic_node, 2, &vctrl_res); + if (ret) { + kvm_err("Cannot obtain VCTRL resource\n"); + goto out_free_irq; + } + + vgic_vctrl_base = of_iomap(vgic_node, 2); + if (!vgic_vctrl_base) { + kvm_err("Cannot ioremap VCTRL\n"); + ret = -ENOMEM; + goto out_free_irq; + } + + vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR); + vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1; + + ret = create_hyp_io_mappings(vgic_vctrl_base, + vgic_vctrl_base + resource_size(&vctrl_res), + vctrl_res.start); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); + goto out_unmap; + } + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vctrl_res.start, vgic_maint_irq); + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { + kvm_err("Cannot obtain VCPU resource\n"); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; + + goto out; + +out_unmap: + iounmap(vgic_vctrl_base); +out_free_irq: + free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus()); +out: + of_node_put(vgic_node); + return ret; +} + +int kvm_vgic_init(struct kvm *kvm) +{ + int ret = 0, i; + + mutex_lock(&kvm->lock); + + if (vgic_initialized(kvm)) + goto out; + + if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { + kvm_err("Need to set vgic cpu and dist addresses first\n"); + ret = -ENXIO; + goto out; + } + + ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, + vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE); + if (ret) { + kvm_err("Unable to remap VGIC CPU to VCPU\n"); + goto out; + } + + for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) + vgic_set_target_reg(kvm, 0, i); + + kvm_timer_init(kvm); + kvm->arch.vgic.ready = true; +out: + mutex_unlock(&kvm->lock); + return ret; +} + +int kvm_vgic_create(struct kvm *kvm) +{ + int ret = 0; + + mutex_lock(&kvm->lock); + + if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { + ret = -EEXIST; + goto out; + } + + spin_lock_init(&kvm->arch.vgic.lock); + kvm->arch.vgic.vctrl_base = vgic_vctrl_base; + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + +out: + mutex_unlock(&kvm->lock); + return ret; +} + +static bool vgic_ioaddr_overlap(struct kvm *kvm) +{ + phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; + phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; + + if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) + return 0; + if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) || + (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist)) + return -EBUSY; + return 0; +} + +static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, + phys_addr_t addr, phys_addr_t size) +{ + int ret; + + if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) + return -EEXIST; + if (addr + size < addr) + return -EINVAL; + + ret = vgic_ioaddr_overlap(kvm); + if (ret) + return ret; + *ioaddr = addr; + return ret; +} + +int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) +{ + int r = 0; + struct vgic_dist *vgic = &kvm->arch.vgic; + + if (addr & ~KVM_PHYS_MASK) + return -E2BIG; + + if (addr & ~PAGE_MASK) + return -EINVAL; + + mutex_lock(&kvm->lock); + switch (type) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, + addr, KVM_VGIC_V2_DIST_SIZE); + break; + case KVM_VGIC_V2_ADDR_TYPE_CPU: + r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, + addr, KVM_VGIC_V2_CPU_SIZE); + break; + default: + r = -ENODEV; + } + + mutex_unlock(&kvm->lock); + return r; +} diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 22ad24e..49ac3df 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -99,12 +99,12 @@ config ARCH_OMAP4 config SOC_OMAP5 bool "TI OMAP5" - select ARM_ARCH_TIMER select ARM_CPU_SUSPEND if PM select ARM_GIC select CPU_V7 select HAVE_SMP select COMMON_CLK + select HAVE_ARM_ARCH_TIMER comment "OMAP Core Type" depends on ARCH_OMAP2 diff --git a/arch/arm/mach-virt/Kconfig b/arch/arm/mach-virt/Kconfig new file mode 100644 index 0000000..8958f0d --- /dev/null +++ b/arch/arm/mach-virt/Kconfig @@ -0,0 +1,10 @@ +config ARCH_VIRT + bool "Dummy Virtual Machine" if ARCH_MULTI_V7 + select ARCH_WANT_OPTIONAL_GPIOLIB + select ARM_GIC + select ARM_ARCH_TIMER + select ARM_PSCI + select HAVE_SMP + select CPU_V7 + select SPARSE_IRQ + select USE_OF diff --git a/arch/arm/mach-virt/Makefile b/arch/arm/mach-virt/Makefile new file mode 100644 index 0000000..042afc1 --- /dev/null +++ b/arch/arm/mach-virt/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for the linux kernel. +# + +obj-y := virt.o +obj-$(CONFIG_SMP) += platsmp.o diff --git a/arch/arm/mach-virt/platsmp.c b/arch/arm/mach-virt/platsmp.c new file mode 100644 index 0000000..8badaab --- /dev/null +++ b/arch/arm/mach-virt/platsmp.c @@ -0,0 +1,58 @@ +/* + * Dummy Virtual Machine - does what it says on the tin. + * + * Copyright (C) 2012 ARM Ltd + * Author: Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/of.h> + +#include <linux/irqchip/arm-gic.h> + +#include <asm/psci.h> +#include <asm/smp_plat.h> + +extern void secondary_startup(void); + +static void __init virt_smp_init_cpus(void) +{ +} + +static void __init virt_smp_prepare_cpus(unsigned int max_cpus) +{ +} + +static int __cpuinit virt_boot_secondary(unsigned int cpu, + struct task_struct *idle) +{ + if (psci_ops.cpu_on) + return psci_ops.cpu_on(cpu_logical_map(cpu), + __pa(secondary_startup)); + return -ENODEV; +} + +static void __cpuinit virt_secondary_init(unsigned int cpu) +{ + gic_secondary_init(0); +} + +struct smp_operations __initdata virt_smp_ops = { + .smp_init_cpus = virt_smp_init_cpus, + .smp_prepare_cpus = virt_smp_prepare_cpus, + .smp_secondary_init = virt_secondary_init, + .smp_boot_secondary = virt_boot_secondary, +}; diff --git a/arch/arm/mach-virt/virt.c b/arch/arm/mach-virt/virt.c new file mode 100644 index 0000000..31666f6 --- /dev/null +++ b/arch/arm/mach-virt/virt.c @@ -0,0 +1,54 @@ +/* + * Dummy Virtual Machine - does what it says on the tin. + * + * Copyright (C) 2012 ARM Ltd + * Authors: Will Deacon <will.deacon@arm.com>, + * Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/irqchip.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> +#include <linux/smp.h> + +#include <asm/arch_timer.h> +#include <asm/mach/arch.h> +#include <asm/mach/time.h> + +static void __init virt_init(void) +{ + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); +} + +static void __init virt_timer_init(void) +{ + WARN_ON(arch_timer_of_register() != 0); + WARN_ON(arch_timer_sched_clock_init() != 0); +} + +static const char *virt_dt_match[] = { + "linux,dummy-virt", + NULL +}; + +extern struct smp_operations virt_smp_ops; + +DT_MACHINE_START(VIRT, "Dummy Virtual Machine") + .init_irq = irqchip_init, + .init_time = virt_timer_init, + .init_machine = virt_init, + .smp = smp_ops(virt_smp_ops), + .dt_compat = virt_dt_match, +MACHINE_END diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7c43569..ab4aa54 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -4,6 +4,7 @@ config ARM64 select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS select ARM_AMBA + select ARM_ARCH_TIMER select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h new file mode 100644 index 0000000..91e2a6a --- /dev/null +++ b/arch/arm64/include/asm/arch_timer.h @@ -0,0 +1,133 @@ +/* + * arch/arm64/include/asm/arch_timer.h + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_ARCH_TIMER_H +#define __ASM_ARCH_TIMER_H + +#include <asm/barrier.h> + +#include <linux/init.h> +#include <linux/types.h> + +#include <clocksource/arm_arch_timer.h> + +static inline void arch_timer_reg_write(int access, int reg, u32 val) +{ + if (access == ARCH_TIMER_PHYS_ACCESS) { + switch (reg) { + case ARCH_TIMER_REG_CTRL: + asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); + break; + case ARCH_TIMER_REG_TVAL: + asm volatile("msr cntp_tval_el0, %0" : : "r" (val)); + break; + default: + BUILD_BUG(); + } + } else if (access == ARCH_TIMER_VIRT_ACCESS) { + switch (reg) { + case ARCH_TIMER_REG_CTRL: + asm volatile("msr cntv_ctl_el0, %0" : : "r" (val)); + break; + case ARCH_TIMER_REG_TVAL: + asm volatile("msr cntv_tval_el0, %0" : : "r" (val)); + break; + default: + BUILD_BUG(); + } + } else { + BUILD_BUG(); + } + + isb(); +} + +static inline u32 arch_timer_reg_read(int access, int reg) +{ + u32 val; + + if (access == ARCH_TIMER_PHYS_ACCESS) { + switch (reg) { + case ARCH_TIMER_REG_CTRL: + asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val)); + break; + case ARCH_TIMER_REG_TVAL: + asm volatile("mrs %0, cntp_tval_el0" : "=r" (val)); + break; + default: + BUILD_BUG(); + } + } else if (access == ARCH_TIMER_VIRT_ACCESS) { + switch (reg) { + case ARCH_TIMER_REG_CTRL: + asm volatile("mrs %0, cntv_ctl_el0" : "=r" (val)); + break; + case ARCH_TIMER_REG_TVAL: + asm volatile("mrs %0, cntv_tval_el0" : "=r" (val)); + break; + default: + BUILD_BUG(); + } + } else { + BUILD_BUG(); + } + + return val; +} + +static inline u32 arch_timer_get_cntfrq(void) +{ + u32 val; + asm volatile("mrs %0, cntfrq_el0" : "=r" (val)); + return val; +} + +static inline void __cpuinit arch_counter_set_user_access(void) +{ + u32 cntkctl; + + /* Disable user access to the timers and the physical counter. */ + asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl)); + cntkctl &= ~((3 << 8) | (1 << 0)); + + /* Enable user access to the virtual counter and frequency. */ + cntkctl |= (1 << 1); + asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); +} + +static inline u64 arch_counter_get_cntpct(void) +{ + u64 cval; + + isb(); + asm volatile("mrs %0, cntpct_el0" : "=r" (cval)); + + return cval; +} + +static inline u64 arch_counter_get_cntvct(void) +{ + u64 cval; + + isb(); + asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); + + return cval; +} + +#endif diff --git a/arch/arm64/include/asm/arm_generic.h b/arch/arm64/include/asm/arm_generic.h deleted file mode 100644 index df2aeb8..0000000 --- a/arch/arm64/include/asm/arm_generic.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * arch/arm64/include/asm/arm_generic.h - * - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_ARM_GENERIC_H -#define __ASM_ARM_GENERIC_H - -#include <linux/clocksource.h> - -#define ARCH_TIMER_CTRL_ENABLE (1 << 0) -#define ARCH_TIMER_CTRL_IMASK (1 << 1) -#define ARCH_TIMER_CTRL_ISTATUS (1 << 2) - -#define ARCH_TIMER_REG_CTRL 0 -#define ARCH_TIMER_REG_FREQ 1 -#define ARCH_TIMER_REG_TVAL 2 - -static inline void arch_timer_reg_write(int reg, u32 val) -{ - switch (reg) { - case ARCH_TIMER_REG_CTRL: - asm volatile("msr cntp_ctl_el0, %0" : : "r" (val)); - break; - case ARCH_TIMER_REG_TVAL: - asm volatile("msr cntp_tval_el0, %0" : : "r" (val)); - break; - default: - BUILD_BUG(); - } - - isb(); -} - -static inline u32 arch_timer_reg_read(int reg) -{ - u32 val; - - switch (reg) { - case ARCH_TIMER_REG_CTRL: - asm volatile("mrs %0, cntp_ctl_el0" : "=r" (val)); - break; - case ARCH_TIMER_REG_FREQ: - asm volatile("mrs %0, cntfrq_el0" : "=r" (val)); - break; - case ARCH_TIMER_REG_TVAL: - asm volatile("mrs %0, cntp_tval_el0" : "=r" (val)); - break; - default: - BUILD_BUG(); - } - - return val; -} - -static inline void __cpuinit arch_counter_enable_user_access(void) -{ - u32 cntkctl; - - /* Disable user access to the timers and the physical counter. */ - asm volatile("mrs %0, cntkctl_el1" : "=r" (cntkctl)); - cntkctl &= ~((3 << 8) | (1 << 0)); - - /* Enable user access to the virtual counter and frequency. */ - cntkctl |= (1 << 1); - asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); -} - -static inline cycle_t arch_counter_get_cntpct(void) -{ - cycle_t cval; - - asm volatile("mrs %0, cntpct_el0" : "=r" (cval)); - - return cval; -} - -static inline cycle_t arch_counter_get_cntvct(void) -{ - cycle_t cval; - - asm volatile("mrs %0, cntvct_el0" : "=r" (cval)); - - return cval; -} - -#endif diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 3b4b725..b0ef18d 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -31,8 +31,9 @@ #include <linux/syscore_ops.h> #include <linux/timer.h> #include <linux/irq.h> +#include <linux/delay.h> -#include <clocksource/arm_generic.h> +#include <clocksource/arm_arch_timer.h> #include <asm/thread_info.h> #include <asm/stacktrace.h> @@ -59,7 +60,31 @@ unsigned long profile_pc(struct pt_regs *regs) EXPORT_SYMBOL(profile_pc); #endif +static u64 sched_clock_mult __read_mostly; + +unsigned long long notrace sched_clock(void) +{ + return arch_timer_read_counter() * sched_clock_mult; +} + +int read_current_timer(unsigned long *timer_value) +{ + *timer_value = arch_timer_read_counter(); + return 0; +} + void __init time_init(void) { - arm_generic_timer_init(); + u32 arch_timer_rate; + + if (arch_timer_init()) + panic("Unable to initialise architected timer.\n"); + + arch_timer_rate = arch_timer_get_rate(); + + /* Cache the sched_clock multiplier to save a divide in the hot path. */ + sched_clock_mult = NSEC_PER_SEC / arch_timer_rate; + + /* Calibrate the delay loop directly */ + lpj_fine = arch_timer_rate / HZ; } diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 7d978c1..e920cbe 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -60,7 +60,5 @@ config CLKSRC_DBX500_PRCMU_SCHED_CLOCK help Use the always on PRCMU Timer as sched_clock -config CLKSRC_ARM_GENERIC - def_bool y if ARM64 - help - This option enables support for the ARM generic timer. +config ARM_ARCH_TIMER + bool diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 596c45c..7d671b8 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -20,4 +20,4 @@ obj-$(CONFIG_SUNXI_TIMER) += sunxi_timer.o obj-$(CONFIG_ARCH_TEGRA) += tegra20_timer.o obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o -obj-$(CONFIG_CLKSRC_ARM_GENERIC) += arm_generic.o +obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c new file mode 100644 index 0000000..d7ad425 --- /dev/null +++ b/drivers/clocksource/arm_arch_timer.c @@ -0,0 +1,391 @@ +/* + * linux/drivers/clocksource/arm_arch_timer.c + * + * Copyright (C) 2011 ARM Ltd. + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/smp.h> +#include <linux/cpu.h> +#include <linux/clockchips.h> +#include <linux/interrupt.h> +#include <linux/of_irq.h> +#include <linux/io.h> + +#include <asm/arch_timer.h> +#include <asm/virt.h> + +#include <clocksource/arm_arch_timer.h> + +static u32 arch_timer_rate; + +enum ppi_nr { + PHYS_SECURE_PPI, + PHYS_NONSECURE_PPI, + VIRT_PPI, + HYP_PPI, + MAX_TIMER_PPI +}; + +static int arch_timer_ppi[MAX_TIMER_PPI]; + +static struct clock_event_device __percpu *arch_timer_evt; + +static bool arch_timer_use_virtual = true; + +/* + * Architected system timer support. + */ + +static inline irqreturn_t timer_handler(const int access, + struct clock_event_device *evt) +{ + unsigned long ctrl; + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); + if (ctrl & ARCH_TIMER_CTRL_IT_STAT) { + ctrl |= ARCH_TIMER_CTRL_IT_MASK; + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); + evt->event_handler(evt); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +static irqreturn_t arch_timer_handler_virt(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + + return timer_handler(ARCH_TIMER_VIRT_ACCESS, evt); +} + +static irqreturn_t arch_timer_handler_phys(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + + return timer_handler(ARCH_TIMER_PHYS_ACCESS, evt); +} + +static inline void timer_set_mode(const int access, int mode) +{ + unsigned long ctrl; + switch (mode) { + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); + ctrl &= ~ARCH_TIMER_CTRL_ENABLE; + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); + break; + default: + break; + } +} + +static void arch_timer_set_mode_virt(enum clock_event_mode mode, + struct clock_event_device *clk) +{ + timer_set_mode(ARCH_TIMER_VIRT_ACCESS, mode); +} + +static void arch_timer_set_mode_phys(enum clock_event_mode mode, + struct clock_event_device *clk) +{ + timer_set_mode(ARCH_TIMER_PHYS_ACCESS, mode); +} + +static inline void set_next_event(const int access, unsigned long evt) +{ + unsigned long ctrl; + ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL); + ctrl |= ARCH_TIMER_CTRL_ENABLE; + ctrl &= ~ARCH_TIMER_CTRL_IT_MASK; + arch_timer_reg_write(access, ARCH_TIMER_REG_TVAL, evt); + arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl); +} + +static int arch_timer_set_next_event_virt(unsigned long evt, + struct clock_event_device *unused) +{ + set_next_event(ARCH_TIMER_VIRT_ACCESS, evt); + return 0; +} + +static int arch_timer_set_next_event_phys(unsigned long evt, + struct clock_event_device *unused) +{ + set_next_event(ARCH_TIMER_PHYS_ACCESS, evt); + return 0; +} + +static int __cpuinit arch_timer_setup(struct clock_event_device *clk) +{ + clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP; + clk->name = "arch_sys_timer"; + clk->rating = 450; + if (arch_timer_use_virtual) { + clk->irq = arch_timer_ppi[VIRT_PPI]; + clk->set_mode = arch_timer_set_mode_virt; + clk->set_next_event = arch_timer_set_next_event_virt; + } else { + clk->irq = arch_timer_ppi[PHYS_SECURE_PPI]; + clk->set_mode = arch_timer_set_mode_phys; + clk->set_next_event = arch_timer_set_next_event_phys; + } + + clk->cpumask = cpumask_of(smp_processor_id()); + + clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, NULL); + + clockevents_config_and_register(clk, arch_timer_rate, + 0xf, 0x7fffffff); + + if (arch_timer_use_virtual) + enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0); + else { + enable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], 0); + if (arch_timer_ppi[PHYS_NONSECURE_PPI]) + enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0); + } + + arch_counter_set_user_access(); + + return 0; +} + +static int arch_timer_available(void) +{ + u32 freq; + + if (arch_timer_rate == 0) { + freq = arch_timer_get_cntfrq(); + + /* Check the timer frequency. */ + if (freq == 0) { + pr_warn("Architected timer frequency not available\n"); + return -EINVAL; + } + + arch_timer_rate = freq; + } + + pr_info_once("Architected local timer running at %lu.%02luMHz (%s).\n", + (unsigned long)arch_timer_rate / 1000000, + (unsigned long)(arch_timer_rate / 10000) % 100, + arch_timer_use_virtual ? "virt" : "phys"); + return 0; +} + +u32 arch_timer_get_rate(void) +{ + return arch_timer_rate; +} + +/* + * Some external users of arch_timer_read_counter (e.g. sched_clock) may try to + * call it before it has been initialised. Rather than incur a performance + * penalty checking for initialisation, provide a default implementation that + * won't lead to time appearing to jump backwards. + */ +static u64 arch_timer_read_zero(void) +{ + return 0; +} + +u64 (*arch_timer_read_counter)(void) = arch_timer_read_zero; + +static cycle_t arch_counter_read(struct clocksource *cs) +{ + return arch_timer_read_counter(); +} + +static cycle_t arch_counter_read_cc(const struct cyclecounter *cc) +{ + return arch_timer_read_counter(); +} + +static struct clocksource clocksource_counter = { + .name = "arch_sys_counter", + .rating = 400, + .read = arch_counter_read, + .mask = CLOCKSOURCE_MASK(56), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static struct cyclecounter cyclecounter = { + .read = arch_counter_read_cc, + .mask = CLOCKSOURCE_MASK(56), +}; + +static struct timecounter timecounter; + +struct timecounter *arch_timer_get_timecounter(void) +{ + return &timecounter; +} + +static void __cpuinit arch_timer_stop(struct clock_event_device *clk) +{ + pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n", + clk->irq, smp_processor_id()); + + if (arch_timer_use_virtual) + disable_percpu_irq(arch_timer_ppi[VIRT_PPI]); + else { + disable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI]); + if (arch_timer_ppi[PHYS_NONSECURE_PPI]) + disable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI]); + } + + clk->set_mode(CLOCK_EVT_MODE_UNUSED, clk); +} + +static int __cpuinit arch_timer_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + struct clock_event_device *evt = this_cpu_ptr(arch_timer_evt); + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_STARTING: + arch_timer_setup(evt); + break; + case CPU_DYING: + arch_timer_stop(evt); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block arch_timer_cpu_nb __cpuinitdata = { + .notifier_call = arch_timer_cpu_notify, +}; + +static int __init arch_timer_register(void) +{ + int err; + int ppi; + + err = arch_timer_available(); + if (err) + goto out; + + arch_timer_evt = alloc_percpu(struct clock_event_device); + if (!arch_timer_evt) { + err = -ENOMEM; + goto out; + } + + clocksource_register_hz(&clocksource_counter, arch_timer_rate); + cyclecounter.mult = clocksource_counter.mult; + cyclecounter.shift = clocksource_counter.shift; + timecounter_init(&timecounter, &cyclecounter, + arch_counter_get_cntpct()); + + if (arch_timer_use_virtual) { + ppi = arch_timer_ppi[VIRT_PPI]; + err = request_percpu_irq(ppi, arch_timer_handler_virt, + "arch_timer", arch_timer_evt); + } else { + ppi = arch_timer_ppi[PHYS_SECURE_PPI]; + err = request_percpu_irq(ppi, arch_timer_handler_phys, + "arch_timer", arch_timer_evt); + if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) { + ppi = arch_timer_ppi[PHYS_NONSECURE_PPI]; + err = request_percpu_irq(ppi, arch_timer_handler_phys, + "arch_timer", arch_timer_evt); + if (err) + free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], + arch_timer_evt); + } + } + + if (err) { + pr_err("arch_timer: can't register interrupt %d (%d)\n", + ppi, err); + goto out_free; + } + + err = register_cpu_notifier(&arch_timer_cpu_nb); + if (err) + goto out_free_irq; + + /* Immediately configure the timer on the boot CPU */ + arch_timer_setup(this_cpu_ptr(arch_timer_evt)); + + return 0; + +out_free_irq: + if (arch_timer_use_virtual) + free_percpu_irq(arch_timer_ppi[VIRT_PPI], arch_timer_evt); + else { + free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], + arch_timer_evt); + if (arch_timer_ppi[PHYS_NONSECURE_PPI]) + free_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], + arch_timer_evt); + } + +out_free: + free_percpu(arch_timer_evt); +out: + return err; +} + +static const struct of_device_id arch_timer_of_match[] __initconst = { + { .compatible = "arm,armv7-timer", }, + { .compatible = "arm,armv8-timer", }, + {}, +}; + +int __init arch_timer_init(void) +{ + struct device_node *np; + u32 freq; + int i; + + np = of_find_matching_node(NULL, arch_timer_of_match); + if (!np) { + pr_err("arch_timer: can't find DT node\n"); + return -ENODEV; + } + + /* Try to determine the frequency from the device tree or CNTFRQ */ + if (!of_property_read_u32(np, "clock-frequency", &freq)) + arch_timer_rate = freq; + + for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++) + arch_timer_ppi[i] = irq_of_parse_and_map(np, i); + + of_node_put(np); + + /* + * If HYP mode is available, we know that the physical timer + * has been configured to be accessible from PL1. Use it, so + * that a guest can use the virtual timer instead. + * + * If no interrupt provided for virtual timer, we'll have to + * stick to the physical timer. It'd better be accessible... + */ + if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) { + arch_timer_use_virtual = false; + + if (!arch_timer_ppi[PHYS_SECURE_PPI] || + !arch_timer_ppi[PHYS_NONSECURE_PPI]) { + pr_warn("arch_timer: No interrupt available, giving up\n"); + return -EINVAL; + } + } + + if (arch_timer_use_virtual) + arch_timer_read_counter = arch_counter_get_cntvct; + else + arch_timer_read_counter = arch_counter_get_cntpct; + + return arch_timer_register(); +} diff --git a/drivers/clocksource/arm_generic.c b/drivers/clocksource/arm_generic.c deleted file mode 100644 index 8ae1a61..0000000 --- a/drivers/clocksource/arm_generic.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Generic timers support - * - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/device.h> -#include <linux/smp.h> -#include <linux/cpu.h> -#include <linux/jiffies.h> -#include <linux/interrupt.h> -#include <linux/clockchips.h> -#include <linux/of_irq.h> -#include <linux/io.h> - -#include <clocksource/arm_generic.h> - -#include <asm/arm_generic.h> - -static u32 arch_timer_rate; -static u64 sched_clock_mult __read_mostly; -static DEFINE_PER_CPU(struct clock_event_device, arch_timer_evt); -static int arch_timer_ppi; - -static irqreturn_t arch_timer_handle_irq(int irq, void *dev_id) -{ - struct clock_event_device *evt = dev_id; - unsigned long ctrl; - - ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL); - if (ctrl & ARCH_TIMER_CTRL_ISTATUS) { - ctrl |= ARCH_TIMER_CTRL_IMASK; - arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl); - evt->event_handler(evt); - return IRQ_HANDLED; - } - - return IRQ_NONE; -} - -static void arch_timer_stop(void) -{ - unsigned long ctrl; - - ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL); - ctrl &= ~ARCH_TIMER_CTRL_ENABLE; - arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl); -} - -static void arch_timer_set_mode(enum clock_event_mode mode, - struct clock_event_device *clk) -{ - switch (mode) { - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - arch_timer_stop(); - break; - default: - break; - } -} - -static int arch_timer_set_next_event(unsigned long evt, - struct clock_event_device *unused) -{ - unsigned long ctrl; - - ctrl = arch_timer_reg_read(ARCH_TIMER_REG_CTRL); - ctrl |= ARCH_TIMER_CTRL_ENABLE; - ctrl &= ~ARCH_TIMER_CTRL_IMASK; - - arch_timer_reg_write(ARCH_TIMER_REG_TVAL, evt); - arch_timer_reg_write(ARCH_TIMER_REG_CTRL, ctrl); - - return 0; -} - -static void __cpuinit arch_timer_setup(struct clock_event_device *clk) -{ - /* Let's make sure the timer is off before doing anything else */ - arch_timer_stop(); - - clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP; - clk->name = "arch_sys_timer"; - clk->rating = 400; - clk->set_mode = arch_timer_set_mode; - clk->set_next_event = arch_timer_set_next_event; - clk->irq = arch_timer_ppi; - clk->cpumask = cpumask_of(smp_processor_id()); - - clockevents_config_and_register(clk, arch_timer_rate, - 0xf, 0x7fffffff); - - enable_percpu_irq(clk->irq, 0); - - /* Ensure the virtual counter is visible to userspace for the vDSO. */ - arch_counter_enable_user_access(); -} - -static void __init arch_timer_calibrate(void) -{ - if (arch_timer_rate == 0) { - arch_timer_reg_write(ARCH_TIMER_REG_CTRL, 0); - arch_timer_rate = arch_timer_reg_read(ARCH_TIMER_REG_FREQ); - - /* Check the timer frequency. */ - if (arch_timer_rate == 0) - panic("Architected timer frequency is set to zero.\n" - "You must set this in your .dts file\n"); - } - - /* Cache the sched_clock multiplier to save a divide in the hot path. */ - - sched_clock_mult = DIV_ROUND_CLOSEST(NSEC_PER_SEC, arch_timer_rate); - - pr_info("Architected local timer running at %u.%02uMHz.\n", - arch_timer_rate / 1000000, (arch_timer_rate / 10000) % 100); -} - -static cycle_t arch_counter_read(struct clocksource *cs) -{ - return arch_counter_get_cntpct(); -} - -static struct clocksource clocksource_counter = { - .name = "arch_sys_counter", - .rating = 400, - .read = arch_counter_read, - .mask = CLOCKSOURCE_MASK(56), - .flags = (CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_VALID_FOR_HRES), -}; - -int read_current_timer(unsigned long *timer_value) -{ - *timer_value = arch_counter_get_cntpct(); - return 0; -} - -unsigned long long notrace sched_clock(void) -{ - return arch_counter_get_cntvct() * sched_clock_mult; -} - -static int __cpuinit arch_timer_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - int cpu = (long)hcpu; - struct clock_event_device *clk = per_cpu_ptr(&arch_timer_evt, cpu); - - switch(action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - arch_timer_setup(clk); - break; - - case CPU_DYING: - case CPU_DYING_FROZEN: - pr_debug("arch_timer_teardown disable IRQ%d cpu #%d\n", - clk->irq, cpu); - disable_percpu_irq(clk->irq); - arch_timer_set_mode(CLOCK_EVT_MODE_UNUSED, clk); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata arch_timer_cpu_nb = { - .notifier_call = arch_timer_cpu_notify, -}; - -static const struct of_device_id arch_timer_of_match[] __initconst = { - { .compatible = "arm,armv8-timer" }, - {}, -}; - -int __init arm_generic_timer_init(void) -{ - struct device_node *np; - int err; - u32 freq; - - np = of_find_matching_node(NULL, arch_timer_of_match); - if (!np) { - pr_err("arch_timer: can't find DT node\n"); - return -ENODEV; - } - - /* Try to determine the frequency from the device tree or CNTFRQ */ - if (!of_property_read_u32(np, "clock-frequency", &freq)) - arch_timer_rate = freq; - arch_timer_calibrate(); - - arch_timer_ppi = irq_of_parse_and_map(np, 0); - pr_info("arch_timer: found %s irq %d\n", np->name, arch_timer_ppi); - - err = request_percpu_irq(arch_timer_ppi, arch_timer_handle_irq, - np->name, &arch_timer_evt); - if (err) { - pr_err("arch_timer: can't register interrupt %d (%d)\n", - arch_timer_ppi, err); - return err; - } - - clocksource_register_hz(&clocksource_counter, arch_timer_rate); - - /* Calibrate the delay loop directly */ - lpj_fine = DIV_ROUND_CLOSEST(arch_timer_rate, HZ); - - /* Immediately configure the timer on the boot CPU */ - arch_timer_setup(this_cpu_ptr(&arch_timer_evt)); - - register_cpu_notifier(&arch_timer_cpu_nb); - - return 0; -} diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h new file mode 100644 index 0000000..2603267 --- /dev/null +++ b/include/clocksource/arm_arch_timer.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __CLKSOURCE_ARM_ARCH_TIMER_H +#define __CLKSOURCE_ARM_ARCH_TIMER_H + +#include <linux/clocksource.h> +#include <linux/types.h> + +#define ARCH_TIMER_CTRL_ENABLE (1 << 0) +#define ARCH_TIMER_CTRL_IT_MASK (1 << 1) +#define ARCH_TIMER_CTRL_IT_STAT (1 << 2) + +#define ARCH_TIMER_REG_CTRL 0 +#define ARCH_TIMER_REG_TVAL 1 + +#define ARCH_TIMER_PHYS_ACCESS 0 +#define ARCH_TIMER_VIRT_ACCESS 1 + +#ifdef CONFIG_ARM_ARCH_TIMER + +extern int arch_timer_init(void); +extern u32 arch_timer_get_rate(void); +extern u64 (*arch_timer_read_counter)(void); +extern struct timecounter *arch_timer_get_timecounter(void); + +#else + +static inline int arch_timer_init(void) +{ + return -ENXIO; +} + +static inline u32 arch_timer_get_rate(void) +{ + return 0; +} + +static inline u64 arch_timer_read_counter(void) +{ + return 0; +} + +static inline struct timecounter *arch_timer_get_timecounter(void) +{ + return NULL; +} + +#endif + +#endif diff --git a/include/clocksource/arm_generic.h b/include/clocksource/arm_generic.h deleted file mode 100644 index 5b41b0d..0000000 --- a/include/clocksource/arm_generic.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __CLKSOURCE_ARM_GENERIC_H -#define __CLKSOURCE_ARM_GENERIC_H - -extern int arm_generic_timer_init(void); - -#endif diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index a67ca55..3fd8e42 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -20,16 +20,45 @@ #define GIC_DIST_CTRL 0x000 #define GIC_DIST_CTR 0x004 +#define GIC_DIST_IGROUP 0x080 #define GIC_DIST_ENABLE_SET 0x100 #define GIC_DIST_ENABLE_CLEAR 0x180 #define GIC_DIST_PENDING_SET 0x200 #define GIC_DIST_PENDING_CLEAR 0x280 -#define GIC_DIST_ACTIVE_BIT 0x300 +#define GIC_DIST_ACTIVE_SET 0x300 +#define GIC_DIST_ACTIVE_CLEAR 0x380 #define GIC_DIST_PRI 0x400 #define GIC_DIST_TARGET 0x800 #define GIC_DIST_CONFIG 0xc00 #define GIC_DIST_SOFTINT 0xf00 +#define GICH_HCR 0x0 +#define GICH_VTR 0x4 +#define GICH_VMCR 0x8 +#define GICH_MISR 0x10 +#define GICH_EISR0 0x20 +#define GICH_EISR1 0x24 +#define GICH_ELRSR0 0x30 +#define GICH_ELRSR1 0x34 +#define GICH_APR 0xf0 +#define GICH_LR0 0x100 + +#define GICH_HCR_EN (1 << 0) +#define GICH_HCR_UIE (1 << 1) + +#define GICH_LR_VIRTUALID (0x3ff << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7 << GICH_LR_PHYSID_CPUID_SHIFT) +#define GICH_LR_STATE (3 << 28) +#define GICH_LR_PENDING_BIT (1 << 28) +#define GICH_LR_ACTIVE_BIT (1 << 29) +#define GICH_LR_EOI (1 << 19) + +#define GICH_MISR_EOI (1 << 0) +#define GICH_MISR_U (1 << 1) + +#ifndef __ASSEMBLY__ + struct device_node; extern struct irq_chip gic_arch_extn; @@ -45,4 +74,6 @@ static inline void gic_init(unsigned int nr, int start, gic_init_bases(nr, start, dist, cpu, 0, NULL); } +#endif /* __ASSEMBLY */ + #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7f2360a..c70577c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -637,6 +637,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #define KVM_CAP_PPC_HTAB_FD 84 #define KVM_CAP_ARM_PSCI 87 +#define KVM_CAP_ARM_SET_DEVICE_ADDR 88 #ifdef KVM_CAP_IRQ_ROUTING @@ -784,6 +785,11 @@ struct kvm_msi { __u8 pad[16]; }; +struct kvm_arm_device_addr { + __u64 id; + __u64 addr; +}; + /* * ioctls for VM fds */ @@ -869,6 +875,8 @@ struct kvm_s390_ucas_mapping { #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* Available with KVM_CAP_PPC_HTAB_FD */ #define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) +/* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ +#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) /* * ioctls for vcpu fds |