From 8fc153cda9c9e6aed2a4a7235970dd6c6cb6e954 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Jan 2016 18:29:19 +0000 Subject: arm64: KVM: Fix AArch64 guest userspace exception injection At the moment, our fault injection is pretty limited. We always generate a SYNC exception into EL1, as if the fault was actually from EL1h, no matter how it was generated. This is obviously wrong, as EL0 can generate faults of its own (not to mention the pretty-much unused EL1t mode). This patch fixes it by implementing section D1.10.2 of the ARMv8 ARM, and in particular table D1-7 ("Vector offsets from vector table base address"), which describes which vector to use depending on the source exception level and type (synchronous, IRQ, FIQ or SError). Reviewed-by: Christoffer Dall Tested-by: Shannon Zhao Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 648112e..4d1ac81 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -27,7 +27,11 @@ #define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ PSR_I_BIT | PSR_D_BIT) -#define EL1_EXCEPT_SYNC_OFFSET 0x200 + +#define CURRENT_EL_SP_EL0_VECTOR 0x0 +#define CURRENT_EL_SP_ELx_VECTOR 0x200 +#define LOWER_EL_AArch64_VECTOR 0x400 +#define LOWER_EL_AArch32_VECTOR 0x600 static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { @@ -97,6 +101,34 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, *fsr = 0x14; } +enum exception_type { + except_type_sync = 0, + except_type_irq = 0x80, + except_type_fiq = 0x100, + except_type_serror = 0x180, +}; + +static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) +{ + u64 exc_offset; + + switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) { + case PSR_MODE_EL1t: + exc_offset = CURRENT_EL_SP_EL0_VECTOR; + break; + case PSR_MODE_EL1h: + exc_offset = CURRENT_EL_SP_ELx_VECTOR; + break; + case PSR_MODE_EL0t: + exc_offset = LOWER_EL_AArch64_VECTOR; + break; + default: + exc_offset = LOWER_EL_AArch32_VECTOR; + } + + return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -108,8 +140,8 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; - *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; vcpu_sys_reg(vcpu, FAR_EL1) = addr; @@ -143,8 +175,8 @@ static void inject_undef64(struct kvm_vcpu *vcpu) *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); + *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; - *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET; /* * Build an unknown exception, depending on the instruction -- cgit v0.10.2 From a7e0ac295d964086af3bf98352614f33c381213e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 19 Jan 2016 16:20:18 +0000 Subject: arm64: KVM: Obey RES0/1 reserved bits when setting CPTR_EL2 Some bits in CPTR are defined as RES1 in the architecture. Setting these bits to zero may unintentionally enable future architecture extensions, allowing guests to use them without supervision by the host. This would be bad: for forwards compatibility, this patch makes sure the affected bits are always written with 1, not 0. This patch only addresses CPTR_EL2. Initialisation of other system registers may still need review. Reviewed-by: Marc Zyngier Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 738a95f..bef6e92 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -182,6 +182,7 @@ #define CPTR_EL2_TCPAC (1 << 31) #define CPTR_EL2_TTA (1 << 20) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) +#define CPTR_EL2_DEFAULT 0x000033ff /* Hyp Debug Configuration Register bits */ #define MDCR_EL2_TDRA (1 << 11) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index ca8f5a5..f0e7bdf 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -36,7 +36,11 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) write_sysreg(val, hcr_el2); /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ write_sysreg(1 << 15, hstr_el2); - write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2); + + val = CPTR_EL2_DEFAULT; + val |= CPTR_EL2_TTA | CPTR_EL2_TFP; + write_sysreg(val, cptr_el2); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); } @@ -45,7 +49,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(HCR_RW, hcr_el2); write_sysreg(0, hstr_el2); write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); - write_sysreg(0, cptr_el2); + write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); } static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From 9586a2ea6806599c819a9e800581c2a698ef7467 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Wed, 13 Jan 2016 17:16:39 +0800 Subject: arm64: KVM: Fix wrong use of the CPSR MODE mask for 32bit guests The values of CPSR MODE mask are different between aarch32 and aarch64. It should use the right one according to the execution state. Reviewed-by: Marc Zyngier Signed-off-by: Shannon Zhao Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 3066328..779a587 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -127,10 +127,14 @@ static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu) static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { - u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; + u32 mode; - if (vcpu_mode_is_32bit(vcpu)) + if (vcpu_mode_is_32bit(vcpu)) { + mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK; return mode > COMPAT_PSR_MODE_USR; + } + + mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK; return mode != PSR_MODE_EL0t; } -- cgit v0.10.2 From 7769db905bd2df08e844b645437f4729fc1c4d20 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Wed, 13 Jan 2016 17:16:40 +0800 Subject: arm64: KVM: Fix comments of the CP handler Make sure the documentation reflects the actual name of the functions. Acked-by: Marc Zyngier Signed-off-by: Shannon Zhao Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index eec3598..4048934 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1043,7 +1043,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, } /** - * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access + * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP14/CP15 access * @vcpu: The VCPU pointer * @run: The kvm_run struct */ @@ -1095,7 +1095,7 @@ out: } /** - * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access + * kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access * @vcpu: The VCPU pointer * @run: The kvm_run struct */ -- cgit v0.10.2 From 6327f35a2010c06a3bc2bfb14202a38764fb9920 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Wed, 13 Jan 2016 17:16:41 +0800 Subject: arm64: KVM: Fix guest dead loop when register accessor returns false Currently emulate_cp will return 0 (Handled) no matter what the accessor returns. If register accessor returns false, it will not skip current PC while emulate_cp return handled. Then guest will stuck in a dead loop. Reviewed-by: Marc Zyngier Signed-off-by: Shannon Zhao Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4048934..2e90371 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1007,10 +1007,9 @@ static int emulate_cp(struct kvm_vcpu *vcpu, if (likely(r->access(vcpu, params, r))) { /* Skip instruction, since it was emulated */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + /* Handled */ + return 0; } - - /* Handled */ - return 0; } /* Not handled */ -- cgit v0.10.2 From 320549a22484952d88d4e0320218765b16cd2174 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 5 Feb 2016 11:22:04 +0000 Subject: regmap: mmio: Revert to v4.4 endianness handling Commit 29bb45f25ff3 (regmap-mmio: Use native endianness for read/write) attempted to fix some long standing bugs in the MMIO implementation for big endian systems caused by duplicate byte swapping in both regmap and readl()/writel() which affected MIPS systems as when they are in big endian mode they flip the endianness of all registers in the system, not just the CPU. MIPS systems had worked around this by declaring regmap using IPs as little endian which is inaccurate, unfortunately the issue had not been reported. Sadly the fix makes things worse rather than better. By changing the behaviour to match the documentation it caused behaviour changes for other IPs which broke them and by using the __raw I/O accessors to avoid the endianness swapping in readl()/writel() it removed some memory ordering guarantees and could potentially generate unvirtualisable instructions on some architectures. Unfortunately sorting out all this mess in any half way sensible fashion was far too invasive to go in during an -rc cycle so instead let's go back to the old broken behaviour for v4.5, the better fixes are already queued for v4.6. This does mean that we keep the broken MIPS DTs for another release but that seems the least bad way of handling the situation. Reported-by: Johannes Berg Signed-off-by: Mark Brown diff --git a/arch/mips/boot/dts/brcm/bcm6328.dtsi b/arch/mips/boot/dts/brcm/bcm6328.dtsi index 459b9b2..d61b161 100644 --- a/arch/mips/boot/dts/brcm/bcm6328.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6328.dtsi @@ -74,6 +74,7 @@ timer: timer@10000040 { compatible = "syscon"; reg = <0x10000040 0x2c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi index 4fc7ece..1a7efa8 100644 --- a/arch/mips/boot/dts/brcm/bcm7125.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi @@ -98,6 +98,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7125-sun-top-ctrl", "syscon"; reg = <0x404000 0x60c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi index a3039bb..d4bf52c 100644 --- a/arch/mips/boot/dts/brcm/bcm7346.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi @@ -118,6 +118,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7346-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi index 4274ff4..8e25016 100644 --- a/arch/mips/boot/dts/brcm/bcm7358.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi @@ -112,6 +112,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7358-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi index 0dcc9163..7e5f760 100644 --- a/arch/mips/boot/dts/brcm/bcm7360.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi @@ -112,6 +112,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7360-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi index 2f3f9fc..c739ea7 100644 --- a/arch/mips/boot/dts/brcm/bcm7362.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi @@ -118,6 +118,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7362-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi index bee221b..5f55d0a 100644 --- a/arch/mips/boot/dts/brcm/bcm7420.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi @@ -99,6 +99,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7420-sun-top-ctrl", "syscon"; reg = <0x404000 0x60c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi index 571f30f..e24d41a 100644 --- a/arch/mips/boot/dts/brcm/bcm7425.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi @@ -100,6 +100,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7425-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi index 614ee21..8b9432c 100644 --- a/arch/mips/boot/dts/brcm/bcm7435.dtsi +++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi @@ -114,6 +114,7 @@ sun_top_ctrl: syscon@404000 { compatible = "brcm,bcm7425-sun-top-ctrl", "syscon"; reg = <0x404000 0x51c>; + little-endian; }; reboot { diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c index 8812bfb..eea5156 100644 --- a/drivers/base/regmap/regmap-mmio.c +++ b/drivers/base/regmap/regmap-mmio.c @@ -133,17 +133,17 @@ static int regmap_mmio_gather_write(void *context, while (val_size) { switch (ctx->val_bytes) { case 1: - __raw_writeb(*(u8 *)val, ctx->regs + offset); + writeb(*(u8 *)val, ctx->regs + offset); break; case 2: - __raw_writew(*(u16 *)val, ctx->regs + offset); + writew(*(u16 *)val, ctx->regs + offset); break; case 4: - __raw_writel(*(u32 *)val, ctx->regs + offset); + writel(*(u32 *)val, ctx->regs + offset); break; #ifdef CONFIG_64BIT case 8: - __raw_writeq(*(u64 *)val, ctx->regs + offset); + writeq(*(u64 *)val, ctx->regs + offset); break; #endif default: @@ -193,17 +193,17 @@ static int regmap_mmio_read(void *context, while (val_size) { switch (ctx->val_bytes) { case 1: - *(u8 *)val = __raw_readb(ctx->regs + offset); + *(u8 *)val = readb(ctx->regs + offset); break; case 2: - *(u16 *)val = __raw_readw(ctx->regs + offset); + *(u16 *)val = readw(ctx->regs + offset); break; case 4: - *(u32 *)val = __raw_readl(ctx->regs + offset); + *(u32 *)val = readl(ctx->regs + offset); break; #ifdef CONFIG_64BIT case 8: - *(u64 *)val = __raw_readq(ctx->regs + offset); + *(u64 *)val = readq(ctx->regs + offset); break; #endif default: -- cgit v0.10.2 From 4ba6a2b28f111e4c9621487612056d10f3f4a6ca Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 8 Feb 2016 16:09:08 +0900 Subject: scatterlist: fix a typo in comment block of sg_miter_stop() Fix the doubled "started" and tidy up the following sentences. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Torvalds diff --git a/lib/scatterlist.c b/lib/scatterlist.c index bafa993..004fc70 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -598,9 +598,9 @@ EXPORT_SYMBOL(sg_miter_next); * * Description: * Stops mapping iterator @miter. @miter should have been started - * started using sg_miter_start(). A stopped iteration can be - * resumed by calling sg_miter_next() on it. This is useful when - * resources (kmap) need to be released during iteration. + * using sg_miter_start(). A stopped iteration can be resumed by + * calling sg_miter_next() on it. This is useful when resources (kmap) + * need to be released during iteration. * * Context: * Preemption disabled if the SG_MITER_ATOMIC is set. Don't care -- cgit v0.10.2 From 8a5fd56431fe1682e870bd6ab0c276e74befbeb9 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 4 Feb 2016 14:40:40 +0100 Subject: locking/lockdep: Fix stack trace caching logic check_prev_add() caches saved stack trace in static trace variable to avoid duplicate save_trace() calls in dependencies involving trylocks. But that caching logic contains a bug. We may not save trace on first iteration due to early return from check_prev_add(). Then on the second iteration when we actually need the trace we don't save it because we think that we've already saved it. Let check_prev_add() itself control when stack is saved. There is another bug. Trace variable is protected by graph lock. But we can temporary release graph lock during printing. Fix this by invalidating cached stack trace when we release graph lock. Signed-off-by: Dmitry Vyukov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: glider@google.com Cc: kcc@google.com Cc: peter@hurleysoftware.com Cc: sasha.levin@oracle.com Link: http://lkml.kernel.org/r/1454593240-121647-1-git-send-email-dvyukov@google.com Signed-off-by: Ingo Molnar diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 60ace56..c7710e4 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1822,7 +1822,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, */ static int check_prev_add(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next, int distance, int trylock_loop) + struct held_lock *next, int distance, int *stack_saved) { struct lock_list *entry; int ret; @@ -1883,8 +1883,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, } } - if (!trylock_loop && !save_trace(&trace)) - return 0; + if (!*stack_saved) { + if (!save_trace(&trace)) + return 0; + *stack_saved = 1; + } /* * Ok, all validations passed, add the new lock @@ -1907,6 +1910,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * Debugging printouts: */ if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) { + /* We drop graph lock, so another thread can overwrite trace. */ + *stack_saved = 0; graph_unlock(); printk("\n new dependency: "); print_lock_name(hlock_class(prev)); @@ -1929,7 +1934,7 @@ static int check_prevs_add(struct task_struct *curr, struct held_lock *next) { int depth = curr->lockdep_depth; - int trylock_loop = 0; + int stack_saved = 0; struct held_lock *hlock; /* @@ -1956,7 +1961,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) */ if (hlock->read != 2 && hlock->check) { if (!check_prev_add(curr, hlock, next, - distance, trylock_loop)) + distance, &stack_saved)) return 0; /* * Stop after the first non-trylock entry, @@ -1979,7 +1984,6 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) if (curr->held_locks[depth].irq_context != curr->held_locks[depth-1].irq_context) break; - trylock_loop = 1; } return 1; out_bug: -- cgit v0.10.2