From 236cf17c2502007a9d2dda3c39fb0d9a6bd03cc2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 16 Feb 2016 14:47:31 +0000 Subject: KVM: arm/arm64: vgic: Ensure bitmaps are long enough When we allocate bitmaps in vgic_vcpu_init_maps, we divide the number of bits we need by 8 to figure out how many bytes to allocate. However, bitmap elements are always accessed as unsigned longs, and if we didn't happen to allocate a size such that size % sizeof(unsigned long) == 0, bitmap accesses may go past the end of the allocation. When using KASAN (which does byte-granular access checks), this results in a continuous stream of BUGs whenever these bitmaps are accessed: ============================================================================= BUG kmalloc-128 (Tainted: G B ): kasan: bad access detected ----------------------------------------------------------------------------- INFO: Allocated in vgic_init.part.25+0x55c/0x990 age=7493 cpu=3 pid=1730 INFO: Slab 0xffffffbde6d5da40 objects=16 used=15 fp=0xffffffc935769700 flags=0x4000000000000080 INFO: Object 0xffffffc935769500 @offset=1280 fp=0x (null) Bytes b4 ffffffc9357694f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769540: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769550: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769560: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769570: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ CPU: 3 PID: 1740 Comm: kvm-vcpu-0 Tainted: G B 4.4.0+ #17 Hardware name: ARM Juno development board (r1) (DT) Call trace: [] dump_backtrace+0x0/0x280 [] show_stack+0x14/0x20 [] dump_stack+0x100/0x188 [] print_trailer+0xfc/0x168 [] object_err+0x3c/0x50 [] kasan_report_error+0x244/0x558 [] __asan_report_load8_noabort+0x48/0x50 [] __bitmap_or+0xc0/0xc8 [] kvm_vgic_flush_hwstate+0x1bc/0x650 [] kvm_arch_vcpu_ioctl_run+0x2ec/0xa60 [] kvm_vcpu_ioctl+0x474/0xa68 [] do_vfs_ioctl+0x5b8/0xcb0 [] SyS_ioctl+0x8c/0xa0 [] el0_svc_naked+0x24/0x28 Memory state around the buggy address: ffffffc935769400: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffffc935769480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffffffc935769500: 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffffffc935769580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffffc935769600: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Fix the issue by always allocating a multiple of sizeof(unsigned long), as we do elsewhere in the vgic code. Fixes: c1bfb577a ("arm/arm64: KVM: vgic: switch to dynamic allocation") Cc: stable@vger.kernel.org Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 043032c..00429b3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1875,8 +1875,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; + int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); + int sz = nr_longs * sizeof(unsigned long); vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); -- cgit v0.10.2 From 1d6a821277aaa0cdd666278aaff93298df313d41 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Feb 2016 17:04:04 +0000 Subject: arm/arm64: KVM: Feed initialized memory to MMIO accesses On an MMIO access, we always copy the on-stack buffer info the shared "run" structure, even if this is a read access. This ends up leaking up to 8 bytes of uninitialized memory into userspace, depending on the size of the access. An obvious fix for this one is to only perform the copy if this is an actual write. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 7f33b20..0f6600f 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -206,7 +206,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, run->mmio.is_write = is_write; run->mmio.phys_addr = fault_ipa; run->mmio.len = len; - memcpy(run->mmio.data, data_buf, len); + if (is_write) + memcpy(run->mmio.data, data_buf, len); if (!ret) { /* We handled the access successfully in the kernel. */ -- cgit v0.10.2 From 172b2386ed16a9143d9a456aae5ec87275c61489 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Feb 2016 17:50:23 +0100 Subject: KVM: x86: fix missed hardware breakpoints Sometimes when setting a breakpoint a process doesn't stop on it. This is because the debug registers are not loaded correctly on VCPU load. The following simple reproducer from Oleg Nesterov tries using debug registers in two threads. To see the bug, run a 2-VCPU guest with "taskset -c 0" and run "./bp 0 1" inside the guest. #include #include #include #include #include #include #include #include #include #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len) { unsigned long dr7; dr7 = ((len | type) & 0xf) << (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); if (enable) dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); return dr7; } int write_dr(int pid, int dr, unsigned long val) { return ptrace(PTRACE_POKEUSER, pid, offsetof (struct user, u_debugreg[dr]), val); } void set_bp(pid_t pid, void *addr) { unsigned long dr7; assert(write_dr(pid, 0, (long)addr) == 0); dr7 = encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1); assert(write_dr(pid, 7, dr7) == 0); } void *get_rip(int pid) { return (void*)ptrace(PTRACE_PEEKUSER, pid, offsetof(struct user, regs.rip), 0); } void test(int nr) { void *bp_addr = &&label + nr, *bp_hit; int pid; printf("test bp %d\n", nr); assert(nr < 16); // see 16 asm nops below pid = fork(); if (!pid) { assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0); kill(getpid(), SIGSTOP); for (;;) { label: asm ( "nop; nop; nop; nop;" "nop; nop; nop; nop;" "nop; nop; nop; nop;" "nop; nop; nop; nop;" ); } } assert(pid == wait(NULL)); set_bp(pid, bp_addr); for (;;) { assert(ptrace(PTRACE_CONT, pid, 0, 0) == 0); assert(pid == wait(NULL)); bp_hit = get_rip(pid); if (bp_hit != bp_addr) fprintf(stderr, "ERR!! hit wrong bp %ld != %d\n", bp_hit - &&label, nr); } } int main(int argc, const char *argv[]) { while (--argc) { int nr = atoi(*++argv); if (!fork()) test(nr); } while (wait(NULL) > 0) ; return 0; } Cc: stable@vger.kernel.org Suggested-by: Nadav Amit Reported-by: Andrey Wagin Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4244c2b..f4891f2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2752,6 +2752,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From 0c1d77f4ba5cc9c05a29adca3d6466cdf4969b70 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 19 Feb 2016 18:07:21 +0100 Subject: KVM: x86: fix conversion of addresses to linear in 32-bit protected mode Commit e8dd2d2d641c ("Silence compiler warning in arch/x86/kvm/emulate.c", 2015-09-06) broke boot of the Hurd. The bug is that the "default:" case actually could modify "la", but after the patch this change is not reflected in *linear. The bug is visible whenever a non-zero segment base causes the linear address to wrap around the 4GB mark. Fixes: e8dd2d2d641cb2724ee10e76c0ad02e04289c017 Cc: stable@vger.kernel.org Reported-by: Aurelien Jarno Tested-by: Aurelien Jarno Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1505587..b9b09fe 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -650,10 +650,10 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, u16 sel; la = seg_base(ctxt, addr.seg) + addr.ea; - *linear = la; *max_size = 0; switch (mode) { case X86EMUL_MODE_PROT64: + *linear = la; if (is_noncanonical_address(la)) goto bad; @@ -662,6 +662,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; break; default: + *linear = la = (u32)la; usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, addr.seg); if (!usable) @@ -689,7 +690,6 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, if (size > *max_size) goto bad; } - la &= (u32)-1; break; } if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) -- cgit v0.10.2 From d7444794a02ff655eda87e3cc54e86b940e7736f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 19 Feb 2016 13:11:46 +0100 Subject: KVM: async_pf: do not warn on page allocation failures In async_pf we try to allocate with NOWAIT to get an element quickly or fail. This code also handle failures gracefully. Lets silence potential page allocation failures under load. qemu-system-s39: page allocation failure: order:0,mode:0x2200000 [...] Call Trace: ([<00000000001146b8>] show_trace+0xf8/0x148) [<000000000011476a>] show_stack+0x62/0xe8 [<00000000004a36b8>] dump_stack+0x70/0x98 [<0000000000272c3a>] warn_alloc_failed+0xd2/0x148 [<000000000027709e>] __alloc_pages_nodemask+0x94e/0xb38 [<00000000002cd36a>] new_slab+0x382/0x400 [<00000000002cf7ac>] ___slab_alloc.constprop.30+0x2dc/0x378 [<00000000002d03d0>] kmem_cache_alloc+0x160/0x1d0 [<0000000000133db4>] kvm_setup_async_pf+0x6c/0x198 [<000000000013dee8>] kvm_arch_vcpu_ioctl_run+0xd48/0xd58 [<000000000012fcaa>] kvm_vcpu_ioctl+0x372/0x690 [<00000000002f66f6>] do_vfs_ioctl+0x3be/0x510 [<00000000002f68ec>] SyS_ioctl+0xa4/0xb8 [<0000000000781c5e>] system_call+0xd6/0x264 [<000003ffa24fa06a>] 0x3ffa24fa06a Cc: stable@vger.kernel.org Signed-off-by: Christian Borntraeger Reviewed-by: Dominik Dingel Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 3531599..db2dd33 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -172,7 +172,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, * do alloc nowait since if we are going to sleep anyway we * may as well sleep faulting in page */ - work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT); + work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN); if (!work) return 0; -- cgit v0.10.2 From fd451b90e78c4178bcfc5072f2b2b637500c109a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 17 Feb 2016 10:25:05 +0000 Subject: arm64: KVM: vgic-v3: Restore ICH_APR0Rn_EL2 before ICH_APR1Rn_EL2 The GICv3 architecture spec says: Writing to the active priority registers in any order other than the following order will result in UNPREDICTABLE behavior: - ICH_AP0R_EL2. - ICH_AP1R_EL2. So let's not pointlessly go against the rule... Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 9142e08..5dd2a26 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -149,16 +149,6 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) switch (nr_pri_bits) { case 7: - write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); - write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); - case 6: - write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); - default: - write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); - } - - switch (nr_pri_bits) { - case 7: write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); case 6: @@ -167,6 +157,16 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); } + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); + write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); + } + switch (max_lr_idx) { case 15: write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); -- cgit v0.10.2 From 17e4bce0ae63c7e03f3c7fa8d80890e7af3d4971 Mon Sep 17 00:00:00 2001 From: Mike Krinkin Date: Wed, 24 Feb 2016 21:02:31 +0300 Subject: KVM: x86: MMU: fix ubsan index-out-of-range warning Ubsan reports the following warning due to a typo in update_accessed_dirty_bits template, the patch fixes the typo: [ 168.791851] ================================================================================ [ 168.791862] UBSAN: Undefined behaviour in arch/x86/kvm/paging_tmpl.h:252:15 [ 168.791866] index 4 is out of range for type 'u64 [4]' [ 168.791871] CPU: 0 PID: 2950 Comm: qemu-system-x86 Tainted: G O L 4.5.0-rc5-next-20160222 #7 [ 168.791873] Hardware name: LENOVO 23205NG/23205NG, BIOS G2ET95WW (2.55 ) 07/09/2013 [ 168.791876] 0000000000000000 ffff8801cfcaf208 ffffffff81c9f780 0000000041b58ab3 [ 168.791882] ffffffff82eb2cc1 ffffffff81c9f6b4 ffff8801cfcaf230 ffff8801cfcaf1e0 [ 168.791886] 0000000000000004 0000000000000001 0000000000000000 ffffffffa1981600 [ 168.791891] Call Trace: [ 168.791899] [] dump_stack+0xcc/0x12c [ 168.791904] [] ? _atomic_dec_and_lock+0xc4/0xc4 [ 168.791910] [] ubsan_epilogue+0xd/0x8a [ 168.791914] [] __ubsan_handle_out_of_bounds+0x15c/0x1a3 [ 168.791918] [] ? __ubsan_handle_shift_out_of_bounds+0x2bd/0x2bd [ 168.791922] [] ? get_user_pages_fast+0x2bf/0x360 [ 168.791954] [] ? kvm_largepages_enabled+0x30/0x30 [kvm] [ 168.791958] [] ? __get_user_pages_fast+0x360/0x360 [ 168.791987] [] paging64_walk_addr_generic+0x1b28/0x2600 [kvm] [ 168.792014] [] ? init_kvm_mmu+0x1100/0x1100 [kvm] [ 168.792019] [] ? debug_check_no_locks_freed+0x350/0x350 [ 168.792044] [] ? init_kvm_mmu+0x1100/0x1100 [kvm] [ 168.792076] [] paging64_gva_to_gpa+0x7d/0x110 [kvm] [ 168.792121] [] ? paging64_walk_addr_generic+0x2600/0x2600 [kvm] [ 168.792130] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 [ 168.792178] [] emulator_read_write_onepage+0x27a/0x1150 [kvm] [ 168.792208] [] ? __kvm_read_guest_page+0x54/0x70 [kvm] [ 168.792234] [] ? kvm_task_switch+0x160/0x160 [kvm] [ 168.792238] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 [ 168.792263] [] emulator_read_write+0xe7/0x6d0 [kvm] [ 168.792290] [] ? em_cr_write+0x230/0x230 [kvm] [ 168.792314] [] emulator_write_emulated+0x15/0x20 [kvm] [ 168.792340] [] segmented_write+0xf8/0x130 [kvm] [ 168.792367] [] ? em_lgdt+0x20/0x20 [kvm] [ 168.792374] [] ? vmx_read_guest_seg_ar+0x42/0x1e0 [kvm_intel] [ 168.792400] [] writeback+0x3f2/0x700 [kvm] [ 168.792424] [] ? em_sidt+0xa0/0xa0 [kvm] [ 168.792449] [] ? x86_decode_insn+0x1b3d/0x4f70 [kvm] [ 168.792474] [] x86_emulate_insn+0x572/0x3010 [kvm] [ 168.792499] [] x86_emulate_instruction+0x3bd/0x2110 [kvm] [ 168.792524] [] ? reexecute_instruction.part.110+0x2e0/0x2e0 [kvm] [ 168.792532] [] handle_ept_misconfig+0x61/0x460 [kvm_intel] [ 168.792539] [] ? handle_pause+0x450/0x450 [kvm_intel] [ 168.792546] [] vmx_handle_exit+0xd6a/0x1ad0 [kvm_intel] [ 168.792572] [] ? kvm_arch_vcpu_ioctl_run+0xbdc/0x6090 [kvm] [ 168.792597] [] kvm_arch_vcpu_ioctl_run+0xd3d/0x6090 [kvm] [ 168.792621] [] ? kvm_arch_vcpu_ioctl_run+0xbdc/0x6090 [kvm] [ 168.792627] [] ? __ww_mutex_lock_interruptible+0x1630/0x1630 [ 168.792651] [] ? kvm_arch_vcpu_runnable+0x4f0/0x4f0 [kvm] [ 168.792656] [] ? preempt_notifier_unregister+0x190/0x190 [ 168.792681] [] ? kvm_arch_vcpu_load+0x127/0x650 [kvm] [ 168.792704] [] kvm_vcpu_ioctl+0x553/0xda0 [kvm] [ 168.792727] [] ? vcpu_put+0x40/0x40 [kvm] [ 168.792732] [] ? debug_check_no_locks_freed+0x350/0x350 [ 168.792735] [] ? _raw_spin_unlock+0x27/0x40 [ 168.792740] [] ? handle_mm_fault+0x1673/0x2e40 [ 168.792744] [] ? trace_hardirqs_on_caller+0x478/0x6c0 [ 168.792747] [] ? trace_hardirqs_on+0xd/0x10 [ 168.792751] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 [ 168.792756] [] do_vfs_ioctl+0x1b0/0x12b0 [ 168.792759] [] ? ioctl_preallocate+0x210/0x210 [ 168.792763] [] ? __fget+0x273/0x4a0 [ 168.792766] [] ? __fget+0x50/0x4a0 [ 168.792770] [] ? __fget_light+0x96/0x2b0 [ 168.792773] [] SyS_ioctl+0x79/0x90 [ 168.792777] [] entry_SYSCALL_64_fastpath+0x23/0xc1 [ 168.792780] ================================================================================ Signed-off-by: Mike Krinkin Reviewed-by: Xiao Guangrong Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6c9fed9..2ce4f05 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -249,7 +249,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, return ret; kvm_vcpu_mark_page_dirty(vcpu, table_gfn); - walker->ptes[level] = pte; + walker->ptes[level - 1] = pte; } return 0; } -- cgit v0.10.2