From 5094aeafbbd500509f648e3cd102b053bc7926b3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 14 Apr 2010 21:43:53 -0600 Subject: lguest: workaround cmpxchg8b_emu by ignoring cli in the guest. It's only used by cmpxchg8b_emu (see db677ffa5f5a for the gory details), and fixing that to be paravirt aware would be more work than simply ignoring it (and AFAICT only help lguest). This makes lguest work on machines which have cmpxchg8b, for kernels compiled for older processors. (We can't emulate it properly: the popf which expects to restore interrupts does not trap). Signed-off-by: Rusty Russell Cc: Jeremy Fitzhardinge Cc: virtualization@lists.osdl.org diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index fb2b7ef..b4eb675 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -288,6 +288,18 @@ static int emulate_insn(struct lg_cpu *cpu) insn = lgread(cpu, physaddr, u8); /* + * Around 2.6.33, the kernel started using an emulation for the + * cmpxchg8b instruction in early boot on many configurations. This + * code isn't paravirtualized, and it tries to disable interrupts. + * Ignore it, which will Mostly Work. + */ + if (insn == 0xfa) { + /* "cli", or Clear Interrupt Enable instruction. Skip it. */ + cpu->regs->eip++; + return 1; + } + + /* * 0x66 is an "operand prefix". It means it's using the upper 16 bits * of the eax register. */ -- cgit v0.10.2 From 091ebf07a2408f9a56634caa0f86d9360e9af23b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 14 Apr 2010 21:43:54 -0600 Subject: lguest: stop using KVM hypercall mechanism This is a partial revert of 4cd8b5e2a159 "lguest: use KVM hypercalls"; we revert to using (just as questionable but more reliable) int $15 for hypercalls. I didn't revert the register mapping, so we still use the same calling convention as kvm. KVM in more recent incarnations stopped injecting a fault when a guest tried to use the VMCALL instruction from ring 1, so lguest under kvm fails to make hypercalls. It was nice to share code with our KVM cousins, but this was overreach. Signed-off-by: Rusty Russell Cc: Matias Zabaljauregui Cc: Avi Kivity diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index ba0eed8..b60f292 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -28,22 +28,39 @@ #ifndef __ASSEMBLY__ #include -#include /*G:030 * But first, how does our Guest contact the Host to ask for privileged * operations? There are two ways: the direct way is to make a "hypercall", * to make requests of the Host Itself. * - * We use the KVM hypercall mechanism, though completely different hypercall - * numbers. Seventeen hypercalls are available: the hypercall number is put in - * the %eax register, and the arguments (when required) are placed in %ebx, - * %ecx, %edx and %esi. If a return value makes sense, it's returned in %eax. + * Our hypercall mechanism uses the highest unused trap code (traps 32 and + * above are used by real hardware interrupts). Seventeen hypercalls are + * available: the hypercall number is put in the %eax register, and the + * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. + * If a return value makes sense, it's returned in %eax. * * Grossly invalid calls result in Sudden Death at the hands of the vengeful * Host, rather than returning failure. This reflects Winston Churchill's * definition of a gentleman: "someone who is only rude intentionally". -:*/ + */ +static inline unsigned long +hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3, + unsigned long arg4) +{ + /* "int" is the Intel instruction to trigger a trap. */ + asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) + /* The call in %eax (aka "a") might be overwritten */ + : "=a"(call) + /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */ + : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4) + /* "memory" means this might write somewhere in memory. + * This isn't true for all calls, but it's safe to tell + * gcc that it might happen so it doesn't get clever. */ + : "memory"); + return call; +} /* Can't use our min() macro here: needs to be a constant */ #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 7e59dc1..2bdf628 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1, local_irq_save(flags); if (lguest_data.hcall_status[next_call] != 0xFF) { /* Table full, so do normal hcall which will flush table. */ - kvm_hypercall4(call, arg1, arg2, arg3, arg4); + hcall(call, arg1, arg2, arg3, arg4); } else { lguest_data.hcalls[next_call].arg0 = call; lguest_data.hcalls[next_call].arg1 = arg1; @@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1, * So, when we're in lazy mode, we call async_hcall() to store the call for * future processing: */ -static void lazy_hcall1(unsigned long call, - unsigned long arg1) +static void lazy_hcall1(unsigned long call, unsigned long arg1) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall1(call, arg1); + hcall(call, arg1, 0, 0, 0); else async_hcall(call, arg1, 0, 0, 0); } /* You can imagine what lazy_hcall2, 3 and 4 look like. :*/ static void lazy_hcall2(unsigned long call, - unsigned long arg1, - unsigned long arg2) + unsigned long arg1, + unsigned long arg2) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall2(call, arg1, arg2); + hcall(call, arg1, arg2, 0, 0); else async_hcall(call, arg1, arg2, 0, 0); } static void lazy_hcall3(unsigned long call, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3) + unsigned long arg1, + unsigned long arg2, + unsigned long arg3) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall3(call, arg1, arg2, arg3); + hcall(call, arg1, arg2, arg3, 0); else async_hcall(call, arg1, arg2, arg3, 0); } #ifdef CONFIG_X86_PAE static void lazy_hcall4(unsigned long call, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4) + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall4(call, arg1, arg2, arg3, arg4); + hcall(call, arg1, arg2, arg3, arg4); else async_hcall(call, arg1, arg2, arg3, arg4); } @@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call, :*/ static void lguest_leave_lazy_mmu_mode(void) { - kvm_hypercall0(LHCALL_FLUSH_ASYNC); + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); paravirt_leave_lazy_mmu(); } static void lguest_end_context_switch(struct task_struct *next) { - kvm_hypercall0(LHCALL_FLUSH_ASYNC); + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); paravirt_end_context_switch(next); } @@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt, /* Keep the local copy up to date. */ native_write_idt_entry(dt, entrynum, g); /* Tell Host about this new entry. */ - kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); + hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0); } /* @@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc) struct desc_struct *idt = (void *)desc->address; for (i = 0; i < (desc->size+1)/8; i++) - kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); + hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0); } /* @@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc) struct desc_struct *gdt = (void *)desc->address; for (i = 0; i < (desc->size+1)/8; i++) - kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); + hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0); } /* @@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, { native_write_gdt_entry(dt, entrynum, desc, type); /* Tell Host about this new entry. */ - kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum, - dt[entrynum].a, dt[entrynum].b); + hcall(LHCALL_LOAD_GDT_ENTRY, entrynum, + dt[entrynum].a, dt[entrynum].b, 0); } /* @@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta, } /* Please wake us this far in the future. */ - kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta); + hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0); return 0; } @@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: /* A 0 argument shuts the clock down. */ - kvm_hypercall0(LHCALL_SET_CLOCKEVENT); + hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0); break; case CLOCK_EVT_MODE_ONESHOT: /* This is what we expect. */ @@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void) /* STOP! Until an interrupt comes in. */ static void lguest_safe_halt(void) { - kvm_hypercall0(LHCALL_HALT); + hcall(LHCALL_HALT, 0, 0, 0, 0); } /* @@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void) */ static void lguest_power_off(void) { - kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), - LGUEST_SHUTDOWN_POWEROFF); + hcall(LHCALL_SHUTDOWN, __pa("Power down"), + LGUEST_SHUTDOWN_POWEROFF, 0, 0); } /* @@ -1123,7 +1122,7 @@ static void lguest_power_off(void) */ static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) { - kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF); + hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0); /* The hcall won't return, but to keep gcc happy, we're "done". */ return NOTIFY_DONE; } @@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) len = sizeof(scratch) - 1; scratch[len] = '\0'; memcpy(scratch, buf, len); - kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch)); + hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0); /* This routine returns the number of bytes actually written. */ return len; @@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) */ static void lguest_restart(char *reason) { - kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); + hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0); } /*G:050 diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 27eac0f..4f420c2f 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -32,7 +32,7 @@ ENTRY(lguest_entry) */ movl $LHCALL_LGUEST_INIT, %eax movl $lguest_data - __PAGE_OFFSET, %ebx - .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ + int $LGUEST_TRAP_ENTRY /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 07090f3..69c84a1 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -178,7 +178,7 @@ static void set_status(struct virtio_device *vdev, u8 status) /* We set the status. */ to_lgdev(vdev)->desc->status = status; - kvm_hypercall1(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset); + hcall(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset, 0, 0, 0); } static void lg_set_status(struct virtio_device *vdev, u8 status) @@ -229,7 +229,7 @@ static void lg_notify(struct virtqueue *vq) */ struct lguest_vq_info *lvq = vq->priv; - kvm_hypercall1(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT); + hcall(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT, 0, 0, 0); } /* An extern declaration inside a C file is bad form. Don't do it. */ -- cgit v0.10.2