From 7ea08093e0ccbad030188ded3cb082d8b8094d35 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Fri, 18 Jan 2008 00:16:43 -0200 Subject: lguest: fix drivers/lguest Makefile entry Parts depend on CONFIG_LGUEST, not just CONFIG_LGUEST_GUEST Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/Makefile b/drivers/Makefile index 8cb37e3..57fb145 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -73,7 +73,7 @@ obj-$(CONFIG_ISDN) += isdn/ obj-$(CONFIG_EDAC) += edac/ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ -obj-$(CONFIG_LGUEST_GUEST) += lguest/ +obj-y += lguest/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_CPU_IDLE) += cpuidle/ obj-$(CONFIG_MMC) += mmc/ -- cgit v0.10.2 From 5c55841d16dbf7c759fa6fb2ecc5e615b86d17db Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Thu, 17 Jan 2008 22:32:50 -0200 Subject: lguest: remove pv_info dependency Currently, lguest module can't be compiled without the PARAVIRT flag being on. This is a fake dependency, since the module itself shouldn't need any paravirt override. Reason for that is the reference to pv_info structure in initial loading tests. This patch removes it in favour of a more generic error message. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index cb4c670..f10abc8 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -253,7 +253,7 @@ static int __init init(void) /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ if (paravirt_enabled()) { - printk("lguest is afraid of %s\n", pv_info.name); + printk("lguest is afraid of being a guest\n"); return -EPERM; } -- cgit v0.10.2 From ec04b13f67be3c90b38c625f4b8bdfea54c1ff60 Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Fri, 28 Dec 2007 14:26:24 +0530 Subject: lguest: Reboot support Reboot Implemented (Prevent fd leak, fix style and fix documentation --RR) Signed-off-by: Balaji Rao Signed-off-by: Rusty Russell diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 9b0e322..86cac3e 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -153,6 +153,9 @@ struct virtqueue void (*handle_output)(int fd, struct virtqueue *me); }; +/* Remember the arguments to the program so we can "reboot" */ +static char **main_args; + /* Since guest is UP and we don't run at the same time, we don't need barriers. * But I include them in the code in case others copy it. */ #define wmb() @@ -1489,7 +1492,9 @@ static void setup_block_file(const char *filename) /* Create stack for thread and run it */ stack = malloc(32768); - if (clone(io_thread, stack + 32768, CLONE_VM, dev) == -1) + /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from + * becoming a zombie. */ + if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) err(1, "Creating clone"); /* We don't need to keep the I/O thread's end of the pipes open. */ @@ -1499,7 +1504,21 @@ static void setup_block_file(const char *filename) verbose("device %u: virtblock %llu sectors\n", devices.device_num, cap); } -/* That's the end of device setup. */ +/* That's the end of device setup. :*/ + +/* Reboot */ +static void __attribute__((noreturn)) restart_guest(void) +{ + unsigned int i; + + /* Closing pipes causes the waker thread and io_threads to die, and + * closing /dev/lguest cleans up the Guest. Since we don't track all + * open fds, we simply close everything beyond stderr. */ + for (i = 3; i < FD_SETSIZE; i++) + close(i); + execv(main_args[0], main_args); + err(1, "Could not exec %s", main_args[0]); +} /*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves * its input and output, and finally, lays it to rest. */ @@ -1523,6 +1542,9 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) char reason[1024] = { 0 }; read(lguest_fd, reason, sizeof(reason)-1); errx(1, "%s", reason); + /* ERESTART means that we need to reboot the guest */ + } else if (errno == ERESTART) { + restart_guest(); /* EAGAIN means the Waker wanted us to look at some input. * Anything else means a bug or incompatible change. */ } else if (errno != EAGAIN) @@ -1571,6 +1593,12 @@ int main(int argc, char *argv[]) /* If they specify an initrd file to load. */ const char *initrd_name = NULL; + /* Save the args: we "reboot" by execing ourselves again. */ + main_args = argv; + /* We don't "wait" for the children, so prevent them from becoming + * zombies. */ + signal(SIGCHLD, SIG_IGN); + /* First we initialize the device list. Since console and network * device receive input from a file descriptor, we keep an fdset * (infds) and the maximum fd number (max_infd) with the head of the diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 92c5611..d6b18e2 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -67,6 +67,7 @@ #include #include #include +#include /* for struct machine_ops */ /*G:010 Welcome to the Guest! * @@ -812,7 +813,7 @@ static void lguest_safe_halt(void) * rather than virtual addresses, so we use __pa() here. */ static void lguest_power_off(void) { - hcall(LHCALL_CRASH, __pa("Power down"), 0, 0); + hcall(LHCALL_SHUTDOWN, __pa("Power down"), LGUEST_SHUTDOWN_POWEROFF, 0); } /* @@ -822,7 +823,7 @@ static void lguest_power_off(void) */ static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) { - hcall(LHCALL_CRASH, __pa(p), 0, 0); + hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0); /* The hcall won't return, but to keep gcc happy, we're "done". */ return NOTIFY_DONE; } @@ -926,6 +927,11 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf, return insn_len; } +static void lguest_restart(char *reason) +{ + hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0); +} + /*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops * structures in the kernel provide points for (almost) every routine we have * to override to avoid privileged instructions. */ @@ -1059,6 +1065,7 @@ __init void lguest_init(void) * the Guest routine to power off. */ pm_power_off = lguest_power_off; + machine_ops.restart = lguest_restart; /* Now we're set up, call start_kernel() in init/main.c and we proceed * to boot as normal. It never returns. */ start_kernel(); diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index f10abc8..c1069bc 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -235,6 +235,8 @@ int run_guest(struct lguest *lg, unsigned long __user *user) lguest_arch_handle_trap(lg); } + if (lg->dead == ERR_PTR(-ERESTART)) + return -ERESTART; /* The Guest is dead => "No such file or directory" */ return -ENOENT; } diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index b478aff..05fad6f 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -41,8 +41,8 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args) * do that. */ kill_guest(lg, "already have lguest_data"); break; - case LHCALL_CRASH: { - /* Crash is such a trivial hypercall that we do it in four + case LHCALL_SHUTDOWN: { + /* Shutdown is such a trivial hypercall that we do it in four * lines right here. */ char msg[128]; /* If the lgread fails, it will call kill_guest() itself; the @@ -50,6 +50,8 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args) __lgread(lg, msg, args->arg1, sizeof(msg)); msg[sizeof(msg)-1] = '\0'; kill_guest(lg, "CRASH: %s", msg); + if (args->arg2 == LGUEST_SHUTDOWN_RESTART) + lg->dead = ERR_PTR(-ERESTART); break; } case LHCALL_FLUSH_TLB: diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h index 2091779..758b9a5 100644 --- a/include/asm-x86/lguest_hcall.h +++ b/include/asm-x86/lguest_hcall.h @@ -4,7 +4,7 @@ #define LHCALL_FLUSH_ASYNC 0 #define LHCALL_LGUEST_INIT 1 -#define LHCALL_CRASH 2 +#define LHCALL_SHUTDOWN 2 #define LHCALL_LOAD_GDT 3 #define LHCALL_NEW_PGTABLE 4 #define LHCALL_FLUSH_TLB 5 @@ -20,6 +20,10 @@ #define LGUEST_TRAP_ENTRY 0x1F +/* Argument number 3 to LHCALL_LGUEST_SHUTDOWN */ +#define LGUEST_SHUTDOWN_POWEROFF 1 +#define LGUEST_SHUTDOWN_RESTART 2 + #ifndef __ASSEMBLY__ #include -- cgit v0.10.2 From badb1e04028e3e029ff9447d4aeb162a84ad68c2 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:22 -0200 Subject: lguest: introduce vcpu struct this patch introduces a vcpu struct for lguest. In upcoming patches, more and more fields will be moved from the lguest struct to the vcpu Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 8692489..5f73ddf 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -38,6 +38,13 @@ struct lguest_pages #define CHANGED_GDT_TLS 4 /* Actually a subset of CHANGED_GDT */ #define CHANGED_ALL 3 +struct lguest; + +struct lg_cpu { + unsigned int id; + struct lguest *lg; +}; + /* The private info the thread maintains about the guest. */ struct lguest { @@ -47,6 +54,9 @@ struct lguest struct lguest_data __user *lguest_data; struct task_struct *tsk; struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ + struct lg_cpu cpus[NR_CPUS]; + unsigned int nr_cpus; + u32 pfn_limit; /* This provides the offset to the base of guest-physical * memory in the Launcher. */ -- cgit v0.10.2 From e3283fa0cc5c4f9bde52339a40da89297e51b481 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:23 -0200 Subject: lguest: adapt launcher to per-cpuness This patch makes uses of pread() and pwrite() in lguest launcher to communicate the vcpu id to the lguest driver. The id is kept in a thread variable, which means we'll span in the future, vcpus as threads. But right now, only the infrastructure is out there. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 86cac3e..6c8a238 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -79,6 +79,9 @@ static void *guest_base; /* The maximum guest physical address allowed, and maximum possible. */ static unsigned long guest_limit, guest_max; +/* a per-cpu variable indicating whose vcpu is currently running */ +static unsigned int __thread cpu_id; + /* This is our list of devices. */ struct device_list { @@ -557,7 +560,7 @@ static void wake_parent(int pipefd, int lguest_fd) else FD_CLR(-fd - 1, &devices.infds); } else /* Send LHREQ_BREAK command. */ - write(lguest_fd, args, sizeof(args)); + pwrite(lguest_fd, args, sizeof(args), cpu_id); } } @@ -1530,7 +1533,8 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) int readval; /* We read from the /dev/lguest device to run the Guest. */ - readval = read(lguest_fd, ¬ify_addr, sizeof(notify_addr)); + readval = pread(lguest_fd, ¬ify_addr, + sizeof(notify_addr), cpu_id); /* One unsigned long means the Guest did HCALL_NOTIFY */ if (readval == sizeof(notify_addr)) { @@ -1540,7 +1544,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) /* ENOENT means the Guest died. Reading tells us why. */ } else if (errno == ENOENT) { char reason[1024] = { 0 }; - read(lguest_fd, reason, sizeof(reason)-1); + pread(lguest_fd, reason, sizeof(reason)-1, cpu_id); errx(1, "%s", reason); /* ERESTART means that we need to reboot the guest */ } else if (errno == ERESTART) { @@ -1550,9 +1554,13 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd) } else if (errno != EAGAIN) err(1, "Running guest failed"); + /* Only service input on thread for CPU 0. */ + if (cpu_id != 0) + continue; + /* Service input, then unset the BREAK to release the Waker. */ handle_input(lguest_fd); - if (write(lguest_fd, args, sizeof(args)) < 0) + if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0) err(1, "Resetting break"); } } @@ -1610,6 +1618,7 @@ int main(int argc, char *argv[]) devices.lastdev = &devices.dev; devices.next_irq = 1; + cpu_id = 0; /* We need to know how much memory so we can set up the device * descriptor and memory pages for the devices as we parse the command * line. So we quickly look through the arguments to find the amount -- cgit v0.10.2 From 4dcc53da49c2387078fe8ceb7a420d125e027fc6 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:24 -0200 Subject: lguest: initialize vcpu this patch initializes the first vcpu in the initialize() routing, which is responsible for starting the process of putting the guest up. right now, as much of the fields are still not per-vcpu, it does not do much. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 3b92a61..c4bfe5a 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -88,6 +88,18 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) return run_guest(lg, (unsigned long __user *)user); } +static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) +{ + if (id >= NR_CPUS) + return -EINVAL; + + cpu->id = id; + cpu->lg = container_of((cpu - id), struct lguest, cpus[0]); + cpu->lg->nr_cpus++; + + return 0; +} + /*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit) * values (in addition to the LHREQ_INITIALIZE value). These are: * @@ -134,6 +146,11 @@ static int initialize(struct file *file, const unsigned long __user *input) lg->mem_base = (void __user *)(long)args[0]; lg->pfn_limit = args[1]; + /* This is the first cpu */ + err = cpu_start(&lg->cpus[0], 0, args[3]); + if (err) + goto release_guest; + /* We need a complete page for the Guest registers: they are accessible * to the Guest and we can only grant it access to whole pages. */ lg->regs_page = get_zeroed_page(GFP_KERNEL); -- cgit v0.10.2 From d0953d42c3445a120299fac9ad70e672d77898e9 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:25 -0200 Subject: lguest: per-cpu run guest This patch makes the run_guest() routine use the lg_cpu struct. This is required since in a smp guest environment, there's no more the notion of "running the guest", but rather, it is "running the vcpu" Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index c1069bc..75b38f2 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -174,8 +174,10 @@ void __lgwrite(struct lguest *lg, unsigned long addr, const void *b, /*H:030 Let's jump straight to the the main loop which runs the Guest. * Remember, this is called by the Launcher reading /dev/lguest, and we keep * going around and around until something interesting happens. */ -int run_guest(struct lguest *lg, unsigned long __user *user) +int run_guest(struct lg_cpu *cpu, unsigned long __user *user) { + struct lguest *lg = cpu->lg; + /* We stop running once the Guest is dead. */ while (!lg->dead) { /* First we run any hypercalls the Guest wants done. */ @@ -226,7 +228,7 @@ int run_guest(struct lguest *lg, unsigned long __user *user) local_irq_disable(); /* Actually run the Guest until something happens. */ - lguest_arch_run_guest(lg); + lguest_arch_run_guest(cpu); /* Now we're ready to be interrupted or moved to other CPUs */ local_irq_enable(); diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 5f73ddf..bfca271 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -126,7 +126,7 @@ void __lgwrite(struct lguest *, unsigned long, const void *, unsigned); } while(0) /* (end of memory access helper routines) :*/ -int run_guest(struct lguest *lg, unsigned long __user *user); +int run_guest(struct lg_cpu *cpu, unsigned long __user *user); /* Helper macros to obtain the first 12 or the last 20 bits, this is only the * first step in the migration to the kernel types. pte_pfn is already defined @@ -177,7 +177,7 @@ void page_table_guest_data_init(struct lguest *lg); /* /core.c: */ void lguest_arch_host_init(void); void lguest_arch_host_fini(void); -void lguest_arch_run_guest(struct lguest *lg); +void lguest_arch_run_guest(struct lg_cpu *cpu); void lguest_arch_handle_trap(struct lguest *lg); int lguest_arch_init_hypercalls(struct lguest *lg); int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args); diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index c4bfe5a..9f0a443 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -55,11 +55,19 @@ static int user_send_irq(struct lguest *lg, const unsigned long __user *input) static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) { struct lguest *lg = file->private_data; + struct lg_cpu *cpu; + unsigned int cpu_id = *o; /* You must write LHREQ_INITIALIZE first! */ if (!lg) return -EINVAL; + /* Watch out for arbitrary vcpu indexes! */ + if (cpu_id >= lg->nr_cpus) + return -EINVAL; + + cpu = &lg->cpus[cpu_id]; + /* If you're not the task which owns the Guest, go away. */ if (current != lg->tsk) return -EPERM; @@ -85,7 +93,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) lg->pending_notify = 0; /* Run the Guest until something interesting happens. */ - return run_guest(lg, (unsigned long __user *)user); + return run_guest(cpu, (unsigned long __user *)user); } static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) @@ -147,7 +155,7 @@ static int initialize(struct file *file, const unsigned long __user *input) lg->pfn_limit = args[1]; /* This is the first cpu */ - err = cpu_start(&lg->cpus[0], 0, args[3]); + err = lg_cpu_start(&lg->cpus[0], 0, args[3]); if (err) goto release_guest; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 96d0fd0..3d2131e 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -73,8 +73,9 @@ static DEFINE_PER_CPU(struct lguest *, last_guest); * since it last ran. We saw this set in interrupts_and_traps.c and * segments.c. */ -static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages) +static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) { + struct lguest *lg = cpu->lg; /* Copying all this data can be quite expensive. We usually run the * same Guest we ran last time (and that Guest hasn't run anywhere else * meanwhile). If that's not the case, we pretend everything in the @@ -113,14 +114,15 @@ static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages) } /* Finally: the code to actually call into the Switcher to run the Guest. */ -static void run_guest_once(struct lguest *lg, struct lguest_pages *pages) +static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) { /* This is a dummy value we need for GCC's sake. */ unsigned int clobber; + struct lguest *lg = cpu->lg; /* Copy the guest-specific information into this CPU's "struct * lguest_pages". */ - copy_in_guest_info(lg, pages); + copy_in_guest_info(cpu, pages); /* Set the trap number to 256 (impossible value). If we fault while * switching to the Guest (bad segment registers or bug), this will @@ -161,8 +163,10 @@ static void run_guest_once(struct lguest *lg, struct lguest_pages *pages) /*H:040 This is the i386-specific code to setup and run the Guest. Interrupts * are disabled: we own the CPU. */ -void lguest_arch_run_guest(struct lguest *lg) +void lguest_arch_run_guest(struct lg_cpu *cpu) { + struct lguest *lg = cpu->lg; + /* Remember the awfully-named TS bit? If the Guest has asked to set it * we set it now, so we can trap and pass that trap to the Guest if it * uses the FPU. */ @@ -180,7 +184,7 @@ void lguest_arch_run_guest(struct lguest *lg) /* Now we actually run the Guest. It will return when something * interesting happens, and we can examine its registers to see what it * was doing. */ - run_guest_once(lg, lguest_pages(raw_smp_processor_id())); + run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); /* Note that the "regs" pointer contains two extra entries which are * not really registers: a trap number which says what interrupt or -- cgit v0.10.2 From 7ea07a1500f05e06ebf0136763c781244f77a2a1 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:26 -0200 Subject: lguest: make write() operation smp aware This patch makes the write() file operation smp aware. Which means, receiving the vcpu_id value through the offset parameter, and being well aware to which vcpu we're talking to. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 9f0a443..2562082 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -227,14 +227,21 @@ static ssize_t write(struct file *file, const char __user *in, struct lguest *lg = file->private_data; const unsigned long __user *input = (const unsigned long __user *)in; unsigned long req; + struct lg_cpu *cpu; + unsigned int cpu_id = *off; if (get_user(req, input) != 0) return -EFAULT; input++; /* If you haven't initialized, you must do that first. */ - if (req != LHREQ_INITIALIZE && !lg) - return -EINVAL; + if (req != LHREQ_INITIALIZE) { + if (!lg || (cpu_id >= lg->nr_cpus)) + return -EINVAL; + cpu = &lg->cpus[cpu_id]; + if (!cpu) + return -EINVAL; + } /* Once the Guest is dead, all you can do is read() why it died. */ if (lg && lg->dead) -- cgit v0.10.2 From 73044f05a4ac65f2df42753e9566444b9d2a660f Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:27 -0200 Subject: lguest: make hypercalls use the vcpu struct this patch changes do_hcall() and do_async_hcall() interfaces (and obviously their callers) to get a vcpu struct. Again, a vcpu services the hypercall, not the whole guest Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 75b38f2..0ea67cb 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -181,8 +181,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) /* We stop running once the Guest is dead. */ while (!lg->dead) { /* First we run any hypercalls the Guest wants done. */ - if (lg->hcall) - do_hypercalls(lg); + if (cpu->hcall) + do_hypercalls(cpu); /* It's possible the Guest did a NOTIFY hypercall to the * Launcher, in which case we return from the read() now. */ @@ -234,7 +234,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) local_irq_enable(); /* Now we deal with whatever happened to the Guest. */ - lguest_arch_handle_trap(lg); + lguest_arch_handle_trap(cpu); } if (lg->dead == ERR_PTR(-ERESTART)) diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 05fad6f..7827671 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -29,8 +29,10 @@ /*H:120 This is the core hypercall routine: where the Guest gets what it wants. * Or gets killed. Or, in the case of LHCALL_CRASH, both. */ -static void do_hcall(struct lguest *lg, struct hcall_args *args) +static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) { + struct lguest *lg = cpu->lg; + switch (args->arg0) { case LHCALL_FLUSH_ASYNC: /* This call does nothing, except by breaking out of the Guest @@ -93,7 +95,7 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args) break; default: /* It should be an architecture-specific hypercall. */ - if (lguest_arch_do_hcall(lg, args)) + if (lguest_arch_do_hcall(cpu, args)) kill_guest(lg, "Bad hypercall %li\n", args->arg0); } } @@ -106,10 +108,11 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args) * Guest put them in the ring, but we also promise the Guest that they will * happen before any normal hypercall (which is why we check this before * checking for a normal hcall). */ -static void do_async_hcalls(struct lguest *lg) +static void do_async_hcalls(struct lg_cpu *cpu) { unsigned int i; u8 st[LHCALL_RING_SIZE]; + struct lguest *lg = cpu->lg; /* For simplicity, we copy the entire call status array in at once. */ if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st))) @@ -121,7 +124,7 @@ static void do_async_hcalls(struct lguest *lg) /* We remember where we were up to from last time. This makes * sure that the hypercalls are done in the order the Guest * places them in the ring. */ - unsigned int n = lg->next_hcall; + unsigned int n = cpu->next_hcall; /* 0xFF means there's no call here (yet). */ if (st[n] == 0xFF) @@ -129,8 +132,8 @@ static void do_async_hcalls(struct lguest *lg) /* OK, we have hypercall. Increment the "next_hcall" cursor, * and wrap back to 0 if we reach the end. */ - if (++lg->next_hcall == LHCALL_RING_SIZE) - lg->next_hcall = 0; + if (++cpu->next_hcall == LHCALL_RING_SIZE) + cpu->next_hcall = 0; /* Copy the hypercall arguments into a local copy of * the hcall_args struct. */ @@ -141,7 +144,7 @@ static void do_async_hcalls(struct lguest *lg) } /* Do the hypercall, same as a normal one. */ - do_hcall(lg, &args); + do_hcall(cpu, &args); /* Mark the hypercall done. */ if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) { @@ -158,16 +161,17 @@ static void do_async_hcalls(struct lguest *lg) /* Last of all, we look at what happens first of all. The very first time the * Guest makes a hypercall, we end up here to set things up: */ -static void initialize(struct lguest *lg) +static void initialize(struct lg_cpu *cpu) { + struct lguest *lg = cpu->lg; /* You can't do anything until you're initialized. The Guest knows the * rules, so we're unforgiving here. */ - if (lg->hcall->arg0 != LHCALL_LGUEST_INIT) { - kill_guest(lg, "hypercall %li before INIT", lg->hcall->arg0); + if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) { + kill_guest(lg, "hypercall %li before INIT", cpu->hcall->arg0); return; } - if (lguest_arch_init_hypercalls(lg)) + if (lguest_arch_init_hypercalls(cpu)) kill_guest(lg, "bad guest page %p", lg->lguest_data); /* The Guest tells us where we're not to deliver interrupts by putting @@ -196,27 +200,27 @@ static void initialize(struct lguest *lg) * Remember from the Guest, hypercalls come in two flavors: normal and * asynchronous. This file handles both of types. */ -void do_hypercalls(struct lguest *lg) +void do_hypercalls(struct lg_cpu *cpu) { /* Not initialized yet? This hypercall must do it. */ - if (unlikely(!lg->lguest_data)) { + if (unlikely(!cpu->lg->lguest_data)) { /* Set up the "struct lguest_data" */ - initialize(lg); + initialize(cpu); /* Hcall is done. */ - lg->hcall = NULL; + cpu->hcall = NULL; return; } /* The Guest has initialized. * * Look in the hypercall ring for the async hypercalls: */ - do_async_hcalls(lg); + do_async_hcalls(cpu); /* If we stopped reading the hypercall ring because the Guest did a * NOTIFY to the Launcher, we want to return now. Otherwise we do * the hypercall. */ - if (!lg->pending_notify) { - do_hcall(lg, lg->hcall); + if (!cpu->lg->pending_notify) { + do_hcall(cpu, cpu->hcall); /* Tricky point: we reset the hcall pointer to mark the * hypercall as "done". We use the hcall pointer rather than * the trap number to indicate a hypercall is pending. @@ -227,7 +231,7 @@ void do_hypercalls(struct lguest *lg) * Launcher, the run_guest() loop will exit without running the * Guest. When it comes back it would try to re-run the * hypercall. */ - lg->hcall = NULL; + cpu->hcall = NULL; } } diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index bfca271..a4ebd41 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -43,6 +43,10 @@ struct lguest; struct lg_cpu { unsigned int id; struct lguest *lg; + + /* If a hypercall was asked for, this points to the arguments. */ + struct hcall_args *hcall; + u32 next_hcall; }; /* The private info the thread maintains about the guest. */ @@ -65,13 +69,9 @@ struct lguest u32 cr2; int halted; int ts; - u32 next_hcall; u32 esp1; u8 ss1; - /* If a hypercall was asked for, this points to the arguments. */ - struct hcall_args *hcall; - /* Do we need to stop what we're doing and return to userspace? */ int break_out; wait_queue_head_t break_wq; @@ -178,9 +178,9 @@ void page_table_guest_data_init(struct lguest *lg); void lguest_arch_host_init(void); void lguest_arch_host_fini(void); void lguest_arch_run_guest(struct lg_cpu *cpu); -void lguest_arch_handle_trap(struct lguest *lg); -int lguest_arch_init_hypercalls(struct lguest *lg); -int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args); +void lguest_arch_handle_trap(struct lg_cpu *cpu); +int lguest_arch_init_hypercalls(struct lg_cpu *cpu); +int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args); void lguest_arch_setup_regs(struct lguest *lg, unsigned long start); /* /switcher.S: */ @@ -191,7 +191,7 @@ int lguest_device_init(void); void lguest_device_remove(void); /* hypercalls.c: */ -void do_hypercalls(struct lguest *lg); +void do_hypercalls(struct lg_cpu *cpu); void write_timestamp(struct lguest *lg); /*L:035 diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 3d2131e..5962160 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -283,8 +283,9 @@ static int emulate_insn(struct lguest *lg) } /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ -void lguest_arch_handle_trap(struct lguest *lg) +void lguest_arch_handle_trap(struct lg_cpu *cpu) { + struct lguest *lg = cpu->lg; switch (lg->regs->trapnum) { case 13: /* We've intercepted a General Protection Fault. */ /* Check if this was one of those annoying IN or OUT @@ -336,7 +337,7 @@ void lguest_arch_handle_trap(struct lguest *lg) case LGUEST_TRAP_ENTRY: /* Our 'struct hcall_args' maps directly over our regs: we set * up the pointer now to indicate a hypercall is pending. */ - lg->hcall = (struct hcall_args *)lg->regs; + cpu->hcall = (struct hcall_args *)lg->regs; return; } @@ -491,8 +492,10 @@ void __exit lguest_arch_host_fini(void) /*H:122 The i386-specific hypercalls simply farm out to the right functions. */ -int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args) +int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args) { + struct lguest *lg = cpu->lg; + switch (args->arg0) { case LHCALL_LOAD_GDT: load_guest_gdt(lg, args->arg1, args->arg2); @@ -511,13 +514,14 @@ int lguest_arch_do_hcall(struct lguest *lg, struct hcall_args *args) } /*H:126 i386-specific hypercall initialization: */ -int lguest_arch_init_hypercalls(struct lguest *lg) +int lguest_arch_init_hypercalls(struct lg_cpu *cpu) { u32 tsc_speed; + struct lguest *lg = cpu->lg; /* The pointer to the Guest's "struct lguest_data" is the only * argument. We check that address now. */ - if (!lguest_address_ok(lg, lg->hcall->arg1, sizeof(*lg->lguest_data))) + if (!lguest_address_ok(lg, cpu->hcall->arg1, sizeof(*lg->lguest_data))) return -EFAULT; /* Having checked it, we simply set lg->lguest_data to point straight @@ -525,7 +529,7 @@ int lguest_arch_init_hypercalls(struct lguest *lg) * copy_to_user/from_user from now on, instead of lgread/write. I put * this in to show that I'm not immune to writing stupid * optimizations. */ - lg->lguest_data = lg->mem_base + lg->hcall->arg1; + lg->lguest_data = lg->mem_base + cpu->hcall->arg1; /* We insist that the Time Stamp Counter exist and doesn't change with * cpu frequency. Some devious chip manufacturers decided that TSC -- cgit v0.10.2 From ad8d8f3bc61ec712dd141e1029ae68c47fadc4a7 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:28 -0200 Subject: lguest: per-vcpu lguest timers Here, I introduce per-vcpu timers. With this, we can have local expiries, needed for accounting time in smp guests Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 7827671..6f8c70a 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -80,7 +80,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) guest_set_pmd(lg, args->arg1, args->arg2); break; case LHCALL_SET_CLOCKEVENT: - guest_set_clockevent(lg, args->arg1); + guest_set_clockevent(cpu, args->arg1); break; case LHCALL_TS: /* This sets the TS flag, as we saw used in run_guest(). */ diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 2b66f79..22c692a 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -470,13 +470,13 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt, * infrastructure to set a callback at that time. * * 0 means "turn off the clock". */ -void guest_set_clockevent(struct lguest *lg, unsigned long delta) +void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta) { ktime_t expires; if (unlikely(delta == 0)) { /* Clock event device is shutting down. */ - hrtimer_cancel(&lg->hrt); + hrtimer_cancel(&cpu->hrt); return; } @@ -484,25 +484,25 @@ void guest_set_clockevent(struct lguest *lg, unsigned long delta) * all the time between now and the timer interrupt it asked for. This * is almost always the right thing to do. */ expires = ktime_add_ns(ktime_get_real(), delta); - hrtimer_start(&lg->hrt, expires, HRTIMER_MODE_ABS); + hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS); } /* This is the function called when the Guest's timer expires. */ static enum hrtimer_restart clockdev_fn(struct hrtimer *timer) { - struct lguest *lg = container_of(timer, struct lguest, hrt); + struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt); /* Remember the first interrupt is the timer interrupt. */ - set_bit(0, lg->irqs_pending); + set_bit(0, cpu->lg->irqs_pending); /* If the Guest is actually stopped, we need to wake it up. */ - if (lg->halted) - wake_up_process(lg->tsk); + if (cpu->lg->halted) + wake_up_process(cpu->lg->tsk); return HRTIMER_NORESTART; } /* This sets up the timer for this Guest. */ -void init_clockdev(struct lguest *lg) +void init_clockdev(struct lg_cpu *cpu) { - hrtimer_init(&lg->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS); - lg->hrt.function = clockdev_fn; + hrtimer_init(&cpu->hrt, CLOCK_REALTIME, HRTIMER_MODE_ABS); + cpu->hrt.function = clockdev_fn; } diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index a4ebd41..7da7b3f 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -47,6 +47,9 @@ struct lg_cpu { /* If a hypercall was asked for, this points to the arguments. */ struct hcall_args *hcall; u32 next_hcall; + + /* Virtual clock device */ + struct hrtimer hrt; }; /* The private info the thread maintains about the guest. */ @@ -95,9 +98,6 @@ struct lguest struct lguest_arch arch; - /* Virtual clock device */ - struct hrtimer hrt; - /* Pending virtual interrupts */ DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); }; @@ -145,8 +145,8 @@ void setup_default_idt_entries(struct lguest_ro_state *state, const unsigned long *def); void copy_traps(const struct lguest *lg, struct desc_struct *idt, const unsigned long *def); -void guest_set_clockevent(struct lguest *lg, unsigned long delta); -void init_clockdev(struct lguest *lg); +void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta); +void init_clockdev(struct lg_cpu *cpu); bool check_syscall_vector(struct lguest *lg); int init_interrupts(void); void free_interrupts(void); diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 2562082..f231b9b 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -104,6 +104,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) cpu->id = id; cpu->lg = container_of((cpu - id), struct lguest, cpus[0]); cpu->lg->nr_cpus++; + init_clockdev(cpu); return 0; } @@ -180,9 +181,6 @@ static int initialize(struct file *file, const unsigned long __user *input) * address. */ lguest_arch_setup_regs(lg, args[3]); - /* The timer for lguest's clock needs initialization. */ - init_clockdev(lg); - /* We keep a pointer to the Launcher task (ie. current task) for when * other Guests want to wake this one (inter-Guest I/O). */ lg->tsk = current; @@ -273,6 +271,7 @@ static ssize_t write(struct file *file, const char __user *in, static int close(struct inode *inode, struct file *file) { struct lguest *lg = file->private_data; + unsigned int i; /* If we never successfully initialized, there's nothing to clean up */ if (!lg) @@ -281,8 +280,9 @@ static int close(struct inode *inode, struct file *file) /* We need the big lock, to protect from inter-guest I/O and other * Launchers initializing guests. */ mutex_lock(&lguest_lock); - /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ - hrtimer_cancel(&lg->hrt); + for (i = 0; i < lg->nr_cpus; i++) + /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ + hrtimer_cancel(&lg->cpus[i].hrt); /* Free up the shadow page tables for the Guest. */ free_guest_pagetable(lg); /* Now all the memory cleanups are done, it's safe to release the -- cgit v0.10.2 From 177e449dc5bd4cf8dc48d66abee61ddf34b126b9 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:29 -0200 Subject: lguest: per-vcpu interrupt processing. This patch adapts interrupt processing for using the vcpu struct. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 0ea67cb..d8e1ac3 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -203,7 +203,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) /* Check if there are any interrupts which can be delivered * now: if so, this sets up the hander to be executed when we * next run the Guest. */ - maybe_do_interrupt(lg); + maybe_do_interrupt(cpu); /* All long-lived kernel loops need to check with this horrible * thing called the freezer. If the Host is trying to suspend, diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 22c692a..8f59232 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -60,11 +60,12 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) * We set up the stack just like the CPU does for a real interrupt, so it's * identical for the Guest (and the standard "iret" instruction will undo * it). */ -static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) +static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) { unsigned long gstack, origstack; u32 eflags, ss, irq_enable; unsigned long virtstack; + struct lguest *lg = cpu->lg; /* There are two cases for interrupts: one where the Guest is already * in the kernel, and a more complex one where the Guest is in @@ -129,9 +130,10 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err) * * maybe_do_interrupt() gets called before every entry to the Guest, to see if * we should divert the Guest to running an interrupt handler. */ -void maybe_do_interrupt(struct lguest *lg) +void maybe_do_interrupt(struct lg_cpu *cpu) { unsigned int irq; + struct lguest *lg = cpu->lg; DECLARE_BITMAP(blk, LGUEST_IRQS); struct desc_struct *idt; @@ -145,7 +147,7 @@ void maybe_do_interrupt(struct lguest *lg) sizeof(blk))) return; - bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS); + bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS); /* Find the first interrupt. */ irq = find_first_bit(blk, LGUEST_IRQS); @@ -180,11 +182,11 @@ void maybe_do_interrupt(struct lguest *lg) /* If they don't have a handler (yet?), we just ignore it */ if (idt_present(idt->a, idt->b)) { /* OK, mark it no longer pending and deliver it. */ - clear_bit(irq, lg->irqs_pending); + clear_bit(irq, cpu->irqs_pending); /* set_guest_interrupt() takes the interrupt descriptor and a * flag to say whether this interrupt pushes an error code onto * the stack as well: virtual interrupts never do. */ - set_guest_interrupt(lg, idt->a, idt->b, 0); + set_guest_interrupt(cpu, idt->a, idt->b, 0); } /* Every time we deliver an interrupt, we update the timestamp in the @@ -245,19 +247,19 @@ static int has_err(unsigned int trap) } /* deliver_trap() returns true if it could deliver the trap. */ -int deliver_trap(struct lguest *lg, unsigned int num) +int deliver_trap(struct lg_cpu *cpu, unsigned int num) { /* Trap numbers are always 8 bit, but we set an impossible trap number * for traps inside the Switcher, so check that here. */ - if (num >= ARRAY_SIZE(lg->arch.idt)) + if (num >= ARRAY_SIZE(cpu->lg->arch.idt)) return 0; /* Early on the Guest hasn't set the IDT entries (or maybe it put a * bogus one in): if we fail here, the Guest will be killed. */ - if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b)) + if (!idt_present(cpu->lg->arch.idt[num].a, cpu->lg->arch.idt[num].b)) return 0; - set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b, - has_err(num)); + set_guest_interrupt(cpu, cpu->lg->arch.idt[num].a, + cpu->lg->arch.idt[num].b, has_err(num)); return 1; } @@ -493,7 +495,7 @@ static enum hrtimer_restart clockdev_fn(struct hrtimer *timer) struct lg_cpu *cpu = container_of(timer, struct lg_cpu, hrt); /* Remember the first interrupt is the timer interrupt. */ - set_bit(0, cpu->lg->irqs_pending); + set_bit(0, cpu->irqs_pending); /* If the Guest is actually stopped, we need to wake it up. */ if (cpu->lg->halted) wake_up_process(cpu->lg->tsk); diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 7da7b3f..29e03d58 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -50,6 +50,9 @@ struct lg_cpu { /* Virtual clock device */ struct hrtimer hrt; + + /* Pending virtual interrupts */ + DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); }; /* The private info the thread maintains about the guest. */ @@ -97,9 +100,6 @@ struct lguest const char *dead; struct lguest_arch arch; - - /* Pending virtual interrupts */ - DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); }; extern struct mutex lguest_lock; @@ -136,8 +136,8 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user); #define pgd_pfn(x) (pgd_val(x) >> PAGE_SHIFT) /* interrupts_and_traps.c: */ -void maybe_do_interrupt(struct lguest *lg); -int deliver_trap(struct lguest *lg, unsigned int num); +void maybe_do_interrupt(struct lg_cpu *cpu); +int deliver_trap(struct lg_cpu *cpu, unsigned int num); void load_guest_idt_entry(struct lguest *lg, unsigned int i, u32 low, u32 hi); void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages); void pin_stack_pages(struct lguest *lg); diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index f231b9b..605db5c 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -36,7 +36,7 @@ static int break_guest_out(struct lguest *lg, const unsigned long __user *input) /*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt * number to /dev/lguest. */ -static int user_send_irq(struct lguest *lg, const unsigned long __user *input) +static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input) { unsigned long irq; @@ -46,7 +46,7 @@ static int user_send_irq(struct lguest *lg, const unsigned long __user *input) return -EINVAL; /* Next time the Guest runs, the core code will see if it can deliver * this interrupt. */ - set_bit(irq, lg->irqs_pending); + set_bit(irq, cpu->irqs_pending); return 0; } @@ -225,7 +225,7 @@ static ssize_t write(struct file *file, const char __user *in, struct lguest *lg = file->private_data; const unsigned long __user *input = (const unsigned long __user *)in; unsigned long req; - struct lg_cpu *cpu; + struct lg_cpu *uninitialized_var(cpu); unsigned int cpu_id = *off; if (get_user(req, input) != 0) @@ -253,7 +253,7 @@ static ssize_t write(struct file *file, const char __user *in, case LHREQ_INITIALIZE: return initialize(file, input); case LHREQ_IRQ: - return user_send_irq(lg, input); + return user_send_irq(cpu, input); case LHREQ_BREAK: return break_guest_out(lg, input); default: diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 5962160..66f48fc 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -342,7 +342,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) } /* We didn't handle the trap, so it needs to go to the Guest. */ - if (!deliver_trap(lg, lg->regs->trapnum)) + if (!deliver_trap(cpu, lg->regs->trapnum)) /* If the Guest doesn't have a handler (either it hasn't * registered any yet, or it's one of the faults we don't let * it handle), it dies with a cryptic error message. */ -- cgit v0.10.2 From 0c78441cf4dd66f66e23dc085f0cc1e3e8669b96 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:30 -0200 Subject: lguest: map_switcher_in_guest() per-vcpu The switcher needs to be mapped per-vcpu, because different vcpus will potentially have different page tables (they don't have to, because threads will share the same). So our first step is the make the function receive a vcpu struct Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 29e03d58..072d0d4 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -168,7 +168,7 @@ void guest_pagetable_clear_all(struct lguest *lg); void guest_pagetable_flush_user(struct lguest *lg); void guest_set_pte(struct lguest *lg, unsigned long gpgdir, unsigned long vaddr, pte_t val); -void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages); +void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages); int demand_page(struct lguest *info, unsigned long cr2, int errcode); void pin_page(struct lguest *lg, unsigned long vaddr); unsigned long guest_pa(struct lguest *lg, unsigned long vaddr); diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index fffabb3..17d3329 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -634,8 +634,9 @@ void free_guest_pagetable(struct lguest *lg) * Guest (and not the pages for other CPUs). We have the appropriate PTE pages * for each CPU already set up, we just need to hook them in now we know which * Guest is about to run on this CPU. */ -void map_switcher_in_guest(struct lguest *lg, struct lguest_pages *pages) +void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) { + struct lguest *lg = cpu->lg; pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); pgd_t switcher_pgd; pte_t regs_pte; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 66f48fc..d35f629 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -91,7 +91,7 @@ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) pages->state.host_cr3 = __pa(current->mm->pgd); /* Set up the Guest's page tables to see this CPU's pages (and no * other CPU's pages). */ - map_switcher_in_guest(lg, pages); + map_switcher_in_guest(cpu, pages); /* Set up the two "TSS" members which tell the CPU what stack to use * for traps which do directly into the Guest (ie. traps at privilege * level 1). */ -- cgit v0.10.2 From a3863f68b0d7fe2073c0f4efe534ec87a685c4fa Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:31 -0200 Subject: lguest: make emulate_insn receive a vcpu struct. emulate_insn() needs to know about current eip, which will be, in the future, a per-vcpu thing. So in this patch, the function prototype is modified to receive a vcpu struct Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index d35f629..ae46c6b 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -218,8 +218,9 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * When the Guest uses one of these instructions, we get a trap (General * Protection Fault) and come here. We see if it's one of those troublesome * instructions and skip over it. We return true if we did. */ -static int emulate_insn(struct lguest *lg) +static int emulate_insn(struct lg_cpu *cpu) { + struct lguest *lg = cpu->lg; u8 insn; unsigned int insnlen = 0, in = 0, shift = 0; /* The eip contains the *virtual* address of the Guest's instruction: @@ -292,7 +293,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * instructions which we need to emulate. If so, we just go * back into the Guest after we've done it. */ if (lg->regs->errcode == 0) { - if (emulate_insn(lg)) + if (emulate_insn(cpu)) return; } break; -- cgit v0.10.2 From a53a35a8b485b9c16b73e5177bddaa4321971199 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:32 -0200 Subject: lguest: make registers per-vcpu This is the most obvious per-vcpu field: registers. So this patch moves it from struct lguest to struct vcpu, and patch the places in which they are used, accordingly Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 8f59232..468faf8 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -70,7 +70,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) /* There are two cases for interrupts: one where the Guest is already * in the kernel, and a more complex one where the Guest is in * userspace. We check the privilege level to find out. */ - if ((lg->regs->ss&0x3) != GUEST_PL) { + if ((cpu->regs->ss&0x3) != GUEST_PL) { /* The Guest told us their kernel stack with the SET_STACK * hypercall: both the virtual address and the segment */ virtstack = lg->esp1; @@ -81,12 +81,12 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) * stack: when the Guest does an "iret" back from the interrupt * handler the CPU will notice they're dropping privilege * levels and expect these here. */ - push_guest_stack(lg, &gstack, lg->regs->ss); - push_guest_stack(lg, &gstack, lg->regs->esp); + push_guest_stack(lg, &gstack, cpu->regs->ss); + push_guest_stack(lg, &gstack, cpu->regs->esp); } else { /* We're staying on the same Guest (kernel) stack. */ - virtstack = lg->regs->esp; - ss = lg->regs->ss; + virtstack = cpu->regs->esp; + ss = cpu->regs->ss; origstack = gstack = guest_pa(lg, virtstack); } @@ -95,7 +95,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) * the "Interrupt Flag" bit is always set. We copy that bit from the * Guest's "irq_enabled" field into the eflags word: we saw the Guest * copy it back in "lguest_iret". */ - eflags = lg->regs->eflags; + eflags = cpu->regs->eflags; if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0 && !(irq_enable & X86_EFLAGS_IF)) eflags &= ~X86_EFLAGS_IF; @@ -104,19 +104,19 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) * "eflags" word, the old code segment, and the old instruction * pointer. */ push_guest_stack(lg, &gstack, eflags); - push_guest_stack(lg, &gstack, lg->regs->cs); - push_guest_stack(lg, &gstack, lg->regs->eip); + push_guest_stack(lg, &gstack, cpu->regs->cs); + push_guest_stack(lg, &gstack, cpu->regs->eip); /* For the six traps which supply an error code, we push that, too. */ if (has_err) - push_guest_stack(lg, &gstack, lg->regs->errcode); + push_guest_stack(lg, &gstack, cpu->regs->errcode); /* Now we've pushed all the old state, we change the stack, the code * segment and the address to execute. */ - lg->regs->ss = ss; - lg->regs->esp = virtstack + (gstack - origstack); - lg->regs->cs = (__KERNEL_CS|GUEST_PL); - lg->regs->eip = idt_address(lo, hi); + cpu->regs->ss = ss; + cpu->regs->esp = virtstack + (gstack - origstack); + cpu->regs->cs = (__KERNEL_CS|GUEST_PL); + cpu->regs->eip = idt_address(lo, hi); /* There are two kinds of interrupt handlers: 0xE is an "interrupt * gate" which expects interrupts to be disabled on entry. */ @@ -157,7 +157,7 @@ void maybe_do_interrupt(struct lg_cpu *cpu) /* They may be in the middle of an iret, where they asked us never to * deliver interrupts. */ - if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end) + if (cpu->regs->eip >= lg->noirq_start && cpu->regs->eip < lg->noirq_end) return; /* If they're halted, interrupts restart them. */ diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 072d0d4..35b3312 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -44,6 +44,10 @@ struct lg_cpu { unsigned int id; struct lguest *lg; + /* At end of a page shared mapped over lguest_pages in guest. */ + unsigned long regs_page; + struct lguest_regs *regs; + /* If a hypercall was asked for, this points to the arguments. */ struct hcall_args *hcall; u32 next_hcall; @@ -58,9 +62,6 @@ struct lg_cpu { /* The private info the thread maintains about the guest. */ struct lguest { - /* At end of a page shared mapped over lguest_pages in guest. */ - unsigned long regs_page; - struct lguest_regs *regs; struct lguest_data __user *lguest_data; struct task_struct *tsk; struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ @@ -181,7 +182,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu); void lguest_arch_handle_trap(struct lg_cpu *cpu); int lguest_arch_init_hypercalls(struct lg_cpu *cpu); int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args); -void lguest_arch_setup_regs(struct lguest *lg, unsigned long start); +void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start); /* /switcher.S: */ extern char start_switcher_text[], end_switcher_text[], switch_to_guest[]; diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 605db5c..d21d95b 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -106,6 +106,19 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) cpu->lg->nr_cpus++; init_clockdev(cpu); + /* We need a complete page for the Guest registers: they are accessible + * to the Guest and we can only grant it access to whole pages. */ + cpu->regs_page = get_zeroed_page(GFP_KERNEL); + if (!cpu->regs_page) + return -ENOMEM; + + /* We actually put the registers at the bottom of the page. */ + cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs); + + /* Now we initialize the Guest's registers, handing it the start + * address. */ + lguest_arch_setup_regs(cpu, start_ip); + return 0; } @@ -160,16 +173,6 @@ static int initialize(struct file *file, const unsigned long __user *input) if (err) goto release_guest; - /* We need a complete page for the Guest registers: they are accessible - * to the Guest and we can only grant it access to whole pages. */ - lg->regs_page = get_zeroed_page(GFP_KERNEL); - if (!lg->regs_page) { - err = -ENOMEM; - goto release_guest; - } - /* We actually put the registers at the bottom of the page. */ - lg->regs = (void *)lg->regs_page + PAGE_SIZE - sizeof(*lg->regs); - /* Initialize the Guest's shadow page tables, using the toplevel * address the Launcher gave us. This allocates memory, so can * fail. */ @@ -177,10 +180,6 @@ static int initialize(struct file *file, const unsigned long __user *input) if (err) goto free_regs; - /* Now we initialize the Guest's registers, handing it the start - * address. */ - lguest_arch_setup_regs(lg, args[3]); - /* We keep a pointer to the Launcher task (ie. current task) for when * other Guests want to wake this one (inter-Guest I/O). */ lg->tsk = current; @@ -205,7 +204,8 @@ static int initialize(struct file *file, const unsigned long __user *input) return sizeof(args); free_regs: - free_page(lg->regs_page); + /* FIXME: This should be in free_vcpu */ + free_page(lg->cpus[0].regs_page); release_guest: kfree(lg); unlock: @@ -280,9 +280,12 @@ static int close(struct inode *inode, struct file *file) /* We need the big lock, to protect from inter-guest I/O and other * Launchers initializing guests. */ mutex_lock(&lguest_lock); - for (i = 0; i < lg->nr_cpus; i++) + for (i = 0; i < lg->nr_cpus; i++) { /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ hrtimer_cancel(&lg->cpus[i].hrt); + /* We can free up the register page we allocated. */ + free_page(lg->cpus[i].regs_page); + } /* Free up the shadow page tables for the Guest. */ free_guest_pagetable(lg); /* Now all the memory cleanups are done, it's safe to release the @@ -292,8 +295,6 @@ static int close(struct inode *inode, struct file *file) * kmalloc()ed string, either of which is ok to hand to kfree(). */ if (!IS_ERR(lg->dead)) kfree(lg->dead); - /* We can free up the register page we allocated. */ - free_page(lg->regs_page); /* We clear the entire structure, which also marks it as free for the * next user. */ memset(lg, 0, sizeof(*lg)); diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 17d3329..f19add4 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -640,6 +640,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); pgd_t switcher_pgd; pte_t regs_pte; + unsigned long pfn; /* Make the last PGD entry for this Guest point to the Switcher's PTE * page for this CPU (with appropriate flags). */ @@ -654,7 +655,8 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) * CPU's "struct lguest_pages": if we make sure the Guest's register * page is already mapped there, we don't have to copy them out * again. */ - regs_pte = pfn_pte (__pa(lg->regs_page) >> PAGE_SHIFT, __pgprot(_PAGE_KERNEL)); + pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; + regs_pte = pfn_pte(pfn, __pgprot(_PAGE_KERNEL)); switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte; } /*:*/ diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index ae46c6b..d96a93d9 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -127,7 +127,7 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) /* Set the trap number to 256 (impossible value). If we fault while * switching to the Guest (bad segment registers or bug), this will * cause us to abort the Guest. */ - lg->regs->trapnum = 256; + cpu->regs->trapnum = 256; /* Now: we push the "eflags" register on the stack, then do an "lcall". * This is how we change from using the kernel code segment to using @@ -195,11 +195,11 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * bad virtual address. We have to grab this now, because once we * re-enable interrupts an interrupt could fault and thus overwrite * cr2, or we could even move off to a different CPU. */ - if (lg->regs->trapnum == 14) + if (cpu->regs->trapnum == 14) lg->arch.last_pagefault = read_cr2(); /* Similarly, if we took a trap because the Guest used the FPU, * we have to restore the FPU it expects to see. */ - else if (lg->regs->trapnum == 7) + else if (cpu->regs->trapnum == 7) math_state_restore(); /* Restore SYSENTER if it's supposed to be on. */ @@ -225,12 +225,12 @@ static int emulate_insn(struct lg_cpu *cpu) unsigned int insnlen = 0, in = 0, shift = 0; /* The eip contains the *virtual* address of the Guest's instruction: * guest_pa just subtracts the Guest's page_offset. */ - unsigned long physaddr = guest_pa(lg, lg->regs->eip); + unsigned long physaddr = guest_pa(lg, cpu->regs->eip); /* This must be the Guest kernel trying to do something, not userspace! * The bottom two bits of the CS segment register are the privilege * level. */ - if ((lg->regs->cs & 3) != GUEST_PL) + if ((cpu->regs->cs & 3) != GUEST_PL) return 0; /* Decoding x86 instructions is icky. */ @@ -273,12 +273,12 @@ static int emulate_insn(struct lg_cpu *cpu) if (in) { /* Lower bit tells is whether it's a 16 or 32 bit access */ if (insn & 0x1) - lg->regs->eax = 0xFFFFFFFF; + cpu->regs->eax = 0xFFFFFFFF; else - lg->regs->eax |= (0xFFFF << shift); + cpu->regs->eax |= (0xFFFF << shift); } /* Finally, we've "done" the instruction, so move past it. */ - lg->regs->eip += insnlen; + cpu->regs->eip += insnlen; /* Success! */ return 1; } @@ -287,12 +287,12 @@ static int emulate_insn(struct lg_cpu *cpu) void lguest_arch_handle_trap(struct lg_cpu *cpu) { struct lguest *lg = cpu->lg; - switch (lg->regs->trapnum) { + switch (cpu->regs->trapnum) { case 13: /* We've intercepted a General Protection Fault. */ /* Check if this was one of those annoying IN or OUT * instructions which we need to emulate. If so, we just go * back into the Guest after we've done it. */ - if (lg->regs->errcode == 0) { + if (cpu->regs->errcode == 0) { if (emulate_insn(cpu)) return; } @@ -307,7 +307,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * * The errcode tells whether this was a read or a write, and * whether kernel or userspace code. */ - if (demand_page(lg, lg->arch.last_pagefault, lg->regs->errcode)) + if (demand_page(lg, lg->arch.last_pagefault, cpu->regs->errcode)) return; /* OK, it's really not there (or not OK): the Guest needs to @@ -338,19 +338,19 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) case LGUEST_TRAP_ENTRY: /* Our 'struct hcall_args' maps directly over our regs: we set * up the pointer now to indicate a hypercall is pending. */ - cpu->hcall = (struct hcall_args *)lg->regs; + cpu->hcall = (struct hcall_args *)cpu->regs; return; } /* We didn't handle the trap, so it needs to go to the Guest. */ - if (!deliver_trap(cpu, lg->regs->trapnum)) + if (!deliver_trap(cpu, cpu->regs->trapnum)) /* If the Guest doesn't have a handler (either it hasn't * registered any yet, or it's one of the faults we don't let * it handle), it dies with a cryptic error message. */ kill_guest(lg, "unhandled trap %li at %#lx (%#lx)", - lg->regs->trapnum, lg->regs->eip, - lg->regs->trapnum == 14 ? lg->arch.last_pagefault - : lg->regs->errcode); + cpu->regs->trapnum, cpu->regs->eip, + cpu->regs->trapnum == 14 ? lg->arch.last_pagefault + : cpu->regs->errcode); } /* Now we can look at each of the routines this calls, in increasing order of @@ -557,9 +557,9 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) * * Most of the Guest's registers are left alone: we used get_zeroed_page() to * allocate the structure, so they will be 0. */ -void lguest_arch_setup_regs(struct lguest *lg, unsigned long start) +void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) { - struct lguest_regs *regs = lg->regs; + struct lguest_regs *regs = cpu->regs; /* There are four "segment" registers which the Guest needs to boot: * The "code segment" register (cs) refers to the kernel code segment @@ -586,5 +586,5 @@ void lguest_arch_setup_regs(struct lguest *lg, unsigned long start) /* There are a couple of GDT entries the Guest expects when first * booting. */ - setup_guest_gdt(lg); + setup_guest_gdt(cpu->lg); } -- cgit v0.10.2 From fc708b3e407dfd2e12ba9a6cf35bd0bffad1796d Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:33 -0200 Subject: lguest: replace lguest_arch with lg_cpu_arch. The fields found in lguest_arch are not really per-guest, but per-cpu (gdt, idt, etc). So this patch turns lguest_arch into lg_cpu_arch. It makes sense to have a per-guest per-arch struct, but this can be addressed later, when the need arrives. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 468faf8..306b93c 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -178,7 +178,7 @@ void maybe_do_interrupt(struct lg_cpu *cpu) /* Look at the IDT entry the Guest gave us for this interrupt. The * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip * over them. */ - idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; + idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq]; /* If they don't have a handler (yet?), we just ignore it */ if (idt_present(idt->a, idt->b)) { /* OK, mark it no longer pending and deliver it. */ @@ -251,15 +251,15 @@ int deliver_trap(struct lg_cpu *cpu, unsigned int num) { /* Trap numbers are always 8 bit, but we set an impossible trap number * for traps inside the Switcher, so check that here. */ - if (num >= ARRAY_SIZE(cpu->lg->arch.idt)) + if (num >= ARRAY_SIZE(cpu->arch.idt)) return 0; /* Early on the Guest hasn't set the IDT entries (or maybe it put a * bogus one in): if we fail here, the Guest will be killed. */ - if (!idt_present(cpu->lg->arch.idt[num].a, cpu->lg->arch.idt[num].b)) + if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b)) return 0; - set_guest_interrupt(cpu, cpu->lg->arch.idt[num].a, - cpu->lg->arch.idt[num].b, has_err(num)); + set_guest_interrupt(cpu, cpu->arch.idt[num].a, + cpu->arch.idt[num].b, has_err(num)); return 1; } @@ -385,7 +385,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap, * * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */ -void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) +void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) { /* Guest never handles: NMI, doublefault, spurious interrupt or * hypercall. We ignore when it tries to set them. */ @@ -394,13 +394,13 @@ void load_guest_idt_entry(struct lguest *lg, unsigned int num, u32 lo, u32 hi) /* Mark the IDT as changed: next time the Guest runs we'll know we have * to copy this again. */ - lg->changed |= CHANGED_IDT; + cpu->lg->changed |= CHANGED_IDT; /* Check that the Guest doesn't try to step outside the bounds. */ - if (num >= ARRAY_SIZE(lg->arch.idt)) - kill_guest(lg, "Setting idt entry %u", num); + if (num >= ARRAY_SIZE(cpu->arch.idt)) + kill_guest(cpu->lg, "Setting idt entry %u", num); else - set_trap(lg, &lg->arch.idt[num], num, lo, hi); + set_trap(cpu->lg, &cpu->arch.idt[num], num, lo, hi); } /* The default entry for each interrupt points into the Switcher routines which @@ -436,14 +436,14 @@ void setup_default_idt_entries(struct lguest_ro_state *state, /*H:240 We don't use the IDT entries in the "struct lguest" directly, instead * we copy them into the IDT which we've set up for Guests on this CPU, just * before we run the Guest. This routine does that copy. */ -void copy_traps(const struct lguest *lg, struct desc_struct *idt, +void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, const unsigned long *def) { unsigned int i; /* We can simply copy the direct traps, otherwise we use the default * ones in the Switcher: they will return to the Host. */ - for (i = 0; i < ARRAY_SIZE(lg->arch.idt); i++) { + for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) { /* If no Guest can ever override this trap, leave it alone. */ if (!direct_trap(i)) continue; @@ -452,8 +452,8 @@ void copy_traps(const struct lguest *lg, struct desc_struct *idt, * Interrupt gates (type 14) disable interrupts as they are * entered, which we never let the Guest do. Not present * entries (type 0x0) also can't go direct, of course. */ - if (idt_type(lg->arch.idt[i].a, lg->arch.idt[i].b) == 0xF) - idt[i] = lg->arch.idt[i]; + if (idt_type(cpu->arch.idt[i].a, cpu->arch.idt[i].b) == 0xF) + idt[i] = cpu->arch.idt[i]; else /* Reset it to the default. */ default_idt_entry(&idt[i], i, def[i]); diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 35b3312..d08b853 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -57,6 +57,8 @@ struct lg_cpu { /* Pending virtual interrupts */ DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); + + struct lg_cpu_arch arch; }; /* The private info the thread maintains about the guest. */ @@ -99,8 +101,6 @@ struct lguest /* Dead? */ const char *dead; - - struct lguest_arch arch; }; extern struct mutex lguest_lock; @@ -139,12 +139,13 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user); /* interrupts_and_traps.c: */ void maybe_do_interrupt(struct lg_cpu *cpu); int deliver_trap(struct lg_cpu *cpu, unsigned int num); -void load_guest_idt_entry(struct lguest *lg, unsigned int i, u32 low, u32 hi); +void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i, + u32 low, u32 hi); void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages); void pin_stack_pages(struct lguest *lg); void setup_default_idt_entries(struct lguest_ro_state *state, const unsigned long *def); -void copy_traps(const struct lguest *lg, struct desc_struct *idt, +void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, const unsigned long *def); void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta); void init_clockdev(struct lg_cpu *cpu); @@ -154,11 +155,11 @@ void free_interrupts(void); /* segments.c: */ void setup_default_gdt_entries(struct lguest_ro_state *state); -void setup_guest_gdt(struct lguest *lg); -void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num); -void guest_load_tls(struct lguest *lg, unsigned long tls_array); -void copy_gdt(const struct lguest *lg, struct desc_struct *gdt); -void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt); +void setup_guest_gdt(struct lg_cpu *cpu); +void load_guest_gdt(struct lg_cpu *cpu, unsigned long table, u32 num); +void guest_load_tls(struct lg_cpu *cpu, unsigned long tls_array); +void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt); +void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt); /* page_tables.c: */ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 9e189cb..0213845 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c @@ -58,7 +58,7 @@ static int ignored_gdt(unsigned int num) * Protection Fault in the Switcher when it restores a Guest segment register * which tries to use that entry. Then we kill the Guest for causing such a * mess: the message will be "unhandled trap 256". */ -static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) +static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end) { unsigned int i; @@ -71,14 +71,14 @@ static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) /* Segment descriptors contain a privilege level: the Guest is * sometimes careless and leaves this as 0, even though it's * running at privilege level 1. If so, we fix it here. */ - if ((lg->arch.gdt[i].b & 0x00006000) == 0) - lg->arch.gdt[i].b |= (GUEST_PL << 13); + if ((cpu->arch.gdt[i].b & 0x00006000) == 0) + cpu->arch.gdt[i].b |= (GUEST_PL << 13); /* Each descriptor has an "accessed" bit. If we don't set it * now, the CPU will try to set it when the Guest first loads * that entry into a segment register. But the GDT isn't * writable by the Guest, so bad things can happen. */ - lg->arch.gdt[i].b |= 0x00000100; + cpu->arch.gdt[i].b |= 0x00000100; } } @@ -109,31 +109,31 @@ void setup_default_gdt_entries(struct lguest_ro_state *state) /* This routine sets up the initial Guest GDT for booting. All entries start * as 0 (unusable). */ -void setup_guest_gdt(struct lguest *lg) +void setup_guest_gdt(struct lg_cpu *cpu) { /* Start with full 0-4G segments... */ - lg->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; - lg->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; + cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT; + cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT; /* ...except the Guest is allowed to use them, so set the privilege * level appropriately in the flags. */ - lg->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); - lg->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); + cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13); + cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13); } /*H:650 An optimization of copy_gdt(), for just the three "thead-local storage" * entries. */ -void copy_gdt_tls(const struct lguest *lg, struct desc_struct *gdt) +void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt) { unsigned int i; for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++) - gdt[i] = lg->arch.gdt[i]; + gdt[i] = cpu->arch.gdt[i]; } /*H:640 When the Guest is run on a different CPU, or the GDT entries have * changed, copy_gdt() is called to copy the Guest's GDT entries across to this * CPU's GDT. */ -void copy_gdt(const struct lguest *lg, struct desc_struct *gdt) +void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt) { unsigned int i; @@ -141,21 +141,22 @@ void copy_gdt(const struct lguest *lg, struct desc_struct *gdt) * replaced. See ignored_gdt() above. */ for (i = 0; i < GDT_ENTRIES; i++) if (!ignored_gdt(i)) - gdt[i] = lg->arch.gdt[i]; + gdt[i] = cpu->arch.gdt[i]; } /*H:620 This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT). * We copy it from the Guest and tweak the entries. */ -void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num) +void load_guest_gdt(struct lg_cpu *cpu, unsigned long table, u32 num) { + struct lguest *lg = cpu->lg; /* We assume the Guest has the same number of GDT entries as the * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ - if (num > ARRAY_SIZE(lg->arch.gdt)) + if (num > ARRAY_SIZE(cpu->arch.gdt)) kill_guest(lg, "too many gdt entries %i", num); /* We read the whole thing in, then fix it up. */ - __lgread(lg, lg->arch.gdt, table, num * sizeof(lg->arch.gdt[0])); - fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->arch.gdt)); + __lgread(lg, cpu->arch.gdt, table, num * sizeof(cpu->arch.gdt[0])); + fixup_gdt_table(cpu, 0, ARRAY_SIZE(cpu->arch.gdt)); /* Mark that the GDT changed so the core knows it has to copy it again, * even if the Guest is run on the same CPU. */ lg->changed |= CHANGED_GDT; @@ -165,12 +166,13 @@ void load_guest_gdt(struct lguest *lg, unsigned long table, u32 num) * Remember that this happens on every context switch, so it's worth * optimizing. But wouldn't it be neater to have a single hypercall to cover * both cases? */ -void guest_load_tls(struct lguest *lg, unsigned long gtls) +void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) { - struct desc_struct *tls = &lg->arch.gdt[GDT_ENTRY_TLS_MIN]; + struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN]; + struct lguest *lg = cpu->lg; __lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES); - fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); + fixup_gdt_table(cpu, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); /* Note that just the TLS entries have changed. */ lg->changed |= CHANGED_GDT_TLS; } diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index d96a93d9..e989b83 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -100,14 +100,14 @@ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) /* Copy direct-to-Guest trap entries. */ if (lg->changed & CHANGED_IDT) - copy_traps(lg, pages->state.guest_idt, default_idt_entries); + copy_traps(cpu, pages->state.guest_idt, default_idt_entries); /* Copy all GDT entries which the Guest can change. */ if (lg->changed & CHANGED_GDT) - copy_gdt(lg, pages->state.guest_gdt); + copy_gdt(cpu, pages->state.guest_gdt); /* If only the TLS entries have changed, copy them. */ else if (lg->changed & CHANGED_GDT_TLS) - copy_gdt_tls(lg, pages->state.guest_gdt); + copy_gdt_tls(cpu, pages->state.guest_gdt); /* Mark the Guest as unchanged for next time. */ lg->changed = 0; @@ -196,7 +196,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * re-enable interrupts an interrupt could fault and thus overwrite * cr2, or we could even move off to a different CPU. */ if (cpu->regs->trapnum == 14) - lg->arch.last_pagefault = read_cr2(); + cpu->arch.last_pagefault = read_cr2(); /* Similarly, if we took a trap because the Guest used the FPU, * we have to restore the FPU it expects to see. */ else if (cpu->regs->trapnum == 7) @@ -307,7 +307,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * * The errcode tells whether this was a read or a write, and * whether kernel or userspace code. */ - if (demand_page(lg, lg->arch.last_pagefault, cpu->regs->errcode)) + if (demand_page(lg,cpu->arch.last_pagefault,cpu->regs->errcode)) return; /* OK, it's really not there (or not OK): the Guest needs to @@ -318,7 +318,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * happen before it's done the LHCALL_LGUEST_INIT hypercall, so * lg->lguest_data could be NULL */ if (lg->lguest_data && - put_user(lg->arch.last_pagefault, &lg->lguest_data->cr2)) + put_user(cpu->arch.last_pagefault, &lg->lguest_data->cr2)) kill_guest(lg, "Writing cr2"); break; case 7: /* We've intercepted a Device Not Available fault. */ @@ -349,7 +349,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * it handle), it dies with a cryptic error message. */ kill_guest(lg, "unhandled trap %li at %#lx (%#lx)", cpu->regs->trapnum, cpu->regs->eip, - cpu->regs->trapnum == 14 ? lg->arch.last_pagefault + cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault : cpu->regs->errcode); } @@ -495,17 +495,15 @@ void __exit lguest_arch_host_fini(void) /*H:122 The i386-specific hypercalls simply farm out to the right functions. */ int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args) { - struct lguest *lg = cpu->lg; - switch (args->arg0) { case LHCALL_LOAD_GDT: - load_guest_gdt(lg, args->arg1, args->arg2); + load_guest_gdt(cpu, args->arg1, args->arg2); break; case LHCALL_LOAD_IDT_ENTRY: - load_guest_idt_entry(lg, args->arg1, args->arg2, args->arg3); + load_guest_idt_entry(cpu, args->arg1, args->arg2, args->arg3); break; case LHCALL_LOAD_TLS: - guest_load_tls(lg, args->arg1); + guest_load_tls(cpu, args->arg1); break; default: /* Bad Guest. Bad! */ @@ -586,5 +584,5 @@ void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start) /* There are a couple of GDT entries the Guest expects when first * booting. */ - setup_guest_gdt(cpu->lg); + setup_guest_gdt(cpu); } diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h index ccd3384..b9d003b 100644 --- a/include/asm-x86/lguest.h +++ b/include/asm-x86/lguest.h @@ -56,7 +56,7 @@ struct lguest_ro_state struct desc_struct guest_gdt[GDT_ENTRIES]; }; -struct lguest_arch +struct lg_cpu_arch { /* The GDT entries copied into lguest_ro_state when running. */ struct desc_struct gdt[GDT_ENTRIES]; -- cgit v0.10.2 From 66686c2ab08feb721ca4d98285fba64acdf6017f Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:34 -0200 Subject: lguest: per-vcpu lguest task management lguest uses tasks to control its running behaviour (like sending breaks, controlling halted state, etc). In a per-vcpu environment, each vcpu will have its own underlying task. So this patch makes the infrastructure for that possible Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index d8e1ac3..66c3d3b 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -197,7 +197,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) return -ERESTARTSYS; /* If Waker set break_out, return to Launcher. */ - if (lg->break_out) + if (cpu->break_out) return -EAGAIN; /* Check if there are any interrupts which can be delivered @@ -217,7 +217,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) /* If the Guest asked to be stopped, we sleep. The Guest's * clock timer or LHCALL_BREAK from the Waker will wake us. */ - if (lg->halted) { + if (cpu->halted) { set_current_state(TASK_INTERRUPTIBLE); schedule(); continue; diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 6f8c70a..83323b1 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -88,7 +88,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) break; case LHCALL_HALT: /* Similarly, this sets the halted flag for run_guest(). */ - lg->halted = 1; + cpu->halted = 1; break; case LHCALL_NOTIFY: lg->pending_notify = args->arg1; diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 306b93c..9c1c479 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -161,11 +161,11 @@ void maybe_do_interrupt(struct lg_cpu *cpu) return; /* If they're halted, interrupts restart them. */ - if (lg->halted) { + if (cpu->halted) { /* Re-enable interrupts. */ if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled)) kill_guest(lg, "Re-enabling interrupts"); - lg->halted = 0; + cpu->halted = 0; } else { /* Otherwise we check if they have interrupts disabled. */ u32 irq_enabled; @@ -497,8 +497,8 @@ static enum hrtimer_restart clockdev_fn(struct hrtimer *timer) /* Remember the first interrupt is the timer interrupt. */ set_bit(0, cpu->irqs_pending); /* If the Guest is actually stopped, we need to wake it up. */ - if (cpu->lg->halted) - wake_up_process(cpu->lg->tsk); + if (cpu->halted) + wake_up_process(cpu->tsk); return HRTIMER_NORESTART; } diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index d08b853..e7123fa 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -43,6 +43,8 @@ struct lguest; struct lg_cpu { unsigned int id; struct lguest *lg; + struct task_struct *tsk; + struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ /* At end of a page shared mapped over lguest_pages in guest. */ unsigned long regs_page; @@ -55,6 +57,11 @@ struct lg_cpu { /* Virtual clock device */ struct hrtimer hrt; + /* Do we need to stop what we're doing and return to userspace? */ + int break_out; + wait_queue_head_t break_wq; + int halted; + /* Pending virtual interrupts */ DECLARE_BITMAP(irqs_pending, LGUEST_IRQS); @@ -65,8 +72,6 @@ struct lg_cpu { struct lguest { struct lguest_data __user *lguest_data; - struct task_struct *tsk; - struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ struct lg_cpu cpus[NR_CPUS]; unsigned int nr_cpus; @@ -76,15 +81,10 @@ struct lguest void __user *mem_base; unsigned long kernel_address; u32 cr2; - int halted; int ts; u32 esp1; u8 ss1; - /* Do we need to stop what we're doing and return to userspace? */ - int break_out; - wait_queue_head_t break_wq; - /* Bitmap of what has changed: see CHANGED_* above. */ int changed; struct lguest_pages *last_pages; diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index d21d95b..980b355 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -13,7 +13,7 @@ * LHREQ_BREAK and the value "1" to /dev/lguest to do this. Once the Launcher * has done whatever needs attention, it writes LHREQ_BREAK and "0" to release * the Waker. */ -static int break_guest_out(struct lguest *lg, const unsigned long __user *input) +static int break_guest_out(struct lg_cpu *cpu, const unsigned long __user*input) { unsigned long on; @@ -22,14 +22,14 @@ static int break_guest_out(struct lguest *lg, const unsigned long __user *input) return -EFAULT; if (on) { - lg->break_out = 1; + cpu->break_out = 1; /* Pop it out of the Guest (may be running on different CPU) */ - wake_up_process(lg->tsk); + wake_up_process(cpu->tsk); /* Wait for them to reset it */ - return wait_event_interruptible(lg->break_wq, !lg->break_out); + return wait_event_interruptible(cpu->break_wq, !cpu->break_out); } else { - lg->break_out = 0; - wake_up(&lg->break_wq); + cpu->break_out = 0; + wake_up(&cpu->break_wq); return 0; } } @@ -69,7 +69,7 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) cpu = &lg->cpus[cpu_id]; /* If you're not the task which owns the Guest, go away. */ - if (current != lg->tsk) + if (current != cpu->tsk) return -EPERM; /* If the guest is already dead, we indicate why */ @@ -119,6 +119,18 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) * address. */ lguest_arch_setup_regs(cpu, start_ip); + /* Initialize the queue for the waker to wait on */ + init_waitqueue_head(&cpu->break_wq); + + /* We keep a pointer to the Launcher task (ie. current task) for when + * other Guests want to wake this one (inter-Guest I/O). */ + cpu->tsk = current; + + /* We need to keep a pointer to the Launcher's memory map, because if + * the Launcher dies we need to clean it up. If we don't keep a + * reference, it is destroyed before close() is called. */ + cpu->mm = get_task_mm(cpu->tsk); + return 0; } @@ -180,17 +192,6 @@ static int initialize(struct file *file, const unsigned long __user *input) if (err) goto free_regs; - /* We keep a pointer to the Launcher task (ie. current task) for when - * other Guests want to wake this one (inter-Guest I/O). */ - lg->tsk = current; - /* We need to keep a pointer to the Launcher's memory map, because if - * the Launcher dies we need to clean it up. If we don't keep a - * reference, it is destroyed before close() is called. */ - lg->mm = get_task_mm(lg->tsk); - - /* Initialize the queue for the waker to wait on */ - init_waitqueue_head(&lg->break_wq); - /* We remember which CPU's pages this Guest used last, for optimization * when the same Guest runs on the same CPU twice. */ lg->last_pages = NULL; @@ -246,7 +247,7 @@ static ssize_t write(struct file *file, const char __user *in, return -ENOENT; /* If you're not the task which owns the Guest, you can only break */ - if (lg && current != lg->tsk && req != LHREQ_BREAK) + if (lg && current != cpu->tsk && req != LHREQ_BREAK) return -EPERM; switch (req) { @@ -255,7 +256,7 @@ static ssize_t write(struct file *file, const char __user *in, case LHREQ_IRQ: return user_send_irq(cpu, input); case LHREQ_BREAK: - return break_guest_out(lg, input); + return break_guest_out(cpu, input); default: return -EINVAL; } @@ -280,17 +281,19 @@ static int close(struct inode *inode, struct file *file) /* We need the big lock, to protect from inter-guest I/O and other * Launchers initializing guests. */ mutex_lock(&lguest_lock); + + /* Free up the shadow page tables for the Guest. */ + free_guest_pagetable(lg); + for (i = 0; i < lg->nr_cpus; i++) { /* Cancels the hrtimer set via LHCALL_SET_CLOCKEVENT. */ hrtimer_cancel(&lg->cpus[i].hrt); /* We can free up the register page we allocated. */ free_page(lg->cpus[i].regs_page); + /* Now all the memory cleanups are done, it's safe to release + * the Launcher's memory management structure. */ + mmput(lg->cpus[i].mm); } - /* Free up the shadow page tables for the Guest. */ - free_guest_pagetable(lg); - /* Now all the memory cleanups are done, it's safe to release the - * Launcher's memory management structure. */ - mmput(lg->mm); /* If lg->dead doesn't contain an error code it will be NULL or a * kmalloc()ed string, either of which is ok to hand to kfree(). */ if (!IS_ERR(lg->dead)) -- cgit v0.10.2 From 4665ac8e28c30c2a015c617c55783c0bf3a49c05 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:35 -0200 Subject: lguest: makes special fields be per-vcpu lguest struct have room for some fields, namely, cr2, ts, esp1 and ss1, that are not really guest-wide, but rather, vcpu-wide. This patch puts it in the vcpu struct Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 83323b1..ab70bbe 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -60,7 +60,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) /* FLUSH_TLB comes in two flavors, depending on the * argument: */ if (args->arg1) - guest_pagetable_clear_all(lg); + guest_pagetable_clear_all(cpu); else guest_pagetable_flush_user(lg); break; @@ -68,10 +68,10 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) /* All these calls simply pass the arguments through to the right * routines. */ case LHCALL_NEW_PGTABLE: - guest_new_pagetable(lg, args->arg1); + guest_new_pagetable(cpu, args->arg1); break; case LHCALL_SET_STACK: - guest_set_stack(lg, args->arg1, args->arg2, args->arg3); + guest_set_stack(cpu, args->arg1, args->arg2, args->arg3); break; case LHCALL_SET_PTE: guest_set_pte(lg, args->arg1, args->arg2, __pte(args->arg3)); @@ -84,7 +84,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) break; case LHCALL_TS: /* This sets the TS flag, as we saw used in run_guest(). */ - lg->ts = args->arg1; + cpu->ts = args->arg1; break; case LHCALL_HALT: /* Similarly, this sets the halted flag for run_guest(). */ @@ -191,7 +191,7 @@ static void initialize(struct lg_cpu *cpu) * first write to a Guest page. This may have caused a copy-on-write * fault, but the old page might be (read-only) in the Guest * pagetable. */ - guest_pagetable_clear_all(lg); + guest_pagetable_clear_all(cpu); } /*H:100 diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 9c1c479..b87d9d6 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -73,8 +73,8 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) if ((cpu->regs->ss&0x3) != GUEST_PL) { /* The Guest told us their kernel stack with the SET_STACK * hypercall: both the virtual address and the segment */ - virtstack = lg->esp1; - ss = lg->ss1; + virtstack = cpu->esp1; + ss = cpu->ss1; origstack = gstack = guest_pa(lg, virtstack); /* We push the old stack segment and pointer onto the new @@ -311,10 +311,11 @@ static int direct_trap(unsigned int num) * the Guest. * * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */ -void pin_stack_pages(struct lguest *lg) +void pin_stack_pages(struct lg_cpu *cpu) { unsigned int i; + struct lguest *lg = cpu->lg; /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or * two pages of stack space. */ for (i = 0; i < lg->stack_pages; i++) @@ -322,7 +323,7 @@ void pin_stack_pages(struct lguest *lg) * start of the page after the kernel stack. Subtract one to * get back onto the first stack page, and keep subtracting to * get to the rest of the stack pages. */ - pin_page(lg, lg->esp1 - 1 - i * PAGE_SIZE); + pin_page(lg, cpu->esp1 - 1 - i * PAGE_SIZE); } /* Direct traps also mean that we need to know whenever the Guest wants to use @@ -333,21 +334,21 @@ void pin_stack_pages(struct lguest *lg) * * In Linux each process has its own kernel stack, so this happens a lot: we * change stacks on each context switch. */ -void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages) +void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) { /* You are not allowed have a stack segment with privilege level 0: bad * Guest! */ if ((seg & 0x3) != GUEST_PL) - kill_guest(lg, "bad stack segment %i", seg); + kill_guest(cpu->lg, "bad stack segment %i", seg); /* We only expect one or two stack pages. */ if (pages > 2) - kill_guest(lg, "bad stack pages %u", pages); + kill_guest(cpu->lg, "bad stack pages %u", pages); /* Save where the stack is, and how many pages */ - lg->ss1 = seg; - lg->esp1 = esp; - lg->stack_pages = pages; + cpu->ss1 = seg; + cpu->esp1 = esp; + cpu->lg->stack_pages = pages; /* Make sure the new stack pages are mapped */ - pin_stack_pages(lg); + pin_stack_pages(cpu); } /* All this reference to mapping stacks leads us neatly into the other complex diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index e7123fa..0563764 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -46,6 +46,11 @@ struct lg_cpu { struct task_struct *tsk; struct mm_struct *mm; /* == tsk->mm, but that becomes NULL on exit */ + u32 cr2; + int ts; + u32 esp1; + u8 ss1; + /* At end of a page shared mapped over lguest_pages in guest. */ unsigned long regs_page; struct lguest_regs *regs; @@ -80,10 +85,6 @@ struct lguest * memory in the Launcher. */ void __user *mem_base; unsigned long kernel_address; - u32 cr2; - int ts; - u32 esp1; - u8 ss1; /* Bitmap of what has changed: see CHANGED_* above. */ int changed; @@ -141,8 +142,8 @@ void maybe_do_interrupt(struct lg_cpu *cpu); int deliver_trap(struct lg_cpu *cpu, unsigned int num); void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i, u32 low, u32 hi); -void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages); -void pin_stack_pages(struct lguest *lg); +void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages); +void pin_stack_pages(struct lg_cpu *cpu); void setup_default_idt_entries(struct lguest_ro_state *state, const unsigned long *def); void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt, @@ -164,9 +165,9 @@ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt); /* page_tables.c: */ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); void free_guest_pagetable(struct lguest *lg); -void guest_new_pagetable(struct lguest *lg, unsigned long pgtable); +void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable); void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i); -void guest_pagetable_clear_all(struct lguest *lg); +void guest_pagetable_clear_all(struct lg_cpu *cpu); void guest_pagetable_flush_user(struct lguest *lg); void guest_set_pte(struct lguest *lg, unsigned long gpgdir, unsigned long vaddr, pte_t val); diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index f19add4..e34c816 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -432,9 +432,10 @@ static unsigned int new_pgdir(struct lguest *lg, * Now we've seen all the page table setting and manipulation, let's see what * what happens when the Guest changes page tables (ie. changes the top-level * pgdir). This occurs on almost every context switch. */ -void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) +void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) { int newpgdir, repin = 0; + struct lguest *lg = cpu->lg; /* Look to see if we have this one already. */ newpgdir = find_pgdir(lg, pgtable); @@ -446,7 +447,7 @@ void guest_new_pagetable(struct lguest *lg, unsigned long pgtable) lg->pgdidx = newpgdir; /* If it was completely blank, we map in the Guest kernel stack */ if (repin) - pin_stack_pages(lg); + pin_stack_pages(cpu); } /*H:470 Finally, a routine which throws away everything: all PGD entries in all @@ -468,11 +469,11 @@ static void release_all_pagetables(struct lguest *lg) * mapping. Since kernel mappings are in every page table, it's easiest to * throw them all away. This traps the Guest in amber for a while as * everything faults back in, but it's rare. */ -void guest_pagetable_clear_all(struct lguest *lg) +void guest_pagetable_clear_all(struct lg_cpu *cpu) { - release_all_pagetables(lg); + release_all_pagetables(cpu->lg); /* We need the Guest kernel stack mapped again. */ - pin_stack_pages(lg); + pin_stack_pages(cpu); } /*:*/ /*M:009 Since we throw away all mappings when a kernel mapping changes, our diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index e989b83..65f2e38 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -95,8 +95,8 @@ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) /* Set up the two "TSS" members which tell the CPU what stack to use * for traps which do directly into the Guest (ie. traps at privilege * level 1). */ - pages->state.guest_tss.esp1 = lg->esp1; - pages->state.guest_tss.ss1 = lg->ss1; + pages->state.guest_tss.esp1 = cpu->esp1; + pages->state.guest_tss.ss1 = cpu->ss1; /* Copy direct-to-Guest trap entries. */ if (lg->changed & CHANGED_IDT) @@ -165,12 +165,10 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) * are disabled: we own the CPU. */ void lguest_arch_run_guest(struct lg_cpu *cpu) { - struct lguest *lg = cpu->lg; - /* Remember the awfully-named TS bit? If the Guest has asked to set it * we set it now, so we can trap and pass that trap to the Guest if it * uses the FPU. */ - if (lg->ts) + if (cpu->ts) lguest_set_ts(); /* SYSENTER is an optimized way of doing system calls. We can't allow @@ -325,7 +323,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) /* If the Guest doesn't want to know, we already restored the * Floating Point Unit, so we just continue without telling * it. */ - if (!lg->ts) + if (!cpu->ts) return; break; case 32 ... 255: -- cgit v0.10.2 From 5e232f4f428c4266ba5cdae9f23ba19a0913dcf9 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:36 -0200 Subject: lguest: make pending notifications per-vcpu this patch makes the pending_notify field, used to control pending notifications, per-vcpu, instead of per-guest Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 66c3d3b..6023872 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -186,10 +186,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) /* It's possible the Guest did a NOTIFY hypercall to the * Launcher, in which case we return from the read() now. */ - if (lg->pending_notify) { - if (put_user(lg->pending_notify, user)) + if (cpu->pending_notify) { + if (put_user(cpu->pending_notify, user)) return -EFAULT; - return sizeof(lg->pending_notify); + return sizeof(cpu->pending_notify); } /* Check for signals */ diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index ab70bbe..be8f046 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -91,7 +91,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) cpu->halted = 1; break; case LHCALL_NOTIFY: - lg->pending_notify = args->arg1; + cpu->pending_notify = args->arg1; break; default: /* It should be an architecture-specific hypercall. */ @@ -154,7 +154,7 @@ static void do_async_hcalls(struct lg_cpu *cpu) /* Stop doing hypercalls if they want to notify the Launcher: * it needs to service this first. */ - if (lg->pending_notify) + if (cpu->pending_notify) break; } } @@ -219,7 +219,7 @@ void do_hypercalls(struct lg_cpu *cpu) /* If we stopped reading the hypercall ring because the Guest did a * NOTIFY to the Launcher, we want to return now. Otherwise we do * the hypercall. */ - if (!cpu->lg->pending_notify) { + if (!cpu->pending_notify) { do_hcall(cpu, cpu->hcall); /* Tricky point: we reset the hcall pointer to mark the * hypercall as "done". We use the hcall pointer rather than diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 0563764..95b473c 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -51,6 +51,8 @@ struct lg_cpu { u32 esp1; u8 ss1; + unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */ + /* At end of a page shared mapped over lguest_pages in guest. */ unsigned long regs_page; struct lguest_regs *regs; @@ -95,7 +97,6 @@ struct lguest struct pgdir pgdirs[4]; unsigned long noirq_start, noirq_end; - unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */ unsigned int stack_pages; u32 tsc_khz; diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index 980b355..f4f6df8 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -89,8 +89,8 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o) /* If we returned from read() last time because the Guest notified, * clear the flag. */ - if (lg->pending_notify) - lg->pending_notify = 0; + if (cpu->pending_notify) + cpu->pending_notify = 0; /* Run the Guest until something interesting happens. */ return run_guest(cpu, (unsigned long __user *)user); -- cgit v0.10.2 From 1713608f280002d9ffc6de89d7de5cf367072d63 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Mon, 7 Jan 2008 11:05:37 -0200 Subject: lguest: per-vcpu lguest pgdir management this patch makes the pgdir management per-vcpu. The pgdirs pool is still guest-wide (although it'll probably need to grow when we are really executing more vcpus), but the pgdidx index is gone, since it makes no sense anymore. Instead, we use a per-vcpu index. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index be8f046..0471018 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -62,7 +62,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) if (args->arg1) guest_pagetable_clear_all(cpu); else - guest_pagetable_flush_user(lg); + guest_pagetable_flush_user(cpu); break; /* All these calls simply pass the arguments through to the right diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index b87d9d6..6bbfce4 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -76,7 +76,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) virtstack = cpu->esp1; ss = cpu->ss1; - origstack = gstack = guest_pa(lg, virtstack); + origstack = gstack = guest_pa(cpu, virtstack); /* We push the old stack segment and pointer onto the new * stack: when the Guest does an "iret" back from the interrupt * handler the CPU will notice they're dropping privilege @@ -88,7 +88,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) virtstack = cpu->regs->esp; ss = cpu->regs->ss; - origstack = gstack = guest_pa(lg, virtstack); + origstack = gstack = guest_pa(cpu, virtstack); } /* Remember that we never let the Guest actually disable interrupts, so @@ -323,7 +323,7 @@ void pin_stack_pages(struct lg_cpu *cpu) * start of the page after the kernel stack. Subtract one to * get back onto the first stack page, and keep subtracting to * get to the rest of the stack pages. */ - pin_page(lg, cpu->esp1 - 1 - i * PAGE_SIZE); + pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE); } /* Direct traps also mean that we need to know whenever the Guest wants to use diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 95b473c..94e518d 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -57,6 +57,8 @@ struct lg_cpu { unsigned long regs_page; struct lguest_regs *regs; + int cpu_pgd; /* which pgd this cpu is currently using */ + /* If a hypercall was asked for, this points to the arguments. */ struct hcall_args *hcall; u32 next_hcall; @@ -92,8 +94,6 @@ struct lguest int changed; struct lguest_pages *last_pages; - /* We keep a small number of these. */ - u32 pgdidx; struct pgdir pgdirs[4]; unsigned long noirq_start, noirq_end; @@ -169,13 +169,13 @@ void free_guest_pagetable(struct lguest *lg); void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable); void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i); void guest_pagetable_clear_all(struct lg_cpu *cpu); -void guest_pagetable_flush_user(struct lguest *lg); +void guest_pagetable_flush_user(struct lg_cpu *cpu); void guest_set_pte(struct lguest *lg, unsigned long gpgdir, unsigned long vaddr, pte_t val); void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages); -int demand_page(struct lguest *info, unsigned long cr2, int errcode); -void pin_page(struct lguest *lg, unsigned long vaddr); -unsigned long guest_pa(struct lguest *lg, unsigned long vaddr); +int demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode); +void pin_page(struct lg_cpu *cpu, unsigned long vaddr); +unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr); void page_table_guest_data_init(struct lguest *lg); /* /core.c: */ diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index e34c816..fb66561 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -94,10 +94,10 @@ static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr) /* These two functions just like the above two, except they access the Guest * page tables. Hence they return a Guest address. */ -static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) +static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) { unsigned int index = vaddr >> (PGDIR_SHIFT); - return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t); + return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t); } static unsigned long gpte_addr(struct lguest *lg, @@ -200,22 +200,23 @@ static void check_gpgd(struct lguest *lg, pgd_t gpgd) * * If we fixed up the fault (ie. we mapped the address), this routine returns * true. Otherwise, it was a real fault and we need to tell the Guest. */ -int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) +int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) { pgd_t gpgd; pgd_t *spgd; unsigned long gpte_ptr; pte_t gpte; pte_t *spte; + struct lguest *lg = cpu->lg; /* First step: get the top-level Guest page table entry. */ - gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); + gpgd = lgread(lg, gpgd_addr(cpu, vaddr), pgd_t); /* Toplevel not present? We can't map it in. */ if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) return 0; /* Now look at the matching shadow entry. */ - spgd = spgd_addr(lg, lg->pgdidx, vaddr); + spgd = spgd_addr(lg, cpu->cpu_pgd, vaddr); if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { /* No shadow entry: allocate a new shadow PTE page. */ unsigned long ptepage = get_zeroed_page(GFP_KERNEL); @@ -297,19 +298,19 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) * * This is a quick version which answers the question: is this virtual address * mapped by the shadow page tables, and is it writable? */ -static int page_writable(struct lguest *lg, unsigned long vaddr) +static int page_writable(struct lg_cpu *cpu, unsigned long vaddr) { pgd_t *spgd; unsigned long flags; /* Look at the current top level entry: is it present? */ - spgd = spgd_addr(lg, lg->pgdidx, vaddr); + spgd = spgd_addr(cpu->lg, cpu->cpu_pgd, vaddr); if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) return 0; /* Check the flags on the pte entry itself: it must be present and * writable. */ - flags = pte_flags(*(spte_addr(lg, *spgd, vaddr))); + flags = pte_flags(*(spte_addr(cpu->lg, *spgd, vaddr))); return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); } @@ -317,10 +318,10 @@ static int page_writable(struct lguest *lg, unsigned long vaddr) /* So, when pin_stack_pages() asks us to pin a page, we check if it's already * in the page tables, and if not, we call demand_page() with error code 2 * (meaning "write"). */ -void pin_page(struct lguest *lg, unsigned long vaddr) +void pin_page(struct lg_cpu *cpu, unsigned long vaddr) { - if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) - kill_guest(lg, "bad stack page %#lx", vaddr); + if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) + kill_guest(cpu->lg, "bad stack page %#lx", vaddr); } /*H:450 If we chase down the release_pgd() code, it looks like this: */ @@ -358,28 +359,28 @@ static void flush_user_mappings(struct lguest *lg, int idx) * * The Guest has a hypercall to throw away the page tables: it's used when a * large number of mappings have been changed. */ -void guest_pagetable_flush_user(struct lguest *lg) +void guest_pagetable_flush_user(struct lg_cpu *cpu) { /* Drop the userspace part of the current page table. */ - flush_user_mappings(lg, lg->pgdidx); + flush_user_mappings(cpu->lg, cpu->cpu_pgd); } /*:*/ /* We walk down the guest page tables to get a guest-physical address */ -unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) +unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) { pgd_t gpgd; pte_t gpte; /* First step: get the top-level Guest page table entry. */ - gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); + gpgd = lgread(cpu->lg, gpgd_addr(cpu, vaddr), pgd_t); /* Toplevel not present? We can't map it in. */ if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) - kill_guest(lg, "Bad address %#lx", vaddr); + kill_guest(cpu->lg, "Bad address %#lx", vaddr); - gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t); + gpte = lgread(cpu->lg, gpte_addr(cpu->lg, gpgd, vaddr), pte_t); if (!(pte_flags(gpte) & _PAGE_PRESENT)) - kill_guest(lg, "Bad address %#lx", vaddr); + kill_guest(cpu->lg, "Bad address %#lx", vaddr); return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); } @@ -399,11 +400,12 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable) /*H:435 And this is us, creating the new page directory. If we really do * allocate a new one (and so the kernel parts are not there), we set * blank_pgdir. */ -static unsigned int new_pgdir(struct lguest *lg, +static unsigned int new_pgdir(struct lg_cpu *cpu, unsigned long gpgdir, int *blank_pgdir) { unsigned int next; + struct lguest *lg = cpu->lg; /* We pick one entry at random to throw out. Choosing the Least * Recently Used might be better, but this is easy. */ @@ -413,7 +415,7 @@ static unsigned int new_pgdir(struct lguest *lg, lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); /* If the allocation fails, just keep using the one we have */ if (!lg->pgdirs[next].pgdir) - next = lg->pgdidx; + next = cpu->cpu_pgd; else /* This is a blank page, so there are no kernel * mappings: caller must map the stack! */ @@ -442,9 +444,9 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) /* If not, we allocate or mug an existing one: if it's a fresh one, * repin gets set to 1. */ if (newpgdir == ARRAY_SIZE(lg->pgdirs)) - newpgdir = new_pgdir(lg, pgtable, &repin); + newpgdir = new_pgdir(cpu, pgtable, &repin); /* Change the current pgd index to the new one. */ - lg->pgdidx = newpgdir; + cpu->cpu_pgd = newpgdir; /* If it was completely blank, we map in the Guest kernel stack */ if (repin) pin_stack_pages(cpu); @@ -591,11 +593,11 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) { /* We start on the first shadow page table, and give it a blank PGD * page. */ - lg->pgdidx = 0; - lg->pgdirs[lg->pgdidx].gpgdir = pgtable; - lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL); - if (!lg->pgdirs[lg->pgdidx].pgdir) + lg->pgdirs[0].gpgdir = pgtable; + lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); + if (!lg->pgdirs[0].pgdir) return -ENOMEM; + lg->cpus[0].cpu_pgd = 0; return 0; } @@ -607,7 +609,7 @@ void page_table_guest_data_init(struct lguest *lg) /* We tell the Guest that it can't use the top 4MB of virtual * addresses used by the Switcher. */ || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) - || put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir)) + || put_user(lg->pgdirs[0].gpgdir, &lg->lguest_data->pgdir)) kill_guest(lg, "bad guest page %p", lg->lguest_data); /* In flush_user_mappings() we loop from 0 to @@ -637,7 +639,6 @@ void free_guest_pagetable(struct lguest *lg) * Guest is about to run on this CPU. */ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) { - struct lguest *lg = cpu->lg; pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); pgd_t switcher_pgd; pte_t regs_pte; @@ -647,7 +648,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) * page for this CPU (with appropriate flags). */ switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL); - lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; + cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; /* We also change the Switcher PTE page. When we're running the Guest, * we want the Guest's "regs" page to appear where the first Switcher diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 65f2e38..8c72355 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -145,7 +145,7 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) * 0-th argument above, ie "a"). %ebx contains the * physical address of the Guest's top-level page * directory. */ - : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir)) + : "0"(pages), "1"(__pa(lg->pgdirs[cpu->cpu_pgd].pgdir)) /* We tell gcc that all these registers could change, * which means we don't have to save and restore them in * the Switcher. */ @@ -223,7 +223,7 @@ static int emulate_insn(struct lg_cpu *cpu) unsigned int insnlen = 0, in = 0, shift = 0; /* The eip contains the *virtual* address of the Guest's instruction: * guest_pa just subtracts the Guest's page_offset. */ - unsigned long physaddr = guest_pa(lg, cpu->regs->eip); + unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); /* This must be the Guest kernel trying to do something, not userspace! * The bottom two bits of the CS segment register are the privilege @@ -305,7 +305,8 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * * The errcode tells whether this was a read or a write, and * whether kernel or userspace code. */ - if (demand_page(lg,cpu->arch.last_pagefault,cpu->regs->errcode)) + if (demand_page(cpu, cpu->arch.last_pagefault, + cpu->regs->errcode)) return; /* OK, it's really not there (or not OK): the Guest needs to -- cgit v0.10.2 From 2092aa277b0adfb8f4f47ab8a9ee00aff0ca7ed6 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Thu, 17 Jan 2008 19:09:49 -0200 Subject: lguest: change spte_addr header spte_addr does not depend on any guest information, so we wipe out the lg parameter completely. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index fb66561..c4b8eaf 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -84,7 +84,7 @@ static pgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) /* This routine then takes the page directory entry returned above, which * contains the address of the page table entry (PTE) page. It then returns a * pointer to the PTE entry for the given address. */ -static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr) +static pte_t *spte_addr(pgd_t spgd, unsigned long vaddr) { pte_t *page = __va(pgd_pfn(spgd) << PAGE_SHIFT); /* You should never call this if the PGD entry wasn't valid */ @@ -261,7 +261,7 @@ int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) gpte = pte_mkdirty(gpte); /* Get the pointer to the shadow PTE entry we're going to set. */ - spte = spte_addr(lg, *spgd, vaddr); + spte = spte_addr(*spgd, vaddr); /* If there was a valid shadow PTE entry here before, we release it. * This can happen with a write to a previously read-only entry. */ release_pte(*spte); @@ -310,7 +310,7 @@ static int page_writable(struct lg_cpu *cpu, unsigned long vaddr) /* Check the flags on the pte entry itself: it must be present and * writable. */ - flags = pte_flags(*(spte_addr(cpu->lg, *spgd, vaddr))); + flags = pte_flags(*(spte_addr(*spgd, vaddr))); return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); } @@ -509,7 +509,7 @@ static void do_set_pte(struct lguest *lg, int idx, /* If the top level isn't present, there's no entry to update. */ if (pgd_flags(*spgd) & _PAGE_PRESENT) { /* Otherwise, we start by releasing the existing entry. */ - pte_t *spte = spte_addr(lg, *spgd, vaddr); + pte_t *spte = spte_addr(*spgd, vaddr); release_pte(*spte); /* If they're setting this entry as dirty or accessed, we might -- cgit v0.10.2 From c40a9f4719d36841a2d7ff4fe866dce7bfb454b7 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Thu, 17 Jan 2008 19:11:20 -0200 Subject: lguest: change last_guest to last_cpu in our model, a guest does not run in a cpu anymore: a virtual cpu does. So we change last_guest to last_cpu Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 8c72355..10eab67 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -60,7 +60,7 @@ static struct lguest_pages *lguest_pages(unsigned int cpu) (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]); } -static DEFINE_PER_CPU(struct lguest *, last_guest); +static DEFINE_PER_CPU(struct lg_cpu *, last_cpu); /*S:010 * We approach the Switcher. @@ -80,8 +80,8 @@ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) * same Guest we ran last time (and that Guest hasn't run anywhere else * meanwhile). If that's not the case, we pretend everything in the * Guest has changed. */ - if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) { - __get_cpu_var(last_guest) = lg; + if (__get_cpu_var(last_cpu) != cpu || lg->last_pages != pages) { + __get_cpu_var(last_cpu) = cpu; lg->last_pages = pages; lg->changed = CHANGED_ALL; } -- cgit v0.10.2 From f34f8c5fea079065671163c37d98328cff31980b Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Thu, 17 Jan 2008 19:13:26 -0200 Subject: lguest: move last_pages to lg_cpu in our new model, pages are assigned to a virtual cpu, not to a guest. We move it to the lg_cpu structure. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 94e518d..f1c4c33 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -57,6 +57,8 @@ struct lg_cpu { unsigned long regs_page; struct lguest_regs *regs; + struct lguest_pages *last_pages; + int cpu_pgd; /* which pgd this cpu is currently using */ /* If a hypercall was asked for, this points to the arguments. */ @@ -92,7 +94,6 @@ struct lguest /* Bitmap of what has changed: see CHANGED_* above. */ int changed; - struct lguest_pages *last_pages; struct pgdir pgdirs[4]; diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index f4f6df8..a87fca6 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -131,6 +131,10 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) * reference, it is destroyed before close() is called. */ cpu->mm = get_task_mm(cpu->tsk); + /* We remember which CPU's pages this Guest used last, for optimization + * when the same Guest runs on the same CPU twice. */ + cpu->last_pages = NULL; + return 0; } @@ -192,10 +196,6 @@ static int initialize(struct file *file, const unsigned long __user *input) if (err) goto free_regs; - /* We remember which CPU's pages this Guest used last, for optimization - * when the same Guest runs on the same CPU twice. */ - lg->last_pages = NULL; - /* We keep our "struct lguest" in the file's private_data. */ file->private_data = lg; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 10eab67..f8dfdc9 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -80,9 +80,9 @@ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) * same Guest we ran last time (and that Guest hasn't run anywhere else * meanwhile). If that's not the case, we pretend everything in the * Guest has changed. */ - if (__get_cpu_var(last_cpu) != cpu || lg->last_pages != pages) { + if (__get_cpu_var(last_cpu) != cpu || cpu->last_pages != pages) { __get_cpu_var(last_cpu) = cpu; - lg->last_pages = pages; + cpu->last_pages = pages; lg->changed = CHANGED_ALL; } -- cgit v0.10.2 From ae3749dcd8c31dcfbab14ea28c68a944c93f418f Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Thu, 17 Jan 2008 19:14:46 -0200 Subject: lguest: move changed bitmap to lg_cpu events represented in the 'changed' bitmap are per-cpu, not per-guest. move it to the lg_cpu structure Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 6bbfce4..9ac7455 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -395,7 +395,7 @@ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) /* Mark the IDT as changed: next time the Guest runs we'll know we have * to copy this again. */ - cpu->lg->changed |= CHANGED_IDT; + cpu->changed |= CHANGED_IDT; /* Check that the Guest doesn't try to step outside the bounds. */ if (num >= ARRAY_SIZE(cpu->arch.idt)) diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index f1c4c33..0d6f643 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -51,6 +51,9 @@ struct lg_cpu { u32 esp1; u8 ss1; + /* Bitmap of what has changed: see CHANGED_* above. */ + int changed; + unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */ /* At end of a page shared mapped over lguest_pages in guest. */ @@ -92,9 +95,6 @@ struct lguest void __user *mem_base; unsigned long kernel_address; - /* Bitmap of what has changed: see CHANGED_* above. */ - int changed; - struct pgdir pgdirs[4]; unsigned long noirq_start, noirq_end; diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 0213845..635f54c 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c @@ -159,7 +159,7 @@ void load_guest_gdt(struct lg_cpu *cpu, unsigned long table, u32 num) fixup_gdt_table(cpu, 0, ARRAY_SIZE(cpu->arch.gdt)); /* Mark that the GDT changed so the core knows it has to copy it again, * even if the Guest is run on the same CPU. */ - lg->changed |= CHANGED_GDT; + cpu->changed |= CHANGED_GDT; } /* This is the fast-track version for just changing the three TLS entries. @@ -174,7 +174,7 @@ void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) __lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES); fixup_gdt_table(cpu, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); /* Note that just the TLS entries have changed. */ - lg->changed |= CHANGED_GDT_TLS; + cpu->changed |= CHANGED_GDT_TLS; } /*:*/ diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index f8dfdc9..fd6a851 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -75,7 +75,6 @@ static DEFINE_PER_CPU(struct lg_cpu *, last_cpu); */ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) { - struct lguest *lg = cpu->lg; /* Copying all this data can be quite expensive. We usually run the * same Guest we ran last time (and that Guest hasn't run anywhere else * meanwhile). If that's not the case, we pretend everything in the @@ -83,7 +82,7 @@ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) if (__get_cpu_var(last_cpu) != cpu || cpu->last_pages != pages) { __get_cpu_var(last_cpu) = cpu; cpu->last_pages = pages; - lg->changed = CHANGED_ALL; + cpu->changed = CHANGED_ALL; } /* These copies are pretty cheap, so we do them unconditionally: */ @@ -99,18 +98,18 @@ static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages) pages->state.guest_tss.ss1 = cpu->ss1; /* Copy direct-to-Guest trap entries. */ - if (lg->changed & CHANGED_IDT) + if (cpu->changed & CHANGED_IDT) copy_traps(cpu, pages->state.guest_idt, default_idt_entries); /* Copy all GDT entries which the Guest can change. */ - if (lg->changed & CHANGED_GDT) + if (cpu->changed & CHANGED_GDT) copy_gdt(cpu, pages->state.guest_gdt); /* If only the TLS entries have changed, copy them. */ - else if (lg->changed & CHANGED_GDT_TLS) + else if (cpu->changed & CHANGED_GDT_TLS) copy_gdt_tls(cpu, pages->state.guest_gdt); /* Mark the Guest as unchanged for next time. */ - lg->changed = 0; + cpu->changed = 0; } /* Finally: the code to actually call into the Switcher to run the Guest. */ -- cgit v0.10.2 From 934faab464c6a26ed1a226b6cf7111b35405dde1 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Thu, 17 Jan 2008 19:18:08 -0200 Subject: lguest: change gpte_addr header gpte_addr() does not depend on any guest information. So we wipe out the lg parameter from it completely. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index c4b8eaf..c9acafc 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -100,8 +100,7 @@ static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr) return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t); } -static unsigned long gpte_addr(struct lguest *lg, - pgd_t gpgd, unsigned long vaddr) +static unsigned long gpte_addr(pgd_t gpgd, unsigned long vaddr) { unsigned long gpage = pgd_pfn(gpgd) << PAGE_SHIFT; BUG_ON(!(pgd_flags(gpgd) & _PAGE_PRESENT)); @@ -235,7 +234,7 @@ int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) /* OK, now we look at the lower level in the Guest page table: keep its * address, because we might update it later. */ - gpte_ptr = gpte_addr(lg, gpgd, vaddr); + gpte_ptr = gpte_addr(gpgd, vaddr); gpte = lgread(lg, gpte_ptr, pte_t); /* If this page isn't in the Guest page tables, we can't page it in. */ @@ -378,7 +377,7 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) kill_guest(cpu->lg, "Bad address %#lx", vaddr); - gpte = lgread(cpu->lg, gpte_addr(cpu->lg, gpgd, vaddr), pte_t); + gpte = lgread(cpu->lg, gpte_addr(gpgd, vaddr), pte_t); if (!(pte_flags(gpte) & _PAGE_PRESENT)) kill_guest(cpu->lg, "Bad address %#lx", vaddr); -- cgit v0.10.2 From 382ac6b3fbc0ea6a5697fc6caaf7e7de12fa8b96 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Thu, 17 Jan 2008 19:19:42 -0200 Subject: lguest: get rid of lg variable assignments We can save some lines of code by getting rid of *lg = cpu... lines of code spread everywhere by now. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 6023872..7743d73 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -151,23 +151,23 @@ int lguest_address_ok(const struct lguest *lg, /* This routine copies memory from the Guest. Here we can see how useful the * kill_lguest() routine we met in the Launcher can be: we return a random * value (all zeroes) instead of needing to return an error. */ -void __lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes) +void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) { - if (!lguest_address_ok(lg, addr, bytes) - || copy_from_user(b, lg->mem_base + addr, bytes) != 0) { + if (!lguest_address_ok(cpu->lg, addr, bytes) + || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) { /* copy_from_user should do this, but as we rely on it... */ memset(b, 0, bytes); - kill_guest(lg, "bad read address %#lx len %u", addr, bytes); + kill_guest(cpu, "bad read address %#lx len %u", addr, bytes); } } /* This is the write (copy into guest) version. */ -void __lgwrite(struct lguest *lg, unsigned long addr, const void *b, +void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, unsigned bytes) { - if (!lguest_address_ok(lg, addr, bytes) - || copy_to_user(lg->mem_base + addr, b, bytes) != 0) - kill_guest(lg, "bad write address %#lx len %u", addr, bytes); + if (!lguest_address_ok(cpu->lg, addr, bytes) + || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0) + kill_guest(cpu, "bad write address %#lx len %u", addr, bytes); } /*:*/ @@ -176,10 +176,8 @@ void __lgwrite(struct lguest *lg, unsigned long addr, const void *b, * going around and around until something interesting happens. */ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) { - struct lguest *lg = cpu->lg; - /* We stop running once the Guest is dead. */ - while (!lg->dead) { + while (!cpu->lg->dead) { /* First we run any hypercalls the Guest wants done. */ if (cpu->hcall) do_hypercalls(cpu); @@ -212,7 +210,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) /* Just make absolutely sure the Guest is still alive. One of * those hypercalls could have been fatal, for example. */ - if (lg->dead) + if (cpu->lg->dead) break; /* If the Guest asked to be stopped, we sleep. The Guest's @@ -237,7 +235,7 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user) lguest_arch_handle_trap(cpu); } - if (lg->dead == ERR_PTR(-ERESTART)) + if (cpu->lg->dead == ERR_PTR(-ERESTART)) return -ERESTART; /* The Guest is dead => "No such file or directory" */ return -ENOENT; diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 0471018..32666d0 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -31,8 +31,6 @@ * Or gets killed. Or, in the case of LHCALL_CRASH, both. */ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) { - struct lguest *lg = cpu->lg; - switch (args->arg0) { case LHCALL_FLUSH_ASYNC: /* This call does nothing, except by breaking out of the Guest @@ -41,7 +39,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) case LHCALL_LGUEST_INIT: /* You can't get here unless you're already initialized. Don't * do that. */ - kill_guest(lg, "already have lguest_data"); + kill_guest(cpu, "already have lguest_data"); break; case LHCALL_SHUTDOWN: { /* Shutdown is such a trivial hypercall that we do it in four @@ -49,11 +47,11 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) char msg[128]; /* If the lgread fails, it will call kill_guest() itself; the * kill_guest() with the message will be ignored. */ - __lgread(lg, msg, args->arg1, sizeof(msg)); + __lgread(cpu, msg, args->arg1, sizeof(msg)); msg[sizeof(msg)-1] = '\0'; - kill_guest(lg, "CRASH: %s", msg); + kill_guest(cpu, "CRASH: %s", msg); if (args->arg2 == LGUEST_SHUTDOWN_RESTART) - lg->dead = ERR_PTR(-ERESTART); + cpu->lg->dead = ERR_PTR(-ERESTART); break; } case LHCALL_FLUSH_TLB: @@ -74,10 +72,10 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) guest_set_stack(cpu, args->arg1, args->arg2, args->arg3); break; case LHCALL_SET_PTE: - guest_set_pte(lg, args->arg1, args->arg2, __pte(args->arg3)); + guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3)); break; case LHCALL_SET_PMD: - guest_set_pmd(lg, args->arg1, args->arg2); + guest_set_pmd(cpu->lg, args->arg1, args->arg2); break; case LHCALL_SET_CLOCKEVENT: guest_set_clockevent(cpu, args->arg1); @@ -96,7 +94,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args) default: /* It should be an architecture-specific hypercall. */ if (lguest_arch_do_hcall(cpu, args)) - kill_guest(lg, "Bad hypercall %li\n", args->arg0); + kill_guest(cpu, "Bad hypercall %li\n", args->arg0); } } /*:*/ @@ -112,10 +110,9 @@ static void do_async_hcalls(struct lg_cpu *cpu) { unsigned int i; u8 st[LHCALL_RING_SIZE]; - struct lguest *lg = cpu->lg; /* For simplicity, we copy the entire call status array in at once. */ - if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st))) + if (copy_from_user(&st, &cpu->lg->lguest_data->hcall_status, sizeof(st))) return; /* We process "struct lguest_data"s hcalls[] ring once. */ @@ -137,9 +134,9 @@ static void do_async_hcalls(struct lg_cpu *cpu) /* Copy the hypercall arguments into a local copy of * the hcall_args struct. */ - if (copy_from_user(&args, &lg->lguest_data->hcalls[n], + if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n], sizeof(struct hcall_args))) { - kill_guest(lg, "Fetching async hypercalls"); + kill_guest(cpu, "Fetching async hypercalls"); break; } @@ -147,8 +144,8 @@ static void do_async_hcalls(struct lg_cpu *cpu) do_hcall(cpu, &args); /* Mark the hypercall done. */ - if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) { - kill_guest(lg, "Writing result for async hypercall"); + if (put_user(0xFF, &cpu->lg->lguest_data->hcall_status[n])) { + kill_guest(cpu, "Writing result for async hypercall"); break; } @@ -163,29 +160,28 @@ static void do_async_hcalls(struct lg_cpu *cpu) * Guest makes a hypercall, we end up here to set things up: */ static void initialize(struct lg_cpu *cpu) { - struct lguest *lg = cpu->lg; /* You can't do anything until you're initialized. The Guest knows the * rules, so we're unforgiving here. */ if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) { - kill_guest(lg, "hypercall %li before INIT", cpu->hcall->arg0); + kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0); return; } if (lguest_arch_init_hypercalls(cpu)) - kill_guest(lg, "bad guest page %p", lg->lguest_data); + kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); /* The Guest tells us where we're not to deliver interrupts by putting * the range of addresses into "struct lguest_data". */ - if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start) - || get_user(lg->noirq_end, &lg->lguest_data->noirq_end)) - kill_guest(lg, "bad guest page %p", lg->lguest_data); + if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start) + || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end)) + kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); /* We write the current time into the Guest's data page once so it can * set its clock. */ - write_timestamp(lg); + write_timestamp(cpu); /* page_tables.c will also do some setup. */ - page_table_guest_data_init(lg); + page_table_guest_data_init(cpu); /* This is the one case where the above accesses might have been the * first write to a Guest page. This may have caused a copy-on-write @@ -237,10 +233,11 @@ void do_hypercalls(struct lg_cpu *cpu) /* This routine supplies the Guest with time: it's used for wallclock time at * initial boot and as a rough time source if the TSC isn't available. */ -void write_timestamp(struct lguest *lg) +void write_timestamp(struct lg_cpu *cpu) { struct timespec now; ktime_get_real_ts(&now); - if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec))) - kill_guest(lg, "Writing timestamp"); + if (copy_to_user(&cpu->lg->lguest_data->time, + &now, sizeof(struct timespec))) + kill_guest(cpu, "Writing timestamp"); } diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index 9ac7455..32e97c1 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -41,11 +41,11 @@ static int idt_present(u32 lo, u32 hi) /* We need a helper to "push" a value onto the Guest's stack, since that's a * big part of what delivering an interrupt does. */ -static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val) +static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val) { /* Stack grows upwards: move stack then write value. */ *gstack -= 4; - lgwrite(lg, *gstack, u32, val); + lgwrite(cpu, *gstack, u32, val); } /*H:210 The set_guest_interrupt() routine actually delivers the interrupt or @@ -65,7 +65,6 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) unsigned long gstack, origstack; u32 eflags, ss, irq_enable; unsigned long virtstack; - struct lguest *lg = cpu->lg; /* There are two cases for interrupts: one where the Guest is already * in the kernel, and a more complex one where the Guest is in @@ -81,8 +80,8 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) * stack: when the Guest does an "iret" back from the interrupt * handler the CPU will notice they're dropping privilege * levels and expect these here. */ - push_guest_stack(lg, &gstack, cpu->regs->ss); - push_guest_stack(lg, &gstack, cpu->regs->esp); + push_guest_stack(cpu, &gstack, cpu->regs->ss); + push_guest_stack(cpu, &gstack, cpu->regs->esp); } else { /* We're staying on the same Guest (kernel) stack. */ virtstack = cpu->regs->esp; @@ -96,20 +95,20 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) * Guest's "irq_enabled" field into the eflags word: we saw the Guest * copy it back in "lguest_iret". */ eflags = cpu->regs->eflags; - if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0 + if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0 && !(irq_enable & X86_EFLAGS_IF)) eflags &= ~X86_EFLAGS_IF; /* An interrupt is expected to push three things on the stack: the old * "eflags" word, the old code segment, and the old instruction * pointer. */ - push_guest_stack(lg, &gstack, eflags); - push_guest_stack(lg, &gstack, cpu->regs->cs); - push_guest_stack(lg, &gstack, cpu->regs->eip); + push_guest_stack(cpu, &gstack, eflags); + push_guest_stack(cpu, &gstack, cpu->regs->cs); + push_guest_stack(cpu, &gstack, cpu->regs->eip); /* For the six traps which supply an error code, we push that, too. */ if (has_err) - push_guest_stack(lg, &gstack, cpu->regs->errcode); + push_guest_stack(cpu, &gstack, cpu->regs->errcode); /* Now we've pushed all the old state, we change the stack, the code * segment and the address to execute. */ @@ -121,8 +120,8 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) /* There are two kinds of interrupt handlers: 0xE is an "interrupt * gate" which expects interrupts to be disabled on entry. */ if (idt_type(lo, hi) == 0xE) - if (put_user(0, &lg->lguest_data->irq_enabled)) - kill_guest(lg, "Disabling interrupts"); + if (put_user(0, &cpu->lg->lguest_data->irq_enabled)) + kill_guest(cpu, "Disabling interrupts"); } /*H:205 @@ -133,17 +132,16 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err) void maybe_do_interrupt(struct lg_cpu *cpu) { unsigned int irq; - struct lguest *lg = cpu->lg; DECLARE_BITMAP(blk, LGUEST_IRQS); struct desc_struct *idt; /* If the Guest hasn't even initialized yet, we can do nothing. */ - if (!lg->lguest_data) + if (!cpu->lg->lguest_data) return; /* Take our "irqs_pending" array and remove any interrupts the Guest * wants blocked: the result ends up in "blk". */ - if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts, + if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts, sizeof(blk))) return; @@ -157,19 +155,20 @@ void maybe_do_interrupt(struct lg_cpu *cpu) /* They may be in the middle of an iret, where they asked us never to * deliver interrupts. */ - if (cpu->regs->eip >= lg->noirq_start && cpu->regs->eip < lg->noirq_end) + if (cpu->regs->eip >= cpu->lg->noirq_start && + (cpu->regs->eip < cpu->lg->noirq_end)) return; /* If they're halted, interrupts restart them. */ if (cpu->halted) { /* Re-enable interrupts. */ - if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled)) - kill_guest(lg, "Re-enabling interrupts"); + if (put_user(X86_EFLAGS_IF, &cpu->lg->lguest_data->irq_enabled)) + kill_guest(cpu, "Re-enabling interrupts"); cpu->halted = 0; } else { /* Otherwise we check if they have interrupts disabled. */ u32 irq_enabled; - if (get_user(irq_enabled, &lg->lguest_data->irq_enabled)) + if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled)) irq_enabled = 0; if (!irq_enabled) return; @@ -194,7 +193,7 @@ void maybe_do_interrupt(struct lg_cpu *cpu) * did this more often, but it can actually be quite slow: doing it * here is a compromise which means at least it gets updated every * timer interrupt. */ - write_timestamp(lg); + write_timestamp(cpu); } /*:*/ @@ -315,10 +314,9 @@ void pin_stack_pages(struct lg_cpu *cpu) { unsigned int i; - struct lguest *lg = cpu->lg; /* Depending on the CONFIG_4KSTACKS option, the Guest can have one or * two pages of stack space. */ - for (i = 0; i < lg->stack_pages; i++) + for (i = 0; i < cpu->lg->stack_pages; i++) /* The stack grows *upwards*, so the address we're given is the * start of the page after the kernel stack. Subtract one to * get back onto the first stack page, and keep subtracting to @@ -339,10 +337,10 @@ void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) /* You are not allowed have a stack segment with privilege level 0: bad * Guest! */ if ((seg & 0x3) != GUEST_PL) - kill_guest(cpu->lg, "bad stack segment %i", seg); + kill_guest(cpu, "bad stack segment %i", seg); /* We only expect one or two stack pages. */ if (pages > 2) - kill_guest(cpu->lg, "bad stack pages %u", pages); + kill_guest(cpu, "bad stack pages %u", pages); /* Save where the stack is, and how many pages */ cpu->ss1 = seg; cpu->esp1 = esp; @@ -356,7 +354,7 @@ void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages) /*H:235 This is the routine which actually checks the Guest's IDT entry and * transfers it into the entry in "struct lguest": */ -static void set_trap(struct lguest *lg, struct desc_struct *trap, +static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap, unsigned int num, u32 lo, u32 hi) { u8 type = idt_type(lo, hi); @@ -369,7 +367,7 @@ static void set_trap(struct lguest *lg, struct desc_struct *trap, /* We only support interrupt and trap gates. */ if (type != 0xE && type != 0xF) - kill_guest(lg, "bad IDT type %i", type); + kill_guest(cpu, "bad IDT type %i", type); /* We only copy the handler address, present bit, privilege level and * type. The privilege level controls where the trap can be triggered @@ -399,9 +397,9 @@ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi) /* Check that the Guest doesn't try to step outside the bounds. */ if (num >= ARRAY_SIZE(cpu->arch.idt)) - kill_guest(cpu->lg, "Setting idt entry %u", num); + kill_guest(cpu, "Setting idt entry %u", num); else - set_trap(cpu->lg, &cpu->arch.idt[num], num, lo, hi); + set_trap(cpu, &cpu->arch.idt[num], num, lo, hi); } /* The default entry for each interrupt points into the Switcher routines which diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 0d6f643..b75ce3b 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -111,22 +111,22 @@ extern struct mutex lguest_lock; /* core.c: */ int lguest_address_ok(const struct lguest *lg, unsigned long addr, unsigned long len); -void __lgread(struct lguest *, void *, unsigned long, unsigned); -void __lgwrite(struct lguest *, unsigned long, const void *, unsigned); +void __lgread(struct lg_cpu *, void *, unsigned long, unsigned); +void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned); /*H:035 Using memory-copy operations like that is usually inconvient, so we * have the following helper macros which read and write a specific type (often * an unsigned long). * * This reads into a variable of the given type then returns that. */ -#define lgread(lg, addr, type) \ - ({ type _v; __lgread((lg), &_v, (addr), sizeof(_v)); _v; }) +#define lgread(cpu, addr, type) \ + ({ type _v; __lgread((cpu), &_v, (addr), sizeof(_v)); _v; }) /* This checks that the variable is of the given type, then writes it out. */ -#define lgwrite(lg, addr, type, val) \ +#define lgwrite(cpu, addr, type, val) \ do { \ typecheck(type, val); \ - __lgwrite((lg), (addr), &(val), sizeof(val)); \ + __lgwrite((cpu), (addr), &(val), sizeof(val)); \ } while(0) /* (end of memory access helper routines) :*/ @@ -171,13 +171,13 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable); void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i); void guest_pagetable_clear_all(struct lg_cpu *cpu); void guest_pagetable_flush_user(struct lg_cpu *cpu); -void guest_set_pte(struct lguest *lg, unsigned long gpgdir, +void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir, unsigned long vaddr, pte_t val); void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages); int demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode); void pin_page(struct lg_cpu *cpu, unsigned long vaddr); unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr); -void page_table_guest_data_init(struct lguest *lg); +void page_table_guest_data_init(struct lg_cpu *cpu); /* /core.c: */ void lguest_arch_host_init(void); @@ -197,7 +197,7 @@ void lguest_device_remove(void); /* hypercalls.c: */ void do_hypercalls(struct lg_cpu *cpu); -void write_timestamp(struct lguest *lg); +void write_timestamp(struct lg_cpu *cpu); /*L:035 * Let's step aside for the moment, to study one important routine that's used @@ -223,12 +223,12 @@ void write_timestamp(struct lguest *lg); * Like any macro which uses an "if", it is safely wrapped in a run-once "do { * } while(0)". */ -#define kill_guest(lg, fmt...) \ +#define kill_guest(cpu, fmt...) \ do { \ - if (!(lg)->dead) { \ - (lg)->dead = kasprintf(GFP_ATOMIC, fmt); \ - if (!(lg)->dead) \ - (lg)->dead = ERR_PTR(-ENOMEM); \ + if (!(cpu)->lg->dead) { \ + (cpu)->lg->dead = kasprintf(GFP_ATOMIC, fmt); \ + if (!(cpu)->lg->dead) \ + (cpu)->lg->dead = ERR_PTR(-ENOMEM); \ } \ } while(0) /* (End of aside) :*/ diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index c9acafc..983e902 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -68,17 +68,17 @@ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages); * page directory entry (PGD) for that address. Since we keep track of several * page tables, the "i" argument tells us which one we're interested in (it's * usually the current one). */ -static pgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr) +static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr) { unsigned int index = pgd_index(vaddr); /* We kill any Guest trying to touch the Switcher addresses. */ if (index >= SWITCHER_PGD_INDEX) { - kill_guest(lg, "attempt to access switcher pages"); + kill_guest(cpu, "attempt to access switcher pages"); index = 0; } /* Return a pointer index'th pgd entry for the i'th page table. */ - return &lg->pgdirs[i].pgdir[index]; + return &cpu->lg->pgdirs[i].pgdir[index]; } /* This routine then takes the page directory entry returned above, which @@ -137,7 +137,7 @@ static unsigned long get_pfn(unsigned long virtpfn, int write) * entry can be a little tricky. The flags are (almost) the same, but the * Guest PTE contains a virtual page number: the CPU needs the real page * number. */ -static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) +static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write) { unsigned long pfn, base, flags; @@ -148,7 +148,7 @@ static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) flags = (pte_flags(gpte) & ~_PAGE_GLOBAL); /* The Guest's pages are offset inside the Launcher. */ - base = (unsigned long)lg->mem_base / PAGE_SIZE; + base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE; /* We need a temporary "unsigned long" variable to hold the answer from * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't @@ -156,7 +156,7 @@ static pte_t gpte_to_spte(struct lguest *lg, pte_t gpte, int write) * page, given the virtual number. */ pfn = get_pfn(base + pte_pfn(gpte), write); if (pfn == -1UL) { - kill_guest(lg, "failed to get page %lu", pte_pfn(gpte)); + kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte)); /* When we destroy the Guest, we'll go through the shadow page * tables and release_pte() them. Make sure we don't think * this one is valid! */ @@ -176,17 +176,18 @@ static void release_pte(pte_t pte) } /*:*/ -static void check_gpte(struct lguest *lg, pte_t gpte) +static void check_gpte(struct lg_cpu *cpu, pte_t gpte) { if ((pte_flags(gpte) & (_PAGE_PWT|_PAGE_PSE)) - || pte_pfn(gpte) >= lg->pfn_limit) - kill_guest(lg, "bad page table entry"); + || pte_pfn(gpte) >= cpu->lg->pfn_limit) + kill_guest(cpu, "bad page table entry"); } -static void check_gpgd(struct lguest *lg, pgd_t gpgd) +static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd) { - if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || pgd_pfn(gpgd) >= lg->pfn_limit) - kill_guest(lg, "bad page directory entry"); + if ((pgd_flags(gpgd) & ~_PAGE_TABLE) || + (pgd_pfn(gpgd) >= cpu->lg->pfn_limit)) + kill_guest(cpu, "bad page directory entry"); } /*H:330 @@ -206,27 +207,26 @@ int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) unsigned long gpte_ptr; pte_t gpte; pte_t *spte; - struct lguest *lg = cpu->lg; /* First step: get the top-level Guest page table entry. */ - gpgd = lgread(lg, gpgd_addr(cpu, vaddr), pgd_t); + gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); /* Toplevel not present? We can't map it in. */ if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) return 0; /* Now look at the matching shadow entry. */ - spgd = spgd_addr(lg, cpu->cpu_pgd, vaddr); + spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { /* No shadow entry: allocate a new shadow PTE page. */ unsigned long ptepage = get_zeroed_page(GFP_KERNEL); /* This is not really the Guest's fault, but killing it is * simple for this corner case. */ if (!ptepage) { - kill_guest(lg, "out of memory allocating pte page"); + kill_guest(cpu, "out of memory allocating pte page"); return 0; } /* We check that the Guest pgd is OK. */ - check_gpgd(lg, gpgd); + check_gpgd(cpu, gpgd); /* And we copy the flags to the shadow PGD entry. The page * number in the shadow PGD is the page we just allocated. */ *spgd = __pgd(__pa(ptepage) | pgd_flags(gpgd)); @@ -235,7 +235,7 @@ int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) /* OK, now we look at the lower level in the Guest page table: keep its * address, because we might update it later. */ gpte_ptr = gpte_addr(gpgd, vaddr); - gpte = lgread(lg, gpte_ptr, pte_t); + gpte = lgread(cpu, gpte_ptr, pte_t); /* If this page isn't in the Guest page tables, we can't page it in. */ if (!(pte_flags(gpte) & _PAGE_PRESENT)) @@ -252,7 +252,7 @@ int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) /* Check that the Guest PTE flags are OK, and the page number is below * the pfn_limit (ie. not mapping the Launcher binary). */ - check_gpte(lg, gpte); + check_gpte(cpu, gpte); /* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */ gpte = pte_mkyoung(gpte); @@ -268,17 +268,17 @@ int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) /* If this is a write, we insist that the Guest page is writable (the * final arg to gpte_to_spte()). */ if (pte_dirty(gpte)) - *spte = gpte_to_spte(lg, gpte, 1); + *spte = gpte_to_spte(cpu, gpte, 1); else /* If this is a read, don't set the "writable" bit in the page * table entry, even if the Guest says it's writable. That way * we will come back here when a write does actually occur, so * we can update the Guest's _PAGE_DIRTY flag. */ - *spte = gpte_to_spte(lg, pte_wrprotect(gpte), 0); + *spte = gpte_to_spte(cpu, pte_wrprotect(gpte), 0); /* Finally, we write the Guest PTE entry back: we've set the * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */ - lgwrite(lg, gpte_ptr, pte_t, gpte); + lgwrite(cpu, gpte_ptr, pte_t, gpte); /* The fault is fixed, the page table is populated, the mapping * manipulated, the result returned and the code complete. A small @@ -303,7 +303,7 @@ static int page_writable(struct lg_cpu *cpu, unsigned long vaddr) unsigned long flags; /* Look at the current top level entry: is it present? */ - spgd = spgd_addr(cpu->lg, cpu->cpu_pgd, vaddr); + spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr); if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) return 0; @@ -320,7 +320,7 @@ static int page_writable(struct lg_cpu *cpu, unsigned long vaddr) void pin_page(struct lg_cpu *cpu, unsigned long vaddr) { if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) - kill_guest(cpu->lg, "bad stack page %#lx", vaddr); + kill_guest(cpu, "bad stack page %#lx", vaddr); } /*H:450 If we chase down the release_pgd() code, it looks like this: */ @@ -372,14 +372,14 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr) pte_t gpte; /* First step: get the top-level Guest page table entry. */ - gpgd = lgread(cpu->lg, gpgd_addr(cpu, vaddr), pgd_t); + gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t); /* Toplevel not present? We can't map it in. */ if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) - kill_guest(cpu->lg, "Bad address %#lx", vaddr); + kill_guest(cpu, "Bad address %#lx", vaddr); - gpte = lgread(cpu->lg, gpte_addr(gpgd, vaddr), pte_t); + gpte = lgread(cpu, gpte_addr(gpgd, vaddr), pte_t); if (!(pte_flags(gpte) & _PAGE_PRESENT)) - kill_guest(cpu->lg, "Bad address %#lx", vaddr); + kill_guest(cpu, "Bad address %#lx", vaddr); return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); } @@ -404,16 +404,16 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, int *blank_pgdir) { unsigned int next; - struct lguest *lg = cpu->lg; /* We pick one entry at random to throw out. Choosing the Least * Recently Used might be better, but this is easy. */ - next = random32() % ARRAY_SIZE(lg->pgdirs); + next = random32() % ARRAY_SIZE(cpu->lg->pgdirs); /* If it's never been allocated at all before, try now. */ - if (!lg->pgdirs[next].pgdir) { - lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); + if (!cpu->lg->pgdirs[next].pgdir) { + cpu->lg->pgdirs[next].pgdir = + (pgd_t *)get_zeroed_page(GFP_KERNEL); /* If the allocation fails, just keep using the one we have */ - if (!lg->pgdirs[next].pgdir) + if (!cpu->lg->pgdirs[next].pgdir) next = cpu->cpu_pgd; else /* This is a blank page, so there are no kernel @@ -421,9 +421,9 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, *blank_pgdir = 1; } /* Record which Guest toplevel this shadows. */ - lg->pgdirs[next].gpgdir = gpgdir; + cpu->lg->pgdirs[next].gpgdir = gpgdir; /* Release all the non-kernel mappings. */ - flush_user_mappings(lg, next); + flush_user_mappings(cpu->lg, next); return next; } @@ -436,13 +436,12 @@ static unsigned int new_pgdir(struct lg_cpu *cpu, void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable) { int newpgdir, repin = 0; - struct lguest *lg = cpu->lg; /* Look to see if we have this one already. */ - newpgdir = find_pgdir(lg, pgtable); + newpgdir = find_pgdir(cpu->lg, pgtable); /* If not, we allocate or mug an existing one: if it's a fresh one, * repin gets set to 1. */ - if (newpgdir == ARRAY_SIZE(lg->pgdirs)) + if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs)) newpgdir = new_pgdir(cpu, pgtable, &repin); /* Change the current pgd index to the new one. */ cpu->cpu_pgd = newpgdir; @@ -499,11 +498,11 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu) * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, and if * they set _PAGE_DIRTY then we can put a writable PTE entry in immediately. */ -static void do_set_pte(struct lguest *lg, int idx, +static void do_set_pte(struct lg_cpu *cpu, int idx, unsigned long vaddr, pte_t gpte) { /* Look up the matching shadow page directory entry. */ - pgd_t *spgd = spgd_addr(lg, idx, vaddr); + pgd_t *spgd = spgd_addr(cpu, idx, vaddr); /* If the top level isn't present, there's no entry to update. */ if (pgd_flags(*spgd) & _PAGE_PRESENT) { @@ -515,8 +514,8 @@ static void do_set_pte(struct lguest *lg, int idx, * as well put that entry they've given us in now. This shaves * 10% off a copy-on-write micro-benchmark. */ if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { - check_gpte(lg, gpte); - *spte = gpte_to_spte(lg, gpte, + check_gpte(cpu, gpte); + *spte = gpte_to_spte(cpu, gpte, pte_flags(gpte) & _PAGE_DIRTY); } else /* Otherwise kill it and we can demand_page() it in @@ -535,22 +534,22 @@ static void do_set_pte(struct lguest *lg, int idx, * * The benefit is that when we have to track a new page table, we can copy keep * all the kernel mappings. This speeds up context switch immensely. */ -void guest_set_pte(struct lguest *lg, +void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir, unsigned long vaddr, pte_t gpte) { /* Kernel mappings must be changed on all top levels. Slow, but * doesn't happen often. */ - if (vaddr >= lg->kernel_address) { + if (vaddr >= cpu->lg->kernel_address) { unsigned int i; - for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) - if (lg->pgdirs[i].pgdir) - do_set_pte(lg, i, vaddr, gpte); + for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++) + if (cpu->lg->pgdirs[i].pgdir) + do_set_pte(cpu, i, vaddr, gpte); } else { /* Is this page table one we have a shadow for? */ - int pgdir = find_pgdir(lg, gpgdir); - if (pgdir != ARRAY_SIZE(lg->pgdirs)) + int pgdir = find_pgdir(cpu->lg, gpgdir); + if (pgdir != ARRAY_SIZE(cpu->lg->pgdirs)) /* If so, do the update. */ - do_set_pte(lg, pgdir, vaddr, gpte); + do_set_pte(cpu, pgdir, vaddr, gpte); } } @@ -601,21 +600,23 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) } /* When the Guest calls LHCALL_LGUEST_INIT we do more setup. */ -void page_table_guest_data_init(struct lguest *lg) +void page_table_guest_data_init(struct lg_cpu *cpu) { /* We get the kernel address: above this is all kernel memory. */ - if (get_user(lg->kernel_address, &lg->lguest_data->kernel_address) + if (get_user(cpu->lg->kernel_address, + &cpu->lg->lguest_data->kernel_address) /* We tell the Guest that it can't use the top 4MB of virtual * addresses used by the Switcher. */ - || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) - || put_user(lg->pgdirs[0].gpgdir, &lg->lguest_data->pgdir)) - kill_guest(lg, "bad guest page %p", lg->lguest_data); + || put_user(4U*1024*1024, &cpu->lg->lguest_data->reserve_mem) + || put_user(cpu->lg->pgdirs[0].gpgdir, &cpu->lg->lguest_data->pgdir)) + kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data); /* In flush_user_mappings() we loop from 0 to * "pgd_index(lg->kernel_address)". This assumes it won't hit the * Switcher mappings, so check that now. */ - if (pgd_index(lg->kernel_address) >= SWITCHER_PGD_INDEX) - kill_guest(lg, "bad kernel address %#lx", lg->kernel_address); + if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX) + kill_guest(cpu, "bad kernel address %#lx", + cpu->lg->kernel_address); } /* When a Guest dies, our cleanup is fairly simple. */ diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c index 635f54c..ec6aa3f 100644 --- a/drivers/lguest/segments.c +++ b/drivers/lguest/segments.c @@ -148,14 +148,13 @@ void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt) * We copy it from the Guest and tweak the entries. */ void load_guest_gdt(struct lg_cpu *cpu, unsigned long table, u32 num) { - struct lguest *lg = cpu->lg; /* We assume the Guest has the same number of GDT entries as the * Host, otherwise we'd have to dynamically allocate the Guest GDT. */ if (num > ARRAY_SIZE(cpu->arch.gdt)) - kill_guest(lg, "too many gdt entries %i", num); + kill_guest(cpu, "too many gdt entries %i", num); /* We read the whole thing in, then fix it up. */ - __lgread(lg, cpu->arch.gdt, table, num * sizeof(cpu->arch.gdt[0])); + __lgread(cpu, cpu->arch.gdt, table, num * sizeof(cpu->arch.gdt[0])); fixup_gdt_table(cpu, 0, ARRAY_SIZE(cpu->arch.gdt)); /* Mark that the GDT changed so the core knows it has to copy it again, * even if the Guest is run on the same CPU. */ @@ -169,9 +168,8 @@ void load_guest_gdt(struct lg_cpu *cpu, unsigned long table, u32 num) void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls) { struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN]; - struct lguest *lg = cpu->lg; - __lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES); + __lgread(cpu, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES); fixup_gdt_table(cpu, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1); /* Note that just the TLS entries have changed. */ cpu->changed |= CHANGED_GDT_TLS; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index fd6a851..e9c3ba8 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -117,7 +117,6 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) { /* This is a dummy value we need for GCC's sake. */ unsigned int clobber; - struct lguest *lg = cpu->lg; /* Copy the guest-specific information into this CPU's "struct * lguest_pages". */ @@ -144,7 +143,7 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) * 0-th argument above, ie "a"). %ebx contains the * physical address of the Guest's top-level page * directory. */ - : "0"(pages), "1"(__pa(lg->pgdirs[cpu->cpu_pgd].pgdir)) + : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir)) /* We tell gcc that all these registers could change, * which means we don't have to save and restore them in * the Switcher. */ @@ -217,7 +216,6 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * instructions and skip over it. We return true if we did. */ static int emulate_insn(struct lg_cpu *cpu) { - struct lguest *lg = cpu->lg; u8 insn; unsigned int insnlen = 0, in = 0, shift = 0; /* The eip contains the *virtual* address of the Guest's instruction: @@ -231,7 +229,7 @@ static int emulate_insn(struct lg_cpu *cpu) return 0; /* Decoding x86 instructions is icky. */ - insn = lgread(lg, physaddr, u8); + insn = lgread(cpu, physaddr, u8); /* 0x66 is an "operand prefix". It means it's using the upper 16 bits of the eax register. */ @@ -239,7 +237,7 @@ static int emulate_insn(struct lg_cpu *cpu) shift = 16; /* The instruction is 1 byte so far, read the next byte. */ insnlen = 1; - insn = lgread(lg, physaddr + insnlen, u8); + insn = lgread(cpu, physaddr + insnlen, u8); } /* We can ignore the lower bit for the moment and decode the 4 opcodes @@ -283,7 +281,6 @@ static int emulate_insn(struct lg_cpu *cpu) /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ void lguest_arch_handle_trap(struct lg_cpu *cpu) { - struct lguest *lg = cpu->lg; switch (cpu->regs->trapnum) { case 13: /* We've intercepted a General Protection Fault. */ /* Check if this was one of those annoying IN or OUT @@ -315,9 +312,10 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * Note that if the Guest were really messed up, this could * happen before it's done the LHCALL_LGUEST_INIT hypercall, so * lg->lguest_data could be NULL */ - if (lg->lguest_data && - put_user(cpu->arch.last_pagefault, &lg->lguest_data->cr2)) - kill_guest(lg, "Writing cr2"); + if (cpu->lg->lguest_data && + put_user(cpu->arch.last_pagefault, + &cpu->lg->lguest_data->cr2)) + kill_guest(cpu, "Writing cr2"); break; case 7: /* We've intercepted a Device Not Available fault. */ /* If the Guest doesn't want to know, we already restored the @@ -345,7 +343,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) /* If the Guest doesn't have a handler (either it hasn't * registered any yet, or it's one of the faults we don't let * it handle), it dies with a cryptic error message. */ - kill_guest(lg, "unhandled trap %li at %#lx (%#lx)", + kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)", cpu->regs->trapnum, cpu->regs->eip, cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault : cpu->regs->errcode); @@ -514,11 +512,11 @@ int lguest_arch_do_hcall(struct lg_cpu *cpu, struct hcall_args *args) int lguest_arch_init_hypercalls(struct lg_cpu *cpu) { u32 tsc_speed; - struct lguest *lg = cpu->lg; /* The pointer to the Guest's "struct lguest_data" is the only * argument. We check that address now. */ - if (!lguest_address_ok(lg, cpu->hcall->arg1, sizeof(*lg->lguest_data))) + if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1, + sizeof(*cpu->lg->lguest_data))) return -EFAULT; /* Having checked it, we simply set lg->lguest_data to point straight @@ -526,7 +524,7 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) * copy_to_user/from_user from now on, instead of lgread/write. I put * this in to show that I'm not immune to writing stupid * optimizations. */ - lg->lguest_data = lg->mem_base + cpu->hcall->arg1; + cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1; /* We insist that the Time Stamp Counter exist and doesn't change with * cpu frequency. Some devious chip manufacturers decided that TSC @@ -539,12 +537,12 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu) tsc_speed = tsc_khz; else tsc_speed = 0; - if (put_user(tsc_speed, &lg->lguest_data->tsc_khz)) + if (put_user(tsc_speed, &cpu->lg->lguest_data->tsc_khz)) return -EFAULT; /* The interrupt code might not like the system call vector. */ - if (!check_syscall_vector(lg)) - kill_guest(lg, "bad syscall vector"); + if (!check_syscall_vector(cpu->lg)) + kill_guest(cpu, "bad syscall vector"); return 0; } -- cgit v0.10.2 From ca94f2bdd1be626361fcfbd474d6b8823ed39f74 Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Fri, 18 Jan 2008 23:59:07 -0200 Subject: lguest: Use explicit includes rateher than indirect explicitly use ktime.h include explicitly use hrtimer.h include explicitly use sched.h include This patch adds headers explicitly to lguest sources file, to avoid depending on them being included somewhere else. Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c index 32666d0..0f2cb4f 100644 --- a/drivers/lguest/hypercalls.c +++ b/drivers/lguest/hypercalls.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include "lg.h" diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index b75ce3b..2337e1a 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index a87fca6..85d42d3 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "lg.h" /*L:055 When something happens, the Waker process needs a way to stop the -- cgit v0.10.2 From 84f12e39c856a8b1ab407f8216ecebaf4204b94d Mon Sep 17 00:00:00 2001 From: Glauber de Oliveira Costa Date: Fri, 18 Jan 2008 23:59:08 -0200 Subject: lguest: use __PAGE_KERNEL instead of _PAGE_KERNEL x86_64 don't expose the intermediate representation with one underline, _PAGE_KERNEL, just the double-underlined one. Use it, to get a common ground between 32 and 64-bit Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Rusty Russell diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 983e902..74b4cf2 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -646,7 +646,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) /* Make the last PGD entry for this Guest point to the Switcher's PTE * page for this CPU (with appropriate flags). */ - switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL); + switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL); cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; @@ -658,7 +658,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) * page is already mapped there, we don't have to copy them out * again. */ pfn = __pa(cpu->regs_page) >> PAGE_SHIFT; - regs_pte = pfn_pte(pfn, __pgprot(_PAGE_KERNEL)); + regs_pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL)); switcher_pte_page[(unsigned long)pages/PAGE_SIZE%PTRS_PER_PTE] = regs_pte; } /*:*/ -- cgit v0.10.2