From 22a9d645677feefd402befd02edd59b122289ef1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 7 Jan 2009 08:45:46 -0800 Subject: async: Asynchronous function calls to speed up kernel boot Right now, most of the kernel boot is strictly synchronous, such that various hardware delays are done sequentially. In order to make the kernel boot faster, this patch introduces infrastructure to allow doing some of the initialization steps asynchronously, which will hide significant portions of the hardware delays in practice. In order to not change device order and other similar observables, this patch does NOT do full parallel initialization. Rather, it operates more in the way an out of order CPU does; the work may be done out of order and asynchronous, but the observable effects (instruction retiring for the CPU) are still done in the original sequence. Signed-off-by: Arjan van de Ven diff --git a/include/linux/async.h b/include/linux/async.h new file mode 100644 index 0000000..c4ecacd --- /dev/null +++ b/include/linux/async.h @@ -0,0 +1,25 @@ +/* + * async.h: Asynchronous function calls for boot performance + * + * (C) Copyright 2009 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include + +typedef u64 async_cookie_t; +typedef void (async_func_ptr) (void *data, async_cookie_t cookie); + +extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data); +extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list); +extern void async_synchronize_full(void); +extern void async_synchronize_full_special(struct list_head *list); +extern void async_synchronize_cookie(async_cookie_t cookie); +extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list); + diff --git a/init/do_mounts.c b/init/do_mounts.c index 5efca73..708105e 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -372,6 +373,7 @@ void __init prepare_namespace(void) /* wait for the known devices to complete their probing */ while (driver_probe_done() != 0) msleep(100); + async_synchronize_full(); md_run_setup(); diff --git a/init/main.c b/init/main.c index b5a892c..f66715d 100644 --- a/init/main.c +++ b/init/main.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -684,7 +685,7 @@ asmlinkage void __init start_kernel(void) rest_init(); } -static int initcall_debug; +int initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); int do_one_initcall(initcall_t fn) @@ -785,6 +786,8 @@ static void run_init_process(char *init_filename) */ static noinline int init_post(void) { + /* need to finish all async __init code before freeing the memory */ + async_synchronize_full(); free_initmem(); unlock_kernel(); mark_rodata_ro(); diff --git a/kernel/Makefile b/kernel/Makefile index e1c5bf3..2921d90 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o + notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ + async.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/async.c b/kernel/async.c new file mode 100644 index 0000000..afaa8a6 --- /dev/null +++ b/kernel/async.c @@ -0,0 +1,321 @@ +/* + * async.c: Asynchronous function calls for boot performance + * + * (C) Copyright 2009 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + + +/* + +Goals and Theory of Operation + +The primary goal of this feature is to reduce the kernel boot time, +by doing various independent hardware delays and discovery operations +decoupled and not strictly serialized. + +More specifically, the asynchronous function call concept allows +certain operations (primarily during system boot) to happen +asynchronously, out of order, while these operations still +have their externally visible parts happen sequentially and in-order. +(not unlike how out-of-order CPUs retire their instructions in order) + +Key to the asynchronous function call implementation is the concept of +a "sequence cookie" (which, although it has an abstracted type, can be +thought of as a monotonically incrementing number). + +The async core will assign each scheduled event such a sequence cookie and +pass this to the called functions. + +The asynchronously called function should before doing a globally visible +operation, such as registering device numbers, call the +async_synchronize_cookie() function and pass in its own cookie. The +async_synchronize_cookie() function will make sure that all asynchronous +operations that were scheduled prior to the operation corresponding with the +cookie have completed. + +Subsystem/driver initialization code that scheduled asynchronous probe +functions, but which shares global resources with other drivers/subsystems +that do not use the asynchronous call feature, need to do a full +synchronization with the async_synchronize_full() function, before returning +from their init function. This is to maintain strict ordering between the +asynchronous and synchronous parts of the kernel. + +*/ + +#include +#include +#include +#include +#include +#include +#include + +static async_cookie_t next_cookie = 1; + +#define MAX_THREADS 256 +#define MAX_WORK 32768 + +static LIST_HEAD(async_pending); +static LIST_HEAD(async_running); +static DEFINE_SPINLOCK(async_lock); + +struct async_entry { + struct list_head list; + async_cookie_t cookie; + async_func_ptr *func; + void *data; + struct list_head *running; +}; + +static DECLARE_WAIT_QUEUE_HEAD(async_done); +static DECLARE_WAIT_QUEUE_HEAD(async_new); + +static atomic_t entry_count; +static atomic_t thread_count; + +extern int initcall_debug; + + +/* + * MUST be called with the lock held! + */ +static async_cookie_t __lowest_in_progress(struct list_head *running) +{ + struct async_entry *entry; + if (!list_empty(&async_pending)) { + entry = list_first_entry(&async_pending, + struct async_entry, list); + return entry->cookie; + } else if (!list_empty(running)) { + entry = list_first_entry(running, + struct async_entry, list); + return entry->cookie; + } else { + /* nothing in progress... next_cookie is "infinity" */ + return next_cookie; + } + +} +/* + * pick the first pending entry and run it + */ +static void run_one_entry(void) +{ + unsigned long flags; + struct async_entry *entry; + ktime_t calltime, delta, rettime; + + /* 1) pick one task from the pending queue */ + + spin_lock_irqsave(&async_lock, flags); + if (list_empty(&async_pending)) + goto out; + entry = list_first_entry(&async_pending, struct async_entry, list); + + /* 2) move it to the running queue */ + list_del(&entry->list); + list_add_tail(&entry->list, &async_running); + spin_unlock_irqrestore(&async_lock, flags); + + /* 3) run it (and print duration)*/ + if (initcall_debug) { + printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current)); + calltime = ktime_get(); + } + entry->func(entry->data, entry->cookie); + if (initcall_debug) { + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie, + entry->func, ktime_to_ns(delta) >> 10); + } + + /* 4) remove it from the running queue */ + spin_lock_irqsave(&async_lock, flags); + list_del(&entry->list); + + /* 5) free the entry */ + kfree(entry); + atomic_dec(&entry_count); + + spin_unlock_irqrestore(&async_lock, flags); + + /* 6) wake up any waiters. */ + wake_up(&async_done); + return; + +out: + spin_unlock_irqrestore(&async_lock, flags); +} + + +static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct list_head *running) +{ + struct async_entry *entry; + unsigned long flags; + async_cookie_t newcookie; + + + /* allow irq-off callers */ + entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC); + + /* + * If we're out of memory or if there's too much work + * pending already, we execute synchronously. + */ + if (!entry || atomic_read(&entry_count) > MAX_WORK) { + kfree(entry); + spin_lock_irqsave(&async_lock, flags); + newcookie = next_cookie++; + spin_unlock_irqrestore(&async_lock, flags); + + /* low on memory.. run synchronously */ + ptr(data, newcookie); + return newcookie; + } + entry->func = ptr; + entry->data = data; + entry->running = running; + + spin_lock_irqsave(&async_lock, flags); + newcookie = entry->cookie = next_cookie++; + list_add_tail(&entry->list, &async_pending); + atomic_inc(&entry_count); + spin_unlock_irqrestore(&async_lock, flags); + wake_up(&async_new); + return newcookie; +} + +async_cookie_t async_schedule(async_func_ptr *ptr, void *data) +{ + return __async_schedule(ptr, data, &async_pending); +} +EXPORT_SYMBOL_GPL(async_schedule); + +async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running) +{ + return __async_schedule(ptr, data, running); +} +EXPORT_SYMBOL_GPL(async_schedule_special); + +void async_synchronize_full(void) +{ + async_synchronize_cookie(next_cookie); +} +EXPORT_SYMBOL_GPL(async_synchronize_full); + +void async_synchronize_full_special(struct list_head *list) +{ + async_synchronize_cookie_special(next_cookie, list); +} +EXPORT_SYMBOL_GPL(async_synchronize_full_special); + +void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running) +{ + ktime_t starttime, delta, endtime; + + if (initcall_debug) { + printk("async_waiting @ %i\n", task_pid_nr(current)); + starttime = ktime_get(); + } + + wait_event(async_done, __lowest_in_progress(running) >= cookie); + + if (initcall_debug) { + endtime = ktime_get(); + delta = ktime_sub(endtime, starttime); + + printk("async_continuing @ %i after %lli usec\n", + task_pid_nr(current), ktime_to_ns(delta) >> 10); + } +} +EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); + +void async_synchronize_cookie(async_cookie_t cookie) +{ + async_synchronize_cookie_special(cookie, &async_running); +} +EXPORT_SYMBOL_GPL(async_synchronize_cookie); + + +static int async_thread(void *unused) +{ + DECLARE_WAITQUEUE(wq, current); + add_wait_queue(&async_new, &wq); + + while (!kthread_should_stop()) { + int ret = HZ; + set_current_state(TASK_INTERRUPTIBLE); + /* + * check the list head without lock.. false positives + * are dealt with inside run_one_entry() while holding + * the lock. + */ + rmb(); + if (!list_empty(&async_pending)) + run_one_entry(); + else + ret = schedule_timeout(HZ); + + if (ret == 0) { + /* + * we timed out, this means we as thread are redundant. + * we sign off and die, but we to avoid any races there + * is a last-straw check to see if work snuck in. + */ + atomic_dec(&thread_count); + wmb(); /* manager must see our departure first */ + if (list_empty(&async_pending)) + break; + /* + * woops work came in between us timing out and us + * signing off; we need to stay alive and keep working. + */ + atomic_inc(&thread_count); + } + } + remove_wait_queue(&async_new, &wq); + + return 0; +} + +static int async_manager_thread(void *unused) +{ + DECLARE_WAITQUEUE(wq, current); + add_wait_queue(&async_new, &wq); + + while (!kthread_should_stop()) { + int tc, ec; + + set_current_state(TASK_INTERRUPTIBLE); + + tc = atomic_read(&thread_count); + rmb(); + ec = atomic_read(&entry_count); + + while (tc < ec && tc < MAX_THREADS) { + kthread_run(async_thread, NULL, "async/%i", tc); + atomic_inc(&thread_count); + tc++; + } + + schedule(); + } + remove_wait_queue(&async_new, &wq); + + return 0; +} + +static int __init async_init(void) +{ + kthread_run(async_manager_thread, NULL, "async/mgr"); + return 0; +} + +core_initcall(async_init); diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index cc0f732..1de9700 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "internals.h" @@ -34,6 +35,10 @@ unsigned long probe_irq_on(void) unsigned int status; int i; + /* + * quiesce the kernel, or at least the asynchronous portion + */ + async_synchronize_full(); mutex_lock(&probing_active); /* * something may have generated an irq long ago and we want to diff --git a/kernel/module.c b/kernel/module.c index 496dcb5..c9332c9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -50,6 +50,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -816,6 +817,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags) mod->exit(); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); + async_synchronize_full(); mutex_lock(&module_mutex); /* Store the name of the last unloaded module for diagnostic purposes */ strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); -- cgit v0.10.2 From 4ace92fc112c6069b4fcb95a31d3142d4a43ff2a Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 4 Jan 2009 05:32:28 -0800 Subject: fastboot: make scsi probes asynchronous This patch makes part of the scsi probe (which is mostly device spin up and the partition scan) asynchronous. Only the part that runs after getting the device number allocated is asynchronous, ensuring that device numbering remains stable. Signed-off-by: Arjan van de Ven diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 18486b5..17914a3 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -179,6 +180,8 @@ int scsi_complete_async_scans(void) spin_unlock(&async_scan_lock); kfree(data); + /* Synchronize async operations globally */ + async_synchronize_full(); return 0; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 62b28d5..e035c11 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -1802,6 +1803,71 @@ static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) return 0; } +/* + * The asynchronous part of sd_probe + */ +static void sd_probe_async(void *data, async_cookie_t cookie) +{ + struct scsi_disk *sdkp = data; + struct scsi_device *sdp; + struct gendisk *gd; + u32 index; + struct device *dev; + + sdp = sdkp->device; + gd = sdkp->disk; + index = sdkp->index; + dev = &sdp->sdev_gendev; + + if (!sdp->request_queue->rq_timeout) { + if (sdp->type != TYPE_MOD) + blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); + else + blk_queue_rq_timeout(sdp->request_queue, + SD_MOD_TIMEOUT); + } + + device_initialize(&sdkp->dev); + sdkp->dev.parent = &sdp->sdev_gendev; + sdkp->dev.class = &sd_disk_class; + strncpy(sdkp->dev.bus_id, sdp->sdev_gendev.bus_id, BUS_ID_SIZE); + + if (device_add(&sdkp->dev)) + goto out_free_index; + + get_device(&sdp->sdev_gendev); + + if (index < SD_MAX_DISKS) { + gd->major = sd_major((index & 0xf0) >> 4); + gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); + gd->minors = SD_MINORS; + } + gd->fops = &sd_fops; + gd->private_data = &sdkp->driver; + gd->queue = sdkp->device->request_queue; + + sd_revalidate_disk(gd); + + blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); + + gd->driverfs_dev = &sdp->sdev_gendev; + gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS; + if (sdp->removable) + gd->flags |= GENHD_FL_REMOVABLE; + + dev_set_drvdata(dev, sdkp); + add_disk(gd); + sd_dif_config_host(sdkp); + + sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", + sdp->removable ? "removable " : ""); + + return; + + out_free_index: + ida_remove(&sd_index_ida, index); +} + /** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once @@ -1865,48 +1931,7 @@ static int sd_probe(struct device *dev) sdkp->openers = 0; sdkp->previous_state = 1; - if (!sdp->request_queue->rq_timeout) { - if (sdp->type != TYPE_MOD) - blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); - else - blk_queue_rq_timeout(sdp->request_queue, - SD_MOD_TIMEOUT); - } - - device_initialize(&sdkp->dev); - sdkp->dev.parent = &sdp->sdev_gendev; - sdkp->dev.class = &sd_disk_class; - strncpy(sdkp->dev.bus_id, sdp->sdev_gendev.bus_id, BUS_ID_SIZE); - - if (device_add(&sdkp->dev)) - goto out_free_index; - - get_device(&sdp->sdev_gendev); - - if (index < SD_MAX_DISKS) { - gd->major = sd_major((index & 0xf0) >> 4); - gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); - gd->minors = SD_MINORS; - } - gd->fops = &sd_fops; - gd->private_data = &sdkp->driver; - gd->queue = sdkp->device->request_queue; - - sd_revalidate_disk(gd); - - blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); - - gd->driverfs_dev = &sdp->sdev_gendev; - gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS; - if (sdp->removable) - gd->flags |= GENHD_FL_REMOVABLE; - - dev_set_drvdata(dev, sdkp); - add_disk(gd); - sd_dif_config_host(sdkp); - - sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", - sdp->removable ? "removable " : ""); + async_schedule(sd_probe_async, sdkp); return 0; -- cgit v0.10.2 From 793180570ff2530d133343ceea85648de5f01b02 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 4 Jan 2009 05:32:28 -0800 Subject: fastboot: make the libata port scan asynchronous This patch makes the libata port scanning asynchronous (per device). There is a synchronization point before doing the actual disk scan so that device ordering is not affected. Signed-off-by: Arjan van de Ven diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index fecca42..7d3ae6a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -5909,6 +5910,48 @@ void ata_host_init(struct ata_host *host, struct device *dev, host->ops = ops; } + +static void async_port_probe(void *data, async_cookie_t cookie) +{ + int rc; + struct ata_port *ap = data; + /* probe */ + if (ap->ops->error_handler) { + struct ata_eh_info *ehi = &ap->link.eh_info; + unsigned long flags; + + ata_port_probe(ap); + + /* kick EH for boot probing */ + spin_lock_irqsave(ap->lock, flags); + + ehi->probe_mask |= ATA_ALL_DEVICES; + ehi->action |= ATA_EH_RESET | ATA_EH_LPM; + ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET; + + ap->pflags &= ~ATA_PFLAG_INITIALIZING; + ap->pflags |= ATA_PFLAG_LOADING; + ata_port_schedule_eh(ap); + + spin_unlock_irqrestore(ap->lock, flags); + + /* wait for EH to finish */ + ata_port_wait_eh(ap); + } else { + DPRINTK("ata%u: bus probe begin\n", ap->print_id); + rc = ata_bus_probe(ap); + DPRINTK("ata%u: bus probe end\n", ap->print_id); + + if (rc) { + /* FIXME: do something useful here? + * Current libata behavior will + * tear down everything when + * the module is removed + * or the h/w is unplugged. + */ + } + } +} /** * ata_host_register - register initialized ATA host * @host: ATA host to register @@ -5988,45 +6031,9 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) DPRINTK("probe begin\n"); for (i = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; - - /* probe */ - if (ap->ops->error_handler) { - struct ata_eh_info *ehi = &ap->link.eh_info; - unsigned long flags; - - ata_port_probe(ap); - - /* kick EH for boot probing */ - spin_lock_irqsave(ap->lock, flags); - - ehi->probe_mask |= ATA_ALL_DEVICES; - ehi->action |= ATA_EH_RESET | ATA_EH_LPM; - ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET; - - ap->pflags &= ~ATA_PFLAG_INITIALIZING; - ap->pflags |= ATA_PFLAG_LOADING; - ata_port_schedule_eh(ap); - - spin_unlock_irqrestore(ap->lock, flags); - - /* wait for EH to finish */ - ata_port_wait_eh(ap); - } else { - DPRINTK("ata%u: bus probe begin\n", ap->print_id); - rc = ata_bus_probe(ap); - DPRINTK("ata%u: bus probe end\n", ap->print_id); - - if (rc) { - /* FIXME: do something useful here? - * Current libata behavior will - * tear down everything when - * the module is removed - * or the h/w is unplugged. - */ - } - } + async_schedule(async_port_probe, ap); } - + async_synchronize_full(); /* probes are done, now scan each port's disk(s) */ DPRINTK("host probe begin\n"); for (i = 0; i < host->n_ports; i++) { @@ -6034,6 +6041,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) ata_scsi_scan_host(ap, 1); } + DPRINTK("host probe end\n"); return 0; } -- cgit v0.10.2 From f29d3b23238e1955a8094e038c72546e99308e61 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 5 Jan 2009 15:07:07 -0800 Subject: fastboot: Make libata initialization even more async As suggested by Linus: Don't do the libata init in 2 separate steps with a global sync inbetween, but do it as one async step, with a local sync before registering the device. This cuts the boottime on my machine with 2 sata controllers down significantly, and it seems to work. Would be nice if the libata folks take a good look at this patch though.. Signed-off-by: Arjan van de Ven diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 7d3ae6a..f178a45 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5951,6 +5951,12 @@ static void async_port_probe(void *data, async_cookie_t cookie) */ } } + + /* in order to keep device order, we need to synchronize at this point */ + async_synchronize_cookie(cookie); + + ata_scsi_scan_host(ap, 1); + } /** * ata_host_register - register initialized ATA host @@ -6033,15 +6039,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) struct ata_port *ap = host->ports[i]; async_schedule(async_port_probe, ap); } - async_synchronize_full(); - /* probes are done, now scan each port's disk(s) */ - DPRINTK("host probe begin\n"); - for (i = 0; i < host->n_ports; i++) { - struct ata_port *ap = host->ports[i]; - - ata_scsi_scan_host(ap, 1); - } - DPRINTK("host probe end\n"); + DPRINTK("probe end\n"); return 0; } -- cgit v0.10.2 From efaee192063a54749c56b7383803e16fe553630e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 6 Jan 2009 07:20:54 -0800 Subject: async: make the final inode deletion an asynchronous event this makes "rm -rf" on a (names cached) kernel tree go from 11.6 to 8.6 seconds on an ext3 filesystem Signed-off-by: Arjan van de Ven diff --git a/fs/inode.c b/fs/inode.c index 7a6e8c2..0013ac1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * This is needed for the following functions: @@ -1138,16 +1139,11 @@ EXPORT_SYMBOL(remove_inode_hash); * I_FREEING is set so that no-one will take a new reference to the inode while * it is being deleted. */ -void generic_delete_inode(struct inode *inode) +static void generic_delete_inode_async(void *data, async_cookie_t cookie) { + struct inode *inode = data; const struct super_operations *op = inode->i_sb->s_op; - list_del_init(&inode->i_list); - list_del_init(&inode->i_sb_list); - inode->i_state |= I_FREEING; - inodes_stat.nr_inodes--; - spin_unlock(&inode_lock); - security_inode_delete(inode); if (op->delete_inode) { @@ -1171,6 +1167,16 @@ void generic_delete_inode(struct inode *inode) destroy_inode(inode); } +void generic_delete_inode(struct inode *inode) +{ + list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); + inode->i_state |= I_FREEING; + inodes_stat.nr_inodes--; + spin_unlock(&inode_lock); + async_schedule_special(generic_delete_inode_async, inode, &inode->i_sb->s_async_list); +} + EXPORT_SYMBOL(generic_delete_inode); static void generic_forget_inode(struct inode *inode) diff --git a/fs/super.c b/fs/super.c index ddba069..cb20744 100644 --- a/fs/super.c +++ b/fs/super.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "internal.h" @@ -71,6 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type) INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); INIT_LIST_HEAD(&s->s_dentry_lru); + INIT_LIST_HEAD(&s->s_async_list); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); lockdep_set_class(&s->s_umount, &type->s_umount_key); @@ -289,11 +291,18 @@ void generic_shutdown_super(struct super_block *sb) { const struct super_operations *sop = sb->s_op; + if (sb->s_root) { shrink_dcache_for_umount(sb); fsync_super(sb); lock_super(sb); sb->s_flags &= ~MS_ACTIVE; + + /* + * wait for asynchronous fs operations to finish before going further + */ + async_synchronize_full_special(&sb->s_async_list); + /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); lock_kernel(); @@ -449,6 +458,7 @@ void sync_filesystems(int wait) if (sb->s_flags & MS_RDONLY) continue; sb->s_need_sync_fs = 1; + async_synchronize_full_special(&sb->s_async_list); } restart: diff --git a/include/linux/fs.h b/include/linux/fs.h index d7eba77..e38a64d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1184,6 +1184,11 @@ struct super_block { * generic_show_options() */ char *s_options; + + /* + * storage for asynchronous operations + */ + struct list_head s_async_list; }; extern struct timespec current_fs_time(struct super_block *sb); -- cgit v0.10.2 From 24b0ecad07ac4d7ef74cb6f7da08c449fa9f6a4f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 4 Jan 2009 10:59:18 -0800 Subject: bootchart: improve output based on Dave Jones' feedback Dave Jones, in his blog, had some feedback about the bootchart script: Primarily his complaint was that shorter delays weren't visualized. The reason for that was that too small delays will have their labels mixed up in the graph in an unreadable mess. This patch has a fix for this; for one, it makes the output wider, so more will fit. The second part is that smaller delays are now shown with a much smaller font for the label; while this isn't per se readable at a 1:1 zoom, at least you can zoom in with most SVG viewing applications and see what it is you are looking at. Signed-off-by: Arjan van de Ven diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl index f0af9aa..0a498e3 100644 --- a/scripts/bootgraph.pl +++ b/scripts/bootgraph.pl @@ -88,7 +88,7 @@ END } print " \n"; -print "\n"; +print "\n"; my @styles; @@ -105,8 +105,9 @@ $styles[9] = "fill:rgb(255,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0 $styles[10] = "fill:rgb(255,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; $styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; -my $mult = 950.0 / ($maxtime - $firsttime); -my $threshold = ($maxtime - $firsttime) / 60.0; +my $mult = 1950.0 / ($maxtime - $firsttime); +my $threshold2 = ($maxtime - $firsttime) / 120.0; +my $threshold = $threshold2/10; my $stylecounter = 0; my %rows; my $rowscount = 1; @@ -116,7 +117,7 @@ foreach my $key (@initcalls) { my $duration = $end{$key} - $start{$key}; if ($duration >= $threshold) { - my ($s, $s2, $e, $w, $y, $y2, $style); + my ($s, $s2, $s3, $e, $w, $y, $y2, $style); my $pid = $pids{$key}; if (!defined($rows{$pid})) { @@ -125,6 +126,7 @@ foreach my $key (@initcalls) { } $s = ($start{$key} - $firsttime) * $mult; $s2 = $s + 6; + $s3 = $s + 1; $e = ($end{$key} - $firsttime) * $mult; $w = $e - $s; @@ -138,7 +140,11 @@ foreach my $key (@initcalls) { }; print "\n"; - print "$key\n"; + if ($duration >= $threshold2) { + print "$key\n"; + } else { + print "$key\n"; + } } } -- cgit v0.10.2 From ad160d23198193135cb2bcc75222e0816b5838c0 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 7 Jan 2009 09:28:53 -0800 Subject: async: don't do the initcall stuff post boot while tracking the asynchronous calls during boot using the initcall_debug convention is useful, doing it once the kernel is done is actually bad now that we use asynchronous operations post boot as well... Signed-off-by: Arjan van de Ven diff --git a/kernel/async.c b/kernel/async.c index afaa8a6..9737338 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -124,12 +124,12 @@ static void run_one_entry(void) spin_unlock_irqrestore(&async_lock, flags); /* 3) run it (and print duration)*/ - if (initcall_debug) { + if (initcall_debug && system_state == SYSTEM_BOOTING) { printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current)); calltime = ktime_get(); } entry->func(entry->data, entry->cookie); - if (initcall_debug) { + if (initcall_debug && system_state == SYSTEM_BOOTING) { rettime = ktime_get(); delta = ktime_sub(rettime, calltime); printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie, @@ -220,14 +220,14 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r { ktime_t starttime, delta, endtime; - if (initcall_debug) { + if (initcall_debug && system_state == SYSTEM_BOOTING) { printk("async_waiting @ %i\n", task_pid_nr(current)); starttime = ktime_get(); } wait_event(async_done, __lowest_in_progress(running) >= cookie); - if (initcall_debug) { + if (initcall_debug && system_state == SYSTEM_BOOTING) { endtime = ktime_get(); delta = ktime_sub(endtime, starttime); -- cgit v0.10.2