From 2226b1c219a18804bc40e32a5d53c287a6c925d9 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 6 Jan 2009 17:55:32 +0000 Subject: uwb: safely remove all reservations When removing all reservations during shutdown, terminate them first and then wait for any pending timeout work to complete. This prevents the timeout work from running after the reservation has been freed. Signed-off-by: David Vrabel diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c index ec6eecb..886f977 100644 --- a/drivers/uwb/rsv.c +++ b/drivers/uwb/rsv.c @@ -114,7 +114,8 @@ void uwb_rsv_dump(char *text, struct uwb_rsv *rsv) devaddr = rsv->target.devaddr; uwb_dev_addr_print(target, sizeof(target), &devaddr); - dev_dbg(dev, "rsv %s -> %s: %s\n", owner, target, uwb_rsv_state_str(rsv->state)); + dev_dbg(dev, "rsv %s %s -> %s: %s\n", + text, owner, target, uwb_rsv_state_str(rsv->state)); } static void uwb_rsv_release(struct kref *kref) @@ -511,8 +512,7 @@ void uwb_rsv_remove(struct uwb_rsv *rsv) if (uwb_rsv_is_owner(rsv)) uwb_rsv_put_stream(rsv); - - del_timer_sync(&rsv->timer); + uwb_dev_put(rsv->owner); if (rsv->target.type == UWB_RSV_TARGET_DEV) uwb_dev_put(rsv->target.dev); @@ -943,13 +943,22 @@ void uwb_rsv_remove_all(struct uwb_rc *rc) mutex_lock(&rc->rsvs_mutex); list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) { - uwb_rsv_remove(rsv); + if (rsv->state != UWB_RSV_STATE_NONE) + uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE); + del_timer_sync(&rsv->timer); } /* Cancel any postponed update. */ rc->set_drp_ie_pending = 0; mutex_unlock(&rc->rsvs_mutex); cancel_delayed_work_sync(&rc->rsv_update_work); + flush_workqueue(rc->rsv_workq); + + mutex_lock(&rc->rsvs_mutex); + list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) { + uwb_rsv_remove(rsv); + } + mutex_unlock(&rc->rsvs_mutex); } void uwb_rsv_init(struct uwb_rc *rc) -- cgit v0.10.2 From 9a9b1d17ba59b78e4bae67f7a7cf546986a42e7d Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 6 Jan 2009 17:58:02 +0000 Subject: wusb: return -ENOTCONN when resetting a port with no connected device If reading the device descriptor fails during hub_port_init() fails, then the port is disabled, disconnecting the device. The port is then reset at the start of the next init attempt but there is no device to reset. Signed-off-by: David Vrabel diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c index e2e7e4b..8e18141 100644 --- a/drivers/usb/wusbcore/devconnect.c +++ b/drivers/usb/wusbcore/devconnect.c @@ -386,6 +386,7 @@ static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc, | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED); port->change |= USB_PORT_STAT_C_CONNECTION | USB_PORT_STAT_C_ENABLE; if (wusb_dev) { + dev_dbg(wusbhc->dev, "disconnecting device from port %d\n", wusb_dev->port_idx); if (!list_empty(&wusb_dev->cack_node)) list_del_init(&wusb_dev->cack_node); /* For the one in cack_add() */ diff --git a/drivers/usb/wusbcore/rh.c b/drivers/usb/wusbcore/rh.c index 95c6fa3..407a9fc 100644 --- a/drivers/usb/wusbcore/rh.c +++ b/drivers/usb/wusbcore/rh.c @@ -100,6 +100,9 @@ static int wusbhc_rh_port_reset(struct wusbhc *wusbhc, u8 port_idx) struct wusb_port *port = wusb_port_by_idx(wusbhc, port_idx); struct wusb_dev *wusb_dev = port->wusb_dev; + if (wusb_dev == NULL) + return -ENOTCONN; + port->status |= USB_PORT_STAT_RESET; port->change |= USB_PORT_STAT_C_RESET; -- cgit v0.10.2 From 04c470adb01c62bb9bd663cfc4875cf0a4eb01ab Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Sun, 4 Jan 2009 11:13:50 +0800 Subject: uwb: remove unused #include 's Remove unused #include 's in file(s) below, drivers/uwb/allocator.c Signed-off-by: Huang Weiyi Signed-off-by: David Vrabel diff --git a/drivers/uwb/allocator.c b/drivers/uwb/allocator.c index c8185e6..c13cec7 100644 --- a/drivers/uwb/allocator.c +++ b/drivers/uwb/allocator.c @@ -15,7 +15,6 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#include #include #include -- cgit v0.10.2 From a5e6ced58d423cb09c4fc0087dcfdb0b5deb5e1c Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 7 Jan 2009 10:54:22 +0000 Subject: wusb: timeout when waiting for ASL/PZL updates in whci-hcd Timeout if an ASL or PZL update doesn't not complete and reset the hardware. Signed-off-by: David Vrabel diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c index 577c0d2..2291c5f 100644 --- a/drivers/usb/host/whci/asl.c +++ b/drivers/usb/host/whci/asl.c @@ -170,12 +170,17 @@ void asl_stop(struct whc *whc) void asl_update(struct whc *whc, uint32_t wusbcmd) { struct wusbhc *wusbhc = &whc->wusbhc; + long t; mutex_lock(&wusbhc->mutex); if (wusbhc->active) { whc_write_wusbcmd(whc, wusbcmd, wusbcmd); - wait_event(whc->async_list_wq, - (le_readl(whc->base + WUSBCMD) & WUSBCMD_ASYNC_UPDATED) == 0); + t = wait_event_timeout( + whc->async_list_wq, + (le_readl(whc->base + WUSBCMD) & WUSBCMD_ASYNC_UPDATED) == 0, + msecs_to_jiffies(1000)); + if (t == 0) + whc_hw_error(whc, "ASL update timeout"); } mutex_unlock(&wusbhc->mutex); } diff --git a/drivers/usb/host/whci/hw.c b/drivers/usb/host/whci/hw.c index d498e72..6afa2e3 100644 --- a/drivers/usb/host/whci/hw.c +++ b/drivers/usb/host/whci/hw.c @@ -87,3 +87,18 @@ out: return ret; } + +/** + * whc_hw_error - recover from a hardware error + * @whc: the WHCI HC that broke. + * @reason: a description of the failure. + * + * Recover from broken hardware with a full reset. + */ +void whc_hw_error(struct whc *whc, const char *reason) +{ + struct wusbhc *wusbhc = &whc->wusbhc; + + dev_err(&whc->umc->dev, "hardware error: %s\n", reason); + wusbhc_reset_all(wusbhc); +} diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c index 2ae5abf..7dc85a0 100644 --- a/drivers/usb/host/whci/pzl.c +++ b/drivers/usb/host/whci/pzl.c @@ -183,12 +183,17 @@ void pzl_stop(struct whc *whc) void pzl_update(struct whc *whc, uint32_t wusbcmd) { struct wusbhc *wusbhc = &whc->wusbhc; + long t; mutex_lock(&wusbhc->mutex); if (wusbhc->active) { whc_write_wusbcmd(whc, wusbcmd, wusbcmd); - wait_event(whc->periodic_list_wq, - (le_readl(whc->base + WUSBCMD) & WUSBCMD_PERIODIC_UPDATED) == 0); + t = wait_event_timeout( + whc->periodic_list_wq, + (le_readl(whc->base + WUSBCMD) & WUSBCMD_PERIODIC_UPDATED) == 0, + msecs_to_jiffies(1000)); + if (t == 0) + whc_hw_error(whc, "PZL update timeout"); } mutex_unlock(&wusbhc->mutex); } diff --git a/drivers/usb/host/whci/whcd.h b/drivers/usb/host/whci/whcd.h index 0f3540f..d3543a1 100644 --- a/drivers/usb/host/whci/whcd.h +++ b/drivers/usb/host/whci/whcd.h @@ -137,6 +137,7 @@ void whc_clean_up(struct whc *whc); /* hw.c */ void whc_write_wusbcmd(struct whc *whc, u32 mask, u32 val); int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len); +void whc_hw_error(struct whc *whc, const char *reason); /* wusb.c */ int whc_wusbhc_start(struct wusbhc *wusbhc); -- cgit v0.10.2 From c9cbf3d3b35f198fab39e98d696312dd0b97a69a Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Sat, 10 Jan 2009 23:44:22 -0800 Subject: Input: usbtouchscreen - allow reporting calibrated data This patch adds a module parameter to report either the raw coordinate data or the hardware-calibrated coordinate data for MicroTouch/3M touchscreens. The default is set to the raw coordinates for backwards compatibilty. Signed-off-by: Dan Streetman Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c index 5080b26..6d27a1d 100644 --- a/drivers/input/touchscreen/usbtouchscreen.c +++ b/drivers/input/touchscreen/usbtouchscreen.c @@ -60,6 +60,10 @@ static int swap_xy; module_param(swap_xy, bool, 0644); MODULE_PARM_DESC(swap_xy, "If set X and Y axes are swapped."); +static int hwcalib_xy; +module_param(hwcalib_xy, bool, 0644); +MODULE_PARM_DESC(hwcalib_xy, "If set hw-calibrated X/Y are used if available"); + /* device specifc data/functions */ struct usbtouch_usb; struct usbtouch_device_info { @@ -260,8 +264,13 @@ static int panjit_read_data(struct usbtouch_usb *dev, unsigned char *pkt) static int mtouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { - dev->x = (pkt[8] << 8) | pkt[7]; - dev->y = (pkt[10] << 8) | pkt[9]; + if (hwcalib_xy) { + dev->x = (pkt[4] << 8) | pkt[3]; + dev->y = 0xffff - ((pkt[6] << 8) | pkt[5]); + } else { + dev->x = (pkt[8] << 8) | pkt[7]; + dev->y = (pkt[10] << 8) | pkt[9]; + } dev->touch = (pkt[2] & 0x40) ? 1 : 0; return 1; @@ -294,6 +303,12 @@ static int mtouch_init(struct usbtouch_usb *usbtouch) return ret; } + /* Default min/max xy are the raw values, override if using hw-calib */ + if (hwcalib_xy) { + input_set_abs_params(usbtouch->input, ABS_X, 0, 0xffff, 0, 0); + input_set_abs_params(usbtouch->input, ABS_Y, 0, 0xffff, 0, 0); + } + return 0; } #endif -- cgit v0.10.2 From 520abcdeb13de23e22b7d4367b1c3b136ef3b108 Mon Sep 17 00:00:00 2001 From: Daniel Mierswa Date: Sat, 10 Jan 2009 23:44:22 -0800 Subject: Input: atkbd - make forced_release_keys[] static Signed-off-by: Andrew Morton Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index f6e9f39..6df9ba1 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -839,7 +839,7 @@ static void atkbd_disconnect(struct serio *serio) */ static void atkbd_dell_laptop_keymap_fixup(struct atkbd *atkbd) { - const unsigned int forced_release_keys[] = { + static const unsigned int forced_release_keys[] = { 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8f, 0x93, }; int i; @@ -856,7 +856,7 @@ static void atkbd_dell_laptop_keymap_fixup(struct atkbd *atkbd) */ static void atkbd_hp_keymap_fixup(struct atkbd *atkbd) { - const unsigned int forced_release_keys[] = { + static const unsigned int forced_release_keys[] = { 0x94, }; int i; -- cgit v0.10.2 From 3c0340b774916caa4149892504169d290c8a720a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 10 Jan 2009 23:44:22 -0800 Subject: Input: psmouse - make MOUSE_PS2_LIFEBOOK depend on X86 All Fujitsu-Siemens Lifebook systems are x86-based, so we might as well make MOUSE_PS2_LIFEBOOK depend on X86. This will avoid surprising things like: arch/arm/configs/s3c2410_defconfig:CONFIG_MOUSE_PS2_LIFEBOOK=y Signed-off-by: Jean Delvare Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig index 093c8c1..9705f3a 100644 --- a/drivers/input/mouse/Kconfig +++ b/drivers/input/mouse/Kconfig @@ -70,7 +70,7 @@ config MOUSE_PS2_SYNAPTICS config MOUSE_PS2_LIFEBOOK bool "Fujitsu Lifebook PS/2 mouse protocol extension" if EMBEDDED default y - depends on MOUSE_PS2 + depends on MOUSE_PS2 && X86 help Say Y here if you have a Fujitsu B-series Lifebook PS/2 TouchScreen connected to your system. -- cgit v0.10.2 From 74ca11c2056d01d9ebb3615cd781a148450c3c82 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Sat, 10 Jan 2009 23:44:22 -0800 Subject: Input: uvc - the button on the camera is KEY_CAMERA Cameras should generate KEY_CAMERA, not BTN_0. Also call input_sync() on the device once the button has been pressed. Signed-off-by: Andrew Morton Signed-off-by: Dmitry Torokhov diff --git a/drivers/media/video/uvc/uvc_status.c b/drivers/media/video/uvc/uvc_status.c index 5d60b26..fdf7b41 100644 --- a/drivers/media/video/uvc/uvc_status.c +++ b/drivers/media/video/uvc/uvc_status.c @@ -47,8 +47,8 @@ static int uvc_input_init(struct uvc_device *dev) usb_to_input_id(udev, &input->id); input->dev.parent = &dev->intf->dev; - set_bit(EV_KEY, input->evbit); - set_bit(BTN_0, input->keybit); + __set_bit(EV_KEY, input->evbit); + __set_bit(KEY_CAMERA, input->keybit); if ((ret = input_register_device(input)) < 0) goto error; @@ -71,8 +71,10 @@ static void uvc_input_cleanup(struct uvc_device *dev) static void uvc_input_report_key(struct uvc_device *dev, unsigned int code, int value) { - if (dev->input) + if (dev->input) { input_report_key(dev->input, code, value); + input_sync(dev->input); + } } #else @@ -97,7 +99,7 @@ static void uvc_event_streaming(struct uvc_device *dev, __u8 *data, int len) return; uvc_trace(UVC_TRACE_STATUS, "Button (intf %u) %s len %d\n", data[1], data[3] ? "pressed" : "released", len); - uvc_input_report_key(dev, BTN_0, data[3]); + uvc_input_report_key(dev, KEY_CAMERA, data[3]); } else { uvc_trace(UVC_TRACE_STATUS, "Stream %u error event %02x %02x " "len %d.\n", data[1], data[2], data[3], len); -- cgit v0.10.2 From 57c1a24ee276a33190008243f986419e122c0dea Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 6 Jan 2009 17:36:09 +0000 Subject: [MTD] [LPDDR] qinfo_probe depends on lpddr Signed-off-by: Alexey Korolev Signed-off-by: David Woodhouse diff --git a/drivers/mtd/lpddr/Kconfig b/drivers/mtd/lpddr/Kconfig index acd4ea9..5a401d8 100644 --- a/drivers/mtd/lpddr/Kconfig +++ b/drivers/mtd/lpddr/Kconfig @@ -12,6 +12,7 @@ config MTD_LPDDR DDR memories, intended for battery-operated systems. config MTD_QINFO_PROBE + depends on MTD_LPDDR tristate "Detect flash chips by QINFO probe" help Device Information for LPDDR chips is offered through the Overlay -- cgit v0.10.2 From 5f877607cdfe8b60bf96fb96e527e0ce2a21e68b Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Sun, 11 Jan 2009 19:52:19 +0000 Subject: [MTD] map_rom has NULL erase pointer Which means if inftl or similar are loaded with it (which is a dumb thing to do admittedly) it may oops. Closes #8108 [dwmw2: change error to -EROFS to match write-protected flash] Signed-off-by: Alan Cox Signed-off-by: David Woodhouse diff --git a/drivers/mtd/chips/map_rom.c b/drivers/mtd/chips/map_rom.c index 821d0ed..c76d6e5 100644 --- a/drivers/mtd/chips/map_rom.c +++ b/drivers/mtd/chips/map_rom.c @@ -19,6 +19,7 @@ static int maprom_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *); static int maprom_write (struct mtd_info *, loff_t, size_t, size_t *, const u_char *); static void maprom_nop (struct mtd_info *); static struct mtd_info *map_rom_probe(struct map_info *map); +static int maprom_erase (struct mtd_info *mtd, struct erase_info *info); static struct mtd_chip_driver maprom_chipdrv = { .probe = map_rom_probe, @@ -42,6 +43,7 @@ static struct mtd_info *map_rom_probe(struct map_info *map) mtd->read = maprom_read; mtd->write = maprom_write; mtd->sync = maprom_nop; + mtd->erase = maprom_erase; mtd->flags = MTD_CAP_ROM; mtd->erasesize = map->size; mtd->writesize = 1; @@ -71,6 +73,12 @@ static int maprom_write (struct mtd_info *mtd, loff_t to, size_t len, size_t *re return -EIO; } +static int maprom_erase (struct mtd_info *mtd, struct erase_info *info) +{ + /* We do our best 8) */ + return -EROFS; +} + static int __init map_rom_init(void) { register_mtd_chip_driver(&maprom_chipdrv); -- cgit v0.10.2 From c225aa57ff4ffe715df4692676b77c815a337236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Holm=20Th=C3=B8gersen?= Date: Sun, 11 Jan 2009 22:34:01 -0500 Subject: ext4: fix wrong use of do_div MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the following warning: fs/jbd2/journal.c: In function ‘jbd2_seq_info_show’: fs/jbd2/journal.c:850: warning: format ‘%lu’ expects type ‘long unsigned int’, but argument 3 has type ‘uint32_t’ is caused by wrong usage of do_div that modifies the dividend in-place and returns the quotient. So not only would an incorrect value be displayed, but s->journal->j_average_commit_time would also be changed to a wrong value! Fix it by using div_u64 instead. Signed-off-by: Simon Holm Thøgersen Signed-off-by: "Theodore Ts'o" diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5667530..eb34300 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -37,10 +37,10 @@ #include #include #include +#include #include #include -#include EXPORT_SYMBOL(jbd2_journal_start); EXPORT_SYMBOL(jbd2_journal_restart); @@ -846,8 +846,8 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v) jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid)); seq_printf(seq, " %ums logging transaction\n", jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid)); - seq_printf(seq, " %luus average transaction commit time\n", - do_div(s->journal->j_average_commit_time, 1000)); + seq_printf(seq, " %lluus average transaction commit time\n", + div_u64(s->journal->j_average_commit_time, 1000)); seq_printf(seq, " %lu handles per transaction\n", s->stats->u.run.rs_handle_count / s->stats->ts_tid); seq_printf(seq, " %lu blocks per transaction\n", -- cgit v0.10.2 From f2d8dc75a14479f8803a70cf637b5d79a3bb87f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 12 Jan 2009 22:29:23 -0800 Subject: Input: corgikbd - mark probe function as __devinit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A pointer to corgikbd_probe is passed to the core via platform_driver_register and so the function must not disappear when the .init sections are discarded. Otherwise (if also having HOTPLUG=y) unbinding and binding a device to the driver via sysfs will result in an oops as does a device being registered late. An alternative to this patch is using platform_driver_probe instead of platform_driver_register plus removing the pointer to the probe function from the struct platform_driver. [dtor@mail.ru: fixed some more section markups] Signed-off-by: Uwe Kleine-König Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/corgikbd.c b/drivers/input/keyboard/corgikbd.c index c8ed065..abb04c8 100644 --- a/drivers/input/keyboard/corgikbd.c +++ b/drivers/input/keyboard/corgikbd.c @@ -288,7 +288,7 @@ static int corgikbd_resume(struct platform_device *dev) #define corgikbd_resume NULL #endif -static int __init corgikbd_probe(struct platform_device *pdev) +static int __devinit corgikbd_probe(struct platform_device *pdev) { struct corgikbd *corgikbd; struct input_dev *input_dev; @@ -368,7 +368,7 @@ static int __init corgikbd_probe(struct platform_device *pdev) return err; } -static int corgikbd_remove(struct platform_device *pdev) +static int __devexit corgikbd_remove(struct platform_device *pdev) { int i; struct corgikbd *corgikbd = platform_get_drvdata(pdev); @@ -388,7 +388,7 @@ static int corgikbd_remove(struct platform_device *pdev) static struct platform_driver corgikbd_driver = { .probe = corgikbd_probe, - .remove = corgikbd_remove, + .remove = __devexit_p(corgikbd_remove), .suspend = corgikbd_suspend, .resume = corgikbd_resume, .driver = { @@ -397,7 +397,7 @@ static struct platform_driver corgikbd_driver = { }, }; -static int __devinit corgikbd_init(void) +static int __init corgikbd_init(void) { return platform_driver_register(&corgikbd_driver); } -- cgit v0.10.2 From 840207edfa8b5e5b46e0d268bf33efe71fecea20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 12 Jan 2009 22:32:17 -0800 Subject: Input: corgi_ts - mark probe function as __devinit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A pointer to corgits_probe is passed to the core via platform_driver_register and so the function must not disappear when the .init sections are discarded. Otherwise (if also having HOTPLUG=y) unbinding and binding a device to the driver via sysfs will result in an oops as does a device being registered late. An alternative to this patch is using platform_driver_probe instead of platform_driver_register plus removing the pointer to the probe function from the struct platform_driver. [dtor@mail.ru: fixed some more section markups] Signed-off-by: Uwe Kleine-König Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/corgi_ts.c b/drivers/input/touchscreen/corgi_ts.c index 65202c9..3fb51b5 100644 --- a/drivers/input/touchscreen/corgi_ts.c +++ b/drivers/input/touchscreen/corgi_ts.c @@ -268,7 +268,7 @@ static int corgits_resume(struct platform_device *dev) #define corgits_resume NULL #endif -static int __init corgits_probe(struct platform_device *pdev) +static int __devinit corgits_probe(struct platform_device *pdev) { struct corgi_ts *corgi_ts; struct input_dev *input_dev; @@ -343,7 +343,7 @@ static int __init corgits_probe(struct platform_device *pdev) return err; } -static int corgits_remove(struct platform_device *pdev) +static int __devexit corgits_remove(struct platform_device *pdev) { struct corgi_ts *corgi_ts = platform_get_drvdata(pdev); @@ -352,12 +352,13 @@ static int corgits_remove(struct platform_device *pdev) corgi_ts->machinfo->put_hsync(); input_unregister_device(corgi_ts->input); kfree(corgi_ts); + return 0; } static struct platform_driver corgits_driver = { .probe = corgits_probe, - .remove = corgits_remove, + .remove = __devexit_p(corgits_remove), .suspend = corgits_suspend, .resume = corgits_resume, .driver = { @@ -366,7 +367,7 @@ static struct platform_driver corgits_driver = { }, }; -static int __devinit corgits_init(void) +static int __init corgits_init(void) { return platform_driver_register(&corgits_driver); } -- cgit v0.10.2 From d63dab00e85641515a0a060e6de93e6b7f450665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 13 Jan 2009 18:20:20 -0800 Subject: Input: omap-keypad - mark probe function as __devinit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A pointer to omap_kp_probe is passed to the core via platform_driver_register and so the function must not disappear when the .init sections are discarded. Otherwise (if also having HOTPLUG=y) unbinding and binding a device to the driver via sysfs will result in an oops as does a device being registered late. An alternative to this patch is using platform_driver_probe instead of platform_driver_register plus removing the pointer to the probe function from the struct platform_driver. [dtor@mail.ru: fixed some more section markups] Signed-off-by: Uwe Kleine-König Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/omap-keypad.c b/drivers/input/keyboard/omap-keypad.c index ec0ebee..23b4fd1 100644 --- a/drivers/input/keyboard/omap-keypad.c +++ b/drivers/input/keyboard/omap-keypad.c @@ -279,7 +279,7 @@ static int omap_kp_resume(struct platform_device *dev) #define omap_kp_resume NULL #endif -static int __init omap_kp_probe(struct platform_device *pdev) +static int __devinit omap_kp_probe(struct platform_device *pdev) { struct omap_kp *omap_kp; struct input_dev *input_dev; @@ -422,7 +422,7 @@ err1: return -EINVAL; } -static int omap_kp_remove(struct platform_device *pdev) +static int __devexit omap_kp_remove(struct platform_device *pdev) { struct omap_kp *omap_kp = platform_get_drvdata(pdev); @@ -454,7 +454,7 @@ static int omap_kp_remove(struct platform_device *pdev) static struct platform_driver omap_kp_driver = { .probe = omap_kp_probe, - .remove = omap_kp_remove, + .remove = __devexit_p(omap_kp_remove), .suspend = omap_kp_suspend, .resume = omap_kp_resume, .driver = { @@ -463,7 +463,7 @@ static struct platform_driver omap_kp_driver = { }, }; -static int __devinit omap_kp_init(void) +static int __init omap_kp_init(void) { printk(KERN_INFO "OMAP Keypad Driver\n"); return platform_driver_register(&omap_kp_driver); -- cgit v0.10.2 From 27f23336bcc1b08512751c878b3e05ed6b815e86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 13 Jan 2009 18:20:31 -0800 Subject: Input: spitzkbd - mark probe function as __devinit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A pointer to spitzkbd_probe is passed to the core via platform_driver_register and so the function must not disappear when the .init sections are discarded. Otherwise (if also having HOTPLUG=y) unbinding and binding a device to the driver via sysfs will result in an oops as does a device being registered late. An alternative to this patch is using platform_driver_probe instead of platform_driver_register plus removing the pointer to the probe function from the struct platform_driver. [dtor@mail.ru: fixed some more section markups] Signed-off-by: Uwe Kleine-König Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/spitzkbd.c b/drivers/input/keyboard/spitzkbd.c index c48b76a..9d1781a 100644 --- a/drivers/input/keyboard/spitzkbd.c +++ b/drivers/input/keyboard/spitzkbd.c @@ -343,7 +343,7 @@ static int spitzkbd_resume(struct platform_device *dev) #define spitzkbd_resume NULL #endif -static int __init spitzkbd_probe(struct platform_device *dev) +static int __devinit spitzkbd_probe(struct platform_device *dev) { struct spitzkbd *spitzkbd; struct input_dev *input_dev; @@ -444,7 +444,7 @@ static int __init spitzkbd_probe(struct platform_device *dev) return err; } -static int spitzkbd_remove(struct platform_device *dev) +static int __devexit spitzkbd_remove(struct platform_device *dev) { int i; struct spitzkbd *spitzkbd = platform_get_drvdata(dev); @@ -470,7 +470,7 @@ static int spitzkbd_remove(struct platform_device *dev) static struct platform_driver spitzkbd_driver = { .probe = spitzkbd_probe, - .remove = spitzkbd_remove, + .remove = __devexit_p(spitzkbd_remove), .suspend = spitzkbd_suspend, .resume = spitzkbd_resume, .driver = { @@ -479,7 +479,7 @@ static struct platform_driver spitzkbd_driver = { }, }; -static int __devinit spitzkbd_init(void) +static int __init spitzkbd_init(void) { return platform_driver_register(&spitzkbd_driver); } -- cgit v0.10.2 From 14819ea1e0bcbdc9b084cd60a6a24d5d786324ef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 14 Jan 2009 12:34:21 +0100 Subject: irq: export __set_irq_handler() and handle_level_irq() Impact: build fix ARM updates broke x86 allmodconfig builds: ERROR: "__set_irq_handler" [drivers/mfd/pcf50633-core.ko] undefined! ERROR: "handle_level_irq" [drivers/mfd/pcf50633-core.ko] undefined! Signed-off-by: Ingo Molnar diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f63c706..7de11bd 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -383,6 +383,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) out_unlock: spin_unlock(&desc->lock); } +EXPORT_SYMBOL_GPL(handle_level_irq); /** * handle_fasteoi_irq - irq handler for transparent controllers @@ -593,6 +594,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, } spin_unlock_irqrestore(&desc->lock, flags); } +EXPORT_SYMBOL_GPL(__set_irq_handler); void set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip, -- cgit v0.10.2 From 9e6f8ed7c3a303d37eb119847dd3029701e37e28 Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Wed, 14 Jan 2009 13:02:00 +0100 Subject: mfd: Remove non exported references from pcf50633 Remove references to set_irq_type and handle_level_irq which are not exported to modules Signed-off-by: Balaji Rao Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c index 24508e2..ea9488e 100644 --- a/drivers/mfd/pcf50633-core.c +++ b/drivers/mfd/pcf50633-core.c @@ -626,7 +626,6 @@ static int __devinit pcf50633_probe(struct i2c_client *client, } if (client->irq) { - set_irq_handler(client->irq, handle_level_irq); ret = request_irq(client->irq, pcf50633_irq, IRQF_TRIGGER_LOW, "pcf50633", pcf); -- cgit v0.10.2 From 06a279d636734da32bb62dd2f7b0ade666f65d7c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 17 Jan 2009 18:41:37 -0500 Subject: ext4: only use i_size_high for regular files Directories are not allowed to be bigger than 2GB, so don't use i_size_high for anything other than regular files. E2fsck should complain about these inodes, but the simplest thing to do for the kernel is to only use i_size_high for regular files. This prevents an intentially corrupted filesystem from causing the kernel to burn a huge amount of CPU and issuing error messages such as: EXT4-fs warning (device loop0): ext4_block_to_path: block 135090028 > max Thanks to David Maciejak from Fortinet's FortiGuard Global Security Research Team for reporting this issue. http://bugzilla.kernel.org/show_bug.cgi?id=12375 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c668e43..aafc9eb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1206,8 +1206,11 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es, static inline loff_t ext4_isize(struct ext4_inode *raw_inode) { - return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | - le32_to_cpu(raw_inode->i_size_lo); + if (S_ISREG(le16_to_cpu(raw_inode->i_mode))) + return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) | + le32_to_cpu(raw_inode->i_size_lo); + else + return (loff_t) le32_to_cpu(raw_inode->i_size_lo); } static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a6444ce..49484ba 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -360,9 +360,9 @@ static int ext4_block_to_path(struct inode *inode, final = ptrs; } else { ext4_warning(inode->i_sb, "ext4_block_to_path", - "block %lu > max", + "block %lu > max in inode %lu", i_block + direct_blocks + - indirect_blocks + double_blocks); + indirect_blocks + double_blocks, inode->i_ino); } if (boundary) *boundary = final - 1 - (i_block & (ptrs - 1)); -- cgit v0.10.2 From e6b8bc09ba2075cd91fbffefcd2778b1a00bd76f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 16 Jan 2009 11:13:40 -0500 Subject: ext4: Add sanity check to make_indexed_dir Make sure the rec_len field in the '..' entry is sane, lest we overrun the directory block and cause a kernel oops on a purposefully corrupted filesystem. Thanks to Sami Liedes for reporting this bug. http://bugzilla.kernel.org/show_bug.cgi?id=12430 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index fec0b4c..ba702bd 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1368,7 +1368,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct fake_dirent *fde; blocksize = dir->i_sb->s_blocksize; - dxtrace(printk(KERN_DEBUG "Creating index\n")); + dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); retval = ext4_journal_get_write_access(handle, bh); if (retval) { ext4_std_error(dir->i_sb, retval); @@ -1377,6 +1377,20 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, } root = (struct dx_root *) bh->b_data; + /* The 0th block becomes the root, move the dirents out */ + fde = &root->dotdot; + de = (struct ext4_dir_entry_2 *)((char *)fde + + ext4_rec_len_from_disk(fde->rec_len)); + if ((char *) de >= (((char *) root) + blocksize)) { + ext4_error(dir->i_sb, __func__, + "invalid rec_len for '..' in inode %lu", + dir->i_ino); + brelse(bh); + return -EIO; + } + len = ((char *) root) + blocksize - (char *) de; + + /* Allocate new block for the 0th block's dirents */ bh2 = ext4_append(handle, dir, &block, &retval); if (!(bh2)) { brelse(bh); @@ -1385,11 +1399,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, EXT4_I(dir)->i_flags |= EXT4_INDEX_FL; data1 = bh2->b_data; - /* The 0th block becomes the root, move the dirents out */ - fde = &root->dotdot; - de = (struct ext4_dir_entry_2 *)((char *)fde + - ext4_rec_len_from_disk(fde->rec_len)); - len = ((char *) root) + blocksize - (char *) de; memcpy (data1, de, len); de = (struct ext4_dir_entry_2 *) data1; top = data1 + len; -- cgit v0.10.2 From a21102b55c4f8dfd3adb4a15a34cd62237b46039 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 16 Jan 2009 11:13:47 -0500 Subject: ext3: Add sanity check to make_indexed_dir Make sure the rec_len field in the '..' entry is sane, lest we overrun the directory block and cause a kernel oops on a purposefully corrupted filesystem. This fixes a bug related to a bug originally reported by Sami Liedes for ext4 at: http://bugzilla.kernel.org/show_bug.cgi?id=12430 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 69a3d19..4db4ffa 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1358,7 +1358,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct fake_dirent *fde; blocksize = dir->i_sb->s_blocksize; - dxtrace(printk("Creating index\n")); + dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); retval = ext3_journal_get_write_access(handle, bh); if (retval) { ext3_std_error(dir->i_sb, retval); @@ -1367,6 +1367,19 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, } root = (struct dx_root *) bh->b_data; + /* The 0th block becomes the root, move the dirents out */ + fde = &root->dotdot; + de = (struct ext3_dir_entry_2 *)((char *)fde + + ext3_rec_len_from_disk(fde->rec_len)); + if ((char *) de >= (((char *) root) + blocksize)) { + ext3_error(dir->i_sb, __func__, + "invalid rec_len for '..' in inode %lu", + dir->i_ino); + brelse(bh); + return -EIO; + } + len = ((char *) root) + blocksize - (char *) de; + bh2 = ext3_append (handle, dir, &block, &retval); if (!(bh2)) { brelse(bh); @@ -1375,11 +1388,6 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; data1 = bh2->b_data; - /* The 0th block becomes the root, move the dirents out */ - fde = &root->dotdot; - de = (struct ext3_dir_entry_2 *)((char *)fde + - ext3_rec_len_from_disk(fde->rec_len)); - len = ((char *) root) + blocksize - (char *) de; memcpy (data1, de, len); de = (struct ext3_dir_entry_2 *) data1; top = data1 + len; -- cgit v0.10.2 From 08ec8c3878cea0bf91f2ba3c0badf44b383752d0 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 16 Jan 2009 11:57:00 -0500 Subject: jbd2: On a __journal_expect() assertion failure printk "JBD2", not "EXT3-fs" Otherwise it can be very confusing to find a "EXT3-fs: " failure in the middle of EXT4-fs failures, and it makes it harder to track the source of the failure. Signed-off-by: "Theodore Ts'o" diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b45109c..b28b37e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -308,7 +308,8 @@ void buffer_assertion_failure(struct buffer_head *bh); int val = (expr); \ if (!val) { \ printk(KERN_ERR \ - "EXT3-fs unexpected failure: %s;\n",# expr); \ + "JBD2 unexpected failure: %s: %s;\n", \ + __func__, #expr); \ printk(KERN_ERR why "\n"); \ } \ val; \ -- cgit v0.10.2 From 091d71e023557136e96f0e54f301497a3fc95dc3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 17 Jan 2009 00:10:45 +0100 Subject: PM: Fix compilation warning in kernel/power/main.c Reorder the code in kernel/power/main.c to fix compilation warning triggered by unsetting CONFIG_SUSPEND. Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown diff --git a/kernel/power/main.c b/kernel/power/main.c index 2399888..b4d2190 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -57,16 +57,6 @@ int pm_notifier_call_chain(unsigned long val) #ifdef CONFIG_PM_DEBUG int pm_test_level = TEST_NONE; -static int suspend_test(int level) -{ - if (pm_test_level == level) { - printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); - mdelay(5000); - return 1; - } - return 0; -} - static const char * const pm_tests[__TEST_AFTER_LAST] = { [TEST_NONE] = "none", [TEST_CORE] = "core", @@ -125,14 +115,24 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, } power_attr(pm_test); -#else /* !CONFIG_PM_DEBUG */ -static inline int suspend_test(int level) { return 0; } -#endif /* !CONFIG_PM_DEBUG */ +#endif /* CONFIG_PM_DEBUG */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_SUSPEND +static int suspend_test(int level) +{ +#ifdef CONFIG_PM_DEBUG + if (pm_test_level == level) { + printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); + mdelay(5000); + return 1; + } +#endif /* !CONFIG_PM_DEBUG */ + return 0; +} + #ifdef CONFIG_PM_TEST_SUSPEND /* -- cgit v0.10.2 From 5d8b532af9e52ea89208f5ef31889f646e67ba28 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 16 Jan 2009 23:09:14 +0100 Subject: ACPI suspend: Fix compilation warnings in drivers/acpi/sleep.c Fix two compilation warnings in drivers/acpi/sleep.c, one triggered by unsetting CONFIG_SUSPEND and the other triggered by unsetting CONFIG_HIBERNATION, by moving some code under the appropriate #ifdefs . Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 707c1f6..a60c1f3 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -156,11 +156,11 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); + if (strncmp(str, "s4_nonvs", 8) == 0) + acpi_s4_no_nvs(); #endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 7e3c609..af85f5b 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -90,31 +90,6 @@ void __init acpi_old_suspend_ordering(void) old_suspend_ordering = true; } -/* - * According to the ACPI specification the BIOS should make sure that ACPI is - * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, - * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI - * on such systems during resume. Unfortunately that doesn't help in - * particularly pathological cases in which SCI_EN has to be set directly on - * resume, although the specification states very clearly that this flag is - * owned by the hardware. The set_sci_en_on_resume variable will be set in such - * cases. - */ -static bool set_sci_en_on_resume; -/* - * The ACPI specification wants us to save NVS memory regions during hibernation - * and to restore them during the subsequent resume. However, it is not certain - * if this mechanism is going to work on all machines, so we allow the user to - * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line - * option. - */ -static bool s4_no_nvs; - -void __init acpi_s4_no_nvs(void) -{ - s4_no_nvs = true; -} - /** * acpi_pm_disable_gpes - Disable the GPEs. */ @@ -193,6 +168,18 @@ static void acpi_pm_end(void) #endif /* CONFIG_ACPI_SLEEP */ #ifdef CONFIG_SUSPEND +/* + * According to the ACPI specification the BIOS should make sure that ACPI is + * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, + * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI + * on such systems during resume. Unfortunately that doesn't help in + * particularly pathological cases in which SCI_EN has to be set directly on + * resume, although the specification states very clearly that this flag is + * owned by the hardware. The set_sci_en_on_resume variable will be set in such + * cases. + */ +static bool set_sci_en_on_resume; + extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { @@ -396,6 +383,20 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATION +/* + * The ACPI specification wants us to save NVS memory regions during hibernation + * and to restore them during the subsequent resume. However, it is not certain + * if this mechanism is going to work on all machines, so we allow the user to + * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line + * option. + */ +static bool s4_no_nvs; + +void __init acpi_s4_no_nvs(void) +{ + s4_no_nvs = true; +} + static unsigned long s4_hardware_signature; static struct acpi_table_facs *facs; static bool nosigcheck; -- cgit v0.10.2 From 4312495f7db63d27ef52ec83dab55f14a8c43827 Mon Sep 17 00:00:00 2001 From: Tero Roponen Date: Sat, 17 Jan 2009 13:06:02 +0200 Subject: ACPI: Fix crash on ASUS laptops This patch fixes the crash I experienced in 2.6.29-rc2. Tested on ASUS M50vm. Signed-off-by: Tero Roponen Acked-by: Alexey Starikovskiy Signed-off-by: Len Brown diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index a2b82c9..5c2f5d3 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -982,7 +982,7 @@ int __init acpi_ec_ecdt_probe(void) saved_ec = kmalloc(sizeof(struct acpi_ec), GFP_KERNEL); if (!saved_ec) return -ENOMEM; - memcpy(&saved_ec, boot_ec, sizeof(saved_ec)); + memcpy(saved_ec, boot_ec, sizeof(*saved_ec)); /* fall through */ } /* This workaround is needed only on some broken machines, -- cgit v0.10.2 From 2b190e76def5233c542f6025b4a133b1d4bd1a37 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sat, 17 Jan 2009 15:51:27 +0100 Subject: panasonic-laptop: fix X[ ARRAY_SIZE(X) ] Ensure pcc->keymap[ ARRAY_SIZE(pcc->keymap) ] does not occur. Signed-off-by: Roel Kluin Signed-off-by: Len Brown diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index f30db36..c47a44d 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -507,7 +507,7 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) hkey_num = result & 0xf; - if (hkey_num < 0 || hkey_num > ARRAY_SIZE(pcc->keymap)) { + if (hkey_num < 0 || hkey_num >= ARRAY_SIZE(pcc->keymap)) { ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "hotkey number out of range: %d\n", hkey_num)); -- cgit v0.10.2 From dedb0d48a9d4d57086526b94a4b64da789a646e4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 9 Jan 2009 21:02:37 +0200 Subject: UBIFS: do not commit twice VFS calls '->sync_fs()' twice - first time with @wait = 0, second time with @wait = 1. As a result, we may commit and synchronize write-buffers twice. Avoid doing this by returning immediatelly if @wait = 0. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 89556ee..a7fc97f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -432,18 +432,19 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) int i, err; struct ubifs_info *c = sb->s_fs_info; struct writeback_control wbc = { - .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, + .sync_mode = WB_SYNC_ALL, .range_start = 0, .range_end = LLONG_MAX, .nr_to_write = LONG_MAX, }; /* - * Note by akpm about WB_SYNC_NONE used above: zero @wait is just an - * advisory thing to help the file system shove lots of data into the - * queues. If some gets missed then it'll be picked up on the second + * Zero @wait is just an advisory thing to help the file system shove + * lots of data into the queues, and there will be the second * '->sync_fs()' call, with non-zero @wait. */ + if (!wait) + return 0; if (sb->s_flags & MS_RDONLY) return 0; -- cgit v0.10.2 From e8b815663b1bfd9c255af5176604ec0eafdf6ed7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 15 Jan 2009 17:43:23 +0200 Subject: UBIFS: constify operations Mark super, file, and inode operation structcutes with 'const'. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index f448ab1..d29b771 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1199,7 +1199,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } -struct inode_operations ubifs_dir_inode_operations = { +const struct inode_operations ubifs_dir_inode_operations = { .lookup = ubifs_lookup, .create = ubifs_create, .link = ubifs_link, @@ -1219,7 +1219,7 @@ struct inode_operations ubifs_dir_inode_operations = { #endif }; -struct file_operations ubifs_dir_operations = { +const struct file_operations ubifs_dir_operations = { .llseek = ubifs_dir_llseek, .release = ubifs_dir_release, .read = generic_read_dir, diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index bf37374..17443d9 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1541,7 +1541,7 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -struct address_space_operations ubifs_file_address_operations = { +const struct address_space_operations ubifs_file_address_operations = { .readpage = ubifs_readpage, .writepage = ubifs_writepage, .write_begin = ubifs_write_begin, @@ -1551,7 +1551,7 @@ struct address_space_operations ubifs_file_address_operations = { .releasepage = ubifs_releasepage, }; -struct inode_operations ubifs_file_inode_operations = { +const struct inode_operations ubifs_file_inode_operations = { .setattr = ubifs_setattr, .getattr = ubifs_getattr, #ifdef CONFIG_UBIFS_FS_XATTR @@ -1562,14 +1562,14 @@ struct inode_operations ubifs_file_inode_operations = { #endif }; -struct inode_operations ubifs_symlink_inode_operations = { +const struct inode_operations ubifs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = ubifs_follow_link, .setattr = ubifs_setattr, .getattr = ubifs_getattr, }; -struct file_operations ubifs_file_operations = { +const struct file_operations ubifs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, .write = do_sync_write, diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index a7fc97f..53811e5 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1778,7 +1778,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } -struct super_operations ubifs_super_operations = { +const struct super_operations ubifs_super_operations = { .alloc_inode = ubifs_alloc_inode, .destroy_inode = ubifs_destroy_inode, .put_super = ubifs_put_super, diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index fc2a4cc..0881897 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1405,13 +1405,13 @@ extern struct list_head ubifs_infos; extern spinlock_t ubifs_infos_lock; extern atomic_long_t ubifs_clean_zn_cnt; extern struct kmem_cache *ubifs_inode_slab; -extern struct super_operations ubifs_super_operations; -extern struct address_space_operations ubifs_file_address_operations; -extern struct file_operations ubifs_file_operations; -extern struct inode_operations ubifs_file_inode_operations; -extern struct file_operations ubifs_dir_operations; -extern struct inode_operations ubifs_dir_inode_operations; -extern struct inode_operations ubifs_symlink_inode_operations; +extern const struct super_operations ubifs_super_operations; +extern const struct address_space_operations ubifs_file_address_operations; +extern const struct file_operations ubifs_file_operations; +extern const struct inode_operations ubifs_file_inode_operations; +extern const struct file_operations ubifs_dir_operations; +extern const struct inode_operations ubifs_dir_inode_operations; +extern const struct inode_operations ubifs_symlink_inode_operations; extern struct backing_dev_info ubifs_backing_dev_info; extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; -- cgit v0.10.2 From 3afd522de8d8ec446efe957b86e4f63e3dd8ce9d Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 19 Jan 2009 00:15:13 +0100 Subject: [MTD] slram: Handle negative devlength correctly A negative devlength won't get noticed and clean up: Signed-off-by: Roel Kluin Signed-off-by: David Woodhouse diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c index a425d09..00248e8 100644 --- a/drivers/mtd/devices/slram.c +++ b/drivers/mtd/devices/slram.c @@ -267,22 +267,28 @@ static int parse_cmdline(char *devname, char *szstart, char *szlength) if (*(szlength) != '+') { devlength = simple_strtoul(szlength, &buffer, 0); devlength = handle_unit(devlength, buffer) - devstart; + if (devlength < devstart) + goto err_out; + + devlength -= devstart; } else { devlength = simple_strtoul(szlength + 1, &buffer, 0); devlength = handle_unit(devlength, buffer); } T("slram: devname=%s, devstart=0x%lx, devlength=0x%lx\n", devname, devstart, devlength); - if ((devstart < 0) || (devlength < 0) || (devlength % SLRAM_BLK_SZ != 0)) { - E("slram: Illegal start / length parameter.\n"); - return(-EINVAL); - } + if (devlength % SLRAM_BLK_SZ != 0) + goto err_out; if ((devstart = register_device(devname, devstart, devlength))){ unregister_devices(); return((int)devstart); } return(0); + +err_out: + E("slram: Illegal length parameter.\n"); + return(-EINVAL); } #ifndef MODULE -- cgit v0.10.2 From f90d4118bacef87894621a3e8aba853fa0c89abc Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 16 Jan 2009 10:24:10 +0800 Subject: cpuset: fix possible deadlock in async_rebuild_sched_domains Lockdep reported some possible circular locking info when we tested cpuset on NUMA/fake NUMA box. ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.29-rc1-00224-ga652504 #111 ------------------------------------------------------- bash/2968 is trying to acquire lock: (events){--..}, at: [] flush_work+0x24/0xd8 but task is already holding lock: (cgroup_mutex){--..}, at: [] cgroup_lock_live_group+0x12/0x29 which lock already depends on the new lock. ...... ------------------------------------------------------- Steps to reproduce: # mkdir /dev/cpuset # mount -t cpuset xxx /dev/cpuset # mkdir /dev/cpuset/0 # echo 0 > /dev/cpuset/0/cpus # echo 0 > /dev/cpuset/0/mems # echo 1 > /dev/cpuset/0/memory_migrate # cat /dev/zero > /dev/null & # echo $! > /dev/cpuset/0/tasks This is because async_rebuild_sched_domains has the following lock sequence: run_workqueue(async_rebuild_sched_domains) -> do_rebuild_sched_domains -> cgroup_lock But, attaching tasks when memory_migrate is set has following: cgroup_lock_live_group(cgroup_tasks_write) -> do_migrate_pages -> flush_work This patch fixes it by using a separate workqueue thread. Signed-off-by: Miao Xie Signed-off-by: Lai Jiangshan Signed-off-by: Ingo Molnar diff --git a/kernel/cpuset.c b/kernel/cpuset.c index a856788..f76db9d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -61,6 +61,14 @@ #include /* + * Workqueue for cpuset related tasks. + * + * Using kevent workqueue may cause deadlock when memory_migrate + * is set. So we create a separate workqueue thread for cpuset. + */ +static struct workqueue_struct *cpuset_wq; + +/* * Tracks how many cpusets are currently defined in system. * When there is only one cpuset (the root cpuset) we can * short circuit some hooks. @@ -831,7 +839,7 @@ static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains); */ static void async_rebuild_sched_domains(void) { - schedule_work(&rebuild_sched_domains_work); + queue_work(cpuset_wq, &rebuild_sched_domains_work); } /* @@ -2111,6 +2119,9 @@ void __init cpuset_init_smp(void) hotcpu_notifier(cpuset_track_online_cpus, 0); hotplug_memory_notifier(cpuset_track_online_nodes, 10); + + cpuset_wq = create_singlethread_workqueue("cpuset"); + BUG_ON(!cpuset_wq); } /** -- cgit v0.10.2 From 8d69abb08343706f88380edb83927ffc0fc83098 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 18 Jan 2009 11:55:19 +0200 Subject: [ARM] pxa: fix NAND and MMC clock initialization for pxa3xx After commit 8c3abc7d903df492a7394b0adae4349d9a381aaf ("[ARM] pxa: convert to clkdev and match clocks by struct device where possible") get_clk in pxa3xx_nand fails with -ENOENT. Apparently, clk_get in pxamci will also fail for MCI2 on PXA310. The 'clk_find' and therefore 'clk_get' require driver to supply both 'dev_id' and 'con_id' if they are not NULL in the 'strcut clk_lookup', but neither pxa3xx_nand nor pxamci supply 'con_id'. This patch sets 'con_id' to NULL in NAND clock and MCI2 clock registration. Signed-off-by: Mike Rapoport Signed-off-by: Eric Miao diff --git a/arch/arm/mach-pxa/pxa300.c b/arch/arm/mach-pxa/pxa300.c index f735e58..83fb609 100644 --- a/arch/arm/mach-pxa/pxa300.c +++ b/arch/arm/mach-pxa/pxa300.c @@ -88,13 +88,13 @@ static struct pxa3xx_mfp_addr_map pxa310_mfp_addr_map[] __initdata = { static DEFINE_PXA3_CKEN(common_nand, NAND, 156000000, 0); static struct clk_lookup common_clkregs[] = { - INIT_CLKREG(&clk_common_nand, "pxa3xx-nand", "NANDCLK"), + INIT_CLKREG(&clk_common_nand, "pxa3xx-nand", NULL), }; static DEFINE_PXA3_CKEN(pxa310_mmc3, MMC3, 19500000, 0); static struct clk_lookup pxa310_clkregs[] = { - INIT_CLKREG(&clk_pxa310_mmc3, "pxa2xx-mci.2", "MMCCLK"), + INIT_CLKREG(&clk_pxa310_mmc3, "pxa2xx-mci.2", NULL), }; static int __init pxa300_init(void) diff --git a/arch/arm/mach-pxa/pxa320.c b/arch/arm/mach-pxa/pxa320.c index effe408..36f0661 100644 --- a/arch/arm/mach-pxa/pxa320.c +++ b/arch/arm/mach-pxa/pxa320.c @@ -83,7 +83,7 @@ static struct pxa3xx_mfp_addr_map pxa320_mfp_addr_map[] __initdata = { static DEFINE_PXA3_CKEN(pxa320_nand, NAND, 104000000, 0); static struct clk_lookup pxa320_clkregs[] = { - INIT_CLKREG(&clk_pxa320_nand, "pxa3xx-nand", "NANDCLK"), + INIT_CLKREG(&clk_pxa320_nand, "pxa3xx-nand", NULL), }; static int __init pxa320_init(void) -- cgit v0.10.2 From ec971c91c55b8e2e2609f8d61eb34da13aedb37d Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Mon, 19 Jan 2009 11:39:36 +0800 Subject: [ARM] pxa: fix missing of __REG() definition for ac97 registers access This currently happens for 'drivers/input/touch/mainstone-wm97xx.c'. Signed-off-by: Eric Miao diff --git a/arch/arm/mach-pxa/include/mach/regs-ac97.h b/arch/arm/mach-pxa/include/mach/regs-ac97.h index e41b9d2..b8d14bd 100644 --- a/arch/arm/mach-pxa/include/mach/regs-ac97.h +++ b/arch/arm/mach-pxa/include/mach/regs-ac97.h @@ -1,6 +1,8 @@ #ifndef __ASM_ARCH_REGS_AC97_H #define __ASM_ARCH_REGS_AC97_H +#include + /* * AC97 Controller registers */ -- cgit v0.10.2 From b6729deb26a131083add5b7238c7b7478ef6b502 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Mon, 19 Jan 2009 11:42:32 +0800 Subject: [ARM] pxa: make more SSCR0 bit definitions visible on multiple processors The only exclusive definitions are SSCR0_SCR and SSCR0_SerClkDiv(), loosen that exclusive #ifdef .. #else .. #endif to allow other definitions to be visible when slected multiple processors. This helps to pass the building of pxa-ssp.c. Signed-off-by: Eric Miao diff --git a/arch/arm/mach-pxa/include/mach/regs-ssp.h b/arch/arm/mach-pxa/include/mach/regs-ssp.h index 3c04cde..cf31986 100644 --- a/arch/arm/mach-pxa/include/mach/regs-ssp.h +++ b/arch/arm/mach-pxa/include/mach/regs-ssp.h @@ -41,6 +41,9 @@ #elif defined(CONFIG_PXA27x) || defined(CONFIG_PXA3xx) #define SSCR0_SCR (0x000fff00) /* Serial Clock Rate (mask) */ #define SSCR0_SerClkDiv(x) (((x) - 1) << 8) /* Divisor [1..4096] */ +#endif + +#if defined(CONFIG_PXA27x) || defined(CONFIG_PXA3xx) #define SSCR0_EDSS (1 << 20) /* Extended data size select */ #define SSCR0_NCS (1 << 21) /* Network clock select */ #define SSCR0_RIM (1 << 22) /* Receive FIFO overrrun interrupt mask */ -- cgit v0.10.2 From a50412e3f8ce95d7ed558370d7dde5171fd04283 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 6 Jan 2009 19:54:02 +0200 Subject: UBIFS: do not treat all data as short term UBIFS wrongly tells UBI that all data is short term. Use proper hints instead. Thanks to Xiaochuan-Xu for noticing this. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 9b7c54e..a11ca09 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -208,7 +208,7 @@ again: offs = 0; out: - err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM); + err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); if (err) goto out_unlock; -- cgit v0.10.2 From 7078202e55b565582fcbd831a8dd3069bdc72610 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 19 Jan 2009 19:57:27 +0200 Subject: UBIFS: document dark_wm and dead_wm better Just add more commentaries. Also some commentary fixes for lprops flags. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 9832f9a..b2e5f11 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -31,6 +31,26 @@ * to be reused. Garbage collection will cause the number of dirty index nodes * to grow, however sufficient space is reserved for the index to ensure the * commit will never run out of space. + * + * Notes about dead watermark. At current UBIFS implementation we assume that + * LEBs which have less than @c->dead_wm bytes of free + dirty space are full + * and not worth garbage-collecting. The dead watermark is one min. I/O unit + * size, or min. UBIFS node size, depending on what is greater. Indeed, UBIFS + * Garbage Collector has to synchronize the GC head's write buffer before + * returning, so this is about wasting one min. I/O unit. However, UBIFS GC can + * actually reclaim even very small pieces of dirty space by garbage collecting + * enough dirty LEBs, but we do not bother doing this at this implementation. + * + * Notes about dark watermark. The results of GC work depends on how big are + * the UBIFS nodes GC deals with. Large nodes make GC waste more space. Indeed, + * if GC move data from LEB A to LEB B and nodes in LEB A are large, GC would + * have to waste large pieces of free space at the end of LEB B, because nodes + * from LEB A would not fit. And the worst situation is when all nodes are of + * maximum size. So dark watermark is the amount of free + dirty space in LEB + * which are guaranteed to be reclaimable. If LEB has less space, the GC migh + * be unable to reclaim it. So, LEBs with free + dirty greater than dark + * watermark are "good" LEBs from GC's point of few. The other LEBs are not so + * good, and GC takes extra care when moving them. */ #include diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 53811e5..da99da0 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -573,15 +573,8 @@ static int init_constants_early(struct ubifs_info *c) c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; /* - * Initialize dead and dark LEB space watermarks. - * - * Dead space is the space which cannot be used. Its watermark is - * equivalent to min. I/O unit or minimum node size if it is greater - * then min. I/O unit. - * - * Dark space is the space which might be used, or might not, depending - * on which node should be written to the LEB. Its watermark is - * equivalent to maximum UBIFS node size. + * Initialize dead and dark LEB space watermarks. See gc.c for comments + * about these values. */ c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 0881897..2e78d6a 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -426,9 +426,9 @@ struct ubifs_unclean_leb { * LEB properties flags. * * LPROPS_UNCAT: not categorized - * LPROPS_DIRTY: dirty > 0, not index + * LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index - * LPROPS_FREE: free > 0, not empty, not index + * LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs * LPROPS_EMPTY: LEB is empty, not taken * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken -- cgit v0.10.2 From 082605de5f82eb692cc90f7fda071cc01bb5ac34 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 19 Jan 2009 14:32:51 -0500 Subject: ring-buffer: fix alignment problem Impact: fix to allow some archs to use the ring buffer Commits in the ring buffer are checked by pointer arithmetic. If the calculation is incorrect, then the commits will never take place and the buffer will simply fill up and report an error. Each page in the ring buffer has a small header: struct buffer_data_page { u64 time_stamp; local_t commit; unsigned char data[]; }; Unfortuntely, some of the calculations used sizeof(struct buffer_data_page) to know the size of the header. But this is incorrect on some archs, where sizeof(struct buffer_data_page) does not equal offsetof(struct buffer_data_page, data), and on those archs, the commits are never processed. This patch replaces the sizeof with offsetof. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8b0daf0..1d65263 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -246,7 +246,7 @@ static inline int test_time_stamp(u64 delta) return 0; } -#define BUF_PAGE_SIZE (PAGE_SIZE - sizeof(struct buffer_data_page)) +#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data)) /* * head_page == tail_page && head == tail then buffer is empty. -- cgit v0.10.2 From e7f07968c16bdd9480001c0a9de013ba56889cf9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 20 Jan 2009 09:50:19 -0500 Subject: ext4: Fix ext4_free_blocks() w/o a journal when files have indirect blocks When trying to unlink a file with indirect blocks on a filesystem without a journal, the "circular indirect block" sanity test was getting falsely triggered. Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 49484ba..b4386da 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3622,7 +3622,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, * block pointed to itself, it would have been detached when * the block was cleared. Check for this instead of OOPSing. */ - if (bh2jh(this_bh)) + if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) ext4_handle_dirty_metadata(handle, inode, this_bh); else ext4_error(inode->i_sb, __func__, -- cgit v0.10.2 From a9df80c5094ed2bac94f4a0d085651f44d549854 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Tue, 20 Jan 2009 16:17:40 +0100 Subject: eeepc-laptop: split eeepc_backlight_exit() eeepc_backlight_exit() was doing rfkill and input stuff, which is a nonsense. This patch add two specific exit functions, one for input and one for rfkill. Signed-off-by: Corentin Chary Signed-off-by: Len Brown diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 9d93cb9..66d611b 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -737,13 +737,21 @@ static void eeepc_backlight_exit(void) { if (eeepc_backlight_device) backlight_device_unregister(eeepc_backlight_device); - if (ehotk->inputdev) - input_unregister_device(ehotk->inputdev); + eeepc_backlight_device = NULL; +} + +static void eeepc_rfkill_exit(void) +{ if (ehotk->eeepc_wlan_rfkill) rfkill_unregister(ehotk->eeepc_wlan_rfkill); if (ehotk->eeepc_bluetooth_rfkill) rfkill_unregister(ehotk->eeepc_bluetooth_rfkill); - eeepc_backlight_device = NULL; +} + +static void eeepc_input_exit(void) +{ + if (ehotk->inputdev) + input_unregister_device(ehotk->inputdev); } static void eeepc_hwmon_exit(void) @@ -762,6 +770,8 @@ static void eeepc_hwmon_exit(void) static void __exit eeepc_laptop_exit(void) { eeepc_backlight_exit(); + eeepc_rfkill_exit(); + eeepc_input_exit(); eeepc_hwmon_exit(); acpi_bus_unregister_driver(&eeepc_hotk_driver); sysfs_remove_group(&platform_device->dev.kobj, @@ -865,6 +875,8 @@ fail_platform_driver: fail_hwmon: eeepc_backlight_exit(); fail_backlight: + eeepc_input_exit(); + eeepc_rfkill_exit(); return result; } -- cgit v0.10.2 From 1021e2119eb33a990a2b9ff1410805dd9bdf7997 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Tue, 20 Jan 2009 16:17:41 +0100 Subject: asus_acpi: Add R1F support Add R1F support Signed-off-by: Corentin Chary Signed-off-by: Len Brown diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c index 1e74988..d63f26e 100644 --- a/drivers/platform/x86/asus_acpi.c +++ b/drivers/platform/x86/asus_acpi.c @@ -143,6 +143,7 @@ struct asus_hotk { S1300N, S5200N*/ A4S, /* Z81sp */ F3Sa, /* (Centrino) */ + R1F, END_MODEL } model; /* Models currently supported */ u16 event_count[128]; /* Count for each event TODO make this better */ @@ -420,7 +421,18 @@ static struct model_data model_conf[END_MODEL] = { .display_get = "\\ADVG", .display_set = "SDSP", }, - + { + .name = "R1F", + .mt_bt_switch = "BLED", + .mt_mled = "MLED", + .mt_wled = "WLED", + .mt_lcd_switch = "\\Q10", + .lcd_status = "\\GP06", + .brightness_set = "SPLV", + .brightness_get = "GPLV", + .display_set = "SDSP", + .display_get = "\\INFB" + } }; /* procdir we use */ @@ -1165,6 +1177,8 @@ static int asus_model_match(char *model) return W3V; else if (strncmp(model, "W5A", 3) == 0) return W5A; + else if (strncmp(model, "R1F", 3) == 0) + return R1F; else if (strncmp(model, "A4S", 3) == 0) return A4S; else if (strncmp(model, "F3Sa", 4) == 0) -- cgit v0.10.2 From 2a7dc0d8c60325e9bf820900bf919430e5a419ab Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Tue, 20 Jan 2009 16:17:42 +0100 Subject: asus-laptop: use generic netlink interface To be prepared for /proc/acpi/event removal we export events also through generic netlink interface. Signed-off-by: Corentin Chary Signed-off-by: Len Brown diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index 8fb8b35..1b7a28c 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -738,8 +738,9 @@ static void asus_hotk_notify(acpi_handle handle, u32 event, void *data) lcd_blank(FB_BLANK_POWERDOWN); } - acpi_bus_generate_proc_event(hotk->device, event, - hotk->event_count[event % 128]++); + acpi_bus_generate_netlink_event(hotk->device->pnp.device_class, + dev_name(&hotk->device->dev), event, + hotk->event_count[event % 128]++); return; } -- cgit v0.10.2 From 034ce90a8d1051deaeb31bae7f26ff1440a5b988 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Tue, 20 Jan 2009 16:17:43 +0100 Subject: asus-laptop: hotkeys via the generic input interface This patch is based on eeepc-laptop.c and the patchs from Nicolas Trangez and Daniel Nascimento (mainly for the keymap). Signed-off-by: Corentin Chary Signed-off-by: Len Brown diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index 1b7a28c..53dbfb4 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -46,6 +46,7 @@ #include #include #include +#include #define ASUS_LAPTOP_VERSION "0.42" @@ -181,6 +182,8 @@ struct asus_hotk { u8 light_level; //light sensor level u8 light_switch; //light sensor switch value u16 event_count[128]; //count for each event TODO make this better + struct input_dev *inputdev; + u16 *keycode_map; }; /* @@ -250,6 +253,37 @@ ASUS_LED(rled, "record"); ASUS_LED(pled, "phone"); ASUS_LED(gled, "gaming"); +struct key_entry { + char type; + u8 code; + u16 keycode; +}; + +enum { KE_KEY, KE_END }; + +static struct key_entry asus_keymap[] = { + {KE_KEY, 0x30, KEY_VOLUMEUP}, + {KE_KEY, 0x31, KEY_VOLUMEDOWN}, + {KE_KEY, 0x32, KEY_MUTE}, + {KE_KEY, 0x33, KEY_SWITCHVIDEOMODE}, + {KE_KEY, 0x34, KEY_SWITCHVIDEOMODE}, + {KE_KEY, 0x40, KEY_PREVIOUSSONG}, + {KE_KEY, 0x41, KEY_NEXTSONG}, + {KE_KEY, 0x43, KEY_STOP}, + {KE_KEY, 0x45, KEY_PLAYPAUSE}, + {KE_KEY, 0x50, KEY_EMAIL}, + {KE_KEY, 0x51, KEY_WWW}, + {KE_KEY, 0x5C, BTN_EXTRA}, /* Performance */ + {KE_KEY, 0x5D, KEY_WLAN}, + {KE_KEY, 0x61, KEY_SWITCHVIDEOMODE}, + {KE_KEY, 0x6B, BTN_TOUCH}, /* Lock Mouse */ + {KE_KEY, 0x82, KEY_CAMERA}, + {KE_KEY, 0x8A, KEY_TV}, + {KE_KEY, 0x95, KEY_MEDIA}, + {KE_KEY, 0x99, KEY_PHONE}, + {KE_END, 0}, +}; + /* * This function evaluates an ACPI method, given an int as parameter, the * method is searched within the scope of the handle, can be NULL. The output @@ -720,8 +754,68 @@ static ssize_t store_gps(struct device *dev, struct device_attribute *attr, return store_status(buf, count, NULL, GPS_ON); } +/* + * Hotkey functions + */ +static struct key_entry *asus_get_entry_by_scancode(int code) +{ + struct key_entry *key; + + for (key = asus_keymap; key->type != KE_END; key++) + if (code == key->code) + return key; + + return NULL; +} + +static struct key_entry *asus_get_entry_by_keycode(int code) +{ + struct key_entry *key; + + for (key = asus_keymap; key->type != KE_END; key++) + if (code == key->keycode && key->type == KE_KEY) + return key; + + return NULL; +} + +static int asus_getkeycode(struct input_dev *dev, int scancode, int *keycode) +{ + struct key_entry *key = asus_get_entry_by_scancode(scancode); + + if (key && key->type == KE_KEY) { + *keycode = key->keycode; + return 0; + } + + return -EINVAL; +} + +static int asus_setkeycode(struct input_dev *dev, int scancode, int keycode) +{ + struct key_entry *key; + int old_keycode; + + if (keycode < 0 || keycode > KEY_MAX) + return -EINVAL; + + key = asus_get_entry_by_scancode(scancode); + if (key && key->type == KE_KEY) { + old_keycode = key->keycode; + key->keycode = keycode; + set_bit(keycode, dev->keybit); + if (!asus_get_entry_by_keycode(old_keycode)) + clear_bit(old_keycode, dev->keybit); + return 0; + } + + return -EINVAL; +} + static void asus_hotk_notify(acpi_handle handle, u32 event, void *data) { + static struct key_entry *key; + /* TODO Find a better way to handle events count. */ if (!hotk) return; @@ -742,7 +836,20 @@ static void asus_hotk_notify(acpi_handle handle, u32 event, void *data) dev_name(&hotk->device->dev), event, hotk->event_count[event % 128]++); - return; + if (hotk->inputdev) { + key = asus_get_entry_by_scancode(event); + if (!key) + return ; + + switch (key->type) { + case KE_KEY: + input_report_key(hotk->inputdev, key->keycode, 1); + input_sync(hotk->inputdev); + input_report_key(hotk->inputdev, key->keycode, 0); + input_sync(hotk->inputdev); + break; + } + } } #define ASUS_CREATE_DEVICE_ATTR(_name) \ @@ -960,6 +1067,38 @@ static int asus_hotk_get_info(void) return AE_OK; } +static int asus_input_init(void) +{ + const struct key_entry *key; + int result; + + hotk->inputdev = input_allocate_device(); + if (!hotk->inputdev) { + printk(ASUS_INFO "Unable to allocate input device\n"); + return 0; + } + hotk->inputdev->name = "Asus Laptop extra buttons"; + hotk->inputdev->phys = ASUS_HOTK_FILE "/input0"; + hotk->inputdev->id.bustype = BUS_HOST; + hotk->inputdev->getkeycode = asus_getkeycode; + hotk->inputdev->setkeycode = asus_setkeycode; + + for (key = asus_keymap; key->type != KE_END; key++) { + switch (key->type) { + case KE_KEY: + set_bit(EV_KEY, hotk->inputdev->evbit); + set_bit(key->keycode, hotk->inputdev->keybit); + break; + } + } + result = input_register_device(hotk->inputdev); + if (result) { + printk(ASUS_INFO "Unable to register input device\n"); + input_free_device(hotk->inputdev); + } + return result; +} + static int asus_hotk_check(void) { int result = 0; @@ -1092,10 +1231,17 @@ static void asus_led_exit(void) ASUS_LED_UNREGISTER(gled); } +static void asus_input_exit(void) +{ + if (hotk->inputdev) + input_unregister_device(hotk->inputdev); +} + static void __exit asus_laptop_exit(void) { asus_backlight_exit(); asus_led_exit(); + asus_input_exit(); acpi_bus_unregister_driver(&asus_hotk_driver); sysfs_remove_group(&asuspf_device->dev.kobj, &asuspf_attribute_group); @@ -1217,6 +1363,10 @@ static int __init asus_laptop_init(void) printk(ASUS_INFO "Brightness ignored, must be controlled by " "ACPI video driver\n"); + result = asus_input_init(); + if (result) + goto fail_input; + result = asus_led_init(dev); if (result) goto fail_led; @@ -1256,6 +1406,9 @@ static int __init asus_laptop_init(void) asus_led_exit(); fail_led: + asus_input_exit(); + + fail_input: asus_backlight_exit(); fail_backlight: -- cgit v0.10.2 From 12d6f35b0ff1f446d465e95e9a2fe187263479ef Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Tue, 20 Jan 2009 16:17:44 +0100 Subject: asus-laptop: update Kconfig for input layer Update Kconfig, now asus-laptop use the input layer. Signed-off-by: Corentin Chary Signed-off-by: Len Brown diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 1a266d4..9436311 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -42,6 +42,7 @@ config ASUS_LAPTOP depends on LEDS_CLASS depends on NEW_LEDS depends on BACKLIGHT_CLASS_DEVICE + depends on INPUT ---help--- This is the new Linux driver for Asus laptops. It may also support some MEDION, JVC or VICTOR laptops. It makes all the extra buttons generate -- cgit v0.10.2 From ed6f44215374d94c35cbe98b582d004b9a3f5fbe Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Tue, 20 Jan 2009 16:17:45 +0100 Subject: asus-laptop: fix label indentation Fix the label indentation Signed-off-by: Corentin Chary Signed-off-by: Len Brown diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index 53dbfb4..56af6cf 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -1184,7 +1184,7 @@ static int asus_hotk_add(struct acpi_device *device) /* GPS is on by default */ write_status(NULL, 1, GPS_ON); - end: +end: if (result) { kfree(hotk->name); kfree(hotk); @@ -1393,25 +1393,25 @@ static int __init asus_laptop_init(void) return 0; - fail_sysfs: +fail_sysfs: platform_device_del(asuspf_device); - fail_platform_device2: +fail_platform_device2: platform_device_put(asuspf_device); - fail_platform_device1: +fail_platform_device1: platform_driver_unregister(&asuspf_driver); - fail_platform_driver: +fail_platform_driver: asus_led_exit(); - fail_led: +fail_led: asus_input_exit(); - fail_input: +fail_input: asus_backlight_exit(); - fail_backlight: +fail_backlight: return result; } -- cgit v0.10.2 From b5f6f26550700445dcc125bbf75b9104e779d353 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 20 Jan 2009 16:17:46 +0100 Subject: eeepc-laptop: Add support for extended hotkeys Newer Eees have extra hotkeys above the function keys. This patch adds support for sending them through the input layer. Signed-off-by: Matthew Garrett Signed-off-by: Corentin Chary Signed-off-by: Len Brown diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 66d611b..d153871 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -161,6 +161,10 @@ static struct key_entry eeepc_keymap[] = { {KE_KEY, 0x13, KEY_MUTE }, {KE_KEY, 0x14, KEY_VOLUMEDOWN }, {KE_KEY, 0x15, KEY_VOLUMEUP }, + {KE_KEY, 0x1a, KEY_COFFEE }, + {KE_KEY, 0x1b, KEY_ZOOM }, + {KE_KEY, 0x1c, KEY_PROG2 }, + {KE_KEY, 0x1d, KEY_PROG3 }, {KE_KEY, 0x30, KEY_SWITCHVIDEOMODE }, {KE_KEY, 0x31, KEY_SWITCHVIDEOMODE }, {KE_KEY, 0x32, KEY_SWITCHVIDEOMODE }, -- cgit v0.10.2 From c9ddf8fede1271bde0a512fa94f77c4cb1ef4040 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 20 Jan 2009 16:17:47 +0100 Subject: eeepc-laptop: Check return values from rfkill_register Error out if rfkill registration fails, and also set the default system state appropriately on boot Signed-off-by: Matthew Garrett Signed-off-by: Corentin Chary Signed-off-by: Len Brown diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index d153871..21e5206 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -562,7 +562,7 @@ static int eeepc_hotk_add(struct acpi_device *device) ehotk->device = device; result = eeepc_hotk_check(); if (result) - goto end; + goto ehotk_fail; status = acpi_install_notify_handler(ehotk->handle, ACPI_SYSTEM_NOTIFY, eeepc_hotk_notify, ehotk); if (ACPI_FAILURE(status)) @@ -573,18 +573,25 @@ static int eeepc_hotk_add(struct acpi_device *device) RFKILL_TYPE_WLAN); if (!ehotk->eeepc_wlan_rfkill) - goto end; + goto wlan_fail; ehotk->eeepc_wlan_rfkill->name = "eeepc-wlan"; ehotk->eeepc_wlan_rfkill->toggle_radio = eeepc_wlan_rfkill_set; ehotk->eeepc_wlan_rfkill->get_state = eeepc_wlan_rfkill_state; - if (get_acpi(CM_ASL_WLAN) == 1) + if (get_acpi(CM_ASL_WLAN) == 1) { ehotk->eeepc_wlan_rfkill->state = RFKILL_STATE_UNBLOCKED; - else + rfkill_set_default(RFKILL_TYPE_WLAN, + RFKILL_STATE_UNBLOCKED); + } else { ehotk->eeepc_wlan_rfkill->state = RFKILL_STATE_SOFT_BLOCKED; - rfkill_register(ehotk->eeepc_wlan_rfkill); + rfkill_set_default(RFKILL_TYPE_WLAN, + RFKILL_STATE_SOFT_BLOCKED); + } + result = rfkill_register(ehotk->eeepc_wlan_rfkill); + if (result) + goto wlan_fail; } if (get_acpi(CM_ASL_BLUETOOTH) != -1) { @@ -592,27 +599,43 @@ static int eeepc_hotk_add(struct acpi_device *device) rfkill_allocate(&device->dev, RFKILL_TYPE_BLUETOOTH); if (!ehotk->eeepc_bluetooth_rfkill) - goto end; + goto bluetooth_fail; ehotk->eeepc_bluetooth_rfkill->name = "eeepc-bluetooth"; ehotk->eeepc_bluetooth_rfkill->toggle_radio = eeepc_bluetooth_rfkill_set; ehotk->eeepc_bluetooth_rfkill->get_state = eeepc_bluetooth_rfkill_state; - if (get_acpi(CM_ASL_BLUETOOTH) == 1) + if (get_acpi(CM_ASL_BLUETOOTH) == 1) { ehotk->eeepc_bluetooth_rfkill->state = RFKILL_STATE_UNBLOCKED; - else + rfkill_set_default(RFKILL_TYPE_BLUETOOTH, + RFKILL_STATE_UNBLOCKED); + } else { ehotk->eeepc_bluetooth_rfkill->state = RFKILL_STATE_SOFT_BLOCKED; - rfkill_register(ehotk->eeepc_bluetooth_rfkill); - } + rfkill_set_default(RFKILL_TYPE_BLUETOOTH, + RFKILL_STATE_SOFT_BLOCKED); + } - end: - if (result) { - kfree(ehotk); - ehotk = NULL; + result = rfkill_register(ehotk->eeepc_bluetooth_rfkill); + if (result) + goto bluetooth_fail; } + return 0; + + bluetooth_fail: + if (ehotk->eeepc_bluetooth_rfkill) + rfkill_free(ehotk->eeepc_bluetooth_rfkill); + rfkill_unregister(ehotk->eeepc_wlan_rfkill); + ehotk->eeepc_wlan_rfkill = NULL; + wlan_fail: + if (ehotk->eeepc_wlan_rfkill) + rfkill_free(ehotk->eeepc_wlan_rfkill); + ehotk_fail: + kfree(ehotk); + ehotk = NULL; + return result; } -- cgit v0.10.2 From 5740294ca3a9b113fe146f2826effb69ca50008d Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 20 Jan 2009 16:17:48 +0100 Subject: eeepc-laptop: Implement rfkill hotplugging in eeepc-laptop The Eee implements rfkill by logically unplugging the wireless card from the PCI bus. Despite sending ACPI notifications, this does not appear to be implemented using standard ACPI hotplug - nor does the firmware provide the _OSC method required to support native PCIe hotplug. The only sensible choice appears to be to handle the hotplugging directly in the eeepc-laptop driver. Tested successfully on a 700, 900 and 901. Signed-off-by: Matthew Garrett Signed-off-by: Corentin Chary Signed-off-by: Len Brown diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 21e5206..66655d2 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -30,6 +30,7 @@ #include #include #include +#include #define EEEPC_LAPTOP_VERSION "0.1" @@ -517,6 +518,41 @@ static void notify_brn(void) bd->props.brightness = read_brightness(bd); } +static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) +{ + struct pci_dev *dev; + struct pci_bus *bus = pci_find_bus(0, 1); + + if (event != ACPI_NOTIFY_BUS_CHECK) + return; + + if (!bus) { + printk(EEEPC_WARNING "Unable to find PCI bus 1?\n"); + return; + } + + if (get_acpi(CM_ASL_WLAN) == 1) { + dev = pci_get_slot(bus, 0); + if (dev) { + /* Device already present */ + pci_dev_put(dev); + return; + } + dev = pci_scan_single_device(bus, 0); + if (dev) { + pci_bus_assign_resources(bus); + if (pci_bus_add_device(dev)) + printk(EEEPC_ERR "Unable to hotplug wifi\n"); + } + } else { + dev = pci_get_slot(bus, 0); + if (dev) { + pci_remove_bus_device(dev); + pci_dev_put(dev); + } + } +} + static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data) { static struct key_entry *key; @@ -543,6 +579,45 @@ static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data) } } +static int eeepc_register_rfkill_notifier(char *node) +{ + acpi_status status = AE_OK; + acpi_handle handle; + + status = acpi_get_handle(NULL, node, &handle); + + if (ACPI_SUCCESS(status)) { + status = acpi_install_notify_handler(handle, + ACPI_SYSTEM_NOTIFY, + eeepc_rfkill_notify, + NULL); + if (ACPI_FAILURE(status)) + printk(EEEPC_WARNING + "Failed to register notify on %s\n", node); + } else + return -ENODEV; + + return 0; +} + +static void eeepc_unregister_rfkill_notifier(char *node) +{ + acpi_status status = AE_OK; + acpi_handle handle; + + status = acpi_get_handle(NULL, node, &handle); + + if (ACPI_SUCCESS(status)) { + status = acpi_remove_notify_handler(handle, + ACPI_SYSTEM_NOTIFY, + eeepc_rfkill_notify); + if (ACPI_FAILURE(status)) + printk(EEEPC_ERR + "Error removing rfkill notify handler %s\n", + node); + } +} + static int eeepc_hotk_add(struct acpi_device *device) { acpi_status status = AE_OK; @@ -622,6 +697,10 @@ static int eeepc_hotk_add(struct acpi_device *device) if (result) goto bluetooth_fail; } + + eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6"); + eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); + return 0; bluetooth_fail: @@ -649,6 +728,10 @@ static int eeepc_hotk_remove(struct acpi_device *device, int type) eeepc_hotk_notify); if (ACPI_FAILURE(status)) printk(EEEPC_ERR "Error removing notify handler\n"); + + eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P6"); + eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7"); + kfree(ehotk); return 0; } -- cgit v0.10.2 From 2b25c9f01aa58d48129b2f93748dfb5d1f7ab0a2 Mon Sep 17 00:00:00 2001 From: Corentin Chary Date: Tue, 20 Jan 2009 16:17:49 +0100 Subject: eeepc-laptop: use netlink interface To be prepared for /proc/acpi/event removal we export events also through generic netlink interface. Signed-off-by: Corentin Chary Signed-off-by: Len Brown diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 66655d2..4348d99 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -560,8 +560,9 @@ static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data) return; if (event >= NOTIFY_BRN_MIN && event <= NOTIFY_BRN_MAX) notify_brn(); - acpi_bus_generate_proc_event(ehotk->device, event, - ehotk->event_count[event % 128]++); + acpi_bus_generate_netlink_event(ehotk->device->pnp.device_class, + dev_name(&ehotk->device->dev), event, + ehotk->event_count[event % 128]++); if (ehotk->inputdev) { key = eepc_get_entry_by_scancode(event); if (key) { -- cgit v0.10.2 From 26a552264bc92d2ec4747b1babb7cb1264908018 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Wed, 21 Jan 2009 11:29:19 +0800 Subject: [ARM] pxa: stop and disable IRQ for each DMA channels at startup Some broken bootloaders will leave the DMA channel state unclean, which we should really initialize correctly here. Signed-off-by: Eric Miao diff --git a/arch/arm/mach-pxa/dma.c b/arch/arm/mach-pxa/dma.c index b1514fb..7de17fc 100644 --- a/arch/arm/mach-pxa/dma.c +++ b/arch/arm/mach-pxa/dma.c @@ -121,20 +121,22 @@ int __init pxa_init_dma(int num_ch) if (dma_channels == NULL) return -ENOMEM; - ret = request_irq(IRQ_DMA, dma_irq_handler, IRQF_DISABLED, "DMA", NULL); - if (ret) { - printk (KERN_CRIT "Wow! Can't register IRQ for DMA\n"); - kfree(dma_channels); - return ret; - } - /* dma channel priorities on pxa2xx processors: * ch 0 - 3, 16 - 19 <--> (0) DMA_PRIO_HIGH * ch 4 - 7, 20 - 23 <--> (1) DMA_PRIO_MEDIUM * ch 8 - 15, 24 - 31 <--> (2) DMA_PRIO_LOW */ - for (i = 0; i < num_ch; i++) + for (i = 0; i < num_ch; i++) { + DCSR(i) = 0; dma_channels[i].prio = min((i & 0xf) >> 2, DMA_PRIO_LOW); + } + + ret = request_irq(IRQ_DMA, dma_irq_handler, IRQF_DISABLED, "DMA", NULL); + if (ret) { + printk (KERN_CRIT "Wow! Can't register IRQ for DMA\n"); + kfree(dma_channels); + return ret; + } num_dma_channels = num_ch; return 0; -- cgit v0.10.2 From 00f57f545afa422db3003b0d0b30a30f8de7ecb2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 14 Jan 2009 13:33:27 -0800 Subject: tracing/function-graph-tracer: fix a regression while suspend to disk Impact: fix a crash while kernel image restore When the function graph tracer is running and while suspend to disk, some racy and dangerous things happen against this tracer. The current task will save its registers including the stack pointer which contains the return address hooked by the tracer. But the current task will continue to enter other functions after that to save the memory, and then it will store other return addresses, and finally loose the old depth which matches the return address saved in the old stack (during the registers saving). So on image restore, the code will return to wrong addresses. And there are other things: on restore, the task will have it's "current" pointer overwritten during registers restoring....switching from one task to another... That would be insane to try to trace function graphs at these stages. This patch makes the function graph tracer listening on power events, making it's tracing disabled for the current task (the one that performs the hibernation work) while suspend/resume to disk, making the tracing safe during hibernation. Signed-off-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2f32969..7dcf6e9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1965,6 +1966,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER static atomic_t ftrace_graph_active; +static struct notifier_block ftrace_suspend_notifier; int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) { @@ -2043,6 +2045,27 @@ static int start_graph_tracing(void) return ret; } +/* + * Hibernation protection. + * The state of the current task is too much unstable during + * suspend/restore to disk. We want to protect against that. + */ +static int +ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, + void *unused) +{ + switch (state) { + case PM_HIBERNATION_PREPARE: + pause_graph_tracing(); + break; + + case PM_POST_HIBERNATION: + unpause_graph_tracing(); + break; + } + return NOTIFY_DONE; +} + int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc) { @@ -2050,6 +2073,9 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, mutex_lock(&ftrace_sysctl_lock); + ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; + register_pm_notifier(&ftrace_suspend_notifier); + atomic_inc(&ftrace_graph_active); ret = start_graph_tracing(); if (ret) { @@ -2075,6 +2101,7 @@ void unregister_ftrace_graph(void) ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); + unregister_pm_notifier(&ftrace_suspend_notifier); mutex_unlock(&ftrace_sysctl_lock); } -- cgit v0.10.2 From 551b4048b3d4acf15aff9fe4aed89b892c135b02 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 12 Jan 2009 11:06:18 +0800 Subject: ring_buffer: reset write when reserve buffer fail Impact: reset struct buffer_page.write when interrupt storm if struct buffer_page.write is not reset, any succedent committing will corrupted ring_buffer: static inline void rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) { ...... cpu_buffer->commit_page->commit = cpu_buffer->commit_page->write; ...... } when "if (RB_WARN_ON(cpu_buffer, next_page == reader_page))", ring_buffer is disabled, but some reserved buffers may haven't been committed. we need reset struct buffer_page.write. when "if (unlikely(next_page == cpu_buffer->commit_page))", ring_buffer is still available, we should not corrupt it. Signed-off-by: Lai Jiangshan Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1d65263..9c1e73d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1025,12 +1025,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, } if (next_page == head_page) { - if (!(buffer->flags & RB_FL_OVERWRITE)) { - /* reset write */ - if (tail <= BUF_PAGE_SIZE) - local_set(&tail_page->write, tail); + if (!(buffer->flags & RB_FL_OVERWRITE)) goto out_unlock; - } /* tail_page has not moved yet? */ if (tail_page == cpu_buffer->tail_page) { @@ -1105,6 +1101,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, return event; out_unlock: + /* reset write */ + if (tail <= BUF_PAGE_SIZE) + local_set(&tail_page->write, tail); + __raw_spin_unlock(&cpu_buffer->lock); local_irq_restore(flags); return NULL; -- cgit v0.10.2 From faf6861ebd776871e77b761c43ec045cd20b5716 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Jan 2009 12:24:42 -0500 Subject: trace: print ftrace_dump at KERN_EMERG log level Impact: fix to print out ftrace_dump when expected I was debugging a hard race condition to only find out that after I hit the race, my log level was not at level to show KERN_INFO. The time it took to trigger the race was wasted because I did not capture the trace. Since ftrace_dump is only called from kernel oops (and only when it is set in the kernel command line to do so), or when a developer adds it to their own local tree, the log level of the print should be at KERN_EMERG to make sure the print appears. ftrace_dump is not called by a normal user setup, and will not add extra unwanted print out to the console. There is no reason it should be at KERN_INFO. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c580233..1a1c5a6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3736,7 +3736,7 @@ static struct notifier_block trace_die_notifier = { * it if we decide to change what log level the ftrace dump * should be at. */ -#define KERN_TRACE KERN_INFO +#define KERN_TRACE KERN_EMERG static void trace_printk_seq(struct trace_seq *s) -- cgit v0.10.2 From a442e5e0a2011af5b2d1f118fee0a8f9079f1d88 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Jan 2009 14:50:19 -0500 Subject: trace: stop all recording to ring buffer on ftrace_dump Impact: limit ftrace dump output Currently ftrace_dump only calls ftrace_kill that is a fast way to prevent the function tracer functions from being called (just sets a flag and clears the function to call, nothing else). It is better to also turn off any recording to the ring buffers as well. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1a1c5a6..4d89e84 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3770,6 +3770,7 @@ void ftrace_dump(void) dump_ran = 1; /* No turning back! */ + tracing_off(); ftrace_kill(); for_each_tracing_cpu(cpu) { -- cgit v0.10.2 From 1092307d582a7566d23779c304cf86f3075ac5f0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 15 Jan 2009 23:40:11 -0500 Subject: trace: set max latency variable to zero on default Impact: trace max latencies on start of latency tracing This patch sets the max latency to zero whenever one of the irq variant tracers or the wakeup tracer is set to current tracer. Most developers expect to see output when starting up a latency tracer. But since the max_latency is already set to max, and it takes a latency greater than max_latency to be recorded, there is no trace. This is not the expected behavior and has even confused myself. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4d89e84..17bb88d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -40,7 +40,7 @@ #define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE) -unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; +unsigned long __read_mostly tracing_max_latency; unsigned long __read_mostly tracing_thresh; /* diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 7c2e326..62a78d9 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -380,6 +380,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr) static void __irqsoff_tracer_init(struct trace_array *tr) { + tracing_max_latency = 0; irqsoff_trace = tr; /* make sure that the tracer is visible */ smp_wmb(); diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 43586b6..42ae1e7 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -333,6 +333,7 @@ static void stop_wakeup_tracer(struct trace_array *tr) static int wakeup_tracer_init(struct trace_array *tr) { + tracing_max_latency = 0; wakeup_trace = tr; start_wakeup_tracer(tr); return 0; -- cgit v0.10.2 From 19d00cc196a3a66fd074f62b39d219f743b92338 Mon Sep 17 00:00:00 2001 From: Wang Cong Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: cleanup fs/btrfs/super.c::btrfs_control_ioctl() - Remove the unused local variable 'len'; - Check return value of kmalloc(). Signed-off-by: Wang Cong Signed-off-by: Chris Mason diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 92c9b54..795b624 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -583,17 +583,18 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, struct btrfs_ioctl_vol_args *vol; struct btrfs_fs_devices *fs_devices; int ret = -ENOTTY; - int len; if (!capable(CAP_SYS_ADMIN)) return -EPERM; vol = kmalloc(sizeof(*vol), GFP_KERNEL); + if (!vol) + return -ENOMEM; + if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { ret = -EFAULT; goto out; } - len = strnlen(vol->name, BTRFS_PATH_NAME_MAX); switch (cmd) { case BTRFS_IOC_SCAN_DEV: -- cgit v0.10.2 From eb1eb04fdfbd9e1c9c40d072ab1b82fe593eeb0f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: MAINTAINERS entry Signed-off-by: Chris Mason diff --git a/MAINTAINERS b/MAINTAINERS index fbc8fa5..e1b7426 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1021,6 +1021,14 @@ M: mb@bu3sch.de W: http://bu3sch.de/btgpio.php S: Maintained +BTRFS FILE SYSTEM +P: Chris Mason +M: chris.mason@oracle.com +L: linux-btrfs@vger.kernel.org +W: http://btrfs.wiki.kernel.org/ +T: git kernel.org:/pub/scm/linux/kernel/git/mason/btrfs-unstable.git +S: Maintained + BTTV VIDEO4LINUX DRIVER P: Mauro Carvalho Chehab M: mchehab@infradead.org -- cgit v0.10.2 From 070604040b86511cc2df0f25f98e26c5529bd928 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: cleanup xattr code Andrew's review of the xattr code revealed some minor issues that this patch addresses. Just an error return fix, got rid of a useless statement and commented one of the trickier parts of __btrfs_getxattr. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 7f332e2..b4fa5f4 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -45,9 +45,12 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name, /* lookup the xattr by name */ di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, strlen(name), 0); - if (!di || IS_ERR(di)) { + if (!di) { ret = -ENODATA; goto out; + } else if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; } leaf = path->nodes[0]; @@ -62,6 +65,14 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name, ret = -ERANGE; goto out; } + + /* + * The way things are packed into the leaf is like this + * |struct btrfs_dir_item|name|data| + * where name is the xattr name, so security.foo, and data is the + * content of the xattr. data_ptr points to the location in memory + * where the data starts in the in memory leaf + */ data_ptr = (unsigned long)((char *)(di + 1) + btrfs_dir_name_len(leaf, di)); read_extent_buffer(leaf, buffer, data_ptr, @@ -176,7 +187,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; - ret = 0; advance = 0; while (1) { leaf = path->nodes[0]; -- cgit v0.10.2 From 7eaebe7d503c3ef240ac7b3efc5433fe647c0298 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: removed unused #include 's Removed unused #include 's in btrfs Signed-off-by: Huang Weiyi Signed-off-by: Chris Mason diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 8e2fec0..d5f4e94 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ee848d8..ab07627 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include "compat.h" #include "ctree.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 81a3138..37e12f6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 293da65..cdc961e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "compat.h" #include "hash.h" #include "crc32c.h" diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e086d40..c9446d4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include "extent_io.h" diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 4a83e33..50da69d 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -3,7 +3,6 @@ #include #include #include -#include #include #include "extent_map.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9026833..fbcbf43 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -29,7 +29,6 @@ #include #include #include -#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8adfe05..44dbd55 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c2aa33e..988fdc8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include "compat.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 795b624..4c0b756 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -37,7 +37,6 @@ #include #include #include -#include #include "compat.h" #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3451e1c..f6e1fc5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include "compat.h" #include "ctree.h" -- cgit v0.10.2 From 119e10cf1b2f6a6cafff74f32373d631489f54c2 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: Remove extra KERN_INFO in the middle of a line The "devid transid " printk in btrfs_scan_one_device() actually follows another printk that doesn't end in a newline (since the intention is for the two printks to make one line of output), so the KERN_INFO just ends up messing up the output: device label exp <6>devid 1 transid 9 /dev/sda5 Fix this by changing the extra KERN_INFO to KERN_CONT. Signed-off-by: Roland Dreier Signed-off-by: Chris Mason diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f6e1fc5..397c8db 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -577,7 +577,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, *(unsigned long long *)disk_super->fsid, *(unsigned long long *)(disk_super->fsid + 8)); } - printk(KERN_INFO "devid %llu transid %llu %s\n", + printk(KERN_CONT "devid %llu transid %llu %s\n", (unsigned long long)devid, (unsigned long long)transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); -- cgit v0.10.2 From 57506d50ed6db7b0e7ddc9845e86e81f140983d5 Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: check return value for kthread_run() correctly kthread_run() returns the kthread or ERR_PTR(-ENOMEM), not NULL. Signed-off-by: Qinghuang Feng Signed-off-by: Chris Mason diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 37e12f6..0d8ccd6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1739,13 +1739,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); - if (!fs_info->cleaner_kthread) + if (IS_ERR(fs_info->cleaner_kthread)) goto fail_csum_root; fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, "btrfs-transaction"); - if (!fs_info->transaction_kthread) + if (IS_ERR(fs_info->transaction_kthread)) goto fail_cleaner; if (btrfs_super_log_root(disk_super) != 0) { -- cgit v0.10.2 From c6e308713a47527f88a277ee95b7c5d1db80f77b Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Wed, 21 Jan 2009 10:59:08 -0500 Subject: Btrfs: simplify iteration codes Merge list_for_each* and list_entry to list_for_each_entry* Signed-off-by: Qinghuang Feng Signed-off-by: Chris Mason diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d8ccd6..26a1877 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1135,7 +1135,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) { struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; int ret = 0; - struct list_head *cur; struct btrfs_device *device; struct backing_dev_info *bdi; #if 0 @@ -1143,8 +1142,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) btrfs_congested_async(info, 0)) return 1; #endif - list_for_each(cur, &info->fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { if (!device->bdev) continue; bdi = blk_get_backing_dev_info(device->bdev); @@ -1162,13 +1160,11 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) */ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { - struct list_head *cur; struct btrfs_device *device; struct btrfs_fs_info *info; info = (struct btrfs_fs_info *)bdi->unplug_io_data; - list_for_each(cur, &info->fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { if (!device->bdev) continue; @@ -1994,7 +1990,6 @@ static int write_dev_supers(struct btrfs_device *device, int write_all_supers(struct btrfs_root *root, int max_mirrors) { - struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; struct btrfs_device *dev; struct btrfs_super_block *sb; @@ -2010,8 +2005,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) sb = &root->fs_info->super_for_commit; dev_item = &sb->dev_item; - list_for_each(cur, head) { - dev = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) { total_errors++; continue; @@ -2044,8 +2038,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) } total_errors = 0; - list_for_each(cur, head) { - dev = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) continue; if (!dev->in_fs_metadata || !dev->writeable) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cdc961e..a4e36c3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -325,10 +325,8 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { struct list_head *head = &info->space_info; - struct list_head *cur; struct btrfs_space_info *found; - list_for_each(cur, head) { - found = list_entry(cur, struct btrfs_space_info, list); + list_for_each_entry(found, head, list) { if (found->flags == flags) return found; } @@ -3013,7 +3011,6 @@ loop_check: static void dump_space_info(struct btrfs_space_info *info, u64 bytes) { struct btrfs_block_group_cache *cache; - struct list_head *l; printk(KERN_INFO "space_info has %llu free, is %sfull\n", (unsigned long long)(info->total_bytes - info->bytes_used - @@ -3021,8 +3018,7 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) (info->full) ? "" : "not "); down_read(&info->groups_sem); - list_for_each(l, &info->block_groups) { - cache = list_entry(l, struct btrfs_block_group_cache, list); + list_for_each_entry(cache, &info->block_groups, list) { spin_lock(&cache->lock); printk(KERN_INFO "block group %llu has %llu bytes, %llu used " "%llu pinned %llu reserved\n", diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 44dbd55..45cf03e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1323,12 +1323,11 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_offset, struct list_head *list) { - struct list_head *cur; struct btrfs_ordered_sum *sum; btrfs_set_trans_block_group(trans, inode); - list_for_each(cur, list) { - sum = list_entry(cur, struct btrfs_ordered_sum, list); + + list_for_each_entry(sum, list, list) { btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); } diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a209401..77c2411 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -613,7 +613,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, struct btrfs_sector_sum *sector_sums; struct btrfs_ordered_extent *ordered; struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; - struct list_head *cur; unsigned long num_sectors; unsigned long i; u32 sectorsize = BTRFS_I(inode)->root->sectorsize; @@ -624,8 +623,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, return 1; mutex_lock(&tree->mutex); - list_for_each_prev(cur, &ordered->list) { - ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); + list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { if (disk_bytenr >= ordered_sum->bytenr) { num_sectors = ordered_sum->len / sectorsize; sector_sums = ordered_sum->sums; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8a08f94..919172de 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -852,11 +852,9 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, { struct btrfs_pending_snapshot *pending; struct list_head *head = &trans->transaction->pending_snapshots; - struct list_head *cur; int ret; - list_for_each(cur, head) { - pending = list_entry(cur, struct btrfs_pending_snapshot, list); + list_for_each_entry(pending, head, list) { ret = create_pending_snapshot(trans, fs_info, pending); BUG_ON(ret); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 397c8db..fd0bedb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -103,10 +103,8 @@ static noinline struct btrfs_device *__find_device(struct list_head *head, u64 devid, u8 *uuid) { struct btrfs_device *dev; - struct list_head *cur; - list_for_each(cur, head) { - dev = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(dev, head, dev_list) { if (dev->devid == devid && (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { return dev; @@ -117,11 +115,9 @@ static noinline struct btrfs_device *__find_device(struct list_head *head, static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) { - struct list_head *cur; struct btrfs_fs_devices *fs_devices; - list_for_each(cur, &fs_uuids) { - fs_devices = list_entry(cur, struct btrfs_fs_devices, list); + list_for_each_entry(fs_devices, &fs_uuids, list) { if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) return fs_devices; } @@ -344,14 +340,11 @@ error: int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) { - struct list_head *tmp; - struct list_head *cur; - struct btrfs_device *device; + struct btrfs_device *device, *next; mutex_lock(&uuid_mutex); again: - list_for_each_safe(cur, tmp, &fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { if (device->in_fs_metadata) continue; @@ -382,14 +375,12 @@ again: static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { - struct list_head *cur; struct btrfs_device *device; if (--fs_devices->opened > 0) return 0; - list_for_each(cur, &fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->bdev) { close_bdev_exclusive(device->bdev, device->mode); fs_devices->open_devices--; @@ -438,7 +429,6 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, { struct block_device *bdev; struct list_head *head = &fs_devices->devices; - struct list_head *cur; struct btrfs_device *device; struct block_device *latest_bdev = NULL; struct buffer_head *bh; @@ -449,8 +439,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int seeding = 1; int ret = 0; - list_for_each(cur, head) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, head, dev_list) { if (device->bdev) continue; if (!device->name) @@ -1016,14 +1005,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } if (strcmp(device_path, "missing") == 0) { - struct list_head *cur; struct list_head *devices; struct btrfs_device *tmp; device = NULL; devices = &root->fs_info->fs_devices->devices; - list_for_each(cur, devices) { - tmp = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(tmp, devices, dev_list) { if (tmp->in_fs_metadata && !tmp->bdev) { device = tmp; break; @@ -1279,7 +1266,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) struct btrfs_trans_handle *trans; struct btrfs_device *device; struct block_device *bdev; - struct list_head *cur; struct list_head *devices; struct super_block *sb = root->fs_info->sb; u64 total_bytes; @@ -1303,8 +1289,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) mutex_lock(&root->fs_info->volume_mutex); devices = &root->fs_info->fs_devices->devices; - list_for_each(cur, devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, devices, dev_list) { if (device->bdev == bdev) { ret = -EEXIST; goto error; @@ -1703,7 +1688,6 @@ static u64 div_factor(u64 num, int factor) int btrfs_balance(struct btrfs_root *dev_root) { int ret; - struct list_head *cur; struct list_head *devices = &dev_root->fs_info->fs_devices->devices; struct btrfs_device *device; u64 old_size; @@ -1722,8 +1706,7 @@ int btrfs_balance(struct btrfs_root *dev_root) dev_root = dev_root->fs_info->dev_root; /* step one make some room on all the devices */ - list_for_each(cur, devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, devices, dev_list) { old_size = device->total_bytes; size_to_free = div_factor(old_size, 1); size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); -- cgit v0.10.2 From 3dfdb9348ada18c74c39b9ae7b115e0594792281 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: fix locking issue in btrfs_remove_block_group We should hold the block_group_cache_lock while modifying the block groups red-black tree. Thank you, Signed-off-by: Yan Zheng diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a4e36c3..3bed6a7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5952,9 +5952,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_remove_free_space_cache(block_group); + spin_lock(&root->fs_info->block_group_cache_lock); rb_erase(&block_group->cache_node, &root->fs_info->block_group_cache_tree); + spin_unlock(&root->fs_info->block_group_cache_lock); + btrfs_remove_free_space_cache(block_group); down_write(&block_group->space_info->groups_sem); list_del(&block_group->list); up_write(&block_group->space_info->groups_sem); -- cgit v0.10.2 From 5a7be515b1f4569aac601170fc681741434cca92 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: Fix infinite loop in btrfs_extent_post_op btrfs_extent_post_op calls finish_current_insert and del_pending_extents. They both may enter infinite loops. finish_current_insert enters infinite loop if it only finds some backrefs to update. The fix is to check for pending backref updates before restarting the loop. The infinite loop in del_pending_extents is due to a the skipped variable not being properly reset before looping around. Signed-off-by: Yan Zheng diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3bed6a7..aeaec84 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2156,7 +2156,8 @@ again: ret = find_first_extent_bit(&info->extent_ins, search, &start, &end, EXTENT_WRITEBACK); if (ret) { - if (skipped && all && !num_inserts) { + if (skipped && all && !num_inserts && + list_empty(&update_list)) { skipped = 0; search = 0; continue; @@ -2544,6 +2545,7 @@ again: if (ret) { if (all && skipped && !nr) { search = 0; + skipped = 0; continue; } mutex_unlock(&info->extent_ins_mutex); -- cgit v0.10.2 From 653249ff9aea51e1ace6bd437389f06e2b84393f Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: remove duplicated #include Removed duplicated #include "compat.h"in fs/btrfs/extent-tree.c Signed-off-by: Huang Weiyi Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aeaec84..c643433 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -29,7 +29,6 @@ #include "volumes.h" #include "locking.h" #include "ref-cache.h" -#include "compat.h" #define PENDING_EXTENT_INSERT 0 #define PENDING_EXTENT_DELETE 1 -- cgit v0.10.2 From 95029d7d598babf62276d9006e575992b1333ba5 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: change/remove typedef Change one typedef to a regular enum, and remove an unused one. Signed-off-by: Jan Engelhardt Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eee060f..e1fec63 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -454,17 +454,11 @@ struct btrfs_timespec { __le32 nsec; } __attribute__ ((__packed__)); -typedef enum { +enum btrfs_compression_type { BTRFS_COMPRESS_NONE = 0, BTRFS_COMPRESS_ZLIB = 1, BTRFS_COMPRESS_LAST = 2, -} btrfs_compression_type; - -/* we don't understand any encryption methods right now */ -typedef enum { - BTRFS_ENCRYPTION_NONE = 0, - BTRFS_ENCRYPTION_LAST = 1, -} btrfs_encryption_type; +}; struct btrfs_inode_item { /* nfs style generation number */ -- cgit v0.10.2 From 86288a198d8e4e8411ff02f9ab848245e8f11257 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: fix stop searching test in replace_one_extent replace_one_extent searches tree leaves for references to a given extent. It stops searching if it goes beyond the last possible position. The last possible position is computed by adding the starting offset of a found file extent to the full size of the extent. The code uses physical size of the extent as the full size. This is incorrect when compression is used. The fix is get the full size from ram_bytes field of file extent item. Signed-off-by: Yan Zheng diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c643433..1d7f043 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4440,7 +4440,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, u64 lock_end = 0; u64 num_bytes; u64 ext_offset; - u64 first_pos; + u64 search_end = (u64)-1; u32 nritems; int nr_scaned = 0; int extent_locked = 0; @@ -4448,7 +4448,6 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, int ret; memcpy(&key, leaf_key, sizeof(key)); - first_pos = INT_LIMIT(loff_t) - extent_key->offset; if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { if (key.objectid < ref_path->owner_objectid || (key.objectid == ref_path->owner_objectid && @@ -4497,7 +4496,7 @@ next: if ((key.objectid > ref_path->owner_objectid) || (key.objectid == ref_path->owner_objectid && key.type > BTRFS_EXTENT_DATA_KEY) || - (key.offset >= first_pos + extent_key->offset)) + key.offset >= search_end) break; } @@ -4530,8 +4529,10 @@ next: num_bytes = btrfs_file_extent_num_bytes(leaf, fi); ext_offset = btrfs_file_extent_offset(leaf, fi); - if (first_pos > key.offset - ext_offset) - first_pos = key.offset - ext_offset; + if (search_end == (u64)-1) { + search_end = key.offset - ext_offset + + btrfs_file_extent_ram_bytes(leaf, fi); + } if (!extent_locked) { lock_start = key.offset; @@ -4720,7 +4721,7 @@ next: } skip: if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && - key.offset >= first_pos + extent_key->offset) + key.offset >= search_end) break; cond_resched(); -- cgit v0.10.2 From 7e6628544abad773222d8b177f738ac2db1859de Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: open_ctree() error handling can oops on fs_info a bug in open_ctree: struct btrfs_root *open_ctree(..) { .... if (!extent_root || !tree_root || !fs_info || !chunk_root || !dev_root || !csum_root) { err = -ENOMEM; goto fail; //When code flow goes to "fail", fs_info may be NULL or uninitialized. } .... fail: btrfs_close_devices(fs_info->fs_devices);// ! btrfs_mapping_tree_free(&fs_info->mapping_tree);// ! kfree(extent_root); kfree(tree_root); bdi_destroy(&fs_info->bdi);// ! ... ) Signed-off-by: Qinghuang Feng Signed-off-by: Chris Mason diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 26a1877..3cf1725 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1823,13 +1823,14 @@ fail_sb_buffer: fail_iput: invalidate_inode_pages2(fs_info->btree_inode->i_mapping); iput(fs_info->btree_inode); -fail: + btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); + bdi_destroy(&fs_info->bdi); +fail: kfree(extent_root); kfree(tree_root); - bdi_destroy(&fs_info->bdi); kfree(fs_info); kfree(chunk_root); kfree(dev_root); -- cgit v0.10.2 From 7237f1833601dcc435a64176c2c347ec4bd959f9 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 21 Jan 2009 12:54:03 -0500 Subject: Btrfs: fix tree logs parallel sync To improve performance, btrfs_sync_log merges tree log sync requests. But it wrongly merges sync requests for different tree logs. If multiple tree logs are synced at the same time, only one of them actually gets synced. This patch has following changes to fix the bug: Move most tree log related fields in btrfs_fs_info to btrfs_root. This allows merging sync requests separately for each tree log. Don't insert root item into the log root tree immediately after log tree is allocated. Root item for log tree is inserted when log tree get synced for the first time. This allows syncing the log root tree without first syncing all log trees. At tree-log sync, btrfs_sync_log first sync the log tree; then updates corresponding root item in the log root tree; sync the log root tree; then update the super block. Signed-off-by: Yan Zheng diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e1fec63..de103a8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -695,9 +695,7 @@ struct btrfs_fs_info { struct btrfs_transaction *running_transaction; wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_wait; - wait_queue_head_t async_submit_wait; - wait_queue_head_t tree_log_wait; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; @@ -724,10 +722,6 @@ struct btrfs_fs_info { atomic_t async_submit_draining; atomic_t nr_async_bios; atomic_t async_delalloc_pages; - atomic_t tree_log_writers; - atomic_t tree_log_commit; - unsigned long tree_log_batch; - u64 tree_log_transid; /* * this is used by the balancing code to wait for all the pending @@ -827,7 +821,14 @@ struct btrfs_root { struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; + struct mutex log_mutex; + wait_queue_head_t log_writer_wait; + wait_queue_head_t log_commit_wait[2]; + atomic_t log_writers; + atomic_t log_commit[2]; + unsigned long log_transid; + unsigned long log_batch; u64 objectid; u64 last_trans; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3cf1725..7feac5a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -849,6 +849,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); + init_waitqueue_head(&root->log_writer_wait); + init_waitqueue_head(&root->log_commit_wait[0]); + init_waitqueue_head(&root->log_commit_wait[1]); + atomic_set(&root->log_commit[0], 0); + atomic_set(&root->log_commit[1], 0); + atomic_set(&root->log_writers, 0); + root->log_batch = 0; + root->log_transid = 0; extent_io_tree_init(&root->dirty_log_pages, fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -933,15 +941,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, return 0; } -int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; + struct extent_buffer *leaf; root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) - return -ENOMEM; + return ERR_PTR(-ENOMEM); __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, @@ -950,12 +959,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; + /* + * log trees do not get reference counted because they go away + * before a real commit is actually done. They do store pointers + * to file data extents, and those reference counts still get + * updated (along with back refs to the log tree). + */ root->ref_cows = 0; - root->node = btrfs_alloc_free_block(trans, root, root->leafsize, - 0, BTRFS_TREE_LOG_OBJECTID, - trans->transid, 0, 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + 0, BTRFS_TREE_LOG_OBJECTID, + trans->transid, 0, 0, 0); + if (IS_ERR(leaf)) { + kfree(root); + return ERR_CAST(leaf); + } + root->node = leaf; btrfs_set_header_nritems(root->node, 0); btrfs_set_header_level(root->node, 0); btrfs_set_header_bytenr(root->node, root->node->start); @@ -967,7 +987,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); - fs_info->log_root_tree = root; + return root; +} + +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *log_root; + + log_root = alloc_log_tree(trans, fs_info); + if (IS_ERR(log_root)) + return PTR_ERR(log_root); + WARN_ON(fs_info->log_root_tree); + fs_info->log_root_tree = log_root; + return 0; +} + +int btrfs_add_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_root *log_root; + struct btrfs_inode_item *inode_item; + + log_root = alloc_log_tree(trans, root->fs_info); + if (IS_ERR(log_root)) + return PTR_ERR(log_root); + + log_root->last_trans = trans->transid; + log_root->root_key.offset = root->root_key.objectid; + + inode_item = &log_root->root_item.inode; + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nbytes = cpu_to_le64(root->leafsize); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + + btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); + btrfs_set_root_generation(&log_root->root_item, trans->transid); + + WARN_ON(root->log_root); + root->log_root = log_root; + root->log_transid = 0; return 0; } @@ -1530,10 +1591,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); - init_waitqueue_head(&fs_info->tree_log_wait); - atomic_set(&fs_info->tree_log_commit, 0); - atomic_set(&fs_info->tree_log_writers, 0); - fs_info->tree_log_transid = 0; __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c0ff404..494a56e 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -98,5 +98,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); +int btrfs_add_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btree_lock_page_hook(struct page *page); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1d7f043..3b26f09 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2698,13 +2698,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, /* if metadata always pin */ if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { - struct btrfs_block_group_cache *cache; - - /* btrfs_free_reserved_extent */ - cache = btrfs_lookup_block_group(root->fs_info, bytenr); - BUG_ON(!cache); - btrfs_add_free_space(cache, bytenr, num_bytes); - put_block_group(cache); + mutex_lock(&root->fs_info->pinned_mutex); + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); + mutex_unlock(&root->fs_info->pinned_mutex); update_reserved_extents(root, bytenr, num_bytes, 0); return 0; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fbcbf43..3e8023e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1214,10 +1214,10 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } mutex_unlock(&root->fs_info->trans_mutex); - root->fs_info->tree_log_batch++; + root->log_batch++; filemap_fdatawrite(inode->i_mapping); btrfs_wait_ordered_range(inode, 0, (u64)-1); - root->fs_info->tree_log_batch++; + root->log_batch++; /* * ok we haven't committed the transaction yet, lets do a commit diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d81cda2..4f26f3e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -78,104 +78,6 @@ static int link_to_fixup_dir(struct btrfs_trans_handle *trans, */ /* - * btrfs_add_log_tree adds a new per-subvolume log tree into the - * tree of log tree roots. This must be called with a tree log transaction - * running (see start_log_trans). - */ -static int btrfs_add_log_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_key key; - struct btrfs_root_item root_item; - struct btrfs_inode_item *inode_item; - struct extent_buffer *leaf; - struct btrfs_root *new_root = root; - int ret; - u64 objectid = root->root_key.objectid; - - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, - BTRFS_TREE_LOG_OBJECTID, - trans->transid, 0, 0, 0); - if (IS_ERR(leaf)) { - ret = PTR_ERR(leaf); - return ret; - } - - btrfs_set_header_nritems(leaf, 0); - btrfs_set_header_level(leaf, 0); - btrfs_set_header_bytenr(leaf, leaf->start); - btrfs_set_header_generation(leaf, trans->transid); - btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); - - write_extent_buffer(leaf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), - BTRFS_FSID_SIZE); - btrfs_mark_buffer_dirty(leaf); - - inode_item = &root_item.inode; - memset(inode_item, 0, sizeof(*inode_item)); - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nbytes = cpu_to_le64(root->leafsize); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - - btrfs_set_root_bytenr(&root_item, leaf->start); - btrfs_set_root_generation(&root_item, trans->transid); - btrfs_set_root_level(&root_item, 0); - btrfs_set_root_refs(&root_item, 0); - btrfs_set_root_used(&root_item, 0); - - memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); - root_item.drop_level = 0; - - btrfs_tree_unlock(leaf); - free_extent_buffer(leaf); - leaf = NULL; - - btrfs_set_root_dirid(&root_item, 0); - - key.objectid = BTRFS_TREE_LOG_OBJECTID; - key.offset = objectid; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key, - &root_item); - if (ret) - goto fail; - - new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree, - &key); - BUG_ON(!new_root); - - WARN_ON(root->log_root); - root->log_root = new_root; - - /* - * log trees do not get reference counted because they go away - * before a real commit is actually done. They do store pointers - * to file data extents, and those reference counts still get - * updated (along with back refs to the log tree). - */ - new_root->ref_cows = 0; - new_root->last_trans = trans->transid; - - /* - * we need to make sure the root block for this new tree - * is marked as dirty in the dirty_log_pages tree. This - * is how it gets flushed down to disk at tree log commit time. - * - * the tree logging mutex keeps others from coming in and changing - * the new_root->node, so we can safely access it here - */ - set_extent_dirty(&new_root->dirty_log_pages, new_root->node->start, - new_root->node->start + new_root->node->len - 1, - GFP_NOFS); - -fail: - return ret; -} - -/* * start a sub transaction and setup the log tree * this increments the log tree writer count to make the people * syncing the tree wait for us to finish @@ -184,6 +86,14 @@ static int start_log_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; + + mutex_lock(&root->log_mutex); + if (root->log_root) { + root->log_batch++; + atomic_inc(&root->log_writers); + mutex_unlock(&root->log_mutex); + return 0; + } mutex_lock(&root->fs_info->tree_log_mutex); if (!root->fs_info->log_root_tree) { ret = btrfs_init_log_root_tree(trans, root->fs_info); @@ -193,9 +103,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans, ret = btrfs_add_log_tree(trans, root); BUG_ON(ret); } - atomic_inc(&root->fs_info->tree_log_writers); - root->fs_info->tree_log_batch++; mutex_unlock(&root->fs_info->tree_log_mutex); + root->log_batch++; + atomic_inc(&root->log_writers); + mutex_unlock(&root->log_mutex); return 0; } @@ -212,13 +123,12 @@ static int join_running_log_trans(struct btrfs_root *root) if (!root->log_root) return -ENOENT; - mutex_lock(&root->fs_info->tree_log_mutex); + mutex_lock(&root->log_mutex); if (root->log_root) { ret = 0; - atomic_inc(&root->fs_info->tree_log_writers); - root->fs_info->tree_log_batch++; + atomic_inc(&root->log_writers); } - mutex_unlock(&root->fs_info->tree_log_mutex); + mutex_unlock(&root->log_mutex); return ret; } @@ -228,10 +138,11 @@ static int join_running_log_trans(struct btrfs_root *root) */ static int end_log_trans(struct btrfs_root *root) { - atomic_dec(&root->fs_info->tree_log_writers); - smp_mb(); - if (waitqueue_active(&root->fs_info->tree_log_wait)) - wake_up(&root->fs_info->tree_log_wait); + if (atomic_dec_and_test(&root->log_writers)) { + smp_mb(); + if (waitqueue_active(&root->log_writer_wait)) + wake_up(&root->log_writer_wait); + } return 0; } @@ -1902,26 +1813,65 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, } } btrfs_free_path(path); - if (wc->free) - free_extent_buffer(log->node); return ret; } -static int wait_log_commit(struct btrfs_root *log) +/* + * helper function to update the item for a given subvolumes log root + * in the tree of log roots + */ +static int update_log_root(struct btrfs_trans_handle *trans, + struct btrfs_root *log) +{ + int ret; + + if (log->log_transid == 1) { + /* insert root item on the first sync */ + ret = btrfs_insert_root(trans, log->fs_info->log_root_tree, + &log->root_key, &log->root_item); + } else { + ret = btrfs_update_root(trans, log->fs_info->log_root_tree, + &log->root_key, &log->root_item); + } + return ret; +} + +static int wait_log_commit(struct btrfs_root *root, unsigned long transid) { DEFINE_WAIT(wait); - u64 transid = log->fs_info->tree_log_transid; + int index = transid % 2; + /* + * we only allow two pending log transactions at a time, + * so we know that if ours is more than 2 older than the + * current transaction, we're done + */ do { - prepare_to_wait(&log->fs_info->tree_log_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&log->fs_info->tree_log_mutex); - if (atomic_read(&log->fs_info->tree_log_commit)) + prepare_to_wait(&root->log_commit_wait[index], + &wait, TASK_UNINTERRUPTIBLE); + mutex_unlock(&root->log_mutex); + if (root->log_transid < transid + 2 && + atomic_read(&root->log_commit[index])) schedule(); - finish_wait(&log->fs_info->tree_log_wait, &wait); - mutex_lock(&log->fs_info->tree_log_mutex); - } while (transid == log->fs_info->tree_log_transid && - atomic_read(&log->fs_info->tree_log_commit)); + finish_wait(&root->log_commit_wait[index], &wait); + mutex_lock(&root->log_mutex); + } while (root->log_transid < transid + 2 && + atomic_read(&root->log_commit[index])); + return 0; +} + +static int wait_for_writer(struct btrfs_root *root) +{ + DEFINE_WAIT(wait); + while (atomic_read(&root->log_writers)) { + prepare_to_wait(&root->log_writer_wait, + &wait, TASK_UNINTERRUPTIBLE); + mutex_unlock(&root->log_mutex); + if (atomic_read(&root->log_writers)) + schedule(); + mutex_lock(&root->log_mutex); + finish_wait(&root->log_writer_wait, &wait); + } return 0; } @@ -1933,57 +1883,114 @@ static int wait_log_commit(struct btrfs_root *log) int btrfs_sync_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) { + int index1; + int index2; int ret; - unsigned long batch; struct btrfs_root *log = root->log_root; + struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; - mutex_lock(&log->fs_info->tree_log_mutex); - if (atomic_read(&log->fs_info->tree_log_commit)) { - wait_log_commit(log); - goto out; + mutex_lock(&root->log_mutex); + index1 = root->log_transid % 2; + if (atomic_read(&root->log_commit[index1])) { + wait_log_commit(root, root->log_transid); + mutex_unlock(&root->log_mutex); + return 0; } - atomic_set(&log->fs_info->tree_log_commit, 1); + atomic_set(&root->log_commit[index1], 1); + + /* wait for previous tree log sync to complete */ + if (atomic_read(&root->log_commit[(index1 + 1) % 2])) + wait_log_commit(root, root->log_transid - 1); while (1) { - batch = log->fs_info->tree_log_batch; - mutex_unlock(&log->fs_info->tree_log_mutex); + unsigned long batch = root->log_batch; + mutex_unlock(&root->log_mutex); schedule_timeout_uninterruptible(1); - mutex_lock(&log->fs_info->tree_log_mutex); - - while (atomic_read(&log->fs_info->tree_log_writers)) { - DEFINE_WAIT(wait); - prepare_to_wait(&log->fs_info->tree_log_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&log->fs_info->tree_log_mutex); - if (atomic_read(&log->fs_info->tree_log_writers)) - schedule(); - mutex_lock(&log->fs_info->tree_log_mutex); - finish_wait(&log->fs_info->tree_log_wait, &wait); - } - if (batch == log->fs_info->tree_log_batch) + mutex_lock(&root->log_mutex); + wait_for_writer(root); + if (batch == root->log_batch) break; } ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); BUG_ON(ret); - ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree, - &root->fs_info->log_root_tree->dirty_log_pages); + + btrfs_set_root_bytenr(&log->root_item, log->node->start); + btrfs_set_root_generation(&log->root_item, trans->transid); + btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); + + root->log_batch = 0; + root->log_transid++; + log->log_transid = root->log_transid; + smp_mb(); + /* + * log tree has been flushed to disk, new modifications of + * the log will be written to new positions. so it's safe to + * allow log writers to go in. + */ + mutex_unlock(&root->log_mutex); + + mutex_lock(&log_root_tree->log_mutex); + log_root_tree->log_batch++; + atomic_inc(&log_root_tree->log_writers); + mutex_unlock(&log_root_tree->log_mutex); + + ret = update_log_root(trans, log); + BUG_ON(ret); + + mutex_lock(&log_root_tree->log_mutex); + if (atomic_dec_and_test(&log_root_tree->log_writers)) { + smp_mb(); + if (waitqueue_active(&log_root_tree->log_writer_wait)) + wake_up(&log_root_tree->log_writer_wait); + } + + index2 = log_root_tree->log_transid % 2; + if (atomic_read(&log_root_tree->log_commit[index2])) { + wait_log_commit(log_root_tree, log_root_tree->log_transid); + mutex_unlock(&log_root_tree->log_mutex); + goto out; + } + atomic_set(&log_root_tree->log_commit[index2], 1); + + if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) + wait_log_commit(log_root_tree, log_root_tree->log_transid - 1); + + wait_for_writer(log_root_tree); + + ret = btrfs_write_and_wait_marked_extents(log_root_tree, + &log_root_tree->dirty_log_pages); BUG_ON(ret); btrfs_set_super_log_root(&root->fs_info->super_for_commit, - log->fs_info->log_root_tree->node->start); + log_root_tree->node->start); btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, - btrfs_header_level(log->fs_info->log_root_tree->node)); + btrfs_header_level(log_root_tree->node)); + + log_root_tree->log_batch = 0; + log_root_tree->log_transid++; + smp_mb(); + + mutex_unlock(&log_root_tree->log_mutex); + + /* + * nobody else is going to jump in and write the the ctree + * super here because the log_commit atomic below is protecting + * us. We must be called with a transaction handle pinning + * the running transaction open, so a full commit can't hop + * in and cause problems either. + */ + write_ctree_super(trans, root->fs_info->tree_root, 2); - write_ctree_super(trans, log->fs_info->tree_root, 2); - log->fs_info->tree_log_transid++; - log->fs_info->tree_log_batch = 0; - atomic_set(&log->fs_info->tree_log_commit, 0); + atomic_set(&log_root_tree->log_commit[index2], 0); smp_mb(); - if (waitqueue_active(&log->fs_info->tree_log_wait)) - wake_up(&log->fs_info->tree_log_wait); + if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) + wake_up(&log_root_tree->log_commit_wait[index2]); out: - mutex_unlock(&log->fs_info->tree_log_mutex); + atomic_set(&root->log_commit[index1], 0); + smp_mb(); + if (waitqueue_active(&root->log_commit_wait[index1])) + wake_up(&root->log_commit_wait[index1]); return 0; } @@ -2019,38 +2026,18 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) start, end, GFP_NOFS); } - log = root->log_root; - ret = btrfs_del_root(trans, root->fs_info->log_root_tree, - &log->root_key); - BUG_ON(ret); + if (log->log_transid > 0) { + ret = btrfs_del_root(trans, root->fs_info->log_root_tree, + &log->root_key); + BUG_ON(ret); + } root->log_root = NULL; - kfree(root->log_root); + free_extent_buffer(log->node); + kfree(log); return 0; } /* - * helper function to update the item for a given subvolumes log root - * in the tree of log roots - */ -static int update_log_root(struct btrfs_trans_handle *trans, - struct btrfs_root *log) -{ - u64 bytenr = btrfs_root_bytenr(&log->root_item); - int ret; - - if (log->node->start == bytenr) - return 0; - - btrfs_set_root_bytenr(&log->root_item, log->node->start); - btrfs_set_root_generation(&log->root_item, trans->transid); - btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); - ret = btrfs_update_root(trans, log->fs_info->log_root_tree, - &log->root_key, &log->root_item); - BUG_ON(ret); - return ret; -} - -/* * If both a file and directory are logged, and unlinks or renames are * mixed in, we have a few interesting corners: * @@ -2711,11 +2698,6 @@ next_slot: btrfs_free_path(path); btrfs_free_path(dst_path); - - mutex_lock(&root->fs_info->tree_log_mutex); - ret = update_log_root(trans, log); - BUG_ON(ret); - mutex_unlock(&root->fs_info->tree_log_mutex); out: return 0; } -- cgit v0.10.2 From 35054394c4b3cecd52577c2662c84da1f3e73525 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 21 Jan 2009 13:11:13 -0500 Subject: Btrfs: stop providing a bmap operation to avoid swapfile corruptions Swapfiles use bmap to build a list of extents belonging to the file, and they assume these extents won't change over the life of the file. They also use resulting list to do IO directly to the block device. This causes problems for btrfs in a few ways: btrfs returns logical block numbers through bmap, and these are not suitable for IO. They might translate to different devices, raid etc. COW means that file block mappings are going to change frequently. Using swapfiles on btrfs will lead to corruption, so we're avoiding the problem for now by dropping bmap support entirely. A later commit will add fiemap support for people that really want to know how a file is laid out. Signed-off-by: Chris Mason diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 45cf03e..2e25d69 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4156,11 +4156,6 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, return -EINVAL; } -static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) -{ - return extent_bmap(mapping, iblock, btrfs_get_extent); -} - int btrfs_readpage(struct file *file, struct page *page) { struct extent_io_tree *tree; @@ -4985,13 +4980,24 @@ static struct extent_io_ops btrfs_extent_io_ops = { .clear_bit_hook = btrfs_clear_bit_hook, }; +/* + * btrfs doesn't support the bmap operation because swapfiles + * use bmap to make a mapping of extents in the file. They assume + * these extents won't change over the life of the file and they + * use the bmap result to do IO directly to the drive. + * + * the btrfs bmap call would return logical addresses that aren't + * suitable for IO and they also will change frequently as COW + * operations happen. So, swapfile + btrfs == corruption. + * + * For now we're avoiding this by dropping bmap. + */ static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, .writepages = btrfs_writepages, .readpages = btrfs_readpages, .sync_page = block_sync_page, - .bmap = btrfs_bmap, .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, -- cgit v0.10.2 From 1506fcc8189cdd4b95e06df7845a09f18b4526a6 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Wed, 21 Jan 2009 14:39:14 -0500 Subject: Btrfs: fiemap support Now that bmap support is gone, this is the only way to get extent mappings for userland. These are still not valid for IO, but they can tell us if a file has holes or how much fragmentation there is. Signed-off-by: Yehuda Sadeh diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c9446d4..a3b0676 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2854,6 +2854,98 @@ out: return sector; } +int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len, get_extent_t *get_extent) +{ + int ret; + u64 off = start; + u64 max = start + len; + u32 flags = 0; + u64 disko = 0; + struct extent_map *em = NULL; + int end = 0; + u64 em_start = 0, em_len = 0; + unsigned long emflags; + ret = 0; + + if (len == 0) + return -EINVAL; + + lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, + GFP_NOFS); + em = get_extent(inode, NULL, 0, off, max - off, 0); + if (!em) + goto out; + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out; + } + while (!end) { + off = em->start + em->len; + if (off >= max) + end = 1; + + em_start = em->start; + em_len = em->len; + + disko = 0; + flags = 0; + + switch (em->block_start) { + case EXTENT_MAP_LAST_BYTE: + end = 1; + flags |= FIEMAP_EXTENT_LAST; + break; + case EXTENT_MAP_HOLE: + flags |= FIEMAP_EXTENT_UNWRITTEN; + break; + case EXTENT_MAP_INLINE: + flags |= (FIEMAP_EXTENT_DATA_INLINE | + FIEMAP_EXTENT_NOT_ALIGNED); + break; + case EXTENT_MAP_DELALLOC: + flags |= (FIEMAP_EXTENT_DELALLOC | + FIEMAP_EXTENT_UNKNOWN); + break; + default: + disko = em->block_start; + break; + } + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) + flags |= FIEMAP_EXTENT_ENCODED; + + emflags = em->flags; + free_extent_map(em); + em = NULL; + + if (!end) { + em = get_extent(inode, NULL, 0, off, max - off, 0); + if (!em) + goto out; + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out; + } + emflags = em->flags; + } + if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { + flags |= FIEMAP_EXTENT_LAST; + end = 1; + } + + ret = fiemap_fill_next_extent(fieinfo, em_start, disko, + em_len, flags); + if (ret) + goto out_free; + } +out_free: + free_extent_map(em); +out: + unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, + GFP_NOFS); + return ret; +} + static inline struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c5b483a..e80c6d9 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -193,6 +193,8 @@ int extent_commit_write(struct extent_io_tree *tree, unsigned from, unsigned to); sector_t extent_bmap(struct address_space *mapping, sector_t iblock, get_extent_t *get_extent); +int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len, get_extent_t *get_extent); int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2e25d69..288c2cd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4156,6 +4156,12 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, return -EINVAL; } +static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len) +{ + return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); +} + int btrfs_readpage(struct file *file, struct page *page) { struct extent_io_tree *tree; @@ -5021,6 +5027,7 @@ static struct inode_operations btrfs_file_inode_operations = { .removexattr = btrfs_removexattr, .permission = btrfs_permission, .fallocate = btrfs_fallocate, + .fiemap = btrfs_fiemap, }; static struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, -- cgit v0.10.2 From 91a8d07d82cac3aae3ef2ea1aaba5c9c4a934e91 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 21 Jan 2009 18:45:57 -0500 Subject: ring-buffer: reset timestamps when ring buffer is reset Impact: fix bad times of recent resets The ring buffer needs to reset its timestamps when reseting of the buffer, otherwise the timestamps are stale and might be used to calculate times in the buffer causing funny timestamps to appear. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9c1e73d..bd38c5c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2174,6 +2174,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->overrun = 0; cpu_buffer->entries = 0; + + cpu_buffer->write_stamp = 0; + cpu_buffer->read_stamp = 0; } /** -- cgit v0.10.2 From ba2607fe9c1f2d4ad5a3d4c4ae9117c5bfdca826 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Mon, 19 Jan 2009 10:38:35 +0100 Subject: x86, ds, bts: cleanup/fix DS configuration Cleanup the cpuid check for DS configuration. This also fixes a Corei7 CPUID enumeration bug. Signed-off-by: Markus Metzger Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index da91701..169a120 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -15,8 +15,8 @@ * - buffer allocation (memory accounting) * * - * Copyright (C) 2007-2008 Intel Corporation. - * Markus Metzger , 2007-2008 + * Copyright (C) 2007-2009 Intel Corporation. + * Markus Metzger , 2007-2009 */ @@ -890,7 +890,7 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) } static const struct ds_configuration ds_cfg_netburst = { - .name = "netburst", + .name = "Netburst", .ctl[dsf_bts] = (1 << 2) | (1 << 3), .ctl[dsf_bts_kernel] = (1 << 5), .ctl[dsf_bts_user] = (1 << 6), @@ -904,7 +904,7 @@ static const struct ds_configuration ds_cfg_netburst = { #endif }; static const struct ds_configuration ds_cfg_pentium_m = { - .name = "pentium m", + .name = "Pentium M", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .sizeof_field = sizeof(long), @@ -915,8 +915,8 @@ static const struct ds_configuration ds_cfg_pentium_m = { .sizeof_rec[ds_pebs] = sizeof(long) * 18, #endif }; -static const struct ds_configuration ds_cfg_core2 = { - .name = "core 2", +static const struct ds_configuration ds_cfg_core2_atom = { + .name = "Core 2/Atom", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .ctl[dsf_bts_kernel] = (1 << 9), .ctl[dsf_bts_user] = (1 << 10), @@ -949,19 +949,22 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { - case 0 ... 0xC: - /* sorry, don't know about them */ - break; - case 0xD: - case 0xE: /* Pentium M */ + case 0x9: + case 0xd: /* Pentium M */ ds_configure(&ds_cfg_pentium_m); break; - default: /* Core2, Atom, ... */ - ds_configure(&ds_cfg_core2); + case 0xf: + case 0x17: /* Core2 */ + case 0x1c: /* Atom */ + ds_configure(&ds_cfg_core2_atom); + break; + case 0x1a: /* i7 */ + default: + /* sorry, don't know about them */ break; } break; - case 0xF: + case 0xf: switch (c->x86_model) { case 0x0: case 0x1: -- cgit v0.10.2 From a717531942f488209dded30f6bc648167bcefa72 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Jan 2009 09:23:10 -0500 Subject: Btrfs: do less aggressive btree readahead Just before reading a leaf, btrfs scans the node for blocks that are close by and reads them too. It tries to build up a large window of IO looking for blocks that are within a max distance from the top and bottom of the IO window. This patch changes things to just look for blocks within 64k of the target block. It will trigger less IO and make for lower latencies on the read size. Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9e46c07..2603ee5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1210,8 +1210,7 @@ static noinline void reada_for_search(struct btrfs_root *root, struct btrfs_disk_key disk_key; u32 nritems; u64 search; - u64 lowest_read; - u64 highest_read; + u64 target; u64 nread = 0; int direction = path->reada; struct extent_buffer *eb; @@ -1235,8 +1234,7 @@ static noinline void reada_for_search(struct btrfs_root *root, return; } - highest_read = search; - lowest_read = search; + target = search; nritems = btrfs_header_nritems(node); nr = slot; @@ -1256,24 +1254,15 @@ static noinline void reada_for_search(struct btrfs_root *root, break; } search = btrfs_node_blockptr(node, nr); - if ((search >= lowest_read && search <= highest_read) || - (search < lowest_read && lowest_read - search <= 16384) || - (search > highest_read && search - highest_read <= 16384)) { + if ((search <= target && target - search <= 65536) || + (search > target && search - target <= 65536)) { readahead_tree_block(root, search, blocksize, btrfs_node_ptr_generation(node, nr)); nread += blocksize; } nscan++; - if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32)) + if ((nread > 65536 || nscan > 32)) break; - - if (nread > (256 * 1024) || nscan > 128) - break; - - if (search < lowest_read) - lowest_read = search; - if (search > highest_read) - highest_read = search; } } -- cgit v0.10.2 From 8f5140a6a0b1a9aa79585b0008e88c5d266c5c1d Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 23 Jan 2009 12:57:20 +0000 Subject: uwb: lock rc->rsvs_lock with spin_lock_bh() rc->rsvs_lock may be taken in a timer so lock it with spin_lock_bh(). Signed-off-by: David Vrabel diff --git a/drivers/uwb/drp.c b/drivers/uwb/drp.c index 2b4f940..4f5ca99 100644 --- a/drivers/uwb/drp.c +++ b/drivers/uwb/drp.c @@ -66,14 +66,14 @@ static void uwb_rc_set_drp_cmd_done(struct uwb_rc *rc, void *arg, } else dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: timeout\n"); - spin_lock(&rc->rsvs_lock); + spin_lock_bh(&rc->rsvs_lock); if (rc->set_drp_ie_pending > 1) { rc->set_drp_ie_pending = 0; uwb_rsv_queue_update(rc); } else { rc->set_drp_ie_pending = 0; } - spin_unlock(&rc->rsvs_lock); + spin_unlock_bh(&rc->rsvs_lock); } /** diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c index 886f977..6b76f4b 100644 --- a/drivers/uwb/rsv.c +++ b/drivers/uwb/rsv.c @@ -870,7 +870,7 @@ void uwb_rsv_queue_update(struct uwb_rc *rc) */ void uwb_rsv_sched_update(struct uwb_rc *rc) { - spin_lock(&rc->rsvs_lock); + spin_lock_bh(&rc->rsvs_lock); if (!delayed_work_pending(&rc->rsv_update_work)) { if (rc->set_drp_ie_pending > 0) { rc->set_drp_ie_pending++; @@ -879,7 +879,7 @@ void uwb_rsv_sched_update(struct uwb_rc *rc) uwb_rsv_queue_update(rc); } unlock: - spin_unlock(&rc->rsvs_lock); + spin_unlock_bh(&rc->rsvs_lock); } /* -- cgit v0.10.2 From 82c1593cad3dfc97661764c8bc62aa1a416e9ea8 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 20 Jan 2009 16:46:02 +0200 Subject: UBIFS: simplify locking This patch simplifies lock_[23]_inodes functions. We do not have to care about locking order, because UBIFS does this for @i_mutex and this is enough. Thanks to Al Viro for suggesting this. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index d29b771..f55d523 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -482,30 +482,29 @@ static int ubifs_dir_release(struct inode *dir, struct file *file) } /** - * lock_2_inodes - lock two UBIFS inodes. + * lock_2_inodes - a wrapper for locking two UBIFS inodes. * @inode1: first inode * @inode2: second inode + * + * We do not implement any tricks to guarantee strict lock ordering, because + * VFS has already done it for us on the @i_mutex. So this is just a simple + * wrapper function. */ static void lock_2_inodes(struct inode *inode1, struct inode *inode2) { - if (inode1->i_ino < inode2->i_ino) { - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2); - mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3); - } else { - mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3); - } + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); } /** - * unlock_2_inodes - unlock two UBIFS inodes inodes. + * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. * @inode1: first inode * @inode2: second inode */ static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) { - mutex_unlock(&ubifs_inode(inode1)->ui_mutex); mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); } static int ubifs_link(struct dentry *old_dentry, struct inode *dir, @@ -527,6 +526,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", dentry->d_name.len, dentry->d_name.name, inode->i_ino, inode->i_nlink, dir->i_ino); + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = dbg_check_synced_i_size(inode); if (err) return err; @@ -580,6 +581,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", dentry->d_name.len, dentry->d_name.name, inode->i_ino, inode->i_nlink, dir->i_ino); + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = dbg_check_synced_i_size(inode); if (err) return err; @@ -667,7 +670,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, dentry->d_name.name, inode->i_ino, dir->i_ino); - + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = check_dir_empty(c, dentry->d_inode); if (err) return err; @@ -922,59 +926,30 @@ out_budg: } /** - * lock_3_inodes - lock three UBIFS inodes for rename. + * lock_3_inodes - a wrapper for locking three UBIFS inodes. * @inode1: first inode * @inode2: second inode * @inode3: third inode * - * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may - * be null. + * This function is used for 'ubifs_rename()' and @inode1 may be the same as + * @inode2 whereas @inode3 may be %NULL. + * + * We do not implement any tricks to guarantee strict lock ordering, because + * VFS has already done it for us on the @i_mutex. So this is just a simple + * wrapper function. */ static void lock_3_inodes(struct inode *inode1, struct inode *inode2, struct inode *inode3) { - struct inode *i1, *i2, *i3; - - if (!inode3) { - if (inode1 != inode2) { - lock_2_inodes(inode1, inode2); - return; - } - mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); - return; - } - - if (inode1 == inode2) { - lock_2_inodes(inode1, inode3); - return; - } - - /* 3 different inodes */ - if (inode1 < inode2) { - i3 = inode2; - if (inode1 < inode3) { - i1 = inode1; - i2 = inode3; - } else { - i1 = inode3; - i2 = inode1; - } - } else { - i3 = inode1; - if (inode2 < inode3) { - i1 = inode2; - i2 = inode3; - } else { - i1 = inode3; - i2 = inode2; - } - } - mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1); - lock_2_inodes(i2, i3); + mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); + if (inode2 != inode1) + mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); + if (inode3) + mutex_lock_nested(&ubifs_inode(inode3)->ui_mutex, WB_MUTEX_3); } /** - * unlock_3_inodes - unlock three UBIFS inodes for rename. + * unlock_3_inodes - a wrapper for unlocking three UBIFS inodes for rename. * @inode1: first inode * @inode2: second inode * @inode3: third inode @@ -982,11 +957,11 @@ static void lock_3_inodes(struct inode *inode1, struct inode *inode2, static void unlock_3_inodes(struct inode *inode1, struct inode *inode2, struct inode *inode3) { - mutex_unlock(&ubifs_inode(inode1)->ui_mutex); - if (inode1 != inode2) - mutex_unlock(&ubifs_inode(inode2)->ui_mutex); if (inode3) mutex_unlock(&ubifs_inode(inode3)->ui_mutex); + if (inode1 != inode2) + mutex_unlock(&ubifs_inode(inode2)->ui_mutex); + mutex_unlock(&ubifs_inode(inode1)->ui_mutex); } static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -1020,6 +995,11 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name, old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, new_dentry->d_name.name, new_dir->i_ino); + ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); + ubifs_assert(mutex_is_locked(&new_dir->i_mutex)); + if (unlink) + ubifs_assert(mutex_is_locked(&new_inode->i_mutex)); + if (unlink && is_dir) { err = check_dir_empty(c, new_inode); -- cgit v0.10.2 From e4d9b6cbfc98d696a28d2c24a3d49768695811ee Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Jan 2009 14:17:36 +0200 Subject: UBIFS: fix LEB list freeing When freeing the c->idx_lebs list, we have to release the LEBs as well, because we might be called from mount to read-only mode code. Otherwise the LEBs stay taken forever, which may cause problems when we re-mount back ro RW mode. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index b2e5f11..9760154 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -830,21 +830,29 @@ out: * ubifs_destroy_idx_gc - destroy idx_gc list. * @c: UBIFS file-system description object * - * This function destroys the idx_gc list. It is called when unmounting or - * remounting read-only so locks are not needed. + * This function destroys the @c->idx_gc list. It is called when unmounting or + * remounting read-only so locks are not needed. Returns zero in case of + * success and a negative error code in case of failure. */ -void ubifs_destroy_idx_gc(struct ubifs_info *c) +int ubifs_destroy_idx_gc(struct ubifs_info *c) { + int ret = 0; + while (!list_empty(&c->idx_gc)) { + int err; struct ubifs_gced_idx_leb *idx_gc; idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list); - c->idx_gc_cnt -= 1; + err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC, + LPROPS_NC, 0, LPROPS_TAKEN, -1); + if (err && !ret) + ret = err; list_del(&idx_gc->list); kfree(idx_gc); } + return ret; } /** diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index dfd2bce..68328c5 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -678,6 +678,9 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, out: ubifs_release_lprops(c); + if (err) + ubifs_err("cannot change properties of LEB %d, error %d", + lnum, err); return err; } @@ -714,6 +717,9 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, out: ubifs_release_lprops(c); + if (err) + ubifs_err("cannot update properties of LEB %d, error %d", + lnum, err); return err; } @@ -737,6 +743,8 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) lpp = ubifs_lpt_lookup(c, lnum); if (IS_ERR(lpp)) { err = PTR_ERR(lpp); + ubifs_err("cannot read properties of LEB %d, error %d", + lnum, err); goto out; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index da99da0..807bbd3 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1469,9 +1469,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) { int err, lnum; - if (c->ro_media) - return -EINVAL; - mutex_lock(&c->umount_mutex); c->remounting_rw = 1; c->always_chk_crc = 1; @@ -1605,9 +1602,13 @@ out: */ static void commit_on_unmount(struct ubifs_info *c) { - struct super_block *sb = c->vfs_sb; long long bud_bytes; + if (!c->fast_unmount) { + dbg_gen("skip committing - fast unmount enabled"); + return; + } + /* * This function is called before the background thread is stopped, so * we may race with ongoing commit, which means we have to take @@ -1617,8 +1618,11 @@ static void commit_on_unmount(struct ubifs_info *c) bud_bytes = c->bud_bytes; spin_unlock(&c->buds_lock); - if (!c->fast_unmount && !(sb->s_flags & MS_RDONLY) && bud_bytes) + if (bud_bytes) { + dbg_gen("run commit"); ubifs_run_commit(c); + } else + dbg_gen("journal is empty, do not run commit"); } /** @@ -1633,6 +1637,8 @@ static void ubifs_remount_ro(struct ubifs_info *c) int i, err; ubifs_assert(!c->need_recovery); + ubifs_assert(!c->ro_media); + commit_on_unmount(c); mutex_lock(&c->umount_mutex); @@ -1646,16 +1652,17 @@ static void ubifs_remount_ro(struct ubifs_info *c) del_timer_sync(&c->jheads[i].wbuf.timer); } - if (!c->ro_media) { - c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); - c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); - c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); - err = ubifs_write_master(c); - if (err) - ubifs_ro_mode(c, err); - } + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); + err = ubifs_write_master(c); + if (err) + ubifs_ro_mode(c, err); + + err = ubifs_destroy_idx_gc(c); + if (err) + ubifs_ro_mode(c, err); - ubifs_destroy_idx_gc(c); free_wbufs(c); vfree(c->orph_buf); c->orph_buf = NULL; @@ -1754,6 +1761,11 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) } if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + if (c->ro_media) { + ubifs_msg("cannot re-mount R/W, UBIFS is working in " + "R/O mode"); + return -EINVAL; + } err = ubifs_remount_rw(c); if (err) return err; @@ -2044,7 +2056,7 @@ static void ubifs_kill_sb(struct super_block *sb) * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' * in order to be outside BKL. */ - if (sb->s_root) + if (sb->s_root && !(sb->s_flags & MS_RDONLY)) commit_on_unmount(c); /* The un-mount routine is actually done in put_super() */ generic_shutdown_super(sb); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 2e78d6a..ee9517a 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1593,7 +1593,7 @@ int ubifs_replay_journal(struct ubifs_info *c); int ubifs_garbage_collect(struct ubifs_info *c, int anyway); int ubifs_gc_start_commit(struct ubifs_info *c); int ubifs_gc_end_commit(struct ubifs_info *c); -void ubifs_destroy_idx_gc(struct ubifs_info *c); +int ubifs_destroy_idx_gc(struct ubifs_info *c); int ubifs_get_idx_gc_leb(struct ubifs_info *c); int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp); -- cgit v0.10.2 From 84abf972ccff5c13d10b672972949eba431a6e0e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Jan 2009 14:54:59 +0200 Subject: UBIFS: add re-mount debugging checks We observe space corrupted accounting when re-mounting. So add some debbugging checks to catch problems like this. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 175f9c5..f393620 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -689,7 +689,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) } /** - * ubifs_get_free_space - return amount of free space. + * ubifs_get_free_space_nolock - return amount of free space. * @c: UBIFS file-system description object * * This function calculates amount of free space to report to user-space. @@ -704,16 +704,14 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free) * traditional file-systems, because they have way less overhead than UBIFS. * So, to keep users happy, UBIFS tries to take the overhead into account. */ -long long ubifs_get_free_space(struct ubifs_info *c) +long long ubifs_get_free_space_nolock(struct ubifs_info *c) { - int min_idx_lebs, rsvd_idx_lebs, lebs; + int rsvd_idx_lebs, lebs; long long available, outstanding, free; - spin_lock(&c->space_lock); - min_idx_lebs = c->min_idx_lebs; - ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c)); + ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); outstanding = c->budg_data_growth + c->budg_dd_growth; - available = ubifs_calc_available(c, min_idx_lebs); + available = ubifs_calc_available(c, c->min_idx_lebs); /* * When reporting free space to user-space, UBIFS guarantees that it is @@ -726,15 +724,14 @@ long long ubifs_get_free_space(struct ubifs_info *c) * Note, the calculations below are similar to what we have in * 'do_budget_space()', so refer there for comments. */ - if (min_idx_lebs > c->lst.idx_lebs) - rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; + if (c->min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; else rsvd_idx_lebs = 0; lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - c->lst.taken_empty_lebs; lebs -= rsvd_idx_lebs; available += lebs * (c->dark_wm - c->leb_overhead); - spin_unlock(&c->space_lock); if (available > outstanding) free = ubifs_reported_space(c, available - outstanding); @@ -742,3 +739,21 @@ long long ubifs_get_free_space(struct ubifs_info *c) free = 0; return free; } + +/** + * ubifs_get_free_space - return amount of free space. + * @c: UBIFS file-system description object + * + * This function calculates and retuns amount of free space to report to + * user-space. + */ +long long ubifs_get_free_space(struct ubifs_info *c) +{ + long long free; + + spin_lock(&c->space_lock); + free = ubifs_get_free_space_nolock(c); + spin_unlock(&c->space_lock); + + return free; +} diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 792c5a1..9a41f6f 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -620,9 +620,11 @@ void dbg_dump_budg(struct ubifs_info *c) c->dark_wm, c->dead_wm, c->max_idx_node_sz); printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum); - for (i = 0; i < c->jhead_cnt; i++) - printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", - c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); + /* If we are in R/O mode, journal heads do not exist */ + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) + printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", + c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { bud = rb_entry(rb, struct ubifs_bud, rb); printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); @@ -637,10 +639,7 @@ void dbg_dump_budg(struct ubifs_info *c) /* Print budgeting predictions */ available = ubifs_calc_available(c, c->min_idx_lebs); outstanding = c->budg_data_growth + c->budg_dd_growth; - if (available > outstanding) - free = ubifs_reported_space(c, available - outstanding); - else - free = 0; + free = ubifs_get_free_space_nolock(c); printk(KERN_DEBUG "Budgeting predictions:\n"); printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", available, outstanding, free); @@ -861,6 +860,65 @@ void dbg_dump_index(struct ubifs_info *c) } /** + * dbg_save_space_info - save information about flash space. + * @c: UBIFS file-system description object + * + * This function saves information about UBIFS free space, dirty space, etc, in + * order to check it later. + */ +void dbg_save_space_info(struct ubifs_info *c) +{ + struct ubifs_debug_info *d = c->dbg; + + ubifs_get_lp_stats(c, &d->saved_lst); + + spin_lock(&c->space_lock); + d->saved_free = ubifs_get_free_space_nolock(c); + spin_unlock(&c->space_lock); +} + +/** + * dbg_check_space_info - check flash space information. + * @c: UBIFS file-system description object + * + * This function compares current flash space information with the information + * which was saved when the 'dbg_save_space_info()' function was called. + * Returns zero if the information has not changed, and %-EINVAL it it has + * changed. + */ +int dbg_check_space_info(struct ubifs_info *c) +{ + struct ubifs_debug_info *d = c->dbg; + struct ubifs_lp_stats lst; + long long avail, free; + + spin_lock(&c->space_lock); + avail = ubifs_calc_available(c, c->min_idx_lebs); + spin_unlock(&c->space_lock); + free = ubifs_get_free_space(c); + + if (free != d->saved_free) { + ubifs_err("free space changed from %lld to %lld", + d->saved_free, free); + goto out; + } + + return 0; + +out: + ubifs_msg("saved lprops statistics dump"); + dbg_dump_lstats(&d->saved_lst); + ubifs_get_lp_stats(c, &lst); + ubifs_msg("current lprops statistics dump"); + dbg_dump_lstats(&d->saved_lst); + spin_lock(&c->space_lock); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + dump_stack(); + return -EINVAL; +} + +/** * dbg_check_synced_i_size - check synchronized inode size. * @inode: inode to check * @@ -2409,7 +2467,7 @@ void ubifs_debugging_exit(struct ubifs_info *c) * Root directory for UBIFS stuff in debugfs. Contains sub-directories which * contain the stuff specific to particular file-system mounts. */ -static struct dentry *debugfs_rootdir; +static struct dentry *dfs_rootdir; /** * dbg_debugfs_init - initialize debugfs file-system. @@ -2421,9 +2479,9 @@ static struct dentry *debugfs_rootdir; */ int dbg_debugfs_init(void) { - debugfs_rootdir = debugfs_create_dir("ubifs", NULL); - if (IS_ERR(debugfs_rootdir)) { - int err = PTR_ERR(debugfs_rootdir); + dfs_rootdir = debugfs_create_dir("ubifs", NULL); + if (IS_ERR(dfs_rootdir)) { + int err = PTR_ERR(dfs_rootdir); ubifs_err("cannot create \"ubifs\" debugfs directory, " "error %d\n", err); return err; @@ -2437,7 +2495,7 @@ int dbg_debugfs_init(void) */ void dbg_debugfs_exit(void) { - debugfs_remove(debugfs_rootdir); + debugfs_remove(dfs_rootdir); } static int open_debugfs_file(struct inode *inode, struct file *file) @@ -2452,13 +2510,13 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, struct ubifs_info *c = file->private_data; struct ubifs_debug_info *d = c->dbg; - if (file->f_path.dentry == d->dump_lprops) + if (file->f_path.dentry == d->dfs_dump_lprops) dbg_dump_lprops(c); - else if (file->f_path.dentry == d->dump_budg) { + else if (file->f_path.dentry == d->dfs_dump_budg) { spin_lock(&c->space_lock); dbg_dump_budg(c); spin_unlock(&c->space_lock); - } else if (file->f_path.dentry == d->dump_tnc) { + } else if (file->f_path.dentry == d->dfs_dump_tnc) { mutex_lock(&c->tnc_mutex); dbg_dump_tnc(c); mutex_unlock(&c->tnc_mutex); @@ -2469,7 +2527,7 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, return count; } -static const struct file_operations debugfs_fops = { +static const struct file_operations dfs_fops = { .open = open_debugfs_file, .write = write_debugfs_file, .owner = THIS_MODULE, @@ -2494,36 +2552,32 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) struct dentry *dent; struct ubifs_debug_info *d = c->dbg; - sprintf(d->debugfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - d->debugfs_dir = debugfs_create_dir(d->debugfs_dir_name, - debugfs_rootdir); - if (IS_ERR(d->debugfs_dir)) { - err = PTR_ERR(d->debugfs_dir); + sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); + d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); + if (IS_ERR(d->dfs_dir)) { + err = PTR_ERR(d->dfs_dir); ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", - d->debugfs_dir_name, err); + d->dfs_dir_name, err); goto out; } fname = "dump_lprops"; - dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, - &debugfs_fops); + dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); if (IS_ERR(dent)) goto out_remove; - d->dump_lprops = dent; + d->dfs_dump_lprops = dent; fname = "dump_budg"; - dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, - &debugfs_fops); + dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); if (IS_ERR(dent)) goto out_remove; - d->dump_budg = dent; + d->dfs_dump_budg = dent; fname = "dump_tnc"; - dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, - &debugfs_fops); + dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); if (IS_ERR(dent)) goto out_remove; - d->dump_tnc = dent; + d->dfs_dump_tnc = dent; return 0; @@ -2531,7 +2585,7 @@ out_remove: err = PTR_ERR(dent); ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", fname, err); - debugfs_remove_recursive(d->debugfs_dir); + debugfs_remove_recursive(d->dfs_dir); out: return err; } @@ -2542,7 +2596,7 @@ out: */ void dbg_debugfs_exit_fs(struct ubifs_info *c) { - debugfs_remove_recursive(c->dbg->debugfs_dir); + debugfs_remove_recursive(c->dbg->dfs_dir); } #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 9820d69..c1cd73b 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -41,15 +41,17 @@ * @chk_lpt_wastage: used by LPT tree size checker * @chk_lpt_lebs: used by LPT tree size checker * @new_nhead_offs: used by LPT tree size checker - * @new_ihead_lnum: used by debugging to check ihead_lnum - * @new_ihead_offs: used by debugging to check ihead_offs + * @new_ihead_lnum: used by debugging to check @c->ihead_lnum + * @new_ihead_offs: used by debugging to check @c->ihead_offs * - * debugfs_dir_name: name of debugfs directory containing this file-system's - * files - * debugfs_dir: direntry object of the file-system debugfs directory - * dump_lprops: "dump lprops" debugfs knob - * dump_budg: "dump budgeting information" debugfs knob - * dump_tnc: "dump TNC" debugfs knob + * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') + * @saved_free: saved free space (used by 'dbg_save_space_info()') + * + * dfs_dir_name: name of debugfs directory containing this file-system's files + * dfs_dir: direntry object of the file-system debugfs directory + * dfs_dump_lprops: "dump lprops" debugfs knob + * dfs_dump_budg: "dump budgeting information" debugfs knob + * dfs_dump_tnc: "dump TNC" debugfs knob */ struct ubifs_debug_info { void *buf; @@ -69,11 +71,14 @@ struct ubifs_debug_info { int new_ihead_lnum; int new_ihead_offs; - char debugfs_dir_name[100]; - struct dentry *debugfs_dir; - struct dentry *dump_lprops; - struct dentry *dump_budg; - struct dentry *dump_tnc; + struct ubifs_lp_stats saved_lst; + long long saved_free; + + char dfs_dir_name[100]; + struct dentry *dfs_dir; + struct dentry *dfs_dump_lprops; + struct dentry *dfs_dump_budg; + struct dentry *dfs_dump_tnc; }; #define ubifs_assert(expr) do { \ @@ -297,7 +302,8 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, dbg_znode_callback znode_cb, void *priv); /* Checking functions */ - +void dbg_save_space_info(struct ubifs_info *c); +int dbg_check_space_info(struct ubifs_info *c); int dbg_check_lprops(struct ubifs_info *c); int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); @@ -439,6 +445,8 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 #define dbg_old_index_check_init(c, zroot) 0 +#define dbg_save_space_info(c) ({}) +#define dbg_check_space_info(c) 0 #define dbg_check_old_index(c, zroot) 0 #define dbg_check_cats(c) 0 #define dbg_check_ltab(c) 0 diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 17443d9..93b6de5 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -432,7 +432,6 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, int uninitialized_var(err), appending = !!(pos + len > inode->i_size); struct page *page; - ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); if (unlikely(c->ro_media)) diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 68328c5..4cdd284 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -635,10 +635,10 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, * @c: UBIFS file-system description object * @st: return statistics */ -void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st) +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst) { spin_lock(&c->space_lock); - memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats)); + memcpy(lst, &c->lst, sizeof(struct ubifs_lp_stats)); spin_unlock(&c->space_lock); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 807bbd3..5c814a7 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1470,6 +1470,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) int err, lnum; mutex_lock(&c->umount_mutex); + dbg_save_space_info(c); c->remounting_rw = 1; c->always_chk_crc = 1; @@ -1573,8 +1574,9 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->vfs_sb->s_flags &= ~MS_RDONLY; c->remounting_rw = 0; c->always_chk_crc = 0; + err = dbg_check_space_info(c); mutex_unlock(&c->umount_mutex); - return 0; + return err; out: vfree(c->orph_buf); @@ -1629,8 +1631,8 @@ static void commit_on_unmount(struct ubifs_info *c) * ubifs_remount_ro - re-mount in read-only mode. * @c: UBIFS file-system description object * - * We rely on VFS to have stopped writing. Possibly the background thread could - * be running a commit, however kthread_stop will wait in that case. + * We assume VFS has stopped writing. Possibly the background thread could be + * running a commit, however kthread_stop will wait in that case. */ static void ubifs_remount_ro(struct ubifs_info *c) { @@ -1640,13 +1642,14 @@ static void ubifs_remount_ro(struct ubifs_info *c) ubifs_assert(!c->ro_media); commit_on_unmount(c); - mutex_lock(&c->umount_mutex); if (c->bgt) { kthread_stop(c->bgt); c->bgt = NULL; } + dbg_save_space_info(c); + for (i = 0; i < c->jhead_cnt; i++) { ubifs_wbuf_sync(&c->jheads[i].wbuf); del_timer_sync(&c->jheads[i].wbuf.timer); @@ -1669,6 +1672,9 @@ static void ubifs_remount_ro(struct ubifs_info *c) vfree(c->ileb_buf); c->ileb_buf = NULL; ubifs_lpt_free(c, 1); + err = dbg_check_space_info(c); + if (err) + ubifs_ro_mode(c, err); mutex_unlock(&c->umount_mutex); } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index ee9517a..f175435 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1495,6 +1495,7 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, struct ubifs_budget_req *req); long long ubifs_get_free_space(struct ubifs_info *c); +long long ubifs_get_free_space_nolock(struct ubifs_info *c); int ubifs_calc_min_idx_lebs(struct ubifs_info *c); void ubifs_convert_page_budget(struct ubifs_info *c); long long ubifs_reported_space(const struct ubifs_info *c, long long free); @@ -1646,7 +1647,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, const struct ubifs_lprops *lp, int free, int dirty, int flags, int idx_gc_cnt); -void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst); void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, int cat); void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, -- cgit v0.10.2 From b4978e949104844224ecf786170c9263efa601f3 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 23 Jan 2009 18:23:03 +0200 Subject: UBIFS: always clean up GC LEB space When we mount UBIFS, GC LEB may contain out-of-date information, and UBIFS should update lprops and set free space for thei LEB. Currently UBIFS does this only if mounted R/W. But for R/O mount we have to do the same, because otherwise we will have incorrect FS free space reported to user-space. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 5c814a7..336073e 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -397,6 +397,7 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = UBIFS_MAX_NLEN; buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); + ubifs_assert(buf->f_bfree <= c->block_cnt); return 0; } @@ -735,12 +736,12 @@ static void init_constants_master(struct ubifs_info *c) * take_gc_lnum - reserve GC LEB. * @c: UBIFS file-system description object * - * This function ensures that the LEB reserved for garbage collection is - * unmapped and is marked as "taken" in lprops. We also have to set free space - * to LEB size and dirty space to zero, because lprops may contain out-of-date - * information if the file-system was un-mounted before it has been committed. - * This function returns zero in case of success and a negative error code in - * case of failure. + * This function ensures that the LEB reserved for garbage collection is marked + * as "taken" in lprops. We also have to set free space to LEB size and dirty + * space to zero, because lprops may contain out-of-date information if the + * file-system was un-mounted before it has been committed. This function + * returns zero in case of success and a negative error code in case of + * failure. */ static int take_gc_lnum(struct ubifs_info *c) { @@ -751,10 +752,6 @@ static int take_gc_lnum(struct ubifs_info *c) return -EINVAL; } - err = ubifs_leb_unmap(c, c->gc_lnum); - if (err) - return err; - /* And we have to tell lprops that this LEB is taken */ err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0, LPROPS_TAKEN, 0, 0); @@ -1280,10 +1277,19 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_orphans; err = ubifs_rcvry_gc_commit(c); - } else + } else { err = take_gc_lnum(c); - if (err) - goto out_orphans; + if (err) + goto out_orphans; + + /* + * GC LEB may contain garbage if there was an unclean + * reboot, and it should be un-mapped. + */ + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; + } err = dbg_check_lprops(c); if (err) @@ -1292,6 +1298,16 @@ static int mount_ubifs(struct ubifs_info *c) err = ubifs_recover_size(c); if (err) goto out_orphans; + } else { + /* + * Even if we mount read-only, we have to set space in GC LEB + * to proper value because this affects UBIFS free space + * reporting. We do not want to have a situation when + * re-mounting from R/O to R/W changes amount of free space. + */ + err = take_gc_lnum(c); + if (err) + goto out_orphans; } spin_lock(&ubifs_infos_lock); @@ -1316,6 +1332,8 @@ static int mount_ubifs(struct ubifs_info *c) goto out_infos; c->always_chk_crc = 0; + /* GC LEB has to be empty and taken at this point */ + ubifs_assert(c->lst.taken_empty_lebs == 1); ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", c->vi.ubi_num, c->vi.vol_id, c->vi.name); @@ -1561,7 +1579,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (c->need_recovery) err = ubifs_rcvry_gc_commit(c); else - err = take_gc_lnum(c); + err = ubifs_leb_unmap(c, c->gc_lnum); if (err) goto out; @@ -1786,6 +1804,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) c->bu.buf = NULL; } + ubifs_assert(c->lst.taken_empty_lebs == 1); return 0; } -- cgit v0.10.2 From 49d128aa60751a010640f4763d11577e2f508853 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 26 Jan 2009 10:55:40 +0200 Subject: UBIFS: ensure orphan area head is initialized When mounting read-only the orphan area head is not initialized. It must be initialized when remounting read/write, but it was not. This patch fixes that. [Artem: sorry, added comment tweaking noise] Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 9e6f403..152a7b3 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -46,7 +46,7 @@ * Orphans are accumulated in a rb-tree. When an inode's link count drops to * zero, the inode number is added to the rb-tree. It is removed from the tree * when the inode is deleted. Any new orphans that are in the orphan tree when - * the commit is run, are written to the orphan area in 1 or more orph nodes. + * the commit is run, are written to the orphan area in 1 or more orphan nodes. * If the orphan area is full, it is consolidated to make space. There is * always enough space because validation prevents the user from creating more * than the maximum number of orphans allowed. @@ -231,7 +231,7 @@ static int tot_avail_orphs(struct ubifs_info *c) } /** - * do_write_orph_node - write a node + * do_write_orph_node - write a node to the orphan head. * @c: UBIFS file-system description object * @len: length of node * @atomic: write atomically @@ -264,11 +264,11 @@ static int do_write_orph_node(struct ubifs_info *c, int len, int atomic) } /** - * write_orph_node - write an orph node + * write_orph_node - write an orphan node. * @c: UBIFS file-system description object * @atomic: write atomically * - * This function builds an orph node from the cnext list and writes it to the + * This function builds an orphan node from the cnext list and writes it to the * orphan head. On success, %0 is returned, otherwise a negative error code * is returned. */ @@ -326,11 +326,11 @@ static int write_orph_node(struct ubifs_info *c, int atomic) } /** - * write_orph_nodes - write orph nodes until there are no more to commit + * write_orph_nodes - write orphan nodes until there are no more to commit. * @c: UBIFS file-system description object * @atomic: write atomically * - * This function writes orph nodes for all the orphans to commit. On success, + * This function writes orphan nodes for all the orphans to commit. On success, * %0 is returned, otherwise a negative error code is returned. */ static int write_orph_nodes(struct ubifs_info *c, int atomic) @@ -478,14 +478,14 @@ int ubifs_orphan_end_commit(struct ubifs_info *c) } /** - * clear_orphans - erase all LEBs used for orphans. + * ubifs_clear_orphans - erase all LEBs used for orphans. * @c: UBIFS file-system description object * * If recovery is not required, then the orphans from the previous session * are not needed. This function locates the LEBs used to record * orphans, and un-maps them. */ -static int clear_orphans(struct ubifs_info *c) +int ubifs_clear_orphans(struct ubifs_info *c) { int lnum, err; @@ -547,9 +547,9 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) * do_kill_orphans - remove orphan inodes from the index. * @c: UBIFS file-system description object * @sleb: scanned LEB - * @last_cmt_no: cmt_no of last orph node read is passed and returned here + * @last_cmt_no: cmt_no of last orphan node read is passed and returned here * @outofdate: whether the LEB is out of date is returned here - * @last_flagged: whether the end orph node is encountered + * @last_flagged: whether the end orphan node is encountered * * This function is a helper to the 'kill_orphans()' function. It goes through * every orphan node in a LEB and for every inode number recorded, removes @@ -580,8 +580,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, /* * The commit number on the master node may be less, because * of a failed commit. If there are several failed commits in a - * row, the commit number written on orph nodes will continue to - * increase (because the commit number is adjusted here) even + * row, the commit number written on orphan nodes will continue + * to increase (because the commit number is adjusted here) even * though the commit number on the master node stays the same * because the master node has not been re-written. */ @@ -589,9 +589,9 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, c->cmt_no = cmt_no; if (cmt_no < *last_cmt_no && *last_flagged) { /* - * The last orph node had a higher commit number and was - * flagged as the last written for that commit number. - * That makes this orph node, out of date. + * The last orphan node had a higher commit number and + * was flagged as the last written for that commit + * number. That makes this orphan node, out of date. */ if (!first) { ubifs_err("out of order commit number %llu in " @@ -658,10 +658,10 @@ static int kill_orphans(struct ubifs_info *c) /* * Orph nodes always start at c->orph_first and are written to each * successive LEB in turn. Generally unused LEBs will have been unmapped - * but may contain out of date orph nodes if the unmap didn't go - * through. In addition, the last orph node written for each commit is + * but may contain out of date orphan nodes if the unmap didn't go + * through. In addition, the last orphan node written for each commit is * marked (top bit of orph->cmt_no is set to 1). It is possible that - * there are orph nodes from the next commit (i.e. the commit did not + * there are orphan nodes from the next commit (i.e. the commit did not * complete successfully). In that case, no orphans will have been lost * due to the way that orphans are written, and any orphans added will * be valid orphans anyway and so can be deleted. @@ -718,7 +718,7 @@ int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) if (unclean) err = kill_orphans(c); else if (!read_only) - err = clear_orphans(c); + err = ubifs_clear_orphans(c); return err; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 336073e..fd7fc7f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1524,6 +1524,12 @@ static int ubifs_remount_rw(struct ubifs_info *c) err = ubifs_recover_inl_heads(c, c->sbuf); if (err) goto out; + } else { + /* A readonly mount is not allowed to have orphans */ + ubifs_assert(c->tot_orphans == 0); + err = ubifs_clear_orphans(c); + if (err) + goto out; } if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) { diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index f175435..9999ff0 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1604,6 +1604,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum); int ubifs_orphan_start_commit(struct ubifs_info *c); int ubifs_orphan_end_commit(struct ubifs_info *c); int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); +int ubifs_clear_orphans(struct ubifs_info *c); /* lpt.c */ int ubifs_calc_lpt_geom(struct ubifs_info *c); -- cgit v0.10.2 From 6ba87c9b920bea8c2703308d31eb7de925242c30 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 26 Jan 2009 16:12:20 +0200 Subject: UBIFS: fix assertions I introduce wrong assertions in one of the previous commits, this patch fixes them. Also, initialize debugfs after the debugging check. This is a little nicer because we want the FS data to be accessible to external users after everything has been initialized. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index fd7fc7f..dbfc8871 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1320,20 +1320,21 @@ static int mount_ubifs(struct ubifs_info *c) else { c->need_recovery = 0; ubifs_msg("recovery completed"); + /* GC LEB has to be empty and taken at this point */ + ubifs_assert(c->lst.taken_empty_lebs == 1); } - } + } else + ubifs_assert(c->lst.taken_empty_lebs == 1); - err = dbg_debugfs_init_fs(c); + err = dbg_check_filesystem(c); if (err) goto out_infos; - err = dbg_check_filesystem(c); + err = dbg_debugfs_init_fs(c); if (err) goto out_infos; c->always_chk_crc = 0; - /* GC LEB has to be empty and taken at this point */ - ubifs_assert(c->lst.taken_empty_lebs == 1); ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", c->vi.ubi_num, c->vi.vol_id, c->vi.name); @@ -1663,7 +1664,7 @@ static void ubifs_remount_ro(struct ubifs_info *c) int i, err; ubifs_assert(!c->need_recovery); - ubifs_assert(!c->ro_media); + ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); commit_on_unmount(c); mutex_lock(&c->umount_mutex); -- cgit v0.10.2 From 30a0fb947a68ad3ab8a7184e3b3d79dce10e3688 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 26 Jan 2009 09:40:58 -0800 Subject: x86: correct the CPUID pattern for MSR_IA32_MISC_ENABLE availability Impact: re-enable CPUID unmasking on affected processors As far as I am capable of discerning from the documentation, MSR_IA32_MISC_ENABLE should be available for all family 0xf CPUs, as well as family 6 for model >= 0xd (newer Pentium M). The documentation on this isn't ideal, so we need to be on the lookout for errors, still. Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 549f2ad..430e5c3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -30,7 +30,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { /* Unmask CPUID levels if masked: */ - if (c->x86 == 6 && c->x86_model >= 15) { + if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { u64 misc_enable; rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); -- cgit v0.10.2 From 3a9f84d354ce1e19956083c8e691727dea33bd5a Mon Sep 17 00:00:00 2001 From: Ed Swierk Date: Mon, 26 Jan 2009 15:33:31 -0800 Subject: signals, debug: fix BUG: using smp_processor_id() in preemptible code in print_fatal_signal() With print-fatal-signals=1 on a kernel with CONFIG_PREEMPT=y, sending an unexpected signal to a process causes a BUG: using smp_processor_id() in preemptible code. get_signal_to_deliver() releases the siglock before calling print_fatal_signal(), which calls show_regs(), which calls smp_processor_id(), which is not supposed to be called from a preemptible thread. Make sure show_regs() runs with preemption disabled. Signed-off-by: Ed Swierk Signed-off-by: Ingo Molnar diff --git a/kernel/signal.c b/kernel/signal.c index e737597..b6b3676 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -909,7 +909,9 @@ static void print_fatal_signal(struct pt_regs *regs, int signr) } #endif printk("\n"); + preempt_disable(); show_regs(regs); + preempt_enable(); } static int __init setup_print_fatal_signals(char *str) -- cgit v0.10.2 From fdff73f094e7220602cc3f8959c7230517976412 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 26 Jan 2009 19:06:41 -0500 Subject: ext4: Initialize the new group descriptor when resizing the filesystem Make sure all of the fields of the group descriptor are properly initialized. Previously, we allowed bg_flags field to be contain random garbage, which could trigger non-deterministic behavior, including a kernel OOPS. http://bugzilla.kernel.org/show_bug.cgi?id=12433 Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index c328be5..c06886a 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -861,12 +861,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) gdp = (struct ext4_group_desc *)((char *)primary->b_data + gdb_off * EXT4_DESC_SIZE(sb)); + memset(gdp, 0, EXT4_DESC_SIZE(sb)); ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ ext4_free_blks_set(sb, gdp, input->free_blocks_count); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); - gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); + gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); /* -- cgit v0.10.2 From 9fd9784c91db79e953ea3fe3741f885bdc390a72 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 26 Jan 2009 19:26:26 -0500 Subject: ext4: Fix building with EXT4FS_DEBUG When bg_free_blocks_count was renamed to bg_free_blocks_count_lo in 560671a0, its uses under EXT4FS_DEBUG were not changed to the helper ext4_free_blks_count. Another commit, 498e5f24, also did not change everything needed under EXT4FS_DEBUG, thus making it spill some warnings related to printing format. This commit fixes both issues and makes ext4 build again when EXT4FS_DEBUG is enabled. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 6bba06b..9a50b80 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -684,15 +684,15 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); + desc_count += ext4_free_blks_count(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_block_bitmap(sb, i); if (bitmap_bh == NULL) continue; x = ext4_count_free(bitmap_bh, sb->s_blocksize); - printk(KERN_DEBUG "group %lu: stored = %d, counted = %u\n", - i, le16_to_cpu(gdp->bg_free_blocks_count), x); + printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", + i, ext4_free_blks_count(sb, gdp), x); bitmap_count += x; } brelse(bitmap_bh); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 54bf062..e2eab19 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3048,7 +3048,7 @@ retry: WARN_ON(ret <= 0); printk(KERN_ERR "%s: ext4_ext_get_blocks " "returned error inode#%lu, block=%u, " - "max_blocks=%lu", __func__, + "max_blocks=%u", __func__, inode->i_ino, block, max_blocks); #endif ext4_mark_inode_dirty(handle, inode); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 918aec0..deba54f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3025,7 +3025,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, goto out_err; ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, - gdp->bg_free_blocks_count); + ext4_free_blks_count(sb, gdp)); err = ext4_journal_get_write_access(handle, gdp_bh); if (err) -- cgit v0.10.2 From 8527bec548e01a29c6d1928d20d6d3be71861482 Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Mon, 26 Jan 2009 21:00:33 -0800 Subject: virtio_net: use correct accessors for scatterlists Without this fix, virtio_net makes incorrect usage of scatterlists. It sets the end of the scatterlist chain after the first element, despite the fact that more entries come after it. If you try to run dma_map_sg() on one of the scatterlists given to you by add_buf(), you will get a null pointer oops. Signed-off-by: Ira W. Snyder Signed-off-by: Rusty Russell Signed-off-by: David S. Miller diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 63ef2a8..c688083 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -287,7 +287,7 @@ static void try_fill_recv_maxbufs(struct virtnet_info *vi) skb_put(skb, MAX_PACKET_LEN); hdr = skb_vnet_hdr(skb); - sg_init_one(sg, hdr, sizeof(*hdr)); + sg_set_buf(sg, hdr, sizeof(*hdr)); if (vi->big_packets) { for (i = 0; i < MAX_SKB_FRAGS; i++) { @@ -488,9 +488,9 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) /* Encode metadata header at front. */ if (vi->mergeable_rx_bufs) - sg_init_one(sg, mhdr, sizeof(*mhdr)); + sg_set_buf(sg, mhdr, sizeof(*mhdr)); else - sg_init_one(sg, hdr, sizeof(*hdr)); + sg_set_buf(sg, hdr, sizeof(*hdr)); num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; -- cgit v0.10.2 From 98322f22eca889478045cf896b572250d03dc45f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 26 Jan 2009 21:35:35 -0800 Subject: udp: optimize bind(0) if many ports are in use commit 9088c5609584684149f3fb5b065aa7f18dcb03ff (udp: Improve port randomization) introduced a regression for UDP bind() syscall to null port (getting a random port) in case lot of ports are already in use. This is because we do about 28000 scans of very long chains (220 sockets per chain), with many spin_lock_bh()/spin_unlock_bh() calls. Fix this using a bitmap (64 bytes for current value of UDP_HTABLE_SIZE) so that we scan chains at most once. Instead of 250 ms per bind() call, we get after patch a time of 2.9 ms Based on a report from Vitaly Mayatskikh Reported-by: Vitaly Mayatskikh Signed-off-by: Eric Dumazet Tested-by: Vitaly Mayatskikh Signed-off-by: David S. Miller diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf5ab05..b7faffe 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); +#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) + static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, + unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2)) @@ -132,12 +135,17 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && - sk2->sk_hash == num && + (bitmap || sk2->sk_hash == num) && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2)) - return 1; + (*saddr_comp)(sk, sk2)) { + if (bitmap) + __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, + bitmap); + else + return 1; + } return 0; } @@ -160,32 +168,47 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, if (!snum) { int low, high, remaining; unsigned rand; - unsigned short first; + unsigned short first, last; + DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; rand = net_random(); - snum = first = rand % remaining + low; - rand |= 1; - for (;;) { - hslot = &udptable->hash[udp_hashfn(net, snum)]; + first = (((u64)rand * remaining) >> 32) + low; + /* + * force rand to be an odd multiple of UDP_HTABLE_SIZE + */ + rand = (rand | 1) * UDP_HTABLE_SIZE; + for (last = first + UDP_HTABLE_SIZE; first != last; first++) { + hslot = &udptable->hash[udp_hashfn(net, first)]; + bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); - if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) - break; - spin_unlock_bh(&hslot->lock); + udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, + saddr_comp); + + snum = first; + /* + * Iterate on all possible values of snum for this hash. + * Using steps of an odd multiple of UDP_HTABLE_SIZE + * give us randomization and full range coverage. + */ do { - snum = snum + rand; - } while (snum < low || snum > high); - if (snum == first) - goto fail; + if (low <= snum && snum <= high && + !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) + goto found; + snum += rand; + } while (snum != first); + spin_unlock_bh(&hslot->lock); } + goto fail; } else { hslot = &udptable->hash[udp_hashfn(net, snum)]; spin_lock_bh(&hslot->lock); - if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) goto fail_unlock; } +found: inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { -- cgit v0.10.2 From a7a41acf99d9150b424839b0d7b4f5ad9d211e2d Mon Sep 17 00:00:00 2001 From: Manish Katiyar Date: Mon, 26 Jan 2009 21:54:21 -0800 Subject: r6040: Remove unused variable pdev from drivers/net/r6040.c drivers/net/r6040.c:441: warning: unused variable 'pdev' Signed-off-by: Manish Katiyar Signed-off-by: David S. Miller diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c index 72fd9e9..b2dcdb5 100644 --- a/drivers/net/r6040.c +++ b/drivers/net/r6040.c @@ -438,7 +438,6 @@ static void r6040_down(struct net_device *dev) { struct r6040_private *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; - struct pci_dev *pdev = lp->pdev; int limit = 2048; u16 *adrp; u16 cmd; -- cgit v0.10.2 From 9fa5fdf291c9b58b1cb8b4bb2a0ee57efa21d635 Mon Sep 17 00:00:00 2001 From: Dimitris Michailidis Date: Mon, 26 Jan 2009 22:15:31 -0800 Subject: tcp: Fix length tcp_splice_data_recv passes to skb_splice_bits. tcp_splice_data_recv has two lengths to consider: the len parameter it gets from tcp_read_sock, which specifies the amount of data in the skb, and rd_desc->count, which is the amount of data the splice caller still wants. Currently it passes just the latter to skb_splice_bits, which then splices min(rd_desc->count, skb->len - offset) bytes. Most of the time this is fine, except when the skb contains urgent data. In that case len goes only up to the urgent byte and is less than skb->len - offset. By ignoring len tcp_splice_data_recv may a) splice data tcp_read_sock told it not to, b) return to tcp_read_sock a value > len. Now, tcp_read_sock doesn't handle used > len and leaves the socket in a bad state (both sk_receive_queue and copied_seq are bad at that point) resulting in duplicated data and corruption. Fix by passing min(rd_desc->count, len) to skb_splice_bits. Signed-off-by: Dimitris Michailidis Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0cd71b8..76b148b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -524,7 +524,8 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, struct tcp_splice_state *tss = rd_desc->arg.data; int ret; - ret = skb_splice_bits(skb, offset, tss->pipe, rd_desc->count, tss->flags); + ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len), + tss->flags); if (ret > 0) rd_desc->count -= ret; return ret; -- cgit v0.10.2 From 6f7ab6d458bbfc2f55d295fa3e6b9e69cdb1d517 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 27 Jan 2009 16:12:31 +0200 Subject: UBIFS: fix no_chk_data_crc When data CRC checking is disabled, UBIFS returns incorrect return code from the 'try_read_node()' function (0 instead of 1, which means CRC error), which make the caller re-read the data node again, but using a different code patch, so the second read is fine. Thus, we read the same node twice. And the result of this is that UBIFS is slower with no_chk_data_crc option than it is with chk_data_crc option. This patches fixes the problem. Reported-by: Reuben Dowle Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 0168271..e8e632a 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -29,7 +29,7 @@ * would have been wasted for padding to the nearest minimal I/O unit boundary. * Instead, data first goes to the write-buffer and is flushed when the * buffer is full or when it is not used for some time (by timer). This is - * similarto the mechanism is used by JFFS2. + * similar to the mechanism is used by JFFS2. * * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by * mutexes defined inside these objects. Since sometimes upper-level code @@ -75,7 +75,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) * @lnum: logical eraseblock number * @offs: offset within the logical eraseblock * @quiet: print no messages - * @chk_crc: indicates whether to always check the CRC + * @must_chk_crc: indicates whether to always check the CRC * * This function checks node magic number and CRC checksum. This function also * validates node length to prevent UBIFS from becoming crazy when an attacker @@ -83,11 +83,17 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) * node length in the common header could cause UBIFS to read memory outside of * allocated buffer when checking the CRC checksum. * - * This function returns zero in case of success %-EUCLEAN in case of bad CRC - * or magic. + * This function may skip data nodes CRC checking if @c->no_chk_data_crc is + * true, which is controlled by corresponding UBIFS mount option. However, if + * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is + * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is + * ignored and CRC is checked. + * + * This function returns zero in case of success and %-EUCLEAN in case of bad + * CRC or magic. */ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int chk_crc) + int offs, int quiet, int must_chk_crc) { int err = -EINVAL, type, node_len; uint32_t crc, node_crc, magic; @@ -123,9 +129,9 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, node_len > c->ranges[type].max_len) goto out_len; - if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc) - if (c->no_chk_data_crc) - return 0; + if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && + c->no_chk_data_crc) + return 0; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); node_crc = le32_to_cpu(ch->crc); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index f7e36f5..fa28a84 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -443,6 +443,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, * This function performs that same function as ubifs_read_node except that * it does not require that there is actually a node present and instead * the return code indicates if a node was read. + * + * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc + * is true (it is controlled by corresponding mount option). However, if + * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always + * checked. */ static int try_read_node(const struct ubifs_info *c, void *buf, int type, int len, int lnum, int offs) @@ -470,9 +475,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, if (node_len != len) return 0; - if (type == UBIFS_DATA_NODE && !c->always_chk_crc) - if (c->no_chk_data_crc) - return 0; + if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) + return 1; crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); node_crc = le32_to_cpu(ch->crc); @@ -1506,7 +1510,7 @@ out: * * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares - * maxumum possible amount of nodes for bulk-read. + * maximum possible amount of nodes for bulk-read. */ int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) { diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 9999ff0..29dfa81 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1428,7 +1428,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, int offs, int dtype); int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int chk_crc); + int offs, int quiet, int must_chk_crc); void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); int ubifs_io_init(struct ubifs_info *c); -- cgit v0.10.2 From 418e4da33f45fd7bdcce48778b149b780ff730bc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Jan 2009 21:43:08 +0100 Subject: PCI PM: Fix suspend error paths and testing facility breakage If one of device drivers refuses to suspend by returning error code from its ->suspend() callback, the devices that have already been suspended are resumed by executing their drivers' ->resume() callbacks. Some of these callbacks expect the device's configuration space to be restored if the device has been put into D3 before they are called. Unfortunately, this mechanism has been broken by recent changes moving the restoration of config spaces of some devices (most importantly, USB controllers and HDA Intel) into the resume callbacks executed with interrupts off. Obviously, these callbacks are not invoked in the suspend error path and, as a result, the system cannot be successfully brought back into the working state in case of a suspend error. The same thing happens in the hibernation error path right before putting the system into S4. Similarly, the suspend testing facility associated with the /sys/power/pm_test file is broken, because it uses the very same mechanism that is used in the suspend and hibernation error paths. Fix the breakage by making the PCI core restore the configuration spaces of PCI devices that haven't been restored already before pci_pm_resume() is called for those devices by the PM core. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 9de07b7..4884c48 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -370,6 +370,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) } pci_save_state(pci_dev); + pci_dev->state_saved = true; /* * This is for compatibility with existing code with legacy PM support. */ @@ -419,6 +420,7 @@ static int pci_legacy_resume(struct device *dev) static void pci_pm_default_resume_noirq(struct pci_dev *pci_dev) { pci_restore_standard_config(pci_dev); + pci_dev->state_saved = false; pci_fixup_device(pci_fixup_resume_early, pci_dev); } @@ -555,6 +557,13 @@ static int pci_pm_resume(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + /* + * This is necessary for the suspend error path in which resume is + * called without restoring the standard config registers of the device. + */ + if (pci_dev->state_saved) + pci_restore_standard_config(pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); @@ -710,6 +719,13 @@ static int pci_pm_restore(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + /* + * This is necessary for the hibernation error path in which restore is + * called without restoring the standard config registers of the device. + */ + if (pci_dev->state_saved) + pci_restore_standard_config(pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); -- cgit v0.10.2 From 57064d213d2e44654d4f13c66df135b5e7389a26 Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Fri, 23 Jan 2009 12:43:38 -0800 Subject: PCI: irq and pci_ids patch for Intel Tigerpoint DeviceIDs This patch adds the Intel Tigerpoint LPC Controller DeviceIDs. Signed-off-by: Seth Heasley Signed-off-by: Jesse Barnes diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 4064345..fecbce6 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -572,6 +572,7 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH7_1: case PCI_DEVICE_ID_INTEL_ICH7_30: case PCI_DEVICE_ID_INTEL_ICH7_31: + case PCI_DEVICE_ID_INTEL_TGP_LPC: case PCI_DEVICE_ID_INTEL_ESB2_0: case PCI_DEVICE_ID_INTEL_ICH8_0: case PCI_DEVICE_ID_INTEL_ICH8_1: diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d56ad9c..6f260b5 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2421,6 +2421,7 @@ #define PCI_DEVICE_ID_INTEL_ICH7_0 0x27b8 #define PCI_DEVICE_ID_INTEL_ICH7_1 0x27b9 #define PCI_DEVICE_ID_INTEL_ICH7_30 0x27b0 +#define PCI_DEVICE_ID_INTEL_TGP_LPC 0x27bc #define PCI_DEVICE_ID_INTEL_ICH7_31 0x27bd #define PCI_DEVICE_ID_INTEL_ICH7_17 0x27da #define PCI_DEVICE_ID_INTEL_ICH7_19 0x27dd -- cgit v0.10.2 From 545ffd58adc86b8d33449dab44fe81b503a6f81b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Jan 2009 23:36:56 +0100 Subject: PCI PM: Fix hibernation breakage on EeePC 701 Hibernation breaks on EeePC 701 as a result of attempting to put one of its (driverless) devices into a low power state. Avoid that by not attepmting to power manage driverless devices during hibernation. Signed-off-by: Rafael J. Wysocki Reported-and-tested-by: Alan Jenkins Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 4884c48..ab1d615 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -669,7 +669,10 @@ static int pci_pm_poweroff(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); - if (drv && drv->pm && drv->pm->poweroff) { + if (!drv || !drv->pm) + return 0; + + if (drv->pm->poweroff) { error = drv->pm->poweroff(dev); suspend_report_result(drv->pm->poweroff, error); } -- cgit v0.10.2 From 48f67f54a53bb68619a63c3f38cf7f502ed74b1d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Jan 2009 23:38:31 +0100 Subject: PCI PM: Power up devices before restoring their state Devices that have MSI-X enabled before suspend to RAM or hibernation and that are in a low power state during resume will not be handled correctly by pci_restore_standard_config(). Namely, it first calls pci_restore_state() which calls pci_restore_msi_state(), which in turn executes __pci_restore_msix_state() that accesses the device's memory space to restore the contents of the MSI-X table. However, if the device is in a low power state at this point, it's memory space is not accessible. The easiest way to fix this potential problem is to make pci_restore_standard_config() call pci_restore_state() after it has put the device into the full power state, D0. Fortunately, all of this is done with interrupts off, so the change of ordering should not cause any trouble. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 17bd932..f0aa3d5 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1393,12 +1393,11 @@ int pci_restore_standard_config(struct pci_dev *dev) pci_power_t prev_state; int error; - pci_restore_state(dev); pci_update_current_state(dev, PCI_D0); prev_state = dev->current_state; if (prev_state == PCI_D0) - return 0; + goto Restore; error = pci_raw_set_power_state(dev, PCI_D0, false); if (error) @@ -1421,7 +1420,8 @@ int pci_restore_standard_config(struct pci_dev *dev) dev->current_state = PCI_D0; - return 0; + Restore: + return pci_restore_state(dev); } /** -- cgit v0.10.2 From 476e7faefc43f106a90b5c96166c59b75de19d30 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Jan 2009 23:39:57 +0100 Subject: PCI PM: Do not wait for buses in B2 or B3 during resume pci_restore_standard_config() adds extra delay for PCI buses in low power states (B2 or B3), but this is only correct for buses in B2, because the buses in B3 are reset when they are put back into B0. Thus we should wait for such buses to settle after the reset, but it's not a good idea to wait that long (1.1 s) with interrupts off. On the other hand, we have never waited for buses in B2 and B3 during resume and it seems reasonable to go back to this well tested behaviour. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index f0aa3d5..4880755 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1403,19 +1403,19 @@ int pci_restore_standard_config(struct pci_dev *dev) if (error) return error; - if (pci_is_bridge(dev)) { - if (prev_state > PCI_D1) - mdelay(PCI_PM_BUS_WAIT); - } else { - switch(prev_state) { - case PCI_D3cold: - case PCI_D3hot: - mdelay(pci_pm_d3_delay); - break; - case PCI_D2: - udelay(PCI_PM_D2_DELAY); - break; - } + /* + * This assumes that we won't get a bus in B2 or B3 from the BIOS, but + * we've made this assumption forever and it appears to be universally + * satisfied. + */ + switch(prev_state) { + case PCI_D3cold: + case PCI_D3hot: + mdelay(pci_pm_d3_delay); + break; + case PCI_D2: + udelay(PCI_PM_D2_DELAY); + break; } dev->current_state = PCI_D0; -- cgit v0.10.2 From bffac3c593eba1f9da3efd0199e49ea6558a40ce Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 21 Jan 2009 19:19:19 -0500 Subject: PCI MSI: Fix undefined shift by 32 Add an msi_mask() function which returns the correct bitmask for the number of MSI interrupts you have. This fixes an undefined bug in msi_capability_init(). Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 896a15d..44f15ff 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -103,6 +103,16 @@ static void msix_set_enable(struct pci_dev *dev, int enable) } } +/* + * Essentially, this is ((1 << (1 << x)) - 1), but without the + * undefinedness of a << 32. + */ +static inline __attribute_const__ u32 msi_mask(unsigned x) +{ + static const u32 mask[] = { 1, 2, 4, 0xf, 0xff, 0xffff, 0xffffffff }; + return mask[x]; +} + static void msix_flush_writes(struct irq_desc *desc) { struct msi_desc *entry; @@ -407,8 +417,7 @@ static int msi_capability_init(struct pci_dev *dev) /* All MSIs are unmasked by default, Mask them all */ pci_read_config_dword(dev, base, &maskbits); - temp = (1 << multi_msi_capable(control)); - temp = ((temp - 1) & ~temp); + temp = msi_mask((control & PCI_MSI_FLAGS_QMASK) >> 1); maskbits |= temp; pci_write_config_dword(dev, base, maskbits); entry->msi_attrib.maskbits_mask = temp; -- cgit v0.10.2 From bf4162bcf82ebc3258d6bc0ddd6453132abde72d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 25 Nov 2008 13:51:44 -0800 Subject: PCI hotplug: fakephp: Allocate PCI resources before adding the device For PCI devices, pci_bus_assign_resources() must be called to set up the pci_device->resource array before pci_bus_add_devices() can be called, else attempts to load drivers results in BAR collision errors where there are none. This is not done in fakephp, so devices can be "unplugged" but scanning the parent bus won't bring the devices back due to resource unallocation. Move the pci_bus_add_device-calling logic into pci_rescan_bus and preface it with a call to pci_bus_assign_resources so that we only have to (re)allocate resources once per bus where a new device is found. Signed-off-by: Darrick J. Wong Acked-by: Alex Chiang Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index b0e7de9..d8649e1 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -195,13 +195,13 @@ static void remove_slot_worker(struct work_struct *work) * Tries hard not to re-enable already existing devices; * also handles scanning of subfunctions. */ -static void pci_rescan_slot(struct pci_dev *temp) +static int pci_rescan_slot(struct pci_dev *temp) { struct pci_bus *bus = temp->bus; struct pci_dev *dev; int func; - int retval; u8 hdr_type; + int count = 0; if (!pci_read_config_byte(temp, PCI_HEADER_TYPE, &hdr_type)) { temp->hdr_type = hdr_type & 0x7f; @@ -213,17 +213,12 @@ static void pci_rescan_slot(struct pci_dev *temp) dbg("New device on %s function %x:%x\n", bus->name, temp->devfn >> 3, temp->devfn & 7); - retval = pci_bus_add_device(dev); - if (retval) - dev_err(&dev->dev, "error adding " - "device, continuing.\n"); - else - add_slot(dev); + count++; } } /* multifunction device? */ if (!(hdr_type & 0x80)) - return; + return count; /* continue scanning for other functions */ for (func = 1, temp->devfn++; func < 8; func++, temp->devfn++) { @@ -239,16 +234,13 @@ static void pci_rescan_slot(struct pci_dev *temp) dbg("New device on %s function %x:%x\n", bus->name, temp->devfn >> 3, temp->devfn & 7); - retval = pci_bus_add_device(dev); - if (retval) - dev_err(&dev->dev, "error adding " - "device, continuing.\n"); - else - add_slot(dev); + count++; } } } } + + return count; } @@ -262,6 +254,8 @@ static void pci_rescan_bus(const struct pci_bus *bus) { unsigned int devfn; struct pci_dev *dev; + int retval; + int found = 0; dev = alloc_pci_dev(); if (!dev) return; @@ -270,7 +264,23 @@ static void pci_rescan_bus(const struct pci_bus *bus) dev->sysdata = bus->sysdata; for (devfn = 0; devfn < 0x100; devfn += 8) { dev->devfn = devfn; - pci_rescan_slot(dev); + found += pci_rescan_slot(dev); + } + + if (found) { + pci_bus_assign_resources(bus); + list_for_each_entry(dev, &bus->devices, bus_list) { + /* Skip already-added devices */ + if (dev->is_added) + continue; + retval = pci_bus_add_device(dev); + if (retval) + dev_err(&dev->dev, + "Error adding device, continuing\n"); + else + add_slot(dev); + } + pci_bus_add_devices(bus); } kfree(dev); } -- cgit v0.10.2 From f0e0059b9c18426cffdcc04161062251a8f9741e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Sun, 25 Jan 2009 20:53:00 -0600 Subject: don't reallocate sxp variable passed into xfs_swapext fixes kernel.org bugzilla 12538, xfs_fsr fails on 2.6.29-rc kernels Regression caused by 743bb4650da9e2595d6cedd01c680b5b9398c74a This was an embarrasing mistake, reallocating the sxp pointer passed in from the main ioctl switch. Signed-off-by: Eric Sandeen Tested-by: Paul Martin Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index b4c1ee7..f8278cf 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -55,17 +55,11 @@ xfs_swapext( struct file *file, *target_file; int error = 0; - sxp = kmem_alloc(sizeof(xfs_swapext_t), KM_MAYFAIL); - if (!sxp) { - error = XFS_ERROR(ENOMEM); - goto out; - } - /* Pull information for the target fd */ file = fget((int)sxp->sx_fdtarget); if (!file) { error = XFS_ERROR(EINVAL); - goto out_free_sxp; + goto out; } if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) { @@ -109,8 +103,6 @@ xfs_swapext( fput(target_file); out_put_file: fput(file); - out_free_sxp: - kmem_free(sxp); out: return error; } -- cgit v0.10.2 From 1cf3eb2ff6b0844c678f2f48d0053b9d12b7da67 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 27 Jan 2009 23:48:59 +0200 Subject: kmalloc: return NULL instead of link failure The SLAB kmalloc with a constant value isn't consistent with the other implementations because it bails out with __you_cannot_kmalloc_that_much rather than returning NULL and properly allowing the caller to fall back to vmalloc or take other action. This doesn't happen with a non-constant value or with SLOB or SLUB. Starting with 2.6.28, I've been seeing build failures on s390x. This is due to init_section_page_cgroup trying to allocate 2.5MB when the max size for a kmalloc on s390x is 2MB. It's failing because the value is constant. The workarounds at the call size are ugly and the caller shouldn't have to change behavior depending on what the backend of the API is. So, this patch eliminates the link failure and returns NULL like the other implementations. Signed-off-by: Jeff Mahoney Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christoph Lameter Cc: Pekka Enberg Cc: Matt Mackall Cc: Nick Piggin Cc: [2.6.28.x] Signed-off-by: Andrew Morton Signed-off-by: Pekka Enberg diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 39c3a5e..6ca6a7b 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -43,10 +43,7 @@ static inline void *kmalloc(size_t size, gfp_t flags) i++; #include #undef CACHE - { - extern void __you_cannot_kmalloc_that_much(void); - __you_cannot_kmalloc_that_much(); - } + return NULL; found: #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) @@ -77,10 +74,7 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) i++; #include #undef CACHE - { - extern void __you_cannot_kmalloc_that_much(void); - __you_cannot_kmalloc_that_much(); - } + return NULL; found: #ifdef CONFIG_ZONE_DMA if (flags & GFP_DMA) -- cgit v0.10.2 From 71a082efc9fdc12068a3cee6cebb1330b00ebeee Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 27 Jan 2009 01:03:35 +0000 Subject: PCI hotplug: Change link order of pciehp & acpiphp Some hardware exposes PCIE slots in such a way that they can be claimed by either the acpiphp or pciehp driver. pciehp is the preferred driver if the firmware allows the OS to claim control via the _OSC method so should be loaded first - if it fails to bind (either due to a missing _OSC method or the firmware refusing to hand off control) then we can fall back to acpiphp or a vendor-specific driver. This patch simply changes the link order to ensure that pciehp will be initialised before acpiphp if both are statically built into the kernel. Signed-off-by: Matthew Garrett Acked-by: Randy Dunlap Signed-off-by: Jesse Barnes diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index e31fb91..2aa117c 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -5,11 +5,15 @@ obj-$(CONFIG_HOTPLUG_PCI) += pci_hotplug.o obj-$(CONFIG_HOTPLUG_PCI_COMPAQ) += cpqphp.o obj-$(CONFIG_HOTPLUG_PCI_IBM) += ibmphp.o + +# pciehp should be linked before acpiphp in order to allow the native driver +# to attempt to bind first. We can then fall back to generic support. + +obj-$(CONFIG_HOTPLUG_PCI_PCIE) += pciehp.o obj-$(CONFIG_HOTPLUG_PCI_ACPI) += acpiphp.o obj-$(CONFIG_HOTPLUG_PCI_ACPI_IBM) += acpiphp_ibm.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550) += cpcihp_zt5550.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o -obj-$(CONFIG_HOTPLUG_PCI_PCIE) += pciehp.o obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o -- cgit v0.10.2 From 15b2bee22a0390d951301b53e83df88d0350c499 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 27 Jan 2009 16:41:58 -0800 Subject: e1000: fix bug with shared interrupt during reset A nasty bug was found where an MTU change (or anything else that caused a reset) could race with the interrupt code. The interrupt code was entered by a shared interrupt during the MTU change. This change prevents the interrupt code from running while the driver is in the middle of its reset path. Signed-off-by: Jesse Brandeburg Signed-off-by: David S. Miller diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 26474c9..c986978 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -31,7 +31,7 @@ char e1000_driver_name[] = "e1000"; static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver"; -#define DRV_VERSION "7.3.20-k3-NAPI" +#define DRV_VERSION "7.3.21-k3-NAPI" const char e1000_driver_version[] = DRV_VERSION; static const char e1000_copyright[] = "Copyright (c) 1999-2006 Intel Corporation."; @@ -3712,7 +3712,7 @@ static irqreturn_t e1000_intr(int irq, void *data) struct e1000_hw *hw = &adapter->hw; u32 rctl, icr = er32(ICR); - if (unlikely(!icr)) + if (unlikely((!icr) || test_bit(__E1000_RESETTING, &adapter->flags))) return IRQ_NONE; /* Not our interrupt */ /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is -- cgit v0.10.2 From 94cd3e6cbebf85903b4d53ed2147bdb4c6e08625 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Jan 2009 17:45:10 -0800 Subject: net: wrong test in inet_ehash_locks_alloc() In commit 9db66bdcc83749affe61c61eb8ff3cf08f42afec (net: convert TCP/DCCP ehash rwlocks to spinlocks), I forgot to change one occurrence of rwlock_t to spinlock_t I believe sizeof(raw_spinlock_t) might be > 0 on !CONFIG_SMP if CONFIG_DEBUG_SPINLOCK while sizeof(raw_rwlock_t) should be 0 in this case. Fortunatly, CONFIG_DEBUG_SPINLOCK adds fields to both spinlock_t and rwlock_t, but at this might change in the future (being able to debug spinlocks but not rwlocks for example), better to be safe. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f44bb5c..d0a0431 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -182,7 +182,7 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) size = 2048; if (nr_pcpus >= 32) size = 4096; - if (sizeof(rwlock_t) != 0) { + if (sizeof(spinlock_t) != 0) { #ifdef CONFIG_NUMA if (size * sizeof(spinlock_t) > PAGE_SIZE) hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t)); -- cgit v0.10.2 From b8e15992b420d09dae831125a623c474c8637cee Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 Jan 2009 14:09:59 +1100 Subject: crypto: api - Fix algorithm test race that broke aead initialisation When we complete a test we'll notify everyone waiting on it, drop the mutex, and then remove the test larval (after reacquiring the mutex). If one of the notified parties tries to register another algorithm with the same driver name prior to the removal of the test larval, they will fail with EEXIST as only one algorithm of a given name can be tested at any time. This broke the initialisation of aead and givcipher algorithms as they will register two algorithms with the same driver name, in sequence. This patch fixes the problem by marking the larval as dead before we drop the mutex, and also ignoring all dead or dying algorithms on the registration path. Tested-by: Andreas Steffen Signed-off-by: Herbert Xu diff --git a/crypto/algapi.c b/crypto/algapi.c index 7c41e74..56c62e2 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -149,6 +149,9 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) if (q == alg) goto err; + if (crypto_is_moribund(q)) + continue; + if (crypto_is_larval(q)) { if (!strcmp(alg->cra_driver_name, q->cra_driver_name)) goto err; @@ -197,7 +200,7 @@ void crypto_alg_tested(const char *name, int err) down_write(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { - if (!crypto_is_larval(q)) + if (crypto_is_moribund(q) || !crypto_is_larval(q)) continue; test = (struct crypto_larval *)q; @@ -210,6 +213,7 @@ void crypto_alg_tested(const char *name, int err) goto unlock; found: + q->cra_flags |= CRYPTO_ALG_DEAD; alg = test->adult; if (err || list_empty(&alg->cra_list)) goto complete; -- cgit v0.10.2 From 6c06a478c9e59d1584a5dc1b2b3519bae5d6546a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 27 Jan 2009 22:30:19 -0800 Subject: net: fix xfrm reverse flow lookup for icmp6 This patch fixes the xfrm reverse flow lookup for icmp6 so that icmp6 packets don't get lost over ipsec tunnels. Similar patch is in RHEL5 kernel for a quite long time and I do not see why it isn't in mainline. Signed-off-by: Jiri Pirko Acked-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4f43384..36dff88 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -443,10 +443,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6)) goto relookup_failed; - if (ip6_dst_lookup(sk, &dst2, &fl)) + if (ip6_dst_lookup(sk, &dst2, &fl2)) goto relookup_failed; - err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP); + err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP); switch (err) { case 0: dst_release(dst); -- cgit v0.10.2 From 1d6e55f195128813f96458203a9fa14204f9251e Mon Sep 17 00:00:00 2001 From: Thomas Goff Date: Tue, 27 Jan 2009 22:39:59 -0800 Subject: IPv6: Fix multicast routing bugs. This patch addresses the IPv6 multicast routing issues described below. It was tested with XORP 1.4/1.5 as the IPv6 PIM-SM routing daemon against FreeBSD peers. net/ipv6/ip6_input.c: - Don't try to forward link-local multicast packets. - Don't reset skb2->dev before calling ip6_mr_input() so packets can be identified as coming from the PIM register vif properly. net/ipv6/ip6mr.c: - Fix incoming PIM register messages processing: * The IPv6 pseudo-header should be included when checksumming PIM messages (RFC 4601 section 4.9; RFC 3973 section 4.7.1). * Packets decapsulated from PIM register messages should have skb->protocol ETH_P_IPV6. - Enable/disable IPv6 multicast forwarding on the corresponding interface when a routing daemon adds/removes a multicast virtual interface. - Remove incorrect skb_pull() to fix userspace signaling. - Enable/disable global IPv6 multicast forwarding when an IPv6 multicast routing socket is opened/closed. net/ipv6/route.c: - Don't use strict routing logic for packets decapsulated from PIM register messages (similar to disabling rp_filter for the IPv4 case). Signed-off-by: Thomas Goff Reviewed-by: Fred Templin Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 936f489..f171e8d 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -255,6 +255,7 @@ int ip6_mc_input(struct sk_buff *skb) * IPv6 multicast router mode is now supported ;) */ if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { /* * Okay, we try to forward - split and duplicate @@ -316,7 +317,6 @@ int ip6_mc_input(struct sk_buff *skb) } if (skb2) { - skb2->dev = skb2->dst->dev; ip6_mr_input(skb2); } } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 3c51b2d..d19a84b 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -365,7 +365,9 @@ static int pim6_rcv(struct sk_buff *skb) pim = (struct pimreghdr *)skb_transport_header(skb); if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) || (pim->flags & PIM_NULL_REGISTER) || - (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && + (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, + sizeof(*pim), IPPROTO_PIM, + csum_partial((void *)pim, sizeof(*pim), 0)) && csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; @@ -392,7 +394,7 @@ static int pim6_rcv(struct sk_buff *skb) skb_pull(skb, (u8 *)encap - skb->data); skb_reset_network_header(skb); skb->dev = reg_dev; - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); @@ -481,6 +483,7 @@ static int mif6_delete(struct net *net, int vifi) { struct mif_device *v; struct net_device *dev; + struct inet6_dev *in6_dev; if (vifi < 0 || vifi >= net->ipv6.maxvif) return -EADDRNOTAVAIL; @@ -513,6 +516,10 @@ static int mif6_delete(struct net *net, int vifi) dev_set_allmulti(dev, -1); + in6_dev = __in6_dev_get(dev); + if (in6_dev) + in6_dev->cnf.mc_forwarding--; + if (v->flags & MIFF_REGISTER) unregister_netdevice(dev); @@ -622,6 +629,7 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) int vifi = vifc->mif6c_mifi; struct mif_device *v = &net->ipv6.vif6_table[vifi]; struct net_device *dev; + struct inet6_dev *in6_dev; int err; /* Is vif busy ? */ @@ -662,6 +670,10 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) return -EINVAL; } + in6_dev = __in6_dev_get(dev); + if (in6_dev) + in6_dev->cnf.mc_forwarding++; + /* * Fill in the VIF structures */ @@ -838,8 +850,6 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi, skb->dst = dst_clone(pkt->dst); skb->ip_summed = CHECKSUM_UNNECESSARY; - - skb_pull(skb, sizeof(struct ipv6hdr)); } if (net->ipv6.mroute6_sk == NULL) { @@ -1222,8 +1232,10 @@ static int ip6mr_sk_init(struct sock *sk) rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(net->ipv6.mroute6_sk == NULL)) + if (likely(net->ipv6.mroute6_sk == NULL)) { net->ipv6.mroute6_sk = sk; + net->ipv6.devconf_all->mc_forwarding++; + } else err = -EADDRINUSE; write_unlock_bh(&mrt_lock); @@ -1242,6 +1254,7 @@ int ip6mr_sk_done(struct sock *sk) if (sk == net->ipv6.mroute6_sk) { write_lock_bh(&mrt_lock); net->ipv6.mroute6_sk = NULL; + net->ipv6.devconf_all->mc_forwarding--; write_unlock_bh(&mrt_lock); mroute_clean_tables(net); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c4a5982..9c57423 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -794,7 +794,7 @@ void ip6_route_input(struct sk_buff *skb) .proto = iph->nexthdr, }; - if (rt6_need_strict(&iph->daddr)) + if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); -- cgit v0.10.2 From a4e6db07984529847c6ad8bc616485e721dcb809 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Jan 2009 22:41:03 -0800 Subject: ipv6: Make mc_forwarding sysctl read-only. The kernel manages this value internally, as necessary, as VIFs are added/removed and as multicast routers are registered and deregistered. Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e92ad84..f9afb45 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4250,7 +4250,7 @@ static struct addrconf_sysctl_table .procname = "mc_forwarding", .data = &ipv6_devconf.mc_forwarding, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0444, .proc_handler = proc_dointvec, }, #endif -- cgit v0.10.2 From 3718909448116bf4411445468c58acc946379f92 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 27 Jan 2009 18:59:46 -0800 Subject: slub: fix per cpu kmem_cache_cpu array memory leak The per cpu array of kmem_cache_cpu structures accomodates NR_KMEM_CACHE_CPU such structs. When this array overflows and a struct is allocated by kmalloc(), it may have an address at the upper bound of this array. If this happens, it does not get freed and the per cpu kmem_cache_cpu_free pointer will be out of bounds after kmem_cache_destroy() or cpu offlining. Cc: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg diff --git a/mm/slub.c b/mm/slub.c index 6392ae5..bdc9abb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1996,7 +1996,7 @@ static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) { if (c < per_cpu(kmem_cache_cpu, cpu) || - c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { + c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { kfree(c); return; } -- cgit v0.10.2 From a71558d0eca1bbb23737f832297926666f9b36db Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 27 Jan 2009 22:32:29 +0000 Subject: [ARM] etherh: continue fixing build failure Further to 483a2b3a3182abcb7fcea986d7ea13e793bb00b1, also fix: drivers/net/arm/etherh.c:649: error: 'eth_set_mac_addr' undeclared here (not in a function) Signed-off-by: Russell King diff --git a/drivers/net/arm/etherh.c b/drivers/net/arm/etherh.c index d15d8b7..54b52e5 100644 --- a/drivers/net/arm/etherh.c +++ b/drivers/net/arm/etherh.c @@ -646,7 +646,7 @@ static const struct net_device_ops etherh_netdev_ops = { .ndo_get_stats = ei_get_stats, .ndo_set_multicast_list = ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_set_mac_addr, + .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ei_poll, -- cgit v0.10.2 From 9ce8bb55e32d11bb82e19e76bdd2cf1c2ed32fd1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 27 Jan 2009 22:44:12 +0000 Subject: [ARM] msm: fix build errors arch/arm/mach-msm/board-halibut.c:45: error: implicit declaration of function 'MSM_GPIO_TO_INT' arch/arm/mach-msm/board-halibut.c:45: error: initializer element is not constant arch/arm/mach-msm/board-halibut.c:45: error: (near initialization for 'smc91x_resources[1].start') arch/arm/mach-msm/board-halibut.c:46: error: initializer element is not constant arch/arm/mach-msm/board-halibut.c:46: error: (near initialization for 'smc91x_resources[1].end') Signed-off-by: Russell King diff --git a/arch/arm/mach-msm/board-halibut.c b/arch/arm/mach-msm/board-halibut.c index c2a96e3..e61967d 100644 --- a/arch/arm/mach-msm/board-halibut.c +++ b/arch/arm/mach-msm/board-halibut.c @@ -27,6 +27,7 @@ #include #include +#include #include #include -- cgit v0.10.2 From ecbab71c521819716e204659dfe72fc39d00630a Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 27 Jan 2009 23:20:00 +0000 Subject: [ARM] call undefined instruction exception handler with irqs enabled Aaro says: > With spinlock debugs enabled I get might_sleep() warnings when using > ptrace. tracked down to a missing enable_irq before calling do_undefinstr(). Reported-by: Aaro Koskinen Tested-by: Aaro Koskinen Signed-off-by: Russell King diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 77b0474..85040cf 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -650,6 +650,7 @@ ENTRY(fp_enter) no_fp: mov pc, lr __und_usr_unknown: + enable_irq mov r0, sp adr lr, ret_from_exception b do_undefinstr -- cgit v0.10.2 From 4a29d2005b0f28d018d36d209c47f3973a725df5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 27 Jan 2009 15:22:54 +0200 Subject: UBIFS: fix LPT out-of-space bug (again) The function to traverse and dirty the LPT was still not dirtying all nodes, with the result that the LPT could run out of space. Signed-off-by: Adrian Hunter Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 96ca957..3216a1f 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -556,23 +556,23 @@ no_space: } /** - * next_pnode - find next pnode. + * next_pnode_to_dirty - find next pnode to dirty. * @c: UBIFS file-system description object * @pnode: pnode * - * This function returns the next pnode or %NULL if there are no more pnodes. + * This function returns the next pnode to dirty or %NULL if there are no more + * pnodes. Note that pnodes that have never been written (lnum == 0) are + * skipped. */ -static struct ubifs_pnode *next_pnode(struct ubifs_info *c, - struct ubifs_pnode *pnode) +static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, + struct ubifs_pnode *pnode) { struct ubifs_nnode *nnode; int iip; /* Try to go right */ nnode = pnode->parent; - iip = pnode->iip + 1; - if (iip < UBIFS_LPT_FANOUT) { - /* We assume here that LEB zero is never an LPT LEB */ + for (iip = pnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { if (nnode->nbranch[iip].lnum) return ubifs_get_pnode(c, nnode, iip); } @@ -583,8 +583,11 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c, nnode = nnode->parent; if (!nnode) return NULL; - /* We assume here that LEB zero is never an LPT LEB */ - } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum); + for (; iip < UBIFS_LPT_FANOUT; iip++) { + if (nnode->nbranch[iip].lnum) + break; + } + } while (iip >= UBIFS_LPT_FANOUT); /* Go right */ nnode = ubifs_get_nnode(c, nnode, iip); @@ -593,12 +596,29 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c, /* Go down to level 1 */ while (nnode->level > 1) { - nnode = ubifs_get_nnode(c, nnode, 0); + for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) { + if (nnode->nbranch[iip].lnum) + break; + } + if (iip >= UBIFS_LPT_FANOUT) { + /* + * Should not happen, but we need to keep going + * if it does. + */ + iip = 0; + } + nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) return (void *)nnode; } - return ubifs_get_pnode(c, nnode, 0); + for (iip = 0; iip < UBIFS_LPT_FANOUT; iip++) + if (nnode->nbranch[iip].lnum) + break; + if (iip >= UBIFS_LPT_FANOUT) + /* Should not happen, but we need to keep going if it does */ + iip = 0; + return ubifs_get_pnode(c, nnode, iip); } /** @@ -688,7 +708,7 @@ static int make_tree_dirty(struct ubifs_info *c) pnode = pnode_lookup(c, 0); while (pnode) { do_make_pnode_dirty(c, pnode); - pnode = next_pnode(c, pnode); + pnode = next_pnode_to_dirty(c, pnode); if (IS_ERR(pnode)) return PTR_ERR(pnode); } -- cgit v0.10.2 From 08e445bd6a98fa09befe0cf6d67705324f913fc6 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 16 Jan 2009 23:02:54 +0100 Subject: [ARM] 5366/1: fix shared memory coherency with VIVT L1 + L2 caches When there are multiple L1-aliasing userland mappings of the same physical page, we currently remap each of them uncached, to prevent VIVT cache aliasing issues. (E.g. writes to one of the mappings not being immediately visible via another mapping.) However, when we do this remapping, there could still be stale data in the L2 cache, and an uncached mapping might bypass L2 and go straight to RAM. This would cause reads from such mappings to see old data (until the dirty L2 line is eventually evicted.) This issue is solved by forcing a L2 cache flush whenever the shared page is made L1 uncacheable. Ideally, we would make L1 uncacheable and L2 cacheable as L2 is PIPT. But Feroceon does not support that combination, and the TEX=5 C=0 B=0 encoding for XSc3 doesn't appear to work in practice. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 81d0b87..bc0099d 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -66,7 +66,10 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address) * fault (ie, is old), we can safely ignore any issues. */ if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) { - flush_cache_page(vma, address, pte_pfn(entry)); + unsigned long pfn = pte_pfn(entry); + flush_cache_page(vma, address, pfn); + outer_flush_range((pfn << PAGE_SHIFT), + (pfn << PAGE_SHIFT) + PAGE_SIZE); pte_val(entry) &= ~L_PTE_MT_MASK; pte_val(entry) |= shared_pte_mask; set_pte_at(vma->vm_mm, address, pte, entry); -- cgit v0.10.2 From a2b7b01c072435b7832ab392167545a1b38cabc3 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 28 Jan 2009 12:47:15 -0500 Subject: ACPI: remove locking from PM1x_STS register reads PM1a_STS and PM1b_STS are twins that get OR'd together on reads, and all writes are repeated to both. The fields in PM1x_STS are single bits only, there are no multi-bit fields. So it is not necessary to lock PM1x_STS reads against writes because it is impossible to read an intermediate value of a single bit. It will either be 0 or 1, even if a write is in progress during the read. Reads are asynchronous to writes no matter if a lock is used or not. Signed-off-by: Len Brown diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 66a9d81..ae00108 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -447,7 +447,7 @@ static void acpi_processor_idle(void) pr->power.bm_activity <<= diff; - acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); + acpi_get_register_unlocked(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); if (bm_status) { pr->power.bm_activity |= 0x1; acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); @@ -1383,7 +1383,7 @@ static int acpi_idle_bm_check(void) { u32 bm_status = 0; - acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); + acpi_get_register_unlocked(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); if (bm_status) acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); /* -- cgit v0.10.2 From 89f135d8b53bcccafd91a075366d2704ba257cf3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Jan 2009 15:34:27 -0500 Subject: Btrfs: fix readdir on 32 bit machines After btrfs_readdir has gone through all the directory items, it sets the directory f_pos to the largest possible int. This way applications that mix readdir with creating new files don't end up in an endless loop finding the new directory items as they go. It was a workaround for a bug in git, but the assumption was that if git could make this looping mistake than it would be a common problem. The largest possible int chosen was INT_LIMIT(typeof(file->f_pos), and it is possible for that to be a larger number than 32 bit glibc expects to come out of readdir. This patches switches that to INT_LIMIT(off_t), which should keep applications happy on 32 and 64 bit machines. Signed-off-by: Chris Mason diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 288c2cd..2bb65e9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3263,7 +3263,7 @@ skip: /* Reached end of directory/root. Bump pos past the last item. */ if (key_type == BTRFS_DIR_INDEX_KEY) - filp->f_pos = INT_LIMIT(typeof(filp->f_pos)); + filp->f_pos = INT_LIMIT(off_t); else filp->f_pos++; nopos: -- cgit v0.10.2 From 31878dd86b7df9a147f5e6cc6e07092b4308782b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 28 Jan 2009 18:28:09 -0500 Subject: ACPI: remove BM_RLD access from idle entry path It is true that BM_RLD needs to be set to enable bus master activity to wake an older chipset (eg PIIX4) from C3. This is contrary to the erroneous wording the ACPI 2.0, 3.0 specifications that suggests that BM_RLD is an indicator rather than a control bit. ACPI 1.0's correct wording should be restored in ACPI 4.0: http://www.acpica.org/bugzilla/show_bug.cgi?id=689 But the kernel should not have to clear BM_RLD when entering a non C3-type state just to set it again when entering a C3-type C-state. We should be able to set BM_RLD at boot time and leave it alone -- removing the overhead of accessing this IO register from the idle entry path. Signed-off-by: Len Brown diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index ae00108..7eab733 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -241,26 +241,6 @@ acpi_processor_power_activate(struct acpi_processor *pr, old->promotion.count = 0; new->demotion.count = 0; - /* Cleanup from old state. */ - if (old) { - switch (old->type) { - case ACPI_STATE_C3: - /* Disable bus master reload */ - if (new->type != ACPI_STATE_C3 && pr->flags.bm_check) - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - break; - } - } - - /* Prepare to use new state. */ - switch (new->type) { - case ACPI_STATE_C3: - /* Enable bus master reload */ - if (old->type != ACPI_STATE_C3 && pr->flags.bm_check) - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); - break; - } - pr->power.state = new; return; @@ -1121,7 +1101,6 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, " for C3 to be enabled on SMP systems\n")); return; } - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); } /* @@ -1137,6 +1116,15 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, #else cx->latency_ticks = cx->latency; #endif + /* + * On older chipsets, BM_RLD needs to be set + * in order for Bus Master activity to wake the + * system from C3. Newer chipsets handle DMA + * during C3 automatically and BM_RLD is a NOP. + * In either case, the proper way to + * handle BM_RLD is to set it and leave it set. + */ + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); return; } @@ -1400,25 +1388,6 @@ static int acpi_idle_bm_check(void) } /** - * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state - * @pr: the processor - * @target: the new target state - */ -static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr, - struct acpi_processor_cx *target) -{ - if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) { - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - pr->flags.bm_rld_set = 0; - } - - if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) { - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); - pr->flags.bm_rld_set = 1; - } -} - -/** * acpi_idle_do_entry - a helper function that does C2 and C3 type entry * @cx: cstate data * @@ -1473,9 +1442,6 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, return 0; } - if (pr->flags.bm_check) - acpi_idle_update_bm_rld(pr, cx); - t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); acpi_idle_do_entry(cx); t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); @@ -1527,9 +1493,6 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, */ acpi_state_timer_broadcast(pr, cx, 1); - if (pr->flags.bm_check) - acpi_idle_update_bm_rld(pr, cx); - if (cx->type == ACPI_STATE_C3) ACPI_FLUSH_CPU_CACHE(); @@ -1621,8 +1584,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, */ acpi_state_timer_broadcast(pr, cx, 1); - acpi_idle_update_bm_rld(pr, cx); - /* * disable bus master * bm_check implies we need ARB_DIS -- cgit v0.10.2 From 3eb8057bbafc64dbf09d5c18513aa80c1b7f2fcb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 21 Jan 2009 21:30:23 -0800 Subject: sparc64: Move generic PCR support code to seperate file. It all lives in the oprofile support code currently and we will need to share this stuff with NMI watchdog and perf_counter support. Signed-off-by: David S. Miller diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h new file mode 100644 index 0000000..4249bb5 --- /dev/null +++ b/arch/sparc/include/asm/pcr.h @@ -0,0 +1,30 @@ +#ifndef __PCR_H +#define __PCR_H + +struct pcr_ops { + u64 (*read)(void); + void (*write)(u64); +}; +extern const struct pcr_ops *pcr_ops; + +extern void deferred_pcr_work_irq(int irq, struct pt_regs *regs); +extern void schedule_deferred_pcr_work(void); + +#define PCR_PIC_PRIV 0x00000001 /* PIC access is privileged */ +#define PCR_STRACE 0x00000002 /* Trace supervisor events */ +#define PCR_UTRACE 0x00000004 /* Trace user events */ +#define PCR_N2_HTRACE 0x00000008 /* Trace hypervisor events */ +#define PCR_N2_TOE_OV0 0x00000010 /* Trap if PIC 0 overflows */ +#define PCR_N2_TOE_OV1 0x00000020 /* Trap if PIC 1 overflows */ +#define PCR_N2_MASK0 0x00003fc0 +#define PCR_N2_MASK0_SHIFT 6 +#define PCR_N2_SL0 0x0003c000 +#define PCR_N2_SL0_SHIFT 14 +#define PCR_N2_OV0 0x00040000 +#define PCR_N2_MASK1 0x07f80000 +#define PCR_N2_MASK1_SHIFT 19 +#define PCR_N2_SL1 0x78000000 +#define PCR_N2_SL1_SHIFT 27 +#define PCR_N2_OV1 0x80000000 + +#endif /* __PCR_H */ diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h index d573820..32a7efe 100644 --- a/arch/sparc/include/asm/pil.h +++ b/arch/sparc/include/asm/pil.h @@ -23,6 +23,7 @@ #define PIL_SMP_CTX_NEW_VERSION 4 #define PIL_DEVICE_IRQ 5 #define PIL_SMP_CALL_FUNC_SNGL 6 +#define PIL_DEFERRED_PCR_WORK 7 #define PIL_NORMAL_MAX 14 #define PIL_NMI 15 diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 53adcaa..cb182d9 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_SPARC64) += visemul.o obj-$(CONFIG_SPARC64) += hvapi.o obj-$(CONFIG_SPARC64) += sstate.o obj-$(CONFIG_SPARC64) += mdesc.o +obj-$(CONFIG_SPARC64) += pcr.o # sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation obj-$(CONFIG_SPARC32) += devres.o diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c new file mode 100644 index 0000000..c4f2470 --- /dev/null +++ b/arch/sparc/kernel/pcr.c @@ -0,0 +1,140 @@ +/* pcr.c: Generic sparc64 performance counter infrastructure. + * + * Copyright (C) 2009 David S. Miller (davem@davemloft.net) + */ +#include +#include +#include +#include + +#include +#include + +/* This code is shared between various users of the performance + * counters. Users will be oprofile, pseudo-NMI watchdog, and the + * perf_counter support layer. + */ + +/* Performance counter interrupts run unmasked at PIL level 15. + * Therefore we can't do things like wakeups and other work + * that expects IRQ disabling to be adhered to in locking etc. + * + * Therefore in such situations we defer the work by signalling + * a lower level cpu IRQ. + */ +void deferred_pcr_work_irq(int irq, struct pt_regs *regs) +{ + clear_softint(1 << PIL_DEFERRED_PCR_WORK); +} + +void schedule_deferred_pcr_work(void) +{ + set_softint(1 << PIL_DEFERRED_PCR_WORK); +} + +const struct pcr_ops *pcr_ops; +EXPORT_SYMBOL_GPL(pcr_ops); + +static u64 direct_pcr_read(void) +{ + u64 val; + + read_pcr(val); + return val; +} + +static void direct_pcr_write(u64 val) +{ + write_pcr(val); +} + +static const struct pcr_ops direct_pcr_ops = { + .read = direct_pcr_read, + .write = direct_pcr_write, +}; + +static void n2_pcr_write(u64 val) +{ + unsigned long ret; + + ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); + if (val != HV_EOK) + write_pcr(val); +} + +static const struct pcr_ops n2_pcr_ops = { + .read = direct_pcr_read, + .write = n2_pcr_write, +}; + +static unsigned long perf_hsvc_group; +static unsigned long perf_hsvc_major; +static unsigned long perf_hsvc_minor; + +static int __init register_perf_hsvc(void) +{ + if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + perf_hsvc_group = HV_GRP_NIAG_PERF; + break; + + case SUN4V_CHIP_NIAGARA2: + perf_hsvc_group = HV_GRP_N2_CPU; + break; + + default: + return -ENODEV; + } + + + perf_hsvc_major = 1; + perf_hsvc_minor = 0; + if (sun4v_hvapi_register(perf_hsvc_group, + perf_hsvc_major, + &perf_hsvc_minor)) { + printk("perfmon: Could not register hvapi.\n"); + return -ENODEV; + } + } + return 0; +} + +static void __init unregister_perf_hsvc(void) +{ + if (tlb_type != hypervisor) + return; + sun4v_hvapi_unregister(perf_hsvc_group); +} + +int __init pcr_arch_init(void) +{ + int err = register_perf_hsvc(); + + if (err) + return err; + + switch (tlb_type) { + case hypervisor: + pcr_ops = &n2_pcr_ops; + break; + + case spitfire: + case cheetah: + case cheetah_plus: + pcr_ops = &direct_pcr_ops; + break; + + default: + err = -ENODEV; + goto out_unregister; + } + + return 0; + +out_unregister: + unregister_perf_hsvc(); + return err; +} + +arch_initcall(pcr_arch_init); diff --git a/arch/sparc/kernel/ttable.S b/arch/sparc/kernel/ttable.S index ea92550..d9bdfb9 100644 --- a/arch/sparc/kernel/ttable.S +++ b/arch/sparc/kernel/ttable.S @@ -63,7 +63,8 @@ tl0_irq6: TRAP_IRQ(smp_call_function_single_client, 6) #else tl0_irq6: BTRAP(0x46) #endif -tl0_irq7: BTRAP(0x47) BTRAP(0x48) BTRAP(0x49) +tl0_irq7: TRAP_IRQ(deferred_pcr_work_irq, 7) +tl0_irq8: BTRAP(0x48) BTRAP(0x49) tl0_irq10: BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d) tl0_irq14: TRAP_IRQ(timer_interrupt, 14) tl0_irq15: TRAP_NMI_IRQ(perfctr_irq, 15) diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c index d6e170c..c8877a5 100644 --- a/arch/sparc/oprofile/init.c +++ b/arch/sparc/oprofile/init.c @@ -17,47 +17,10 @@ #include #include #include +#include static int nmi_enabled; -struct pcr_ops { - u64 (*read)(void); - void (*write)(u64); -}; -static const struct pcr_ops *pcr_ops; - -static u64 direct_pcr_read(void) -{ - u64 val; - - read_pcr(val); - return val; -} - -static void direct_pcr_write(u64 val) -{ - write_pcr(val); -} - -static const struct pcr_ops direct_pcr_ops = { - .read = direct_pcr_read, - .write = direct_pcr_write, -}; - -static void n2_pcr_write(u64 val) -{ - unsigned long ret; - - ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); - if (val != HV_EOK) - write_pcr(val); -} - -static const struct pcr_ops n2_pcr_ops = { - .read = direct_pcr_read, - .write = n2_pcr_write, -}; - /* In order to commonize as much of the implementation as * possible, we use PICH as our counter. Mostly this is * to accomodate Niagara-1 which can only count insn cycles @@ -70,30 +33,13 @@ static u64 picl_value(void) return ((u64)((0 - delta) & 0xffffffff)) << 32; } -#define PCR_PIC_PRIV 0x00000001 /* PIC access is privileged */ -#define PCR_STRACE 0x00000002 /* Trace supervisor events */ -#define PCR_UTRACE 0x00000004 /* Trace user events */ -#define PCR_N2_HTRACE 0x00000008 /* Trace hypervisor events */ -#define PCR_N2_TOE_OV0 0x00000010 /* Trap if PIC 0 overflows */ -#define PCR_N2_TOE_OV1 0x00000020 /* Trap if PIC 1 overflows */ -#define PCR_N2_MASK0 0x00003fc0 -#define PCR_N2_MASK0_SHIFT 6 -#define PCR_N2_SL0 0x0003c000 -#define PCR_N2_SL0_SHIFT 14 -#define PCR_N2_OV0 0x00040000 -#define PCR_N2_MASK1 0x07f80000 -#define PCR_N2_MASK1_SHIFT 19 -#define PCR_N2_SL1 0x78000000 -#define PCR_N2_SL1_SHIFT 27 -#define PCR_N2_OV1 0x80000000 - #define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) #define PCR_N2_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \ PCR_N2_TOE_OV1 | \ (2 << PCR_N2_SL1_SHIFT) | \ (0xff << PCR_N2_MASK1_SHIFT)) -static u64 pcr_enable = PCR_SUN4U_ENABLE; +static u64 pcr_enable; static void nmi_handler(struct pt_regs *regs) { @@ -153,62 +99,16 @@ static void nmi_stop(void) synchronize_sched(); } -static unsigned long perf_hsvc_group; -static unsigned long perf_hsvc_major; -static unsigned long perf_hsvc_minor; - -static int __init register_perf_hsvc(void) -{ - if (tlb_type == hypervisor) { - switch (sun4v_chip_type) { - case SUN4V_CHIP_NIAGARA1: - perf_hsvc_group = HV_GRP_NIAG_PERF; - break; - - case SUN4V_CHIP_NIAGARA2: - perf_hsvc_group = HV_GRP_N2_CPU; - break; - - default: - return -ENODEV; - } - - - perf_hsvc_major = 1; - perf_hsvc_minor = 0; - if (sun4v_hvapi_register(perf_hsvc_group, - perf_hsvc_major, - &perf_hsvc_minor)) { - printk("perfmon: Could not register N2 hvapi.\n"); - return -ENODEV; - } - } - return 0; -} - -static void unregister_perf_hsvc(void) -{ - if (tlb_type != hypervisor) - return; - sun4v_hvapi_unregister(perf_hsvc_group); -} - static int oprofile_nmi_init(struct oprofile_operations *ops) { - int err = register_perf_hsvc(); - - if (err) - return err; - switch (tlb_type) { case hypervisor: - pcr_ops = &n2_pcr_ops; pcr_enable = PCR_N2_ENABLE; break; case cheetah: case cheetah_plus: - pcr_ops = &direct_pcr_ops; + pcr_enable = PCR_SUN4U_ENABLE; break; default: @@ -241,10 +141,6 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) return ret; } - void oprofile_arch_exit(void) { -#ifdef CONFIG_SPARC64 - unregister_perf_hsvc(); -#endif } -- cgit v0.10.2 From c3cf5e8cc56d272f828a66610bb78bbb727b2ce1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 21 Jan 2009 23:16:40 -0800 Subject: sparc: Probe PMU type and record in sparc_pmu_type. Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 32d32b4..d85c3dc 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -26,6 +26,7 @@ EXPORT_PER_CPU_SYMBOL(__cpu_data); struct cpu_info { int psr_vers; const char *name; + const char *pmu_name; }; struct fpu_info { @@ -45,6 +46,9 @@ struct manufacturer_info { #define CPU(ver, _name) \ { .psr_vers = ver, .name = _name } +#define CPU_PMU(ver, _name, _pmu_name) \ +{ .psr_vers = ver, .name = _name, .pmu_name = _pmu_name } + #define FPU(ver, _name) \ { .fp_vers = ver, .name = _name } @@ -183,10 +187,10 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { },{ 0x17, .cpu_info = { - CPU(0x10, "TI UltraSparc I (SpitFire)"), - CPU(0x11, "TI UltraSparc II (BlackBird)"), - CPU(0x12, "TI UltraSparc IIi (Sabre)"), - CPU(0x13, "TI UltraSparc IIe (Hummingbird)"), + CPU_PMU(0x10, "TI UltraSparc I (SpitFire)", "ultra12"), + CPU_PMU(0x11, "TI UltraSparc II (BlackBird)", "ultra12"), + CPU_PMU(0x12, "TI UltraSparc IIi (Sabre)", "ultra12"), + CPU_PMU(0x13, "TI UltraSparc IIe (Hummingbird)", "ultra12"), CPU(-1, NULL) }, .fpu_info = { @@ -199,7 +203,7 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { },{ 0x22, .cpu_info = { - CPU(0x10, "TI UltraSparc I (SpitFire)"), + CPU_PMU(0x10, "TI UltraSparc I (SpitFire)", "ultra12"), CPU(-1, NULL) }, .fpu_info = { @@ -209,12 +213,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { },{ 0x3e, .cpu_info = { - CPU(0x14, "TI UltraSparc III (Cheetah)"), - CPU(0x15, "TI UltraSparc III+ (Cheetah+)"), - CPU(0x16, "TI UltraSparc IIIi (Jalapeno)"), - CPU(0x18, "TI UltraSparc IV (Jaguar)"), - CPU(0x19, "TI UltraSparc IV+ (Panther)"), - CPU(0x22, "TI UltraSparc IIIi+ (Serrano)"), + CPU_PMU(0x14, "TI UltraSparc III (Cheetah)", "ultra3"), + CPU_PMU(0x15, "TI UltraSparc III+ (Cheetah+)", "ultra3+"), + CPU_PMU(0x16, "TI UltraSparc IIIi (Jalapeno)", "ultra3i"), + CPU_PMU(0x18, "TI UltraSparc IV (Jaguar)", "ultra3+"), + CPU_PMU(0x19, "TI UltraSparc IV+ (Panther)", "ultra4+"), + CPU_PMU(0x22, "TI UltraSparc IIIi+ (Serrano)", "ultra3i"), CPU(-1, NULL) }, .fpu_info = { @@ -234,6 +238,7 @@ static const struct manufacturer_info __initconst manufacturer_info[] = { const char *sparc_cpu_type; const char *sparc_fpu_type; +const char *sparc_pmu_type; unsigned int fsr_storage; @@ -244,6 +249,7 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) sparc_cpu_type = NULL; sparc_fpu_type = NULL; + sparc_pmu_type = NULL; manuf = NULL; for (i = 0; i < ARRAY_SIZE(manufacturer_info); i++) @@ -263,6 +269,7 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) { if (cpu->psr_vers == psr_vers) { sparc_cpu_type = cpu->name; + sparc_pmu_type = cpu->pmu_name; sparc_fpu_type = "No FPU"; break; } @@ -290,6 +297,8 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers) psr_impl, fpu_vers); sparc_fpu_type = "Unknown FPU"; } + if (sparc_pmu_type == NULL) + sparc_pmu_type = "Unknown PMU"; } #ifdef CONFIG_SPARC32 @@ -315,11 +324,13 @@ static void __init sun4v_cpu_probe(void) case SUN4V_CHIP_NIAGARA1: sparc_cpu_type = "UltraSparc T1 (Niagara)"; sparc_fpu_type = "UltraSparc T1 integrated FPU"; + sparc_pmu_type = "niagara"; break; case SUN4V_CHIP_NIAGARA2: sparc_cpu_type = "UltraSparc T2 (Niagara2)"; sparc_fpu_type = "UltraSparc T2 integrated FPU"; + sparc_pmu_type = "niagara2"; break; default: diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 81a972e..15d8a3f 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -5,6 +5,7 @@ /* cpu.c */ extern const char *sparc_cpu_type; +extern const char *sparc_pmu_type; extern const char *sparc_fpu_type; extern unsigned int fsr_storage; diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 49d061f..f2bcfd2 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -354,6 +354,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) seq_printf(m, "cpu\t\t: %s\n" "fpu\t\t: %s\n" + "pmu\t\t: %s\n" "prom\t\t: %s\n" "type\t\t: %s\n" "ncpus probed\t: %d\n" @@ -366,6 +367,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) , sparc_cpu_type, sparc_fpu_type, + sparc_pmu_type, prom_version, ((tlb_type == hypervisor) ? "sun4v" : -- cgit v0.10.2 From 334d99072d2b740fa3e0d14e1ac1126805dda2ef Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 22 Jan 2009 00:18:05 +0000 Subject: sh: mach-migor: Enable ov772x and tw9910 in defconfig. This patch updates the Migo-R defconfig to include ov772x camera and tw9910 video driver Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index 7758263..6785767 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.28 -# Fri Jan 9 17:09:35 2009 +# Linux kernel version: 2.6.29-rc1 +# Thu Jan 22 09:16:16 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y @@ -45,8 +45,12 @@ CONFIG_SYSVIPC_SYSCTL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CGROUPS is not set # CONFIG_GROUP_SCHED is not set + +# +# Control Group support +# +# CONFIG_CGROUPS is not set CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set @@ -389,6 +393,7 @@ CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y # CONFIG_LIB80211 is not set # CONFIG_MAC80211 is not set +# CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -411,6 +416,7 @@ CONFIG_MTD=y # CONFIG_MTD_DEBUG is not set CONFIG_MTD_CONCAT=y CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_TESTS is not set # CONFIG_MTD_REDBOOT_PARTS is not set CONFIG_MTD_CMDLINE_PARTS=y # CONFIG_MTD_AR7_PARTS is not set @@ -458,9 +464,7 @@ CONFIG_MTD_CFI_UTIL=y # # CONFIG_MTD_COMPLEX_MAPPINGS is not set CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_PHYSMAP_START=0xffffffff -CONFIG_MTD_PHYSMAP_LEN=0 -CONFIG_MTD_PHYSMAP_BANKWIDTH=0 +# CONFIG_MTD_PHYSMAP_COMPAT is not set # CONFIG_MTD_PLATRAM is not set # @@ -488,6 +492,12 @@ CONFIG_MTD_NAND_PLATFORM=y # CONFIG_MTD_ONENAND is not set # +# LPDDR flash memory drivers +# +# CONFIG_MTD_LPDDR is not set +# CONFIG_MTD_QINFO_PROBE is not set + +# # UBI - Unsorted block images # # CONFIG_MTD_UBI is not set @@ -587,6 +597,10 @@ CONFIG_SMC91X=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set # CONFIG_IWLWIFI_LEDS is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# # CONFIG_WAN is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set @@ -761,6 +775,7 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_PMIC_DA903X is not set # CONFIG_MFD_WM8400 is not set # CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set # CONFIG_REGULATOR is not set # @@ -806,9 +821,9 @@ CONFIG_SOC_CAMERA=y # CONFIG_SOC_CAMERA_MT9M111 is not set # CONFIG_SOC_CAMERA_MT9T031 is not set # CONFIG_SOC_CAMERA_MT9V022 is not set -# CONFIG_SOC_CAMERA_TW9910 is not set -CONFIG_SOC_CAMERA_PLATFORM=y -# CONFIG_SOC_CAMERA_OV772X is not set +CONFIG_SOC_CAMERA_TW9910=y +# CONFIG_SOC_CAMERA_PLATFORM is not set +CONFIG_SOC_CAMERA_OV772X=y CONFIG_VIDEO_SH_MOBILE_CEU=y # CONFIG_RADIO_ADAPTERS is not set # CONFIG_DAB is not set @@ -866,11 +881,13 @@ CONFIG_USB_GADGET_SELECTED=y # CONFIG_USB_GADGET_PXA25X is not set # CONFIG_USB_GADGET_PXA27X is not set # CONFIG_USB_GADGET_S3C2410 is not set +# CONFIG_USB_GADGET_IMX is not set CONFIG_USB_GADGET_M66592=y CONFIG_USB_M66592=y CONFIG_SUPERH_BUILT_IN_M66592=y # CONFIG_USB_GADGET_AMD5536UDC is not set # CONFIG_USB_GADGET_FSL_QE is not set +# CONFIG_USB_GADGET_CI13XXX is not set # CONFIG_USB_GADGET_NET2280 is not set # CONFIG_USB_GADGET_GOKU is not set # CONFIG_USB_GADGET_DUMMY_HCD is not set @@ -883,6 +900,11 @@ CONFIG_USB_G_SERIAL=y # CONFIG_USB_MIDI_GADGET is not set # CONFIG_USB_G_PRINTER is not set # CONFIG_USB_CDC_COMPOSITE is not set + +# +# OTG and related infrastructure +# +# CONFIG_USB_GPIO_VBUS is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set @@ -961,6 +983,7 @@ CONFIG_UIO_PDRV_GENIRQ=y CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY is not set # CONFIG_QUOTA is not set @@ -1004,6 +1027,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_EFS_FS is not set # CONFIG_JFFS2_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set -- cgit v0.10.2 From 86746284e20411c66e25bddac391ba821560522b Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 22 Jan 2009 00:33:21 +0000 Subject: sh: ap325rxa: control camera power toggling. Signed-off-by: Kuninori Morimoto Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c index caf4c33..72da416 100644 --- a/arch/sh/boards/board-ap325rxa.c +++ b/arch/sh/boards/board-ap325rxa.c @@ -216,6 +216,12 @@ static struct platform_device lcdc_device = { }, }; +static void camera_power(int val) +{ + gpio_set_value(GPIO_PTZ5, val); /* RST_CAM/RSTB */ + mdelay(10); +} + #ifdef CONFIG_I2C static unsigned char camera_ncm03j_magic[] = { @@ -245,9 +251,11 @@ static int camera_set_capture(struct soc_camera_platform_info *info, int ret = 0; int i; + camera_power(0); if (!enable) return 0; /* no disable for now */ + camera_power(1); for (i = 0; i < ARRAY_SIZE(camera_ncm03j_magic); i += 2) { u_int8_t buf[8]; @@ -426,7 +434,7 @@ static int __init ap325rxa_devices_setup(void) gpio_request(GPIO_PTZ6, NULL); gpio_direction_output(GPIO_PTZ6, 0); /* STBY_CAM */ gpio_request(GPIO_PTZ5, NULL); - gpio_direction_output(GPIO_PTZ5, 1); /* RST_CAM */ + gpio_direction_output(GPIO_PTZ5, 0); /* RST_CAM */ gpio_request(GPIO_PTZ4, NULL); gpio_direction_output(GPIO_PTZ4, 0); /* SADDR */ -- cgit v0.10.2 From 7a65d245fa32d937409e2fc30bb24a85488090de Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 22 Jan 2009 00:38:31 +0000 Subject: sh: ap325rxa: Add ov772x support. This patch add ov772x camera settings to ap325, Old camera is still supported. And it will be 2nd camera if you select ov772x and soc_camera_platform in same time. Signed-off-by: Kuninori Morimoto Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c index 72da416..7c35787 100644 --- a/arch/sh/boards/board-ap325rxa.c +++ b/arch/sh/boards/board-ap325rxa.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include