From 9fd097b14918875bd6f125ed699d7bbbba5893ee Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Apr 2011 21:32:55 +0200 Subject: block: unexport DISK_EVENT_MEDIA_CHANGE for legacy/fringe drivers In-kernel disk event polling doesn't matter for legacy/fringe drivers and may lead to infinite event loop if ->check_events() implementation generates events on level condition instead of edge. Now that block layer supports suppressing exporting unlisted events, simply leaving disk->events cleared allows these drivers to keep the internal revalidation behavior intact while avoiding weird interactions with userland event handler. Signed-off-by: Tejun Heo Cc: Kay Sievers Signed-off-by: Jens Axboe diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 8066d08..e086fbb 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -2547,7 +2547,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller) disk->major = MajorNumber; disk->first_minor = n << DAC960_MaxPartitionsBits; disk->fops = &DAC960_BlockDeviceOperations; - disk->events = DISK_EVENT_MEDIA_CHANGE; } /* Indicate the Block Device Registration completed successfully, diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 456c0cc..8eba86b 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1736,7 +1736,6 @@ static int __init fd_probe_drives(void) disk->major = FLOPPY_MAJOR; disk->first_minor = drive; disk->fops = &floppy_fops; - disk->events = DISK_EVENT_MEDIA_CHANGE; sprintf(disk->disk_name, "fd%d", drive); disk->private_data = &unit[drive]; set_capacity(disk, 880*2); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index c871eae..ede16c6 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -1964,7 +1964,6 @@ static int __init atari_floppy_init (void) unit[i].disk->first_minor = i; sprintf(unit[i].disk->disk_name, "fd%d", i); unit[i].disk->fops = &floppy_fops; - unit[i].disk->events = DISK_EVENT_MEDIA_CHANGE; unit[i].disk->private_data = &unit[i]; unit[i].disk->queue = blk_init_queue(do_fd_request, &ataflop_lock); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 301d7a9..db8f885 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4205,7 +4205,6 @@ static int __init floppy_init(void) disks[dr]->major = FLOPPY_MAJOR; disks[dr]->first_minor = TOMINOR(dr); disks[dr]->fops = &floppy_fops; - disks[dr]->events = DISK_EVENT_MEDIA_CHANGE; sprintf(disks[dr]->disk_name, "fd%d", dr); init_timer(&motor_off_timer[dr]); diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 2f2ccf6..8690e31 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -320,7 +320,6 @@ static void pcd_init_units(void) disk->first_minor = unit; strcpy(disk->disk_name, cd->name); /* umm... */ disk->fops = &pcd_bdops; - disk->events = DISK_EVENT_MEDIA_CHANGE; } } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 21dfdb7..869e767 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -837,7 +837,6 @@ static void pd_probe_drive(struct pd_unit *disk) p->fops = &pd_fops; p->major = major; p->first_minor = (disk - pd) << PD_BITS; - p->events = DISK_EVENT_MEDIA_CHANGE; disk->gd = p; p->private_data = disk; p->queue = pd_queue; diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 7adeb1e..f21b520 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -294,7 +294,6 @@ static void __init pf_init_units(void) disk->first_minor = unit; strcpy(disk->disk_name, pf->name); disk->fops = &pf_fops; - disk->events = DISK_EVENT_MEDIA_CHANGE; if (!(*drives[unit])[D_PRT]) pf_drive_count++; } diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 24a482f..fd5adcd 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -858,7 +858,6 @@ static int __devinit swim_floppy_init(struct swim_priv *swd) swd->unit[drive].disk->first_minor = drive; sprintf(swd->unit[drive].disk->disk_name, "fd%d", drive); swd->unit[drive].disk->fops = &floppy_fops; - swd->unit[drive].disk->events = DISK_EVENT_MEDIA_CHANGE; swd->unit[drive].disk->private_data = &swd->unit[drive]; swd->unit[drive].disk->queue = swd->queue; set_capacity(swd->unit[drive].disk, 2880); diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 4c10f56..773bfa7 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1163,7 +1163,6 @@ static int __devinit swim3_attach(struct macio_dev *mdev, const struct of_device disk->major = FLOPPY_MAJOR; disk->first_minor = i; disk->fops = &floppy_fops; - disk->events = DISK_EVENT_MEDIA_CHANGE; disk->private_data = &floppy_states[i]; disk->queue = swim3_queue; disk->flags |= GENHD_FL_REMOVABLE; diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 68b9430..0e376d4 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -2334,7 +2334,6 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum) disk->major = UB_MAJOR; disk->first_minor = lun->id * UB_PARTS_PER_LUN; disk->fops = &ub_bd_fops; - disk->events = DISK_EVENT_MEDIA_CHANGE; disk->private_data = lun; disk->driverfs_dev = &sc->intf->dev; diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 645ff76..6c7fd7d 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -1005,7 +1005,6 @@ static int __devinit ace_setup(struct ace_device *ace) ace->gd->major = ace_major; ace->gd->first_minor = ace->id * ACE_NUM_MINORS; ace->gd->fops = &ace_fops; - ace->gd->events = DISK_EVENT_MEDIA_CHANGE; ace->gd->queue = ace->queue; ace->gd->private_data = ace; snprintf(ace->gd->disk_name, 32, "xs%c", ace->id + 'a'); diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index b2b034f..3ceaf00 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -803,7 +803,6 @@ static int __devinit probe_gdrom(struct platform_device *devptr) goto probe_fail_cdrom_register; } gd.disk->fops = &gdrom_bdops; - gd.disk->events = DISK_EVENT_MEDIA_CHANGE; /* latch on to the interrupt */ err = gdrom_set_interrupt_handlers(); if (err) diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index 4e874c5..e427fbe 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -626,7 +626,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id) gendisk->queue = q; gendisk->fops = &viocd_fops; gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE; - gendisk->events = DISK_EVENT_MEDIA_CHANGE; set_capacity(gendisk, 0); gendisk->private_data = d; d->viocd_disk = gendisk; diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 643ad52..4796bbf 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -1000,7 +1000,6 @@ static struct i2o_block_device *i2o_block_device_alloc(void) gd->major = I2O_MAJOR; gd->queue = queue; gd->fops = &i2o_block_fops; - gd->events = DISK_EVENT_MEDIA_CHANGE; gd->private_data = dev; dev->gd = gd; diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index 83cea9a..1b3924c 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -236,7 +236,6 @@ tapeblock_setup_device(struct tape_device * device) disk->major = tapeblock_major; disk->first_minor = device->first_minor; disk->fops = &tapeblock_fops; - disk->events = DISK_EVENT_MEDIA_CHANGE; disk->private_data = tape_get_device(device); disk->queue = blkdat->request_queue; set_capacity(disk, 0); -- cgit v0.10.2 From 505d9147a72d4e14323af9581dde066bd5fc439c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Thu, 21 Apr 2011 15:37:20 -0700 Subject: sparc32: fix section mismatch warnings in apc, pmc and time_32 In all cases there were a struct of_device_id variable defined __initdata. But it was referenced from struct platform_driver.of_match_table which is not guaranteed to be used during init only. So drop the __initdata annotation. This fixes following warnings: WARNING: arch/sparc/kernel/built-in.o(.data+0x810): Section mismatch in reference from the variable clock_driver to the variable .init.data:clock_match The variable clock_driver references the variable __initdata clock_match If the reference is valid then annotate the variable with __init* or __refdata (see linux/init.h) or name the variable: *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console WARNING: arch/sparc/kernel/built-in.o(.data+0xcec): Section mismatch in reference from the variable apc_driver to the variable .init.data:apc_match The variable apc_driver references the variable __initdata apc_match If the reference is valid then annotate the variable with __init* or __refdata (see linux/init.h) or name the variable: *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console WARNING: arch/sparc/kernel/built-in.o(.data+0xd60): Section mismatch in reference from the variable pmc_driver to the variable .init.data:pmc_match The variable pmc_driver references the variable __initdata pmc_match If the reference is valid then annotate the variable with __init* or __refdata (see linux/init.h) or name the variable: *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c index f679c57..1e34f29 100644 --- a/arch/sparc/kernel/apc.c +++ b/arch/sparc/kernel/apc.c @@ -165,7 +165,7 @@ static int __devinit apc_probe(struct platform_device *op) return 0; } -static struct of_device_id __initdata apc_match[] = { +static struct of_device_id apc_match[] = { { .name = APC_OBPNAME, }, diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c index 93d7b44..6a585d3 100644 --- a/arch/sparc/kernel/pmc.c +++ b/arch/sparc/kernel/pmc.c @@ -69,7 +69,7 @@ static int __devinit pmc_probe(struct platform_device *op) return 0; } -static struct of_device_id __initdata pmc_match[] = { +static struct of_device_id pmc_match[] = { { .name = PMC_OBPNAME, }, diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 4e23639..96046a4 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -168,7 +168,7 @@ static int __devinit clock_probe(struct platform_device *op) return 0; } -static struct of_device_id __initdata clock_match[] = { +static struct of_device_id clock_match[] = { { .name = "eeprom", }, -- cgit v0.10.2 From f486b3dc2d048e7309a733f97eb9f9f83d586df2 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Thu, 21 Apr 2011 16:35:46 -0700 Subject: sparc32: fix sparcstation 5 boot The sparcstation 5 I have available has no MID property for the CPU. This resulted in a panic when booting a SMP kernel on this box. The assigned field in cpu_data is never used, so if we fail to read the MID property then inform user and continue booting. Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 91c10fb..850a136 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -53,6 +53,7 @@ cpumask_t smp_commenced_mask = CPU_MASK_NONE; void __cpuinit smp_store_cpu_info(int id) { int cpu_node; + int mid; cpu_data(id).udelay_val = loops_per_jiffy; @@ -60,10 +61,13 @@ void __cpuinit smp_store_cpu_info(int id) cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); cpu_data(id).prom_node = cpu_node; - cpu_data(id).mid = cpu_get_hwmid(cpu_node); + mid = cpu_get_hwmid(cpu_node); - if (cpu_data(id).mid < 0) - panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); + if (mid < 0) { + printk(KERN_NOTICE "No MID found for CPU%d at node 0x%08d", id, cpu_node); + mid = 0; + } + cpu_data(id).mid = mid; } void __init smp_cpus_done(unsigned int max_cpus) -- cgit v0.10.2 From bf2253a6f00e8fea5b026e471e9f0d0a1b3621f2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 29 Apr 2011 10:15:14 +0200 Subject: cdrom: always check_disk_change() on open cdrom_open() called check_disk_change() after the rest of open path succeeded which leads to the following bizarre behavior. * After media change, if the device opened without O_NONBLOCK, open_for_data() naturally fails with -ENOMEDIA and check_disk_change() is never called. The media is known to be gone and the open failure makes it obvious to the userland but device invalidation never happens. * But if the device is opened with O_NONBLOCK, all the checks are bypassed and cdrom_open() doesn't notice that the media is not there and check_disk_change() is called and invalidation happens. There's nothing to be gained by avoiding calling check_disk_change() on open failure. Common cases end up calling check_disk_change() anyway. All we get is inconsistent behavior. Fix it by moving check_disk_change() invocation to the top of cdrom_open() so that it always gets called regardless of how the rest of open proceeds. Stable: 2.6.38 Signed-off-by: Tejun Heo Reported-by: Amit Shah Tested-by: Amit Shah Cc: stable@kernel.org Signed-off-by: Jens Axboe diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 514dd8e..75fb965 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -986,6 +986,9 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t cdinfo(CD_OPEN, "entering cdrom_open\n"); + /* open is event synchronization point, check events first */ + check_disk_change(bdev); + /* if this was a O_NONBLOCK open and we should honor the flags, * do a quick open without drive/disc integrity checks. */ cdi->use_count++; @@ -1012,9 +1015,6 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t cdinfo(CD_OPEN, "Use count for \"/dev/%s\" now %d\n", cdi->name, cdi->use_count); - /* Do this on open. Don't wait for mount, because they might - not be mounting, but opening with O_NONBLOCK */ - check_disk_change(bdev); return 0; err_release: if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) { -- cgit v0.10.2 From 02e352287a40bd456eb78df705bf888bc3161d3f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 29 Apr 2011 10:15:20 +0200 Subject: block: rescan partitions on invalidated devices on -ENOMEDIA too __blkdev_get() doesn't rescan partitions if disk->fops->open() fails, which leads to ghost partition devices lingering after medimum removal is known to both the kernel and userland. The behavior also creates a subtle inconsistency where O_NONBLOCK open, which doesn't fail even if there's no medium, clears the ghots partitions, which is exploited to work around the problem from userland. Fix it by updating __blkdev_get() to issue partition rescan after -ENOMEDIA too. This was reported in the following bz. https://bugzilla.kernel.org/show_bug.cgi?id=13029 Stable: 2.6.38 Signed-off-by: Tejun Heo Reported-by: David Zeuthen Reported-by: Martin Pitt Reported-by: Kay Sievers Tested-by: Kay Sievers Cc: Alan Cox Cc: stable@kernel.org Signed-off-by: Jens Axboe diff --git a/fs/block_dev.c b/fs/block_dev.c index 5147bdd..257b00e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1102,6 +1102,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!bdev->bd_part) goto out_clear; + ret = 0; if (disk->fops->open) { ret = disk->fops->open(bdev, mode); if (ret == -ERESTARTSYS) { @@ -1118,9 +1119,18 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) put_disk(disk); goto restart; } - if (ret) - goto out_clear; } + /* + * If the device is invalidated, rescan partition + * if open succeeded or failed with -ENOMEDIUM. + * The latter is necessary to prevent ghost + * partitions on a removed medium. + */ + if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) + rescan_partitions(disk, bdev); + if (ret) + goto out_clear; + if (!bdev->bd_openers) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); bdi = blk_get_backing_dev_info(bdev); @@ -1128,8 +1138,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdi = &default_backing_dev_info; bdev_inode_switch_bdi(bdev->bd_inode, bdi); } - if (bdev->bd_invalidated) - rescan_partitions(disk, bdev); } else { struct block_device *whole; whole = bdget_disk(disk, 0); @@ -1153,13 +1161,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } } else { if (bdev->bd_contains == bdev) { - if (bdev->bd_disk->fops->open) { + ret = 0; + if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); - if (ret) - goto out_unlock_bdev; - } - if (bdev->bd_invalidated) + /* the same as first opener case, read comment there */ + if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) rescan_partitions(bdev->bd_disk, bdev); + if (ret) + goto out_unlock_bdev; } /* only one opener holds refs to the module and disk */ module_put(disk->fops->owner); -- cgit v0.10.2 From 005967a1df80980acb47c72d758ec05059105492 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 30 Apr 2011 22:28:20 +0200 Subject: ASoC: JZ4740: Fix i2s shutdown The i2s shutdown callback has the check whether it should be disabled reversed. Currently it is disabled if another stream is still active, but kept enabled if the last stream is closed. This patch fixes it. Signed-off-by: Lars-Peter Clausen Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/jz4740/jz4740-i2s.c b/sound/soc/jz4740/jz4740-i2s.c index 419bf4f..cd22a54 100644 --- a/sound/soc/jz4740/jz4740-i2s.c +++ b/sound/soc/jz4740/jz4740-i2s.c @@ -133,7 +133,7 @@ static void jz4740_i2s_shutdown(struct snd_pcm_substream *substream, struct jz4740_i2s *i2s = snd_soc_dai_get_drvdata(dai); uint32_t conf; - if (!dai->active) + if (dai->active) return; conf = jz4740_i2s_read(i2s, JZ_REG_AIC_CONF); -- cgit v0.10.2 From ed77cc122a8402db8f9c3492649aa0c3fee7b385 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 3 May 2011 18:25:34 +0100 Subject: ASoC: Don't crash on PM operations The move over to exposing snd_soc_register_card() let the initialisation of the driver data we use to find the card in PM operations go AWOL. Fix this by setting the driver data when we register the card. Signed-off-by: Mark Brown Acked-by: Liam Girdwood diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index d8562ce..dd55d10 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3291,6 +3291,8 @@ int snd_soc_register_card(struct snd_soc_card *card) if (!card->name || !card->dev) return -EINVAL; + dev_set_drvdata(card->dev, card); + snd_soc_initialize_card_lists(card); soc_init_card_debugfs(card); -- cgit v0.10.2 From 9ab88434e8b5ffc5a638b5b1d3b9a67dceb28e5d Mon Sep 17 00:00:00 2001 From: xingchao Date: Wed, 27 Apr 2011 16:58:54 -0400 Subject: ASoC: sst_platform: add hw_free callback to fix resource leak Signed-off-by: xingchao Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/mid-x86/sst_platform.c b/sound/soc/mid-x86/sst_platform.c index d567c32..6b1f9d3 100644 --- a/sound/soc/mid-x86/sst_platform.c +++ b/sound/soc/mid-x86/sst_platform.c @@ -376,6 +376,11 @@ static int sst_platform_pcm_hw_params(struct snd_pcm_substream *substream, return 0; } +static int sst_platform_pcm_hw_free(struct snd_pcm_substream *substream) +{ + return snd_pcm_lib_free_pages(substream); +} + static struct snd_pcm_ops sst_platform_ops = { .open = sst_platform_open, .close = sst_platform_close, @@ -384,6 +389,7 @@ static struct snd_pcm_ops sst_platform_ops = { .trigger = sst_platform_pcm_trigger, .pointer = sst_platform_pcm_pointer, .hw_params = sst_platform_pcm_hw_params, + .hw_free = sst_platform_pcm_hw_free, }; static void sst_pcm_free(struct snd_pcm *pcm) -- cgit v0.10.2 From b7534f002d3c81d18abfbf57179d07d3ec763bb5 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 30 Apr 2011 10:34:05 -0300 Subject: [media] v4l: Release module if subdev registration fails If v4l2_device_register_subdev() fails, the reference to the subdev module taken by the function isn't released. Fix this. Signed-off-by: Laurent Pinchart Cc: stable@kernel.org Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/video/v4l2-device.c b/drivers/media/video/v4l2-device.c index 5aeaf87..4aae501 100644 --- a/drivers/media/video/v4l2-device.c +++ b/drivers/media/video/v4l2-device.c @@ -155,8 +155,10 @@ int v4l2_device_register_subdev(struct v4l2_device *v4l2_dev, sd->v4l2_dev = v4l2_dev; if (sd->internal_ops && sd->internal_ops->registered) { err = sd->internal_ops->registered(sd); - if (err) + if (err) { + module_put(sd->owner); return err; + } } /* This just returns 0 if either of the two args is NULL */ @@ -164,6 +166,7 @@ int v4l2_device_register_subdev(struct v4l2_device *v4l2_dev, if (err) { if (sd->internal_ops && sd->internal_ops->unregistered) sd->internal_ops->unregistered(sd); + module_put(sd->owner); return err; } -- cgit v0.10.2 From 2a164d02dd34c6b49a3f0995900e0f8af102b804 Mon Sep 17 00:00:00 2001 From: Lawrence Rust Date: Fri, 8 Apr 2011 09:50:45 -0300 Subject: [media] Fix cx88 remote control input In the IR interrupt handler of cx88-input.c there's a 32-bit multiply overflow which causes IR pulse durations to be incorrectly calculated. This is a regression caused by commit 2997137be8eba. Cc: stable@kernel.org Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c index c820e2f..3f44200 100644 --- a/drivers/media/video/cx88/cx88-input.c +++ b/drivers/media/video/cx88/cx88-input.c @@ -524,7 +524,7 @@ void cx88_ir_irq(struct cx88_core *core) for (todo = 32; todo > 0; todo -= bits) { ev.pulse = samples & 0x80000000 ? false : true; bits = min(todo, 32U - fls(ev.pulse ? samples : ~samples)); - ev.duration = (bits * NSEC_PER_SEC) / (1000 * ir_samplerate); + ev.duration = (bits * (NSEC_PER_SEC / 1000)) / ir_samplerate; ir_raw_event_store_with_filter(ir->dev, &ev); samples <<= bits; } -- cgit v0.10.2 From 0b84834a5a9f5fe8f3760560ef8c5b1536d22bd1 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 2 May 2011 08:09:25 -0300 Subject: [media] v4l2-subdev: fix broken subdev control enumeration The v4l2_subdev_* functions are meant for older V4L2 drivers that do not use the control framework yet. These functions should not be used by subdev_do_ioctl. Most of those backwards compatibility functions are just stubs, but commit 87a0c94ce616b231f3c0bd09d7dbd39d43b0557a actually changed the behavior of v4l2_subdev_queryctrl, so calling that one from subdev_do_ioctl broke the control enumeration in subdev nodes. The fix is simply not to use those compatibility functions in v4l2-subdev.c. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/video/v4l2-subdev.c b/drivers/media/video/v4l2-subdev.c index 0b80644..812729e 100644 --- a/drivers/media/video/v4l2-subdev.c +++ b/drivers/media/video/v4l2-subdev.c @@ -155,25 +155,25 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg) switch (cmd) { case VIDIOC_QUERYCTRL: - return v4l2_subdev_queryctrl(sd, arg); + return v4l2_queryctrl(sd->ctrl_handler, arg); case VIDIOC_QUERYMENU: - return v4l2_subdev_querymenu(sd, arg); + return v4l2_querymenu(sd->ctrl_handler, arg); case VIDIOC_G_CTRL: - return v4l2_subdev_g_ctrl(sd, arg); + return v4l2_g_ctrl(sd->ctrl_handler, arg); case VIDIOC_S_CTRL: - return v4l2_subdev_s_ctrl(sd, arg); + return v4l2_s_ctrl(sd->ctrl_handler, arg); case VIDIOC_G_EXT_CTRLS: - return v4l2_subdev_g_ext_ctrls(sd, arg); + return v4l2_g_ext_ctrls(sd->ctrl_handler, arg); case VIDIOC_S_EXT_CTRLS: - return v4l2_subdev_s_ext_ctrls(sd, arg); + return v4l2_s_ext_ctrls(sd->ctrl_handler, arg); case VIDIOC_TRY_EXT_CTRLS: - return v4l2_subdev_try_ext_ctrls(sd, arg); + return v4l2_try_ext_ctrls(sd->ctrl_handler, arg); case VIDIOC_DQEVENT: if (!(sd->flags & V4L2_SUBDEV_FL_HAS_EVENTS)) -- cgit v0.10.2 From dca6b6d18fa4428c4539e45f7a43040e388ab99e Mon Sep 17 00:00:00 2001 From: Sergio Aguirre Date: Mon, 11 Apr 2011 13:14:33 -0300 Subject: [media] V4L: soc-camera: regression fix: calculate .sizeimage in soc_camera.c A recent patch has given individual soc-camera host drivers a possibility to calculate .sizeimage and .bytesperline pixel format fields internally, however, some drivers relied on the core calculating these values for them, following a default algorithm. This patch restores the default calculation for such drivers. Based on initial patch by Guennadi Liakhovetski, found here: http://www.spinics.net/lists/linux-media/msg31282.html Except that this covers try_fmt aswell. Signed-off-by: Sergio Aguirre Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c index 3973f9a..ddb4c09 100644 --- a/drivers/media/video/soc_camera.c +++ b/drivers/media/video/soc_camera.c @@ -136,11 +136,50 @@ unsigned long soc_camera_apply_sensor_flags(struct soc_camera_link *icl, } EXPORT_SYMBOL(soc_camera_apply_sensor_flags); +#define pixfmtstr(x) (x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, \ + ((x) >> 24) & 0xff + +static int soc_camera_try_fmt(struct soc_camera_device *icd, + struct v4l2_format *f) +{ + struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent); + struct v4l2_pix_format *pix = &f->fmt.pix; + int ret; + + dev_dbg(&icd->dev, "TRY_FMT(%c%c%c%c, %ux%u)\n", + pixfmtstr(pix->pixelformat), pix->width, pix->height); + + pix->bytesperline = 0; + pix->sizeimage = 0; + + ret = ici->ops->try_fmt(icd, f); + if (ret < 0) + return ret; + + if (!pix->sizeimage) { + if (!pix->bytesperline) { + const struct soc_camera_format_xlate *xlate; + + xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat); + if (!xlate) + return -EINVAL; + + ret = soc_mbus_bytes_per_line(pix->width, + xlate->host_fmt); + if (ret > 0) + pix->bytesperline = ret; + } + if (pix->bytesperline) + pix->sizeimage = pix->bytesperline * pix->height; + } + + return 0; +} + static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv, struct v4l2_format *f) { struct soc_camera_device *icd = file->private_data; - struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent); WARN_ON(priv != file->private_data); @@ -149,7 +188,7 @@ static int soc_camera_try_fmt_vid_cap(struct file *file, void *priv, return -EINVAL; /* limit format to hardware capabilities */ - return ici->ops->try_fmt(icd, f); + return soc_camera_try_fmt(icd, f); } static int soc_camera_enum_input(struct file *file, void *priv, @@ -362,9 +401,6 @@ static void soc_camera_free_user_formats(struct soc_camera_device *icd) icd->user_formats = NULL; } -#define pixfmtstr(x) (x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, \ - ((x) >> 24) & 0xff - /* Called with .vb_lock held, or from the first open(2), see comment there */ static int soc_camera_set_fmt(struct soc_camera_device *icd, struct v4l2_format *f) @@ -377,7 +413,7 @@ static int soc_camera_set_fmt(struct soc_camera_device *icd, pixfmtstr(pix->pixelformat), pix->width, pix->height); /* We always call try_fmt() before set_fmt() or set_crop() */ - ret = ici->ops->try_fmt(icd, f); + ret = soc_camera_try_fmt(icd, f); if (ret < 0) return ret; -- cgit v0.10.2 From e05b2efb82596905ebfe88e8612ee81dec9b6592 Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 4 May 2011 18:16:50 -0700 Subject: clocksource: Install completely before selecting Christian Hoffmann reported that the command line clocksource override with acpi_pm timer fails: Kernel command line: clocksource=acpi_pm hpet clockevent registered Switching to clocksource hpet Override clocksource acpi_pm is not HRT compatible. Cannot switch while in HRT/NOHZ mode. The watchdog code is what enables CLOCK_SOURCE_VALID_FOR_HRES, but we actually end up selecting the clocksource before we enqueue it into the watchdog list, so that's why we see the warning and fail to switch to acpi_pm timer as requested. That's particularly bad when we want to debug timekeeping related problems in early boot. Put the selection call last. Reported-by: Christian Hoffmann Signed-off-by: John Stultz Cc: stable@kernel.org # 32... Link: http://lkml.kernel.org/r/%3C1304558210.2943.24.camel%40work-vm%3E Signed-off-by: Thomas Gleixner diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 6519cf6..0e17c10 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -685,8 +685,8 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) /* Add clocksource to the clcoksource list */ mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); - clocksource_select(); clocksource_enqueue_watchdog(cs); + clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; } @@ -706,8 +706,8 @@ int clocksource_register(struct clocksource *cs) mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); - clocksource_select(); clocksource_enqueue_watchdog(cs); + clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; } -- cgit v0.10.2 From 1dbe7dada2d03d1313183d439068f1f951a91244 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 5 May 2011 08:40:14 -0700 Subject: Input: ads7846 - make transfer buffers DMA safe req.sample needs its own cacheline otherwise accessing req.msg fetches it in again. Note: This effect doesn't occur if the underlying SPI driver doesn't use DMA at all. Signed-off-by: Alexander Stein Acked-by: Jonathan Cameron Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index c24946f..ab51a8d 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -281,17 +281,25 @@ struct ser_req { u8 command; u8 ref_off; u16 scratch; - __be16 sample; struct spi_message msg; struct spi_transfer xfer[6]; + /* + * DMA (thus cache coherency maintenance) requires the + * transfer buffers to live in their own cache lines. + */ + __be16 sample ____cacheline_aligned; }; struct ads7845_ser_req { u8 command[3]; u8 pwrdown[3]; - u8 sample[3]; struct spi_message msg; struct spi_transfer xfer[2]; + /* + * DMA (thus cache coherency maintenance) requires the + * transfer buffers to live in their own cache lines. + */ + u8 sample[3] ____cacheline_aligned; }; static int ads7846_read12_ser(struct device *dev, unsigned command) -- cgit v0.10.2 From 28350e330cfab46b60a1dbf763b678d859f9f3d9 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 5 May 2011 08:40:46 -0700 Subject: Input: ads7846 - remove unused variable from struct ads7845_ser_req Signed-off-by: Alexander Stein Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index ab51a8d..1de1c19 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -292,7 +292,6 @@ struct ser_req { struct ads7845_ser_req { u8 command[3]; - u8 pwrdown[3]; struct spi_message msg; struct spi_transfer xfer[2]; /* -- cgit v0.10.2 From 5f54c8a00af20e5cf38c3e5ef2f59b6848a17cd9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 4 May 2011 17:31:27 +0200 Subject: rtc: mxc: Initialize drvdata before registering device Commit f44f7f96a20 ("RTC: Initialize kernel state from RTC") uncovered an issue in a number of RTC drivers, where the drivers call rtc_device_register before initializing the device or platform drvdata. This frequently results in null pointer dereferences when the rtc_device_register immediately makes use of the rtc device, calling rtc_read_alarm. The solution is to ensure the drvdata is initialized prior to registering the rtc device. CC: Alessandro Zummo CC: Thomas Gleixner CC: rtc-linux@googlegroups.com Signed-off-by: Wolfram Sang [fixed up commit log -jstultz] Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index 826ab64..d814417 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -418,14 +418,6 @@ static int __init mxc_rtc_probe(struct platform_device *pdev) goto exit_put_clk; } - rtc = rtc_device_register(pdev->name, &pdev->dev, &mxc_rtc_ops, - THIS_MODULE); - if (IS_ERR(rtc)) { - ret = PTR_ERR(rtc); - goto exit_put_clk; - } - - pdata->rtc = rtc; platform_set_drvdata(pdev, pdata); /* Configure and enable the RTC */ @@ -438,8 +430,19 @@ static int __init mxc_rtc_probe(struct platform_device *pdev) pdata->irq = -1; } + rtc = rtc_device_register(pdev->name, &pdev->dev, &mxc_rtc_ops, + THIS_MODULE); + if (IS_ERR(rtc)) { + ret = PTR_ERR(rtc); + goto exit_clr_drvdata; + } + + pdata->rtc = rtc; + return 0; +exit_clr_drvdata: + platform_set_drvdata(pdev, NULL); exit_put_clk: clk_disable(pdata->clk); clk_put(pdata->clk); -- cgit v0.10.2 From f4e708ae8e5f3eb98f4c53036c0a470717bbc709 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 5 May 2011 11:46:14 +0200 Subject: rtc: davinci: Initialize drvdata before registering device Commit f44f7f96a20 ("RTC: Initialize kernel state from RTC") uncovered an issue in a number of RTC drivers, where the drivers call rtc_device_register before initializing the device or platform drvdata. This frequently results in null pointer dereferences when the rtc_device_register immediately makes use of the rtc device, calling rtc_read_alarm. The solution is to ensure the drvdata is initialized prior to registering the rtc device. CC: Alessandro Zummo CC: Thomas Gleixner CC: rtc-linux@googlegroups.com Signed-off-by: Wolfram Sang [fixed up commit log -jstultz] Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c index 8d46838..755e1fe 100644 --- a/drivers/rtc/rtc-davinci.c +++ b/drivers/rtc/rtc-davinci.c @@ -524,6 +524,8 @@ static int __init davinci_rtc_probe(struct platform_device *pdev) goto fail2; } + platform_set_drvdata(pdev, davinci_rtc); + davinci_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, &davinci_rtc_ops, THIS_MODULE); if (IS_ERR(davinci_rtc->rtc)) { @@ -553,8 +555,6 @@ static int __init davinci_rtc_probe(struct platform_device *pdev) rtcss_write(davinci_rtc, PRTCSS_RTC_CCTRL_CAEN, PRTCSS_RTC_CCTRL); - platform_set_drvdata(pdev, davinci_rtc); - device_init_wakeup(&pdev->dev, 0); return 0; @@ -562,6 +562,7 @@ static int __init davinci_rtc_probe(struct platform_device *pdev) fail4: rtc_device_unregister(davinci_rtc->rtc); fail3: + platform_set_drvdata(pdev, NULL); iounmap(davinci_rtc->base); fail2: release_mem_region(davinci_rtc->pbase, davinci_rtc->base_size); -- cgit v0.10.2 From 92d921c5def1a7b1411bc54859c0771b2cf2c08d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 5 May 2011 11:46:15 +0200 Subject: rtc: ep93xx: Initialize drvdata before registering device Commit f44f7f96a20 ("RTC: Initialize kernel state from RTC") uncovered an issue in a number of RTC drivers, where the drivers call rtc_device_register before initializing the device or platform drvdata. This frequently results in null pointer dereferences when the rtc_device_register immediately makes use of the rtc device, calling rtc_read_alarm. The solution is to ensure the drvdata is initialized prior to registering the rtc device. CC: Alessandro Zummo CC: Thomas Gleixner CC: rtc-linux@googlegroups.com Signed-off-by: Wolfram Sang [Fixed up commit log -jstultz] Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 11ae64d..335551d 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -151,6 +151,7 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) return -ENXIO; pdev->dev.platform_data = ep93xx_rtc; + platform_set_drvdata(pdev, rtc); rtc = rtc_device_register(pdev->name, &pdev->dev, &ep93xx_rtc_ops, THIS_MODULE); @@ -159,8 +160,6 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) goto exit; } - platform_set_drvdata(pdev, rtc); - err = sysfs_create_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files); if (err) goto fail; @@ -168,9 +167,9 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) return 0; fail: - platform_set_drvdata(pdev, NULL); rtc_device_unregister(rtc); exit: + platform_set_drvdata(pdev, NULL); pdev->dev.platform_data = NULL; return err; } -- cgit v0.10.2 From 9a281a677c1dbf25943b5bc3225de21fcb4945ae Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 6 May 2011 17:21:12 -0700 Subject: rtc: ds1286: Initialize drvdata before registering device Commit f44f7f96a20 ("RTC: Initialize kernel state from RTC") uncovered an issue in a number of RTC drivers, where the drivers call rtc_device_register before initializing the device or platform drvdata. This frequently results in null pointer dereferences when the rtc_device_register immediately makes use of the rtc device, calling rtc_read_alarm. The solution is to ensure the drvdata is initialized prior to registering the rtc device. CC: Wolfram Sang CC: Alessandro Zummo CC: Thomas Gleixner CC: rtc-linux@googlegroups.com Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-ds1286.c b/drivers/rtc/rtc-ds1286.c index 60ce696..47e681d 100644 --- a/drivers/rtc/rtc-ds1286.c +++ b/drivers/rtc/rtc-ds1286.c @@ -355,6 +355,7 @@ static int __devinit ds1286_probe(struct platform_device *pdev) goto out; } spin_lock_init(&priv->lock); + platform_set_drvdata(pdev, priv); rtc = rtc_device_register("ds1286", &pdev->dev, &ds1286_ops, THIS_MODULE); if (IS_ERR(rtc)) { @@ -362,7 +363,6 @@ static int __devinit ds1286_probe(struct platform_device *pdev) goto out; } priv->rtc = rtc; - platform_set_drvdata(pdev, priv); return 0; out: -- cgit v0.10.2 From a015dbc110a97ed3147546a9c914f18f71d798d0 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 6 May 2011 17:24:27 -0700 Subject: rtc: m41t80: Initialize clientdata before registering device Commit f44f7f96a20 ("RTC: Initialize kernel state from RTC") uncovered an issue in a number of RTC drivers, where the drivers call rtc_device_register before initializing the clientdata. This frequently results in null pointer dereferences when the rtc_device_register immediately makes use of the rtc device, calling rtc_read_alarm. The solution is to ensure the clientdata is initialized prior to registering the rtc device. CC: Wolfram Sang CC: Alessandro Zummo CC: Thomas Gleixner CC: rtc-linux@googlegroups.com Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 69fe664..eda128f 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -783,6 +783,9 @@ static int m41t80_probe(struct i2c_client *client, goto exit; } + clientdata->features = id->driver_data; + i2c_set_clientdata(client, clientdata); + rtc = rtc_device_register(client->name, &client->dev, &m41t80_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { @@ -792,8 +795,6 @@ static int m41t80_probe(struct i2c_client *client, } clientdata->rtc = rtc; - clientdata->features = id->driver_data; - i2c_set_clientdata(client, clientdata); /* Make sure HT (Halt Update) bit is cleared */ rc = i2c_smbus_read_byte_data(client, M41T80_REG_ALARM_HOUR); -- cgit v0.10.2 From 2f5c4fe8f9811152d69ef5cd020e095a1f84ca65 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 6 May 2011 17:26:25 -0700 Subject: rtc: max8925: Initialize drvdata before registering device Commit f44f7f96a20 ("RTC: Initialize kernel state from RTC") uncovered an issue in a number of RTC drivers, where the drivers call rtc_device_register before initializing the device or platform drvdata. This frequently results in null pointer dereferences when the rtc_device_register immediately makes use of the rtc device, calling rtc_read_alarm. The solution is to ensure the drvdata is initialized prior to registering the rtc device. CC: Wolfram Sang CC: Alessandro Zummo CC: Thomas Gleixner CC: rtc-linux@googlegroups.com Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c index 20494b5..3bc046f 100644 --- a/drivers/rtc/rtc-max8925.c +++ b/drivers/rtc/rtc-max8925.c @@ -258,6 +258,8 @@ static int __devinit max8925_rtc_probe(struct platform_device *pdev) } dev_set_drvdata(&pdev->dev, info); + /* XXX - isn't this redundant? */ + platform_set_drvdata(pdev, info); info->rtc_dev = rtc_device_register("max8925-rtc", &pdev->dev, &max8925_rtc_ops, THIS_MODULE); @@ -267,10 +269,9 @@ static int __devinit max8925_rtc_probe(struct platform_device *pdev) goto out_rtc; } - platform_set_drvdata(pdev, info); - return 0; out_rtc: + platform_set_drvdata(pdev, NULL); free_irq(chip->irq_base + MAX8925_IRQ_RTC_ALARM0, info); out_irq: kfree(info); -- cgit v0.10.2 From 03cf7c477de8cb47658ba93f33dc93242985acff Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 6 May 2011 17:27:07 -0700 Subject: rtc: max8998: Initialize drvdata before registering device Commit f44f7f96a20 ("RTC: Initialize kernel state from RTC") uncovered an issue in a number of RTC drivers, where the drivers call rtc_device_register before initializing the device or platform drvdata. This frequently results in null pointer dereferences when the rtc_device_register immediately makes use of the rtc device, calling rtc_read_alarm. The solution is to ensure the drvdata is initialized prior to registering the rtc device. CC: Wolfram Sang CC: Alessandro Zummo CC: Thomas Gleixner CC: rtc-linux@googlegroups.com Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-max8998.c b/drivers/rtc/rtc-max8998.c index 3f7bc6b..2e48aa6 100644 --- a/drivers/rtc/rtc-max8998.c +++ b/drivers/rtc/rtc-max8998.c @@ -265,6 +265,8 @@ static int __devinit max8998_rtc_probe(struct platform_device *pdev) info->rtc = max8998->rtc; info->irq = max8998->irq_base + MAX8998_IRQ_ALARM0; + platform_set_drvdata(pdev, info); + info->rtc_dev = rtc_device_register("max8998-rtc", &pdev->dev, &max8998_rtc_ops, THIS_MODULE); @@ -274,8 +276,6 @@ static int __devinit max8998_rtc_probe(struct platform_device *pdev) goto out_rtc; } - platform_set_drvdata(pdev, info); - ret = request_threaded_irq(info->irq, NULL, max8998_rtc_alarm_irq, 0, "rtc-alarm0", info); @@ -293,6 +293,7 @@ static int __devinit max8998_rtc_probe(struct platform_device *pdev) return 0; out_rtc: + platform_set_drvdata(pdev, NULL); kfree(info); return ret; } -- cgit v0.10.2 From 93015236d92bf9ea746c0b10c3c1d9058cb11f82 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 6 May 2011 17:28:36 -0700 Subject: rtc: msm6242: Initialize drvdata before registering device Commit f44f7f96a20 ("RTC: Initialize kernel state from RTC") uncovered an issue in a number of RTC drivers, where the drivers call rtc_device_register before initializing the device or platform drvdata. This frequently results in null pointer dereferences when the rtc_device_register immediately makes use of the rtc device, calling rtc_read_alarm. The solution is to ensure the drvdata is initialized prior to registering the rtc device. CC: Wolfram Sang CC: Alessandro Zummo CC: Thomas Gleixner CC: rtc-linux@googlegroups.com Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c index 6782062..fcb113c 100644 --- a/drivers/rtc/rtc-msm6242.c +++ b/drivers/rtc/rtc-msm6242.c @@ -214,6 +214,7 @@ static int __init msm6242_rtc_probe(struct platform_device *dev) error = -ENOMEM; goto out_free_priv; } + platform_set_drvdata(dev, priv); rtc = rtc_device_register("rtc-msm6242", &dev->dev, &msm6242_rtc_ops, THIS_MODULE); @@ -223,10 +224,10 @@ static int __init msm6242_rtc_probe(struct platform_device *dev) } priv->rtc = rtc; - platform_set_drvdata(dev, priv); return 0; out_unmap: + platform_set_drvdata(dev, NULL); iounmap(priv->regs); out_free_priv: kfree(priv); -- cgit v0.10.2 From 4b3687f9c18156cdb71729fe4e0c3000f7e4d7de Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 6 May 2011 17:30:57 -0700 Subject: rtc: pcap: Initialize drvdata before registering device Commit f44f7f96a20 ("RTC: Initialize kernel state from RTC") uncovered an issue in a number of RTC drivers, where the drivers call rtc_device_register before initializing the device or platform drvdata. This frequently results in null pointer dereferences when the rtc_device_register immediately makes use of the rtc device, calling rtc_read_alarm. The solution is to ensure the drvdata is initialized prior to registering the rtc device. CC: Wolfram Sang CC: Alessandro Zummo CC: Thomas Gleixner CC: rtc-linux@googlegroups.com Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c index a633abc..cd4f198 100644 --- a/drivers/rtc/rtc-pcap.c +++ b/drivers/rtc/rtc-pcap.c @@ -151,6 +151,8 @@ static int __devinit pcap_rtc_probe(struct platform_device *pdev) pcap_rtc->pcap = dev_get_drvdata(pdev->dev.parent); + platform_set_drvdata(pdev, pcap_rtc); + pcap_rtc->rtc = rtc_device_register("pcap", &pdev->dev, &pcap_rtc_ops, THIS_MODULE); if (IS_ERR(pcap_rtc->rtc)) { @@ -158,7 +160,6 @@ static int __devinit pcap_rtc_probe(struct platform_device *pdev) goto fail_rtc; } - platform_set_drvdata(pdev, pcap_rtc); timer_irq = pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_1HZ); alarm_irq = pcap_to_irq(pcap_rtc->pcap, PCAP_IRQ_TODA); @@ -177,6 +178,7 @@ fail_alarm: fail_timer: rtc_device_unregister(pcap_rtc->rtc); fail_rtc: + platform_set_drvdata(pdev, NULL); kfree(pcap_rtc); return err; } -- cgit v0.10.2 From 130107b270f9a8ef1b50e02140a381c44a6abd68 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 6 May 2011 17:31:20 -0700 Subject: rtc: rp5c01: Initialize drvdata before registering device Commit f44f7f96a20 ("RTC: Initialize kernel state from RTC") uncovered an issue in a number of RTC drivers, where the drivers call rtc_device_register before initializing the device or platform drvdata. This frequently results in null pointer dereferences when the rtc_device_register immediately makes use of the rtc device, calling rtc_read_alarm. The solution is to ensure the drvdata is initialized prior to registering the rtc device. CC: Wolfram Sang CC: Alessandro Zummo CC: Thomas Gleixner CC: rtc-linux@googlegroups.com Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c index 694da39..359da6d 100644 --- a/drivers/rtc/rtc-rp5c01.c +++ b/drivers/rtc/rtc-rp5c01.c @@ -249,15 +249,15 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev) spin_lock_init(&priv->lock); + platform_set_drvdata(dev, priv); + rtc = rtc_device_register("rtc-rp5c01", &dev->dev, &rp5c01_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { error = PTR_ERR(rtc); goto out_unmap; } - priv->rtc = rtc; - platform_set_drvdata(dev, priv); error = sysfs_create_bin_file(&dev->dev.kobj, &priv->nvram_attr); if (error) @@ -268,6 +268,7 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev) out_unregister: rtc_device_unregister(rtc); out_unmap: + platform_set_drvdata(dev, NULL); iounmap(priv->regs); out_free_priv: kfree(priv); -- cgit v0.10.2 From 156229b352b999cafb86a21b50912975e39b7f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 6 May 2011 11:57:47 +0200 Subject: rtc: mc13xxx: Don't call rtc_device_register while holding lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit f44f7f9 (RTC: Initialize kernel state from RTC) rtc_device_register reads the programmed alarm. As reading the alarm needs to take the mc13xxx lock, release it before calling rtc_device_register. This fixes a deadlock during boot: INFO: task swapper:1 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. swapper D c02b175c 0 1 0 0x00000000 [] (schedule+0x304/0x4f4) from [] (__mutex_lock_slowpath+0x7c/0x110) [] (__mutex_lock_slowpath+0x7c/0x110) from [] (mc13xxx_rtc_read_time+0x1c/0x118) [] (mc13xxx_rtc_read_time+0x1c/0x118) from [] (__rtc_read_time+0x58/0x5c) [] (__rtc_read_time+0x58/0x5c) from [] (rtc_read_time+0x30/0x48) [] (rtc_read_time+0x30/0x48) from [] (__rtc_read_alarm+0x1c/0x290) [] (__rtc_read_alarm+0x1c/0x290) from [] (rtc_device_register+0x150/0x27c) [] (rtc_device_register+0x150/0x27c) from [] (mc13xxx_rtc_probe+0x128/0x17c) [] (mc13xxx_rtc_probe+0x128/0x17c) from [] (platform_drv_probe+0x1c/0x24) [] (platform_drv_probe+0x1c/0x24) from [] (driver_probe_device+0x80/0x1a8) [] (driver_probe_device+0x80/0x1a8) from [] (__driver_attach+0x8c/0x90) [] (__driver_attach+0x8c/0x90) from [] (bus_for_each_dev+0x60/0x8c) [] (bus_for_each_dev+0x60/0x8c) from [] (bus_add_driver+0x180/0x248) [] (bus_add_driver+0x180/0x248) from [] (driver_register+0x70/0x15c) [] (driver_register+0x70/0x15c) from [] (platform_driver_probe+0x18/0x98) [] (platform_driver_probe+0x18/0x98) from [] (do_one_initcall+0x2c/0x168) [] (do_one_initcall+0x2c/0x168) from [] (kernel_init+0xa0/0x150) [] (kernel_init+0xa0/0x150) from [] (kernel_thread_exit+0x0/0x8) Reported-by: Vagrant Cascadian Signed-off-by: Uwe Kleine-König Closes: http://bugs.debian.org/625804 [Tweaked commit log -jstultz] Signed-off-by: John Stultz diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c index c5ac037..a1a278b 100644 --- a/drivers/rtc/rtc-mc13xxx.c +++ b/drivers/rtc/rtc-mc13xxx.c @@ -349,11 +349,15 @@ static int __devinit mc13xxx_rtc_probe(struct platform_device *pdev) if (ret) goto err_alarm_irq_request; + mc13xxx_unlock(mc13xxx); + priv->rtc = rtc_device_register(pdev->name, &pdev->dev, &mc13xxx_rtc_ops, THIS_MODULE); if (IS_ERR(priv->rtc)) { ret = PTR_ERR(priv->rtc); + mc13xxx_lock(mc13xxx); + mc13xxx_irq_free(mc13xxx, MC13XXX_IRQ_TODA, priv); err_alarm_irq_request: @@ -365,12 +369,12 @@ err_reset_irq_status: mc13xxx_irq_free(mc13xxx, MC13XXX_IRQ_RTCRST, priv); err_reset_irq_request: + mc13xxx_unlock(mc13xxx); + platform_set_drvdata(pdev, NULL); kfree(priv); } - mc13xxx_unlock(mc13xxx); - return ret; } -- cgit v0.10.2 From 04b894553fd6e6fd7439e8440fd6bf5b6a17d9ae Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 5 May 2011 16:59:12 +0200 Subject: ASoC: SSM2602: Properly annotate i2c probe and remove functions Annotate the i2c probe and remove functions with __devinit and __devexit. Signed-off-by: Lars-Peter Clausen Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c index 2727bef..f7c1ce5 100644 --- a/sound/soc/codecs/ssm2602.c +++ b/sound/soc/codecs/ssm2602.c @@ -614,7 +614,7 @@ static struct snd_soc_codec_driver soc_codec_dev_ssm2602 = { * low = 0x1a * high = 0x1b */ -static int ssm2602_i2c_probe(struct i2c_client *i2c, +static int __devinit ssm2602_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { struct ssm2602_priv *ssm2602; @@ -635,7 +635,7 @@ static int ssm2602_i2c_probe(struct i2c_client *i2c, return ret; } -static int ssm2602_i2c_remove(struct i2c_client *client) +static int __devexit ssm2602_i2c_remove(struct i2c_client *client) { snd_soc_unregister_codec(&client->dev); kfree(i2c_get_clientdata(client)); @@ -655,7 +655,7 @@ static struct i2c_driver ssm2602_i2c_driver = { .owner = THIS_MODULE, }, .probe = ssm2602_i2c_probe, - .remove = ssm2602_i2c_remove, + .remove = __devexit_p(ssm2602_i2c_remove), .id_table = ssm2602_i2c_id, }; #endif -- cgit v0.10.2 From 36c90ab33feabbd63da775bd92ad356e5bd5cf56 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 5 May 2011 16:59:16 +0200 Subject: ASoC: SSM2602: Fix 'Mic Boost2' control The 'Mic Boost2' control's shift was off by one and thus was not working. Signed-off-by: Lars-Peter Clausen Acked-by: Liam Girdwood Signed-off-by: Mark Brown Cc: stable@kernel.org diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c index f7c1ce5..946797d 100644 --- a/sound/soc/codecs/ssm2602.c +++ b/sound/soc/codecs/ssm2602.c @@ -139,7 +139,7 @@ SOC_DOUBLE_R("Capture Volume", SSM2602_LINVOL, SSM2602_RINVOL, 0, 31, 0), SOC_DOUBLE_R("Capture Switch", SSM2602_LINVOL, SSM2602_RINVOL, 7, 1, 1), SOC_SINGLE("Mic Boost (+20dB)", SSM2602_APANA, 0, 1, 0), -SOC_SINGLE("Mic Boost2 (+20dB)", SSM2602_APANA, 7, 1, 0), +SOC_SINGLE("Mic Boost2 (+20dB)", SSM2602_APANA, 8, 1, 0), SOC_SINGLE("Mic Switch", SSM2602_APANA, 1, 1, 1), SOC_SINGLE("Sidetone Playback Volume", SSM2602_APANA, 6, 3, 1), -- cgit v0.10.2 From 8fc63fe9412634c72676db42649f357eaac04566 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 5 May 2011 16:59:14 +0200 Subject: ASoC: SSM2602: Fix reg_cache_size reg_cache_size is supposed to be the number of elements in the register cache, not the size in bytes. Signed-off-by: Lars-Peter Clausen Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c index 946797d..b04d280 100644 --- a/sound/soc/codecs/ssm2602.c +++ b/sound/soc/codecs/ssm2602.c @@ -602,7 +602,7 @@ static struct snd_soc_codec_driver soc_codec_dev_ssm2602 = { .read = ssm2602_read_reg_cache, .write = ssm2602_write, .set_bias_level = ssm2602_set_bias_level, - .reg_cache_size = sizeof(ssm2602_reg), + .reg_cache_size = ARRAY_SIZE(ssm2602_reg), .reg_word_size = sizeof(u16), .reg_cache_default = ssm2602_reg, }; -- cgit v0.10.2 From bf707de21fec7bb203dace2d0a2bbd124d1b36ca Mon Sep 17 00:00:00 2001 From: Marek Belisko Date: Tue, 3 May 2011 14:46:32 +0200 Subject: ASoC: UDA134x: Remove POWER_OFF_ON_STANDBY define. Define POWER_OFF_ON_STANDBY cause trobles when trying to get some sound from codec because code for bias setup was not compiled (define wasn't defined). This define was removed in commit: cc3202f5 but again introduced by commit: f0fba2ad1 which then completely break codec functionality so remove it again. Signed-off-by: Marek Belisko Acked-by: Liam Girdwood Signed-off-by: Mark Brown Cc: stable@kernel.org diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c index 48ffd40..a7b8f30 100644 --- a/sound/soc/codecs/uda134x.c +++ b/sound/soc/codecs/uda134x.c @@ -601,9 +601,7 @@ static struct snd_soc_codec_driver soc_codec_dev_uda134x = { .reg_cache_step = 1, .read = uda134x_read_reg_cache, .write = uda134x_write, -#ifdef POWER_OFF_ON_STANDBY .set_bias_level = uda134x_set_bias_level, -#endif }; static int __devinit uda134x_codec_probe(struct platform_device *pdev) -- cgit v0.10.2 From 1d44e8288a0557c28c447d7e511f50d06ff93a34 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 9 May 2011 11:35:19 -0500 Subject: x86, UV: Fix NMI handler for UV platforms This fixes problems seen on UV systems handling NMIs from the node controller. I isolated the "dazed..." messages that I saw earlier to a bug in the BMC on our platform. It was sending NMIs w/o properly setting a register that indicated the source of NMI. So rather than _assuming_ any unhandled NMI came from the UV system maintenance console (SMC), add a check to verify that the SMC actually sent the NMI. Signed-off-by: Jack Steiner Cc: gorcunov@gmail.com Cc: dzickus@redhat.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a501741..4298002 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -398,6 +398,8 @@ struct uv_blade_info { unsigned short nr_online_cpus; unsigned short pnode; short memory_nid; + spinlock_t nmi_lock; + unsigned long nmi_count; }; extern struct uv_blade_info *uv_blade_info; extern short *uv_node_to_blade; diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 20cafea..f5bb64a 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -5,7 +5,7 @@ * * SGI UV MMR definitions * - * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_MMRS_H @@ -1099,5 +1099,19 @@ union uvh_rtc1_int_config_u { } s; }; +/* ========================================================================= */ +/* UVH_SCRATCH5 */ +/* ========================================================================= */ +#define UVH_SCRATCH5 0x2d0200UL +#define UVH_SCRATCH5_32 0x00778 + +#define UVH_SCRATCH5_SCRATCH5_SHFT 0 +#define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL +union uvh_scratch5_u { + unsigned long v; + struct uvh_scratch5_s { + unsigned long scratch5 : 64; /* RW, W1CS */ + } s; +}; #endif /* __ASM_UV_MMRS_X86_H__ */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 33b10a0..7acd2d2 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -37,6 +37,13 @@ #include #include #include +#include + +/* BMC sets a bit this MMR non-zero before sending an NMI */ +#define UVH_NMI_MMR UVH_SCRATCH5 +#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8) +#define UV_NMI_PENDING_MASK (1UL << 63) +DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count); DEFINE_PER_CPU(int, x2apic_extra_bits); @@ -642,18 +649,46 @@ void __cpuinit uv_cpu_init(void) */ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { + unsigned long real_uv_nmi; + int bid; + if (reason != DIE_NMIUNKNOWN) return NOTIFY_OK; if (in_crash_kexec) /* do nothing if entering the crash kernel */ return NOTIFY_OK; + /* - * Use a lock so only one cpu prints at a time - * to prevent intermixed output. + * Each blade has an MMR that indicates when an NMI has been sent + * to cpus on the blade. If an NMI is detected, atomically + * clear the MMR and update a per-blade NMI count used to + * cause each cpu on the blade to notice a new NMI. + */ + bid = uv_numa_blade_id(); + real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); + + if (unlikely(real_uv_nmi)) { + spin_lock(&uv_blade_info[bid].nmi_lock); + real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK); + if (real_uv_nmi) { + uv_blade_info[bid].nmi_count++; + uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK); + } + spin_unlock(&uv_blade_info[bid].nmi_lock); + } + + if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) + return NOTIFY_DONE; + + __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; + + /* + * Use a lock so only one cpu prints at a time. + * This prevents intermixed output. */ spin_lock(&uv_nmi_lock); - pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); + pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id()); dump_stack(); spin_unlock(&uv_nmi_lock); @@ -661,7 +696,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) } static struct notifier_block uv_dump_stack_nmi_nb = { - .notifier_call = uv_handle_nmi + .notifier_call = uv_handle_nmi, + .priority = NMI_LOCAL_LOW_PRIOR - 1, }; void uv_register_nmi_notifier(void) @@ -720,8 +756,9 @@ void __init uv_system_init(void) printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); - uv_blade_info = kmalloc(bytes, GFP_KERNEL); + uv_blade_info = kzalloc(bytes, GFP_KERNEL); BUG_ON(!uv_blade_info); + for (blade = 0; blade < uv_num_possible_blades(); blade++) uv_blade_info[blade].memory_nid = -1; @@ -747,6 +784,7 @@ void __init uv_system_init(void) uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + spin_lock_init(&uv_blade_info[blade].nmi_lock); max_pnode = max(pnode, max_pnode); blade++; } -- cgit v0.10.2 From 61bf35b9a3eab961ee1249467d9b2ac11d3c34c1 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 9 May 2011 16:32:03 -0600 Subject: ASoC: WM8903: Fix Digital Capture Volume range Increase the range of the Digital Capture Volume control to be 120 steps. Each step is 0.75dB, and the range starts at -72dB, giving a max setting of 18dB, which matches the latest datasheet, to the precision of the step size. Signed-off-by: Stephen Warren Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index f52b623..824d1c8 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -692,7 +692,7 @@ SOC_ENUM("DRC Smoothing Threshold", drc_smoothing), SOC_SINGLE_TLV("DRC Startup Volume", WM8903_DRC_0, 6, 18, 0, drc_tlv_startup), SOC_DOUBLE_R_TLV("Digital Capture Volume", WM8903_ADC_DIGITAL_VOLUME_LEFT, - WM8903_ADC_DIGITAL_VOLUME_RIGHT, 1, 96, 0, digital_tlv), + WM8903_ADC_DIGITAL_VOLUME_RIGHT, 1, 120, 0, digital_tlv), SOC_ENUM("ADC Companding Mode", adc_companding), SOC_SINGLE("ADC Companding Switch", WM8903_AUDIO_INTERFACE_0, 3, 1, 0), -- cgit v0.10.2 From d24339059d640f108c08ba99ef30e3bafa10f8e4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 10 May 2011 17:35:58 +0200 Subject: fuse: fix oops in revalidate when called with NULL nameidata Some cases (e.g. ecryptfs) can call ->dentry_revalidate with NULL nameidata. https://bugzilla.kernel.org/show_bug.cgi?id=34732 Tyler Hicks pointed out that this bug was introduced by commit e7c0a16786 "fuse: make fuse_dentry_revalidate() RCU aware" Reported-by: Witold Baryluk Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c6ba49b..b32eb29 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (!inode) return 0; - if (nd->flags & LOOKUP_RCU) + if (nd && (nd->flags & LOOKUP_RCU)) return -ECHILD; fc = get_fuse_conn(inode); -- cgit v0.10.2 From 5db1c07ced19b2eec3a149a3c624d88e02e246ae Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 3 May 2011 21:40:08 +0300 Subject: mac80211: don't start the dynamic ps timer if not associated When we are disconnecting, we set PS off, but this happens before we send the deauth/disassoc request. When the deauth/disassoc frames are sent, we trigger the dynamic ps timer, which then times out and turns PS back on. Thus, PS remains on after disconnecting, causing problems when associating again. This can be fixed by preventing the timer to start when we're not associated anymore. Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ce4596e..bd1224f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -237,6 +237,10 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) &local->dynamic_ps_disable_work); } + /* Don't restart the timer if we're not disassociated */ + if (!ifmgd->associated) + return TX_CONTINUE; + mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); -- cgit v0.10.2 From 99aa55b66e3553e6f7212ec1104e0fac06cc558e Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Fri, 6 May 2011 20:43:11 +0530 Subject: ath9k: Fix a warning due to a queued work during S3 state during suspend/S3 state drv_flush is called from mac80211 irrespective of interface count. In ath9k we queue a work in ath9k_flush which we expect to be cancelled in the drv_stop call back. during suspend process mac80211 calls drv_stop only when the interface count(local->count) is non-zero. unfortunately when the network manager is enabled, drv_flush is called while drv_stop is not called as local->count reaches '0'. So fix this by simply checking for the device presence in the drv_flush call back in the driver before queueing work or anything else. this patch fixes the following WARNING Call Trace: [] warn_slowpath_common+0x72/0xa0 [] ? ieee80211_can_queue_work+0x39/0x50 [mac80211] [] ? ieee80211_can_queue_work+0x39/0x50 [mac80211] [] warn_slowpath_fmt+0x2b/0x30 [] ieee80211_can_queue_work+0x39/0x50 [mac80211] [] ieee80211_queue_delayed_work+0x21/0x50 [mac80211] [] ath_tx_complete_poll_work+0xb2/0x100 [ath9k] [] run_workqueue+0x8e/0x150 [] ? ath_tx_complete_poll_work+0x0/0x100 [ath9k] [] worker_thread+0x84/0xe0 [] ? autoremove_wake_function+0x0/0x50 [] ? worker_thread+0x0/0xe0 [] kthread+0x74/0x80 [] ? kthread+0x0/0x80 [] kernel_thread_helper+0x7/0x10 ---[ end trace 2aff81010df9215b ]--- Signed-off-by: Rajkumar Manoharan Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 17d04ff..1482fa6 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2141,6 +2141,8 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, u8 coverage_class) static void ath9k_flush(struct ieee80211_hw *hw, bool drop) { struct ath_softc *sc = hw->priv; + struct ath_hw *ah = sc->sc_ah; + struct ath_common *common = ath9k_hw_common(ah); int timeout = 200; /* ms */ int i, j; @@ -2149,6 +2151,12 @@ static void ath9k_flush(struct ieee80211_hw *hw, bool drop) cancel_delayed_work_sync(&sc->tx_complete_work); + if (sc->sc_flags & SC_OP_INVALID) { + ath_dbg(common, ATH_DBG_ANY, "Device not present\n"); + mutex_unlock(&sc->mutex); + return; + } + if (drop) timeout = 1; -- cgit v0.10.2 From eb85de3f84868ca85703a23617b4079ce79a801e Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Sat, 7 May 2011 17:46:21 +0200 Subject: iwlegacy: fix IBSS mode crashes We should not switch to non-IBSS channels when working in IBSS mode, otherwise there are microcode errors, and after some time system crashes. This bug is only observable when software scan is used in IBSS mode, so should be considered as regression after: commit 0263aa45293838b514b8af674a03faf040991a90 Author: Stanislaw Gruszka Date: Tue Mar 29 11:24:21 2011 +0200 iwl3945: disable hw scan by default However IBSS mode check, which this patch add again, was removed by commit b2f30e8bdd8ef5f3b5a7ef9146509585a15347d3 Author: Johannes Berg Date: Thu Jan 21 07:32:20 2010 -0800 iwlwifi: remove IBSS channel sanity check That commit claim that mac80211 will not use non-IBSS channel in IBSS mode, what definitely is not true. Bug probably should be fixed in mac80211, but that will require more work, so better to apply that patch temporally, and provide proper mac80211 fix latter. Resolves: https://bugzilla.kernel.org/show_bug.cgi?id=34452 Reported-and-tested-by: Mikko Rapeli Cc: stable@kernel.org # 2.6.38.5+ Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/iwlegacy/iwl-core.c b/drivers/net/wireless/iwlegacy/iwl-core.c index 2b08efb..dcbb2ef 100644 --- a/drivers/net/wireless/iwlegacy/iwl-core.c +++ b/drivers/net/wireless/iwlegacy/iwl-core.c @@ -2155,6 +2155,13 @@ int iwl_legacy_mac_config(struct ieee80211_hw *hw, u32 changed) goto set_ch_out; } + if (priv->iw_mode == NL80211_IFTYPE_ADHOC && + !iwl_legacy_is_channel_ibss(ch_info)) { + IWL_DEBUG_MAC80211(priv, "leave - not IBSS channel\n"); + ret = -EINVAL; + goto set_ch_out; + } + spin_lock_irqsave(&priv->lock, flags); for_each_context(priv, ctx) { diff --git a/drivers/net/wireless/iwlegacy/iwl-dev.h b/drivers/net/wireless/iwlegacy/iwl-dev.h index 9ee849d..f43ac1e 100644 --- a/drivers/net/wireless/iwlegacy/iwl-dev.h +++ b/drivers/net/wireless/iwlegacy/iwl-dev.h @@ -1411,6 +1411,12 @@ iwl_legacy_is_channel_passive(const struct iwl_channel_info *ch) return (!(ch->flags & EEPROM_CHANNEL_ACTIVE)) ? 1 : 0; } +static inline int +iwl_legacy_is_channel_ibss(const struct iwl_channel_info *ch) +{ + return (ch->flags & EEPROM_CHANNEL_IBSS) ? 1 : 0; +} + static inline void __iwl_legacy_free_pages(struct iwl_priv *priv, struct page *page) { -- cgit v0.10.2 From 2ae1b8b35faba31a59b153cbad07f9c15de99740 Mon Sep 17 00:00:00 2001 From: Paul Fox Date: Mon, 9 May 2011 10:40:42 +0100 Subject: libertas: fix cmdpendingq locking We occasionally see list corruption using libertas. While we haven't been able to diagnose this precisely, we have spotted a possible cause: cmdpendingq is generally modified with driver_lock held. However, there are a couple of points where this is not the case. Fix up those operations to execute under the lock, it seems like the correct thing to do and will hopefully improve the situation. Signed-off-by: Paul Fox Signed-off-by: Daniel Drake Acked-by: Dan Williams Cc: stable@kernel.org Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c index 7e8a658..f3ac624 100644 --- a/drivers/net/wireless/libertas/cmd.c +++ b/drivers/net/wireless/libertas/cmd.c @@ -1339,8 +1339,8 @@ int lbs_execute_next_command(struct lbs_private *priv) cpu_to_le16(PS_MODE_ACTION_EXIT_PS)) { lbs_deb_host( "EXEC_NEXT_CMD: ignore ENTER_PS cmd\n"); - list_del(&cmdnode->list); spin_lock_irqsave(&priv->driver_lock, flags); + list_del(&cmdnode->list); lbs_complete_command(priv, cmdnode, 0); spin_unlock_irqrestore(&priv->driver_lock, flags); @@ -1352,8 +1352,8 @@ int lbs_execute_next_command(struct lbs_private *priv) (priv->psstate == PS_STATE_PRE_SLEEP)) { lbs_deb_host( "EXEC_NEXT_CMD: ignore EXIT_PS cmd in sleep\n"); - list_del(&cmdnode->list); spin_lock_irqsave(&priv->driver_lock, flags); + list_del(&cmdnode->list); lbs_complete_command(priv, cmdnode, 0); spin_unlock_irqrestore(&priv->driver_lock, flags); priv->needtowakeup = 1; @@ -1366,7 +1366,9 @@ int lbs_execute_next_command(struct lbs_private *priv) "EXEC_NEXT_CMD: sending EXIT_PS\n"); } } + spin_lock_irqsave(&priv->driver_lock, flags); list_del(&cmdnode->list); + spin_unlock_irqrestore(&priv->driver_lock, flags); lbs_deb_host("EXEC_NEXT_CMD: sending command 0x%04x\n", le16_to_cpu(cmd->command)); lbs_submit_command(priv, cmdnode); -- cgit v0.10.2 From 9bbeacf52f66d165739a4bbe9c018d17493a74b5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 May 2011 13:06:13 +0200 Subject: kprobes, x86: Disable irqs during optimized callback Disable irqs during optimized callback, so we dont miss any in-irq kprobes. The following commands: # cd /debug/tracing/ # echo "p mutex_unlock" >> kprobe_events # echo "p _raw_spin_lock" >> kprobe_events # echo "p smp_apic_timer_interrupt" >> ./kprobe_events # echo 1 > events/enable Cause the optimized kprobes to be missed. None is missed with the fix applied. Signed-off-by: Jiri Olsa Acked-by: Masami Hiramatsu Link: http://lkml.kernel.org/r/20110511110613.GB2390@jolsa.brq.redhat.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index c969fd9..f1a6244 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1183,12 +1183,13 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long flags; /* This is possible if op is under delayed unoptimizing */ if (kprobe_disabled(&op->kp)) return; - preempt_disable(); + local_irq_save(flags); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); } else { @@ -1207,7 +1208,7 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op, opt_pre_handler(&op->kp, regs); __this_cpu_write(current_kprobe, NULL); } - preempt_enable_no_resched(); + local_irq_restore(flags); } static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) -- cgit v0.10.2 From a8a4ae3a899a6c0b4771cc57884800d8b76a6996 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 3 May 2011 13:43:03 -0400 Subject: NFSv41: Resend on NFS4ERR_RETRY_UNCACHED_REP Free the slot and resend the RPC with new session . For nfs4_async_handle_error, return -EAGAIN and set the task->tk_status to 0 to restart the async rpc in the rpc_restart_call_prepare state which resets the slot. For nfs4_handle_exception, retrying a call that uses nfs4_call_sync will reset the slot via nfs41_call_sync_prepare. For open/close/lock/locku/delegreturn/layoutcommit/unlink/rename/write cachethis is true, so these operations will not trigger an NFS4ERR_RETRY_UNCACHED_REP. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 6f8192f..7841ea6 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -117,6 +117,8 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -EKEYEXPIRED: rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); break; + case -NFS4ERR_RETRY_UNCACHED_REP: + break; default: dprintk("%s DS error. Retry through MDS %d\n", __func__, task->tk_status); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 69c0f3c..cf1b339 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -300,6 +300,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc ret = nfs4_delay(server->client, &exception->timeout); if (ret != 0) break; + case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: exception->retry = 1; break; @@ -3695,6 +3696,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; return -EAGAIN; + case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: task->tk_status = 0; return -EAGAIN; @@ -4844,6 +4846,8 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; + /* fall through */ + case -NFS4ERR_RETRY_UNCACHED_REP: nfs_restart_rpc(task, data->clp); return; } @@ -5479,6 +5483,8 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf break; case -NFS4ERR_DELAY: rpc_delay(task, NFS4_POLL_RETRY_MAX); + /* fall through */ + case -NFS4ERR_RETRY_UNCACHED_REP: return -EAGAIN; default: nfs4_schedule_lease_recovery(clp); -- cgit v0.10.2 From 2887fe45522843149ccf72e01f43813be4fb36c5 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 11 May 2011 01:19:58 -0400 Subject: NFSv4.1: remove pnfs_layout_hdr from pnfs_destroy_all_layouts tmp_list Prevents an infinite loop as list was never emptied. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ff681ab..65455f5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -383,6 +383,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) plh_layouts); dprintk("%s freeing layout for inode %lu\n", __func__, lo->plh_inode->i_ino); + list_del_init(&lo->plh_layouts); pnfs_destroy_layout(NFS_I(lo->plh_inode)); } } -- cgit v0.10.2 From a75b9df9d3bfc3cd1083974c045ae31ce5f3434f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 May 2011 18:00:51 -0400 Subject: NFSv4.1: Ensure that layoutget uses the correct gfp modes Currently, writebacks may end up recursing back into the filesystem due to GFP_KERNEL direct reclaims in the pnfs subsystem. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 7841ea6..be79dc9 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -418,7 +418,8 @@ static int filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id) + struct nfs4_deviceid *id, + gfp_t gfp_flags) { struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; @@ -441,7 +442,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, /* find and reference the deviceid */ dsaddr = nfs4_fl_find_get_deviceid(id); if (dsaddr == NULL) { - dsaddr = get_device_info(lo->plh_inode, id); + dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); if (dsaddr == NULL) goto out; } @@ -502,7 +503,8 @@ static int filelayout_decode_layout(struct pnfs_layout_hdr *flo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id) + struct nfs4_deviceid *id, + gfp_t gfp_flags) { struct xdr_stream stream; struct xdr_buf buf = { @@ -518,7 +520,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, dprintk("%s: set_layout_map Begin\n", __func__); - scratch = alloc_page(GFP_KERNEL); + scratch = alloc_page(gfp_flags); if (!scratch) return -ENOMEM; @@ -556,13 +558,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, goto out_err; fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), - GFP_KERNEL); + gfp_flags); if (!fl->fh_array) goto out_err; for (i = 0; i < fl->num_fh; i++) { /* Do we want to use a mempool here? */ - fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); + fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); if (!fl->fh_array[i]) goto out_err_free; @@ -607,19 +609,20 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) static struct pnfs_layout_segment * filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, - struct nfs4_layoutget_res *lgr) + struct nfs4_layoutget_res *lgr, + gfp_t gfp_flags) { struct nfs4_filelayout_segment *fl; int rc; struct nfs4_deviceid id; dprintk("--> %s\n", __func__); - fl = kzalloc(sizeof(*fl), GFP_KERNEL); + fl = kzalloc(sizeof(*fl), gfp_flags); if (!fl) return NULL; - rc = filelayout_decode_layout(layoutid, fl, lgr, &id); - if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) { + rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags); + if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) { _filelayout_free_lseg(fl); return NULL; } @@ -635,7 +638,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, int size = (fl->stripe_type == STRIPE_SPARSE) ? fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL); + fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); if (!fl->commit_buckets) { filelayout_free_lseg(&fl->generic_hdr); return NULL; diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 7c44579..2b461d7 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -104,6 +104,6 @@ extern struct nfs4_file_layout_dsaddr * nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * -get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); +get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index de5350f..db07c7a 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -225,11 +225,11 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) } static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) +nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) { struct nfs4_pnfs_ds *tmp_ds, *ds; - ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); + ds = kzalloc(sizeof(*tmp_ds), gfp_flags); if (!ds) goto out; @@ -261,7 +261,7 @@ out: * Currently only support ipv4, and one multi-path address. */ static struct nfs4_pnfs_ds * -decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) +decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) { struct nfs4_pnfs_ds *ds = NULL; char *buf; @@ -303,7 +303,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) rlen); goto out_err; } - buf = kmalloc(rlen + 1, GFP_KERNEL); + buf = kmalloc(rlen + 1, gfp_flags); if (!buf) { dprintk("%s: Not enough memory\n", __func__); goto out_err; @@ -333,7 +333,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); port = htons((tmp[0] << 8) | (tmp[1])); - ds = nfs4_pnfs_ds_add(inode, ip_addr, port); + ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); dprintk("%s: Decoded address and port %s\n", __func__, buf); out_free: kfree(buf); @@ -343,7 +343,7 @@ out_err: /* Decode opaque device data and return the result */ static struct nfs4_file_layout_dsaddr* -decode_device(struct inode *ino, struct pnfs_device *pdev) +decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) { int i; u32 cnt, num; @@ -362,7 +362,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) struct page *scratch; /* set up xdr stream */ - scratch = alloc_page(GFP_KERNEL); + scratch = alloc_page(gfp_flags); if (!scratch) goto out_err; @@ -384,7 +384,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) } /* read stripe indices */ - stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL); + stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags); if (!stripe_indices) goto out_err_free_scratch; @@ -423,7 +423,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) dsaddr = kzalloc(sizeof(*dsaddr) + (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), - GFP_KERNEL); + gfp_flags); if (!dsaddr) goto out_err_free_stripe_indices; @@ -452,7 +452,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) for (j = 0; j < mp_count; j++) { if (j == 0) { dsaddr->ds_list[i] = decode_and_add_ds(&stream, - ino); + ino, gfp_flags); if (dsaddr->ds_list[i] == NULL) goto out_err_free_deviceid; } else { @@ -503,12 +503,12 @@ out_err: * available devices. */ static struct nfs4_file_layout_dsaddr * -decode_and_add_device(struct inode *inode, struct pnfs_device *dev) +decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) { struct nfs4_file_layout_dsaddr *d, *new; long hash; - new = decode_device(inode, dev); + new = decode_device(inode, dev, gfp_flags); if (!new) { printk(KERN_WARNING "%s: Could not decode or add device\n", __func__); @@ -537,7 +537,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev) * of available devices, and return it. */ struct nfs4_file_layout_dsaddr * -get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) +get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) { struct pnfs_device *pdev = NULL; u32 max_resp_sz; @@ -556,17 +556,17 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) dprintk("%s inode %p max_resp_sz %u max_pages %d\n", __func__, inode, max_resp_sz, max_pages); - pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); + pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); if (pdev == NULL) return NULL; - pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); + pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); if (pages == NULL) { kfree(pdev); return NULL; } for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(GFP_KERNEL); + pages[i] = alloc_page(gfp_flags); if (!pages[i]) goto out_free; } @@ -587,7 +587,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) * Found new device, need to decode it and then add it to the * list of known devices for this mountpoint. */ - dsaddr = decode_and_add_device(inode, pdev); + dsaddr = decode_and_add_device(inode, pdev, gfp_flags); out_free: for (i = 0; i < max_pages; i++) __free_page(pages[i]); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 65455f5..f57f528 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -467,7 +467,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, static struct pnfs_layout_segment * send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_open_context *ctx, - u32 iomode) + u32 iomode, + gfp_t gfp_flags) { struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); @@ -480,7 +481,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); BUG_ON(ctx == NULL); - lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); + lgp = kzalloc(sizeof(*lgp), gfp_flags); if (lgp == NULL) return NULL; @@ -488,12 +489,12 @@ send_layoutget(struct pnfs_layout_hdr *lo, max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; max_pages = max_resp_sz >> PAGE_SHIFT; - pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); + pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); if (!pages) goto out_err_free; for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(GFP_KERNEL); + pages[i] = alloc_page(gfp_flags); if (!pages[i]) goto out_err_free; } @@ -509,6 +510,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.layout.pages = pages; lgp->args.layout.pglen = max_pages * PAGE_SIZE; lgp->lsegpp = &lseg; + lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. @@ -666,11 +668,11 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, } static struct pnfs_layout_hdr * -alloc_init_layout_hdr(struct inode *ino) +alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) { struct pnfs_layout_hdr *lo; - lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); + lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); if (!lo) return NULL; atomic_set(&lo->plh_refcount, 1); @@ -682,7 +684,7 @@ alloc_init_layout_hdr(struct inode *ino) } static struct pnfs_layout_hdr * -pnfs_find_alloc_layout(struct inode *ino) +pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) { struct nfs_inode *nfsi = NFS_I(ino); struct pnfs_layout_hdr *new = NULL; @@ -697,7 +699,7 @@ pnfs_find_alloc_layout(struct inode *ino) return nfsi->layout; } spin_unlock(&ino->i_lock); - new = alloc_init_layout_hdr(ino); + new = alloc_init_layout_hdr(ino, gfp_flags); spin_lock(&ino->i_lock); if (likely(nfsi->layout == NULL)) /* Won the race? */ @@ -757,7 +759,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode iomode) + enum pnfs_iomode iomode, + gfp_t gfp_flags) { struct nfs_inode *nfsi = NFS_I(ino); struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; @@ -768,7 +771,7 @@ pnfs_update_layout(struct inode *ino, if (!pnfs_enabled_sb(NFS_SERVER(ino))) return NULL; spin_lock(&ino->i_lock); - lo = pnfs_find_alloc_layout(ino); + lo = pnfs_find_alloc_layout(ino, gfp_flags); if (lo == NULL) { dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); goto out_unlock; @@ -808,7 +811,7 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&clp->cl_lock); } - lseg = send_layoutget(lo, ctx, iomode); + lseg = send_layoutget(lo, ctx, iomode, gfp_flags); if (!lseg && first) { spin_lock(&clp->cl_lock); list_del_init(&lo->plh_layouts); @@ -847,7 +850,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out; } /* Inject layout blob into I/O device driver */ - lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); + lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); if (!lseg || IS_ERR(lseg)) { if (!lseg) status = -ENOMEM; @@ -900,7 +903,8 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, - IOMODE_READ); + IOMODE_READ, + GFP_KERNEL); } return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } @@ -922,7 +926,8 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, - IOMODE_RW); + IOMODE_RW, + GFP_NOFS); } return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bc48272..0c015ba 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -70,7 +70,7 @@ struct pnfs_layoutdriver_type { const u32 id; const char *name; struct module *owner; - struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); + struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); void (*free_lseg) (struct pnfs_layout_segment *lseg); /* test for nfs page cache coalescing */ @@ -126,7 +126,7 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode access_type); + enum pnfs_iomode access_type, gfp_t gfp_flags); void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, @@ -245,7 +245,7 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) static inline struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode access_type) + enum pnfs_iomode access_type, gfp_t gfp_flags) { return NULL; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 7cded2b..2bcf0dc 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -288,7 +288,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) atomic_set(&req->wb_complete, requests); BUG_ON(desc->pg_lseg != NULL); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -351,7 +351,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 0, lseg); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3bd5d7e..49c715b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -939,7 +939,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) atomic_set(&req->wb_complete, requests); BUG_ON(desc->pg_lseg); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -1013,7 +1013,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 890dce2..7e371f7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -233,6 +233,7 @@ struct nfs4_layoutget { struct nfs4_layoutget_args args; struct nfs4_layoutget_res res; struct pnfs_layout_segment **lsegpp; + gfp_t gfp_flags; }; struct nfs4_getdeviceinfo_args { -- cgit v0.10.2 From 21ccc7936dac5ca9b3e2838bbc112a60f34e18b3 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 10 May 2011 16:17:10 +0000 Subject: ehea: Fix memory hotplug oops The ehea driver oopses during memory hotplug if the ports are not up. A simple testcase: # ifconfig ethX down # echo offline > /sys/devices/system/memory/memory32/state Oops: Kernel access of bad area, sig: 11 [#1] last sysfs file: /sys/devices/system/memory/memory32/state REGS: c000000709393110 TRAP: 0300 Not tainted (2.6.39-rc2-01385-g7ef73bc-dirty) DAR: 0000000000000000, DSISR: 40000000 ... NIP [c000000000067c98] .__wake_up_common+0x48/0xf0 LR [c00000000006d034] .__wake_up+0x54/0x90 Call Trace: [c00000000006d034] .__wake_up+0x54/0x90 [d000000006bb6270] .ehea_rereg_mrs+0x140/0x730 [ehea] [d000000006bb69c4] .ehea_mem_notifier+0x164/0x170 [ehea] [c0000000006fc8a8] .notifier_call_chain+0x78/0xf0 [c0000000000b3d70] .__blocking_notifier_call_chain+0x70/0xb0 [c000000000458d78] .memory_notify+0x28/0x40 [c0000000001871d8] .remove_memory+0x208/0x6d0 [c000000000458264] .memory_section_action+0x94/0x140 [c0000000004583ec] .memory_block_change_state+0xdc/0x1d0 [c0000000004585cc] .store_mem_state+0xec/0x160 [c00000000044768c] .sysdev_store+0x3c/0x50 [c00000000020b48c] .sysfs_write_file+0xec/0x1f0 [c00000000018f86c] .vfs_write+0xec/0x1e0 [c00000000018fa88] .SyS_write+0x58/0xd0 To fix this, initialise the waitqueues during port probe instead of port open. Signed-off-by: Anton Blanchard Cc: stable@kernel.org Acked-by: Breno Leitao Signed-off-by: David S. Miller diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index 53c0f04..cf79cf7 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -2688,9 +2688,6 @@ static int ehea_open(struct net_device *dev) netif_start_queue(dev); } - init_waitqueue_head(&port->swqe_avail_wq); - init_waitqueue_head(&port->restart_wq); - mutex_unlock(&port->port_lock); return ret; @@ -3276,6 +3273,9 @@ struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter, INIT_WORK(&port->reset_task, ehea_reset_port); + init_waitqueue_head(&port->swqe_avail_wq); + init_waitqueue_head(&port->restart_wq); + ret = register_netdev(dev); if (ret) { pr_err("register_netdev failed. ret=%d\n", ret); -- cgit v0.10.2 From b1054282d752c5a026e2c0450616ebf37fc0413e Mon Sep 17 00:00:00 2001 From: Tkhai Kirill Date: Tue, 10 May 2011 02:31:41 +0000 Subject: sparc32: Fixed unaligned memory copying in function __csum_partial_copy_sparc_generic When we are in the label cc_dword_align, registers %o0 and %o1 have the same last 2 bits, but it's not guaranteed one of them is zero. So we can get unaligned memory access in label ccte. Example of parameters which lead to this: %o0=0x7ff183e9, %o1=0x8e709e7d, %g1=3 With the parameters I had a memory corruption, when the additional 5 bytes were rewritten. This patch corrects the error. One comment to the patch. We don't care about the third bit in %o1, because cc_end_cruft stores word or less. Signed-off-by: Tkhai Kirill Signed-off-by: David S. Miller diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S index 3632cb3..0084c33 100644 --- a/arch/sparc/lib/checksum_32.S +++ b/arch/sparc/lib/checksum_32.S @@ -289,10 +289,16 @@ cc_end_cruft: /* Also, handle the alignment code out of band. */ cc_dword_align: - cmp %g1, 6 - bl,a ccte + cmp %g1, 16 + bge 1f + srl %g1, 1, %o3 +2: cmp %o3, 0 + be,a ccte andcc %g1, 0xf, %o3 - andcc %o0, 0x1, %g0 + andcc %o3, %o0, %g0 ! Check %o0 only (%o1 has the same last 2 bits) + be,a 2b + srl %o3, 1, %o3 +1: andcc %o0, 0x1, %g0 bne ccslow andcc %o0, 0x2, %g0 be 1f -- cgit v0.10.2 From 747df2258b1b9a2e25929ef496262c339c380009 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 11 May 2011 17:41:18 +0100 Subject: sfc: Always map MCDI shared memory as uncacheable We enabled write-combining for memory-mapped registers in commit 65f0b417dee94f779ce9b77102b7d73c93723b39, but inhibited it for the MCDI shared memory where this is not supported. However, write-combining mappings also allow read-reordering, which may also be a problem. I found that when an SFC9000-family controller is connected to an Intel 3000 chipset, and write-combining is enabled, the controller stops responding to PCIe read requests during driver initialisation while the driver is polling for completion of an MCDI command. This results in an NMI and system hang. Adding read memory barriers between all reads to the shared memory area appears to reduce but not eliminate the probability of this. We have not yet established whether this is a bug in our BIU or in the PCIe bridge. For now, work around by mapping the shared memory area separately. Signed-off-by: Ben Hutchings diff --git a/drivers/net/sfc/mcdi.c b/drivers/net/sfc/mcdi.c index d984790..3dd45ed 100644 --- a/drivers/net/sfc/mcdi.c +++ b/drivers/net/sfc/mcdi.c @@ -50,6 +50,20 @@ static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx) return &nic_data->mcdi; } +static inline void +efx_mcdi_readd(struct efx_nic *efx, efx_dword_t *value, unsigned reg) +{ + struct siena_nic_data *nic_data = efx->nic_data; + value->u32[0] = (__force __le32)__raw_readl(nic_data->mcdi_smem + reg); +} + +static inline void +efx_mcdi_writed(struct efx_nic *efx, const efx_dword_t *value, unsigned reg) +{ + struct siena_nic_data *nic_data = efx->nic_data; + __raw_writel((__force u32)value->u32[0], nic_data->mcdi_smem + reg); +} + void efx_mcdi_init(struct efx_nic *efx) { struct efx_mcdi_iface *mcdi; @@ -70,8 +84,8 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, const u8 *inbuf, size_t inlen) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - unsigned pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); - unsigned doorbell = FR_CZ_MC_TREG_SMEM + MCDI_DOORBELL(efx); + unsigned pdu = MCDI_PDU(efx); + unsigned doorbell = MCDI_DOORBELL(efx); unsigned int i; efx_dword_t hdr; u32 xflags, seqno; @@ -92,30 +106,28 @@ static void efx_mcdi_copyin(struct efx_nic *efx, unsigned cmd, MCDI_HEADER_SEQ, seqno, MCDI_HEADER_XFLAGS, xflags); - efx_writed(efx, &hdr, pdu); + efx_mcdi_writed(efx, &hdr, pdu); - for (i = 0; i < inlen; i += 4) { - _efx_writed(efx, *((__le32 *)(inbuf + i)), pdu + 4 + i); - /* use wmb() within loop to inhibit write combining */ - wmb(); - } + for (i = 0; i < inlen; i += 4) + efx_mcdi_writed(efx, (const efx_dword_t *)(inbuf + i), + pdu + 4 + i); /* ring the doorbell with a distinctive value */ - _efx_writed(efx, (__force __le32) 0x45789abc, doorbell); - wmb(); + EFX_POPULATE_DWORD_1(hdr, EFX_DWORD_0, 0x45789abc); + efx_mcdi_writed(efx, &hdr, doorbell); } static void efx_mcdi_copyout(struct efx_nic *efx, u8 *outbuf, size_t outlen) { struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); + unsigned int pdu = MCDI_PDU(efx); int i; BUG_ON(atomic_read(&mcdi->state) == MCDI_STATE_QUIESCENT); BUG_ON(outlen & 3 || outlen >= 0x100); for (i = 0; i < outlen; i += 4) - *((__le32 *)(outbuf + i)) = _efx_readd(efx, pdu + 4 + i); + efx_mcdi_readd(efx, (efx_dword_t *)(outbuf + i), pdu + 4 + i); } static int efx_mcdi_poll(struct efx_nic *efx) @@ -123,7 +135,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) struct efx_mcdi_iface *mcdi = efx_mcdi(efx); unsigned int time, finish; unsigned int respseq, respcmd, error; - unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx); + unsigned int pdu = MCDI_PDU(efx); unsigned int rc, spins; efx_dword_t reg; @@ -149,8 +161,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) time = get_seconds(); - rmb(); - efx_readd(efx, ®, pdu); + efx_mcdi_readd(efx, ®, pdu); /* All 1's indicates that shared memory is in reset (and is * not a valid header). Wait for it to come out reset before @@ -177,7 +188,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) respseq, mcdi->seqno); rc = EIO; } else if (error) { - efx_readd(efx, ®, pdu + 4); + efx_mcdi_readd(efx, ®, pdu + 4); switch (EFX_DWORD_FIELD(reg, EFX_DWORD_0)) { #define TRANSLATE_ERROR(name) \ case MC_CMD_ERR_ ## name: \ @@ -211,21 +222,21 @@ out: /* Test and clear MC-rebooted flag for this port/function */ int efx_mcdi_poll_reboot(struct efx_nic *efx) { - unsigned int addr = FR_CZ_MC_TREG_SMEM + MCDI_REBOOT_FLAG(efx); + unsigned int addr = MCDI_REBOOT_FLAG(efx); efx_dword_t reg; uint32_t value; if (efx_nic_rev(efx) < EFX_REV_SIENA_A0) return false; - efx_readd(efx, ®, addr); + efx_mcdi_readd(efx, ®, addr); value = EFX_DWORD_FIELD(reg, EFX_DWORD_0); if (value == 0) return 0; EFX_ZERO_DWORD(reg); - efx_writed(efx, ®, addr); + efx_mcdi_writed(efx, ®, addr); if (value == MC_STATUS_DWORD_ASSERT) return -EINTR; diff --git a/drivers/net/sfc/nic.h b/drivers/net/sfc/nic.h index a42db6e..d91701a 100644 --- a/drivers/net/sfc/nic.h +++ b/drivers/net/sfc/nic.h @@ -143,10 +143,12 @@ static inline struct falcon_board *falcon_board(struct efx_nic *efx) /** * struct siena_nic_data - Siena NIC state * @mcdi: Management-Controller-to-Driver Interface + * @mcdi_smem: MCDI shared memory mapping. The mapping is always uncacheable. * @wol_filter_id: Wake-on-LAN packet filter id */ struct siena_nic_data { struct efx_mcdi_iface mcdi; + void __iomem *mcdi_smem; int wol_filter_id; }; diff --git a/drivers/net/sfc/siena.c b/drivers/net/sfc/siena.c index e4dd898..837869b 100644 --- a/drivers/net/sfc/siena.c +++ b/drivers/net/sfc/siena.c @@ -220,12 +220,26 @@ static int siena_probe_nic(struct efx_nic *efx) efx_reado(efx, ®, FR_AZ_CS_DEBUG); efx->net_dev->dev_id = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1; + /* Initialise MCDI */ + nic_data->mcdi_smem = ioremap_nocache(efx->membase_phys + + FR_CZ_MC_TREG_SMEM, + FR_CZ_MC_TREG_SMEM_STEP * + FR_CZ_MC_TREG_SMEM_ROWS); + if (!nic_data->mcdi_smem) { + netif_err(efx, probe, efx->net_dev, + "could not map MCDI at %llx+%x\n", + (unsigned long long)efx->membase_phys + + FR_CZ_MC_TREG_SMEM, + FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS); + rc = -ENOMEM; + goto fail1; + } efx_mcdi_init(efx); /* Recover from a failed assertion before probing */ rc = efx_mcdi_handle_assertion(efx); if (rc) - goto fail1; + goto fail2; /* Let the BMC know that the driver is now in charge of link and * filter settings. We must do this before we reset the NIC */ @@ -280,6 +294,7 @@ fail4: fail3: efx_mcdi_drv_attach(efx, false, NULL); fail2: + iounmap(nic_data->mcdi_smem); fail1: kfree(efx->nic_data); return rc; @@ -359,6 +374,8 @@ static int siena_init_nic(struct efx_nic *efx) static void siena_remove_nic(struct efx_nic *efx) { + struct siena_nic_data *nic_data = efx->nic_data; + efx_nic_free_buffer(efx, &efx->irq_status); siena_reset_hw(efx, RESET_TYPE_ALL); @@ -368,7 +385,8 @@ static void siena_remove_nic(struct efx_nic *efx) efx_mcdi_drv_attach(efx, false, NULL); /* Tear down the private nic state */ - kfree(efx->nic_data); + iounmap(nic_data->mcdi_smem); + kfree(nic_data); efx->nic_data = NULL; } @@ -606,8 +624,7 @@ struct efx_nic_type siena_a0_nic_type = { .default_mac_ops = &efx_mcdi_mac_operations, .revision = EFX_REV_SIENA_A0, - .mem_map_size = (FR_CZ_MC_TREG_SMEM + - FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS), + .mem_map_size = FR_CZ_MC_TREG_SMEM, /* MC_TREG_SMEM mapped separately */ .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL, .rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL, .buf_tbl_base = FR_BZ_BUF_FULL_TBL, -- cgit v0.10.2 From 698b368275c3fa98261159253cfc79653f9dffc6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 11 May 2011 14:49:36 -0700 Subject: fbcon: add lifetime refcount to opened frame buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This just adds the refcount and the new registration lock logic. It does not (for example) actually change the read/write/ioctl routines to actually use the frame buffer that was opened: those function still end up alway susing whatever the current frame buffer is at the time of the call. Without this, if something holds the frame buffer open over a framebuffer switch, the close() operation after the switch will access a fb_info that has been free'd by the unregistering of the old frame buffer. (The read/write/ioctl operations will normally not cause problems, because they will - illogically - pick up the new fbcon instead. But a switch that happens just as one of those is going on might see problems too, the window is just much smaller: one individual op rather than the whole open-close sequence.) This use-after-free is apparently fairly easily triggered by the Ubuntu 11.04 boot sequence. Acked-by: Tim Gardner Tested-by: Daniel J Blueman Tested-by: Anca Emanuel Cc: Bruno Prémont Cc: Alan Cox Cc: Paul Mundt Cc: Dave Airlie Cc: Andy Whitcroft Signed-off-by: Linus Torvalds diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index e0c2284..eec14d2 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -42,9 +42,34 @@ #define FBPIXMAPSIZE (1024 * 8) +static DEFINE_MUTEX(registration_lock); struct fb_info *registered_fb[FB_MAX] __read_mostly; int num_registered_fb __read_mostly; +static struct fb_info *get_fb_info(unsigned int idx) +{ + struct fb_info *fb_info; + + if (idx >= FB_MAX) + return ERR_PTR(-ENODEV); + + mutex_lock(®istration_lock); + fb_info = registered_fb[idx]; + if (fb_info) + atomic_inc(&fb_info->count); + mutex_unlock(®istration_lock); + + return fb_info; +} + +static void put_fb_info(struct fb_info *fb_info) +{ + if (!atomic_dec_and_test(&fb_info->count)) + return; + if (fb_info->fbops->fb_destroy) + fb_info->fbops->fb_destroy(fb_info); +} + int lock_fb_info(struct fb_info *info) { mutex_lock(&info->lock); @@ -647,6 +672,7 @@ int fb_show_logo(struct fb_info *info, int rotate) { return 0; } static void *fb_seq_start(struct seq_file *m, loff_t *pos) { + mutex_lock(®istration_lock); return (*pos < FB_MAX) ? pos : NULL; } @@ -658,6 +684,7 @@ static void *fb_seq_next(struct seq_file *m, void *v, loff_t *pos) static void fb_seq_stop(struct seq_file *m, void *v) { + mutex_unlock(®istration_lock); } static int fb_seq_show(struct seq_file *m, void *v) @@ -1361,14 +1388,16 @@ __releases(&info->lock) struct fb_info *info; int res = 0; - if (fbidx >= FB_MAX) - return -ENODEV; - info = registered_fb[fbidx]; - if (!info) + info = get_fb_info(fbidx); + if (!info) { request_module("fb%d", fbidx); - info = registered_fb[fbidx]; - if (!info) - return -ENODEV; + info = get_fb_info(fbidx); + if (!info) + return -ENODEV; + } + if (IS_ERR(info)) + return PTR_ERR(info); + mutex_lock(&info->lock); if (!try_module_get(info->fbops->owner)) { res = -ENODEV; @@ -1386,6 +1415,8 @@ __releases(&info->lock) #endif out: mutex_unlock(&info->lock); + if (res) + put_fb_info(info); return res; } @@ -1401,6 +1432,7 @@ __releases(&info->lock) info->fbops->fb_release(info,1); module_put(info->fbops->owner); mutex_unlock(&info->lock); + put_fb_info(info); return 0; } @@ -1542,11 +1574,13 @@ register_framebuffer(struct fb_info *fb_info) remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id, fb_is_primary_device(fb_info)); + mutex_lock(®istration_lock); num_registered_fb++; for (i = 0 ; i < FB_MAX; i++) if (!registered_fb[i]) break; fb_info->node = i; + atomic_set(&fb_info->count, 1); mutex_init(&fb_info->lock); mutex_init(&fb_info->mm_lock); @@ -1583,6 +1617,7 @@ register_framebuffer(struct fb_info *fb_info) fb_var_to_videomode(&mode, &fb_info->var); fb_add_videomode(&mode, &fb_info->modelist); registered_fb[i] = fb_info; + mutex_unlock(®istration_lock); event.info = fb_info; if (!lock_fb_info(fb_info)) @@ -1616,6 +1651,7 @@ unregister_framebuffer(struct fb_info *fb_info) struct fb_event event; int i, ret = 0; + mutex_lock(®istration_lock); i = fb_info->node; if (!registered_fb[i]) { ret = -EINVAL; @@ -1638,7 +1674,7 @@ unregister_framebuffer(struct fb_info *fb_info) (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) kfree(fb_info->pixmap.addr); fb_destroy_modelist(&fb_info->modelist); - registered_fb[i]=NULL; + registered_fb[i] = NULL; num_registered_fb--; fb_cleanup_device(fb_info); device_destroy(fb_class, MKDEV(FB_MAJOR, i)); @@ -1646,9 +1682,9 @@ unregister_framebuffer(struct fb_info *fb_info) fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event); /* this may free fb info */ - if (fb_info->fbops->fb_destroy) - fb_info->fbops->fb_destroy(fb_info); + put_fb_info(fb_info); done: + mutex_unlock(®istration_lock); return ret; } diff --git a/include/linux/fb.h b/include/linux/fb.h index df728c1..6a82748 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -832,6 +832,7 @@ struct fb_tile_ops { #define FBINFO_CAN_FORCE_OUTPUT 0x200000 struct fb_info { + atomic_t count; int node; int flags; struct mutex lock; /* Lock for open/release/ioctl funcs */ -- cgit v0.10.2 From c47747fde931c02455683bd00ea43eaa62f35b0e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 11 May 2011 14:58:34 -0700 Subject: fbmem: make read/write/ioctl use the frame buffer at open time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit read/write/ioctl on a fbcon file descriptor has traditionally used the fbcon not when it was opened, but as it was at the time of the call. That makes no sense, but the lack of sense is much more obvious now that we properly ref-count the usage - it means that the ref-counting doesn't actually protect operations we do on the frame buffer. This changes it to look at the fb_info that we got at open time, but in order to avoid using a frame buffer long after it has been unregistered, we do verify that it is still current, and return -ENODEV if not. Acked-by: Tim Gardner Tested-by: Daniel J Blueman Tested-by: Anca Emanuel Cc: Bruno Prémont Cc: Alan Cox Cc: Paul Mundt Cc: Dave Airlie Cc: Andy Whitcroft Signed-off-by: Linus Torvalds diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index eec14d2..ea16e65 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -717,13 +717,30 @@ static const struct file_operations fb_proc_fops = { .release = seq_release, }; -static ssize_t -fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +/* + * We hold a reference to the fb_info in file->private_data, + * but if the current registered fb has changed, we don't + * actually want to use it. + * + * So look up the fb_info using the inode minor number, + * and just verify it against the reference we have. + */ +static struct fb_info *file_fb_info(struct file *file) { - unsigned long p = *ppos; struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); struct fb_info *info = registered_fb[fbidx]; + + if (info != file->private_data) + info = NULL; + return info; +} + +static ssize_t +fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long p = *ppos; + struct fb_info *info = file_fb_info(file); u8 *buffer, *dst; u8 __iomem *src; int c, cnt = 0, err = 0; @@ -788,9 +805,7 @@ static ssize_t fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos; - struct inode *inode = file->f_path.dentry->d_inode; - int fbidx = iminor(inode); - struct fb_info *info = registered_fb[fbidx]; + struct fb_info *info = file_fb_info(file); u8 *buffer, *src; u8 __iomem *dst; int c, cnt = 0, err = 0; @@ -1168,10 +1183,10 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, static long fb_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int fbidx = iminor(inode); - struct fb_info *info = registered_fb[fbidx]; + struct fb_info *info = file_fb_info(file); + if (!info) + return -ENODEV; return do_fb_ioctl(info, cmd, arg); } @@ -1292,12 +1307,13 @@ static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd, static long fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int fbidx = iminor(inode); - struct fb_info *info = registered_fb[fbidx]; - struct fb_ops *fb = info->fbops; + struct fb_info *info = file_fb_info(file); + struct fb_ops *fb; long ret = -ENOIOCTLCMD; + if (!info) + return -ENODEV; + fb = info->fbops; switch(cmd) { case FBIOGET_VSCREENINFO: case FBIOPUT_VSCREENINFO: @@ -1330,16 +1346,18 @@ static long fb_compat_ioctl(struct file *file, unsigned int cmd, static int fb_mmap(struct file *file, struct vm_area_struct * vma) { - int fbidx = iminor(file->f_path.dentry->d_inode); - struct fb_info *info = registered_fb[fbidx]; - struct fb_ops *fb = info->fbops; + struct fb_info *info = file_fb_info(file); + struct fb_ops *fb; unsigned long off; unsigned long start; u32 len; + if (!info) + return -ENODEV; if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) return -EINVAL; off = vma->vm_pgoff << PAGE_SHIFT; + fb = info->fbops; if (!fb) return -ENODEV; mutex_lock(&info->mm_lock); -- cgit v0.10.2 From 92bdaef7b2c5d3cb8abc902faa1f7670a183dcdc Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 5 May 2011 13:50:43 -0400 Subject: Revert "xen/mmu: Add workaround "x86-64, mm: Put early page table high"" This reverts commit a38647837a411f7df79623128421eef2118b5884. It does not work with certain AMD machines. last_pfn = 0x100000 max_arch_pfn = 0x400000000 initial memory mapped : 0 - 02c3a000 Base memory trampoline at [ffff88000009b000] 9b000 size 20480 init_memory_mapping: 0000000000000000-0000000100000000 0000000000 - 0100000000 page 4k kernel direct mapping tables up to 100000000 @ ff7fb000-100000000 init_memory_mapping: 0000000100000000-00000001e0800000 0100000000 - 01e0800000 page 4k kernel direct mapping tables up to 1e0800000 @ 1df0f3000-1e0000000 xen: setting RW the range fffdc000 - 100000000 RAMDISK: 0203b000 - 02c3a000 No NUMA configuration found Faking a node at 0000000000000000-00000001e0800000 NUMA: Using 63 for the hash shift. Initmem setup node 0 0000000000000000-00000001e0800000 NODE_DATA [00000001dfffb000 - 00000001dfffffff] BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] setup_node_bootmem+0x18a/0x1ea PGD 0 Oops: 0003 [#1] SMP last sysfs file: CPU 0 Modules linked in: Pid: 0, comm: swapper Not tainted 2.6.39-0-virtual #6~smb1 RIP: e030:[] [] setup_node_bootmem+0x18a/0x1ea RSP: e02b:ffffffff81c01e38 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 00000001e0800000 RCX: 0000000000001040 RDX: 0000000000004100 RSI: 0000000000000000 RDI: ffff8801dfffb000 RBP: ffffffff81c01e58 R08: 0000000000000020 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000bfe400 FS: 0000000000000000(0000) GS:ffffffff81cca000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000001c03000 CR4: 0000000000000660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 0, threadinfo ffffffff81c00000, task ffffffff81c0b020) Stack: 0000000000000040 0000000000000001 0000000000000000 ffffffffffffffff ffffffff81c01e88 ffffffff81cf6c25 0000000000000000 0000000000000000 ffffffff81cf687f 0000000000000000 ffffffff81c01ea8 ffffffff81cf6e45 Call Trace: [] numa_register_memblks.constprop.3+0x150/0x181 [] ? numa_add_memblk+0x7c/0x7c [] numa_init.part.2+0x1c/0x7c [] ? numa_add_memblk+0x7c/0x7c [] numa_init+0x6c/0x70 [] initmem_init+0x39/0x3b [] setup_arch+0x64e/0x769 [] ? printk+0x51/0x53 [] start_kernel+0xd4/0x3f3 [] x86_64_start_reservations+0x132/0x136 [] xen_start_kernel+0x588/0x58f Code: 41 00 00 48 8b 3c c5 a0 24 cc 81 31 c0 40 f6 c7 01 74 05 aa 66 ba ff 40 40 f6 c7 02 74 05 66 ab 83 ea 02 89 d1 c1 e9 02 f6 c2 02 ab 74 02 66 ab 80 e2 01 74 01 aa 49 63 c4 48 c1 eb 0c 44 89 RIP [] setup_node_bootmem+0x18a/0x1ea RSP CR2: 0000000000000000 ---[ end trace a7919e7f17c0a725 ]--- Kernel panic - not syncing: Attempted to kill the idle task! Pid: 0, comm: swapper Tainted: G D 2.6.39-0-virtual #6~smb1 Reported-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 55c965b..cf4ef61 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1463,119 +1463,6 @@ static int xen_pgd_alloc(struct mm_struct *mm) return ret; } -#ifdef CONFIG_X86_64 -static __initdata u64 __last_pgt_set_rw = 0; -static __initdata u64 __pgt_buf_start = 0; -static __initdata u64 __pgt_buf_end = 0; -static __initdata u64 __pgt_buf_top = 0; -/* - * As a consequence of the commit: - * - * commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e - * Author: Yinghai Lu - * Date: Fri Dec 17 16:58:28 2010 -0800 - * - * x86-64, mm: Put early page table high - * - * at some point init_memory_mapping is going to reach the pagetable pages - * area and map those pages too (mapping them as normal memory that falls - * in the range of addresses passed to init_memory_mapping as argument). - * Some of those pages are already pagetable pages (they are in the range - * pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and - * everything is fine. - * Some of these pages are not pagetable pages yet (they fall in the range - * pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they - * are going to be mapped RW. When these pages become pagetable pages and - * are hooked into the pagetable, xen will find that the guest has already - * a RW mapping of them somewhere and fail the operation. - * The reason Xen requires pagetables to be RO is that the hypervisor needs - * to verify that the pagetables are valid before using them. The validation - * operations are called "pinning". - * - * In order to fix the issue we mark all the pages in the entire range - * pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation - * is completed only the range pgt_buf_start-pgt_buf_end is reserved by - * init_memory_mapping. Hence the kernel is going to crash as soon as one - * of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those - * ranges are RO). - * - * For this reason, 'mark_rw_past_pgt' is introduced which is called _after_ - * the init_memory_mapping has completed (in a perfect world we would - * call this function from init_memory_mapping, but lets ignore that). - * - * Because we are called _after_ init_memory_mapping the pgt_buf_[start, - * end,top] have all changed to new values (b/c init_memory_mapping - * is called and setting up another new page-table). Hence, the first time - * we enter this function, we save away the pgt_buf_start value and update - * the pgt_buf_[end,top]. - * - * When we detect that the "old" pgt_buf_start through pgt_buf_end - * PFNs have been reserved (so memblock_x86_reserve_range has been called), - * we immediately set out to RW the "old" pgt_buf_end through pgt_buf_top. - * - * And then we update those "old" pgt_buf_[end|top] with the new ones - * so that we can redo this on the next pagetable. - */ -static __init void mark_rw_past_pgt(void) { - - if (pgt_buf_end > pgt_buf_start) { - u64 addr, size; - - /* Save it away. */ - if (!__pgt_buf_start) { - __pgt_buf_start = pgt_buf_start; - __pgt_buf_end = pgt_buf_end; - __pgt_buf_top = pgt_buf_top; - return; - } - /* If we get the range that starts at __pgt_buf_end that means - * the range is reserved, and that in 'init_memory_mapping' - * the 'memblock_x86_reserve_range' has been called with the - * outdated __pgt_buf_start, __pgt_buf_end (the "new" - * pgt_buf_[start|end|top] refer now to a new pagetable. - * Note: we are called _after_ the pgt_buf_[..] have been - * updated.*/ - - addr = memblock_x86_find_in_range_size(PFN_PHYS(__pgt_buf_start), - &size, PAGE_SIZE); - - /* Still not reserved, meaning 'memblock_x86_reserve_range' - * hasn't been called yet. Update the _end and _top.*/ - if (addr == PFN_PHYS(__pgt_buf_start)) { - __pgt_buf_end = pgt_buf_end; - __pgt_buf_top = pgt_buf_top; - return; - } - - /* OK, the area is reserved, meaning it is time for us to - * set RW for the old end->top PFNs. */ - - /* ..unless we had already done this. */ - if (__pgt_buf_end == __last_pgt_set_rw) - return; - - addr = PFN_PHYS(__pgt_buf_end); - - /* set as RW the rest */ - printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", - PFN_PHYS(__pgt_buf_end), PFN_PHYS(__pgt_buf_top)); - - while (addr < PFN_PHYS(__pgt_buf_top)) { - make_lowmem_page_readwrite(__va(addr)); - addr += PAGE_SIZE; - } - /* And update everything so that we are ready for the next - * pagetable (the one created for regions past 4GB) */ - __last_pgt_set_rw = __pgt_buf_end; - __pgt_buf_start = pgt_buf_start; - __pgt_buf_end = pgt_buf_end; - __pgt_buf_top = pgt_buf_top; - } - return; -} -#else -static __init void mark_rw_past_pgt(void) { } -#endif static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) { #ifdef CONFIG_X86_64 @@ -1602,14 +1489,6 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) unsigned long pfn = pte_pfn(pte); /* - * A bit of optimization. We do not need to call the workaround - * when xen_set_pte_init is called with a PTE with 0 as PFN. - * That is b/c the pagetable at that point are just being populated - * with empty values and we can save some cycles by not calling - * the 'memblock' code.*/ - if (pfn) - mark_rw_past_pgt(); - /* * If the new pfn is within the range of the newly allocated * kernel pagetable, and it isn't being mapped into an * early_ioremap fixmap slot as a freshly allocated page, make sure @@ -2118,8 +1997,6 @@ __init void xen_ident_map_ISA(void) static __init void xen_post_allocator_init(void) { - mark_rw_past_pgt(); - #ifdef CONFIG_XEN_DEBUG pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); #endif -- cgit v0.10.2 From 279b706bf800b5967037f492dbe4fc5081ad5d0f Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 14 Apr 2011 15:49:41 +0100 Subject: x86,xen: introduce x86_init.mapping.pagetable_reserve Introduce a new x86_init hook called pagetable_reserve that at the end of init_memory_mapping is used to reserve a range of memory addresses for the kernel pagetable pages we used and free the other ones. On native it just calls memblock_x86_reserve_range while on xen it also takes care of setting the spare memory previously allocated for kernel pagetable pages from RO to RW, so that it can be used for other purposes. A detailed explanation of the reason why this hook is needed follows. As a consequence of the commit: commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e Author: Yinghai Lu Date: Fri Dec 17 16:58:28 2010 -0800 x86-64, mm: Put early page table high at some point init_memory_mapping is going to reach the pagetable pages area and map those pages too (mapping them as normal memory that falls in the range of addresses passed to init_memory_mapping as argument). Some of those pages are already pagetable pages (they are in the range pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and everything is fine. Some of these pages are not pagetable pages yet (they fall in the range pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they are going to be mapped RW. When these pages become pagetable pages and are hooked into the pagetable, xen will find that the guest has already a RW mapping of them somewhere and fail the operation. The reason Xen requires pagetables to be RO is that the hypervisor needs to verify that the pagetables are valid before using them. The validation operations are called "pinning" (more details in arch/x86/xen/mmu.c). In order to fix the issue we mark all the pages in the entire range pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation is completed only the range pgt_buf_start-pgt_buf_end is reserved by init_memory_mapping. Hence the kernel is going to crash as soon as one of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those ranges are RO). For this reason we need a hook to reserve the kernel pagetable pages we used and free the other ones so that they can be reused for other purposes. On native it just means calling memblock_x86_reserve_range, on Xen it also means marking RW the pagetable pages that we allocated before but that haven't been used before. Another way to fix this is without using the hook is by adding a 'if (xen_pv_domain)' in the 'init_memory_mapping' code and calling the Xen counterpart, but that is just nasty. Signed-off-by: Stefano Stabellini Acked-by: Yinghai Lu Acked-by: H. Peter Anvin Cc: Ingo Molnar Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 7db7723..d56187c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, /* Install a pte for a particular vaddr in kernel space. */ void set_pte_vaddr(unsigned long vaddr, pte_t pte); +extern void native_pagetable_reserve(u64 start, u64 end); #ifdef CONFIG_X86_32 extern void native_pagetable_setup_start(pgd_t *base); extern void native_pagetable_setup_done(pgd_t *base); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 643ebf2..d3d8590 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -68,6 +68,17 @@ struct x86_init_oem { }; /** + * struct x86_init_mapping - platform specific initial kernel pagetable setup + * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage + * + * For more details on the purpose of this hook, look in + * init_memory_mapping and the commit that added it. + */ +struct x86_init_mapping { + void (*pagetable_reserve)(u64 start, u64 end); +}; + +/** * struct x86_init_paging - platform specific paging functions * @pagetable_setup_start: platform specific pre paging_init() call * @pagetable_setup_done: platform specific post paging_init() call @@ -123,6 +134,7 @@ struct x86_init_ops { struct x86_init_mpparse mpparse; struct x86_init_irqs irqs; struct x86_init_oem oem; + struct x86_init_mapping mapping; struct x86_init_paging paging; struct x86_init_timers timers; struct x86_init_iommu iommu; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c11514e..75ef4b1 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = { .banner = default_banner, }, + .mapping = { + .pagetable_reserve = native_pagetable_reserve, + }, + .paging = { .pagetable_setup_start = native_pagetable_setup_start, .pagetable_setup_done = native_pagetable_setup_done, diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 286d289..722a4c3 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); } +void native_pagetable_reserve(u64 start, u64 end) +{ + memblock_x86_reserve_range(start, end, "PGTABLE"); +} + struct map_range { unsigned long start; unsigned long end; @@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __flush_tlb_all(); + /* + * Reserve the kernel pagetable pages we used (pgt_buf_start - + * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) + * so that they can be reused for other purposes. + * + * On native it just means calling memblock_x86_reserve_range, on Xen it + * also means marking RW the pagetable pages that we allocated before + * but that haven't been used. + * + * In fact on xen we mark RO the whole range pgt_buf_start - + * pgt_buf_top, because we have to make sure that when + * init_memory_mapping reaches the pagetable pages area, it maps + * RO all the pagetable pages, including the ones that are beyond + * pgt_buf_end at that time. + */ if (!after_bootmem && pgt_buf_end > pgt_buf_start) - memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT, - pgt_buf_end << PAGE_SHIFT, "PGTABLE"); + x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), + PFN_PHYS(pgt_buf_end)); if (!after_bootmem) early_memtest(start, end); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index cf4ef61..0684f3c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base) { } +static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) +{ + /* reserve the range used */ + native_pagetable_reserve(start, end); + + /* set as RW the rest */ + printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, + PFN_PHYS(pgt_buf_top)); + while (end < PFN_PHYS(pgt_buf_top)) { + make_lowmem_page_readwrite(__va(end)); + end += PAGE_SIZE; + } +} + static void xen_post_allocator_init(void); static __init void xen_pagetable_setup_done(pgd_t *base) @@ -2105,6 +2119,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { void __init xen_init_mmu_ops(void) { + x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; pv_mmu_ops = xen_mmu_ops; -- cgit v0.10.2 From 53f8023febf9b3e18d8fb0d99c55010e473ce53d Mon Sep 17 00:00:00 2001 From: Sedat Dilek Date: Sun, 17 Apr 2011 16:17:34 +0200 Subject: x86/mm: Fix section mismatch derived from native_pagetable_reserve() With CONFIG_DEBUG_SECTION_MISMATCH=y I see these warnings in next-20110415: LD vmlinux.o MODPOST vmlinux.o WARNING: vmlinux.o(.text+0x1ba48): Section mismatch in reference from the function native_pagetable_reserve() to the function .init.text:memblock_x86_reserve_range() The function native_pagetable_reserve() references the function __init memblock_x86_reserve_range(). This is often because native_pagetable_reserve lacks a __init annotation or the annotation of memblock_x86_reserve_range is wrong. This patch fixes the issue. Thanks to pipacs from PaX project for help on IRC. Acked-by: "H. Peter Anvin" Signed-off-by: Sedat Dilek Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 722a4c3..37b8b0f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -81,7 +81,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse, end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); } -void native_pagetable_reserve(u64 start, u64 end) +void __init native_pagetable_reserve(u64 start, u64 end) { memblock_x86_reserve_range(start, end, "PGTABLE"); } -- cgit v0.10.2 From 982b2035d9d7033f63db187bac55e9d8998b0266 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 12 May 2011 12:19:43 -0700 Subject: Revert "drm/i915: Only enable the plane after setting the fb base (pre-ILK)" This reverts commit 49183b2818de6899383bb82bc032f9344d6791ff. Quoth Franz Melchior: "This patch introduces a bug on my infamous "Acer Travelmate 5735Z-452G32Mnss": when KMS takes over, the frame buffer contents get completely garbled up on screen, with colored stripes and unreadable text (photo on request). Only when X11 is started, the screen gets restored again. Closing and re-opening the lid partly cures the mess, too: it makes the font readable, though horizontally stretched." Acked-by: Keith Packard Cc: Chris Wilson Cc: Daniel Vetter Cc: Jesse Barnes Signed-off-by: Linus Torvalds diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 373c2a0..2166ee0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5154,6 +5154,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, I915_WRITE(DSPCNTR(plane), dspcntr); POSTING_READ(DSPCNTR(plane)); + if (!HAS_PCH_SPLIT(dev)) + intel_enable_plane(dev_priv, plane, pipe); ret = intel_pipe_set_base(crtc, x, y, old_fb); -- cgit v0.10.2 From 93826c092c385549c04af184fbebd43f36995c69 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 7 Apr 2011 14:46:59 -0400 Subject: SELinux: delete debugging printks from filename_trans rule processing The filename_trans rule processing has some printk(KERN_ERR ) messages which were intended as debug aids in creating the code but weren't removed before it was submitted. Remove them. Reported-by: Paul Bolle Signed-off-by: Eric Paris diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index e6e7ce0..7102457 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -1819,8 +1819,6 @@ static int filename_trans_read(struct policydb *p, void *fp) goto out; nel = le32_to_cpu(buf[0]); - printk(KERN_ERR "%s: nel=%d\n", __func__, nel); - last = p->filename_trans; while (last && last->next) last = last->next; @@ -1857,8 +1855,6 @@ static int filename_trans_read(struct policydb *p, void *fp) goto out; name[len] = 0; - printk(KERN_ERR "%s: ft=%p ft->name=%p ft->name=%s\n", __func__, ft, ft->name, ft->name); - rc = next_entry(buf, fp, sizeof(u32) * 4); if (rc) goto out; -- cgit v0.10.2 From cf7e032fc87d59c475df26c4d40bf45d401b2adb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 12 May 2011 09:11:38 +0000 Subject: zorro8390: Fix regression caused during net_device_ops conversion Changeset b6114794a1c394534659f4a17420e48cf23aa922 ("zorro8390: convert to net_device_ops") broke zorro8390 by adding 8390.o to the link. That meant that lib8390.c was included twice, once in zorro8390.c and once in 8390.c, subject to different macros. This patch reverts that by avoiding the wrappers in 8390.c. Fix based on commits 217cbfa856dc1cbc2890781626c4032d9e3ec59f ("mac8390: fix regression caused during net_device_ops conversion") and 4e0168fa4842e27795a75b205a510f25b62181d9 ("mac8390: fix build with NET_POLL_CONTROLLER"). Reported-by: Christian T. Steigies Suggested-by: Finn Thain Signed-off-by: Geert Uytterhoeven Tested-by: Christian T. Steigies Cc: stable@kernel.org Signed-off-by: David S. Miller diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 01b604a..c64675f 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -219,7 +219,7 @@ obj-$(CONFIG_SC92031) += sc92031.o obj-$(CONFIG_LP486E) += lp486e.o obj-$(CONFIG_ETH16I) += eth16i.o -obj-$(CONFIG_ZORRO8390) += zorro8390.o 8390.o +obj-$(CONFIG_ZORRO8390) += zorro8390.o obj-$(CONFIG_HPLANCE) += hplance.o 7990.o obj-$(CONFIG_MVME147_NET) += mvme147.o 7990.o obj-$(CONFIG_EQUALIZER) += eql.o diff --git a/drivers/net/zorro8390.c b/drivers/net/zorro8390.c index b78a38d..8c7c522 100644 --- a/drivers/net/zorro8390.c +++ b/drivers/net/zorro8390.c @@ -126,7 +126,7 @@ static int __devinit zorro8390_init_one(struct zorro_dev *z, board = z->resource.start; ioaddr = board+cards[i].offset; - dev = alloc_ei_netdev(); + dev = ____alloc_ei_netdev(0); if (!dev) return -ENOMEM; if (!request_mem_region(ioaddr, NE_IO_EXTENT*2, DRV_NAME)) { @@ -146,15 +146,15 @@ static int __devinit zorro8390_init_one(struct zorro_dev *z, static const struct net_device_ops zorro8390_netdev_ops = { .ndo_open = zorro8390_open, .ndo_stop = zorro8390_close, - .ndo_start_xmit = ei_start_xmit, - .ndo_tx_timeout = ei_tx_timeout, - .ndo_get_stats = ei_get_stats, - .ndo_set_multicast_list = ei_set_multicast_list, + .ndo_start_xmit = __ei_start_xmit, + .ndo_tx_timeout = __ei_tx_timeout, + .ndo_get_stats = __ei_get_stats, + .ndo_set_multicast_list = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ei_poll, + .ndo_poll_controller = __ei_poll, #endif }; -- cgit v0.10.2 From 0b25e0157dfa236a0629c16c8ad6f222f633f682 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 12 May 2011 09:11:39 +0000 Subject: hydra: Fix regression caused during net_device_ops conversion Changeset 5618f0d1193d6b051da9b59b0e32ad24397f06a4 ("hydra: convert to net_device_ops") broke hydra by adding 8390.o to the link. That meant that lib8390.c was included twice, once in hydra.c and once in 8390.c, subject to different macros. This patch reverts that by avoiding the wrappers in 8390.c. Fix based on commits 217cbfa856dc1cbc2890781626c4032d9e3ec59f ("mac8390: fix regression caused during net_device_ops conversion") and 4e0168fa4842e27795a75b205a510f25b62181d9 ("mac8390: fix build with NET_POLL_CONTROLLER"). Signed-off-by: Geert Uytterhoeven Cc: stable@kernel.org Signed-off-by: David S. Miller diff --git a/drivers/net/Makefile b/drivers/net/Makefile index c64675f..4d2f094 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -231,7 +231,7 @@ obj-$(CONFIG_SGI_IOC3_ETH) += ioc3-eth.o obj-$(CONFIG_DECLANCE) += declance.o obj-$(CONFIG_ATARILANCE) += atarilance.o obj-$(CONFIG_A2065) += a2065.o -obj-$(CONFIG_HYDRA) += hydra.o 8390.o +obj-$(CONFIG_HYDRA) += hydra.o obj-$(CONFIG_ARIADNE) += ariadne.o obj-$(CONFIG_CS89x0) += cs89x0.o obj-$(CONFIG_MACSONIC) += macsonic.o diff --git a/drivers/net/hydra.c b/drivers/net/hydra.c index c5ef62c..1cd481c 100644 --- a/drivers/net/hydra.c +++ b/drivers/net/hydra.c @@ -98,15 +98,15 @@ static const struct net_device_ops hydra_netdev_ops = { .ndo_open = hydra_open, .ndo_stop = hydra_close, - .ndo_start_xmit = ei_start_xmit, - .ndo_tx_timeout = ei_tx_timeout, - .ndo_get_stats = ei_get_stats, - .ndo_set_multicast_list = ei_set_multicast_list, + .ndo_start_xmit = __ei_start_xmit, + .ndo_tx_timeout = __ei_tx_timeout, + .ndo_get_stats = __ei_get_stats, + .ndo_set_multicast_list = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ei_poll, + .ndo_poll_controller = __ei_poll, #endif }; @@ -125,7 +125,7 @@ static int __devinit hydra_init(struct zorro_dev *z) 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, }; - dev = alloc_ei_netdev(); + dev = ____alloc_ei_netdev(0); if (!dev) return -ENOMEM; -- cgit v0.10.2 From 2592a7354092afd304a8c067319b15ab1e441e35 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 12 May 2011 09:11:40 +0000 Subject: ne-h8300: Fix regression caused during net_device_ops conversion Changeset dcd39c90290297f6e6ed8a04bb20da7ac2b043c5 ("ne-h8300: convert to net_device_ops") broke ne-h8300 by adding 8390.o to the link. That meant that lib8390.c was included twice, once in ne-h8300.c and once in 8390.c, subject to different macros. This patch reverts that by avoiding the wrappers in 8390.c. Fix based on commits 217cbfa856dc1cbc2890781626c4032d9e3ec59f ("mac8390: fix regression caused during net_device_ops conversion") and 4e0168fa4842e27795a75b205a510f25b62181d9 ("mac8390: fix build with NET_POLL_CONTROLLER"). Signed-off-by: Geert Uytterhoeven Cc: stable@kernel.org Signed-off-by: David S. Miller diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 4d2f094..e5a7375 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -144,7 +144,7 @@ obj-$(CONFIG_NE3210) += ne3210.o 8390.o obj-$(CONFIG_SB1250_MAC) += sb1250-mac.o obj-$(CONFIG_B44) += b44.o obj-$(CONFIG_FORCEDETH) += forcedeth.o -obj-$(CONFIG_NE_H8300) += ne-h8300.o 8390.o +obj-$(CONFIG_NE_H8300) += ne-h8300.o obj-$(CONFIG_AX88796) += ax88796.o obj-$(CONFIG_BCM63XX_ENET) += bcm63xx_enet.o obj-$(CONFIG_FTMAC100) += ftmac100.o diff --git a/drivers/net/ne-h8300.c b/drivers/net/ne-h8300.c index 30be8c6..7298a34 100644 --- a/drivers/net/ne-h8300.c +++ b/drivers/net/ne-h8300.c @@ -167,7 +167,7 @@ static void cleanup_card(struct net_device *dev) #ifndef MODULE struct net_device * __init ne_probe(int unit) { - struct net_device *dev = alloc_ei_netdev(); + struct net_device *dev = ____alloc_ei_netdev(0); int err; if (!dev) @@ -197,15 +197,15 @@ static const struct net_device_ops ne_netdev_ops = { .ndo_open = ne_open, .ndo_stop = ne_close, - .ndo_start_xmit = ei_start_xmit, - .ndo_tx_timeout = ei_tx_timeout, - .ndo_get_stats = ei_get_stats, - .ndo_set_multicast_list = ei_set_multicast_list, + .ndo_start_xmit = __ei_start_xmit, + .ndo_tx_timeout = __ei_tx_timeout, + .ndo_get_stats = __ei_get_stats, + .ndo_set_multicast_list = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ei_poll, + .ndo_poll_controller = __ei_poll, #endif }; @@ -637,7 +637,7 @@ int init_module(void) int err; for (this_dev = 0; this_dev < MAX_NE_CARDS; this_dev++) { - struct net_device *dev = alloc_ei_netdev(); + struct net_device *dev = ____alloc_ei_netdev(0); if (!dev) break; if (io[this_dev]) { -- cgit v0.10.2 From 77ed23f8d995a01cd8101d84351b567bf5177a30 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Tue, 10 May 2011 08:26:43 -0500 Subject: x86: Fix UV BAU for non-consecutive nasids This is a fix for the SGI Altix-UV Broadcast Assist Unit code, which is used for TLB flushing. Certain hardware configurations (that customers are ordering) cause nasids (numa address space id's) to be non-consecutive. Specifically, once you have more than 4 blades in a IRU (Individual Rack Unit - or 1/2 rack) but less than the maximum of 16, the nasid numbering becomes non-consecutive. This currently results in a 'catastrophic error' (CATERR) detected by the firmware during OS boot. The BAU is generating an 'INTD' request that is targeting a non-existent nasid value. Such configurations may also occur when a blade is configured off because of hardware errors. (There is one UV hub per blade.) This patch is required to support such configurations. The problem with the tlb_uv.c code is that is using the consecutive hub numbers as indices to the BAU distribution bit map. These are simply the ordinal position of the hub or blade within its partition. It should be using physical node numbers (pnodes), which correspond to the physical nasid values. Use of the hub number only works as long as the nasids in the partition are consecutive and increase with a stride of 1. This patch changes the index to be the pnode number, thus allowing nasids to be non-consecutive. It also provides a table in local memory for each cpu to translate target cpu number to target pnode and nasid. And it improves naming to properly reflect 'node' and 'uvhub' versus 'nasid'. Signed-off-by: Cliff Wickman Cc: Link: http://lkml.kernel.org/r/E1QJmxX-0002Mz-Fk@eag09.americas.sgi.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 3e094af..130f1ee 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -94,6 +94,8 @@ /* after this # consecutive successes, bump up the throttle if it was lowered */ #define COMPLETE_THRESHOLD 5 +#define UV_LB_SUBNODEID 0x10 + /* * number of entries in the destination side payload queue */ @@ -124,7 +126,7 @@ * The distribution specification (32 bytes) is interpreted as a 256-bit * distribution vector. Adjacent bits correspond to consecutive even numbered * nodeIDs. The result of adding the index of a given bit to the 15-bit - * 'base_dest_nodeid' field of the header corresponds to the + * 'base_dest_nasid' field of the header corresponds to the * destination nodeID associated with that specified bit. */ struct bau_target_uvhubmask { @@ -176,7 +178,7 @@ struct bau_msg_payload { struct bau_msg_header { unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ /* bits 5:0 */ - unsigned int base_dest_nodeid:15; /* nasid of the */ + unsigned int base_dest_nasid:15; /* nasid of the */ /* bits 20:6 */ /* first bit in uvhub map */ unsigned int command:8; /* message type */ /* bits 28:21 */ @@ -378,6 +380,10 @@ struct ptc_stats { unsigned long d_rcanceled; /* number of messages canceled by resets */ }; +struct hub_and_pnode { + short uvhub; + short pnode; +}; /* * one per-cpu; to locate the software tables */ @@ -399,10 +405,12 @@ struct bau_control { int baudisabled; int set_bau_off; short cpu; + short osnode; short uvhub_cpu; short uvhub; short cpus_in_socket; short cpus_in_uvhub; + short partition_base_pnode; unsigned short message_number; unsigned short uvhub_quiesce; short socket_acknowledge_count[DEST_Q_SIZE]; @@ -422,15 +430,16 @@ struct bau_control { int congested_period; cycles_t period_time; long period_requests; + struct hub_and_pnode *target_hub_and_pnode; }; static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) { return constant_test_bit(uvhub, &dstp->bits[0]); } -static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) +static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) { - __set_bit(uvhub, &dstp->bits[0]); + __set_bit(pnode, &dstp->bits[0]); } static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, int nbits) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 7cb6424..c58e0ea 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -699,16 +699,17 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long va, unsigned int cpu) { - int tcpu; - int uvhub; int locals = 0; int remotes = 0; int hubs = 0; + int tcpu; + int tpnode; struct bau_desc *bau_desc; struct cpumask *flush_mask; struct ptc_stats *stat; struct bau_control *bcp; struct bau_control *tbcp; + struct hub_and_pnode *hpp; /* kernel was booted 'nobau' */ if (nobau) @@ -750,11 +751,18 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); - /* cpu statistics */ for_each_cpu(tcpu, flush_mask) { - uvhub = uv_cpu_to_blade_id(tcpu); - bau_uvhub_set(uvhub, &bau_desc->distribution); - if (uvhub == bcp->uvhub) + /* + * The distribution vector is a bit map of pnodes, relative + * to the partition base pnode (and the partition base nasid + * in the header). + * Translate cpu to pnode and hub using an array stored + * in local memory. + */ + hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; + tpnode = hpp->pnode - bcp->partition_base_pnode; + bau_uvhub_set(tpnode, &bau_desc->distribution); + if (hpp->uvhub == bcp->uvhub) locals++; else remotes++; @@ -855,7 +863,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs) * an interrupt, but causes an error message to be returned to * the sender. */ -static void uv_enable_timeouts(void) +static void __init uv_enable_timeouts(void) { int uvhub; int nuvhubs; @@ -1326,10 +1334,10 @@ static int __init uv_ptc_init(void) } /* - * initialize the sending side's sending buffers + * Initialize the sending side's sending buffers. */ static void -uv_activation_descriptor_init(int node, int pnode) +uv_activation_descriptor_init(int node, int pnode, int base_pnode) { int i; int cpu; @@ -1352,11 +1360,11 @@ uv_activation_descriptor_init(int node, int pnode) n = pa >> uv_nshift; m = pa & uv_mmask; + /* the 14-bit pnode */ uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, (n << UV_DESC_BASE_PNODE_SHIFT | m)); - /* - * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each + * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each * cpu even though we only use the first one; one descriptor can * describe a broadcast to 256 uv hubs. */ @@ -1365,12 +1373,13 @@ uv_activation_descriptor_init(int node, int pnode) memset(bd2, 0, sizeof(struct bau_desc)); bd2->header.sw_ack_flag = 1; /* - * base_dest_nodeid is the nasid of the first uvhub - * in the partition. The bit map will indicate uvhub numbers, - * which are 0-N in a partition. Pnodes are unique system-wide. + * The base_dest_nasid set in the message header is the nasid + * of the first uvhub in the partition. The bit map will + * indicate destination pnode numbers relative to that base. + * They may not be consecutive if nasid striding is being used. */ - bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode); - bd2->header.dest_subnodeid = 0x10; /* the LB */ + bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); + bd2->header.dest_subnodeid = UV_LB_SUBNODEID; bd2->header.command = UV_NET_ENDPOINT_INTD; bd2->header.int_both = 1; /* @@ -1442,7 +1451,7 @@ uv_payload_queue_init(int node, int pnode) /* * Initialization of each UV hub's structures */ -static void __init uv_init_uvhub(int uvhub, int vector) +static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) { int node; int pnode; @@ -1450,11 +1459,11 @@ static void __init uv_init_uvhub(int uvhub, int vector) node = uvhub_to_first_node(uvhub); pnode = uv_blade_to_pnode(uvhub); - uv_activation_descriptor_init(node, pnode); + uv_activation_descriptor_init(node, pnode, base_pnode); uv_payload_queue_init(node, pnode); /* - * the below initialization can't be in firmware because the - * messaging IRQ will be determined by the OS + * The below initialization can't be in firmware because the + * messaging IRQ will be determined by the OS. */ apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, @@ -1491,10 +1500,11 @@ calculate_destination_timeout(void) /* * initialize the bau_control structure for each cpu */ -static int __init uv_init_per_cpu(int nuvhubs) +static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) { int i; int cpu; + int tcpu; int pnode; int uvhub; int have_hmaster; @@ -1528,6 +1538,15 @@ static int __init uv_init_per_cpu(int nuvhubs) bcp = &per_cpu(bau_control, cpu); memset(bcp, 0, sizeof(struct bau_control)); pnode = uv_cpu_hub_info(cpu)->pnode; + if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { + printk(KERN_EMERG + "cpu %d pnode %d-%d beyond %d; BAU disabled\n", + cpu, pnode, base_part_pnode, + UV_DISTRIBUTION_SIZE); + return 1; + } + bcp->osnode = cpu_to_node(cpu); + bcp->partition_base_pnode = uv_partition_base_pnode; uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); bdp = &uvhub_descs[uvhub]; @@ -1536,7 +1555,7 @@ static int __init uv_init_per_cpu(int nuvhubs) bdp->pnode = pnode; /* kludge: 'assuming' one node per socket, and assuming that disabling a socket just leaves a gap in node numbers */ - socket = (cpu_to_node(cpu) & 1); + socket = bcp->osnode & 1; bdp->socket_mask |= (1 << socket); sdp = &bdp->socket[socket]; sdp->cpu_number[sdp->num_cpus] = cpu; @@ -1585,6 +1604,20 @@ static int __init uv_init_per_cpu(int nuvhubs) nextsocket: socket++; socket_mask = (socket_mask >> 1); + /* each socket gets a local array of pnodes/hubs */ + bcp = smaster; + bcp->target_hub_and_pnode = kmalloc_node( + sizeof(struct hub_and_pnode) * + num_possible_cpus(), GFP_KERNEL, bcp->osnode); + memset(bcp->target_hub_and_pnode, 0, + sizeof(struct hub_and_pnode) * + num_possible_cpus()); + for_each_present_cpu(tcpu) { + bcp->target_hub_and_pnode[tcpu].pnode = + uv_cpu_hub_info(tcpu)->pnode; + bcp->target_hub_and_pnode[tcpu].uvhub = + uv_cpu_hub_info(tcpu)->numa_blade_id; + } } } kfree(uvhub_descs); @@ -1637,21 +1670,22 @@ static int __init uv_bau_init(void) spin_lock_init(&disable_lock); congested_cycles = microsec_2_cycles(congested_response_us); - if (uv_init_per_cpu(nuvhubs)) { - nobau = 1; - return 0; - } - uv_partition_base_pnode = 0x7fffffff; - for (uvhub = 0; uvhub < nuvhubs; uvhub++) + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { if (uv_blade_nr_possible_cpus(uvhub) && (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) uv_partition_base_pnode = uv_blade_to_pnode(uvhub); + } + + if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { + nobau = 1; + return 0; + } vector = UV_BAU_MESSAGE; for_each_possible_blade(uvhub) if (uv_blade_nr_possible_cpus(uvhub)) - uv_init_uvhub(uvhub, vector); + uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); uv_enable_timeouts(); alloc_intr_gate(vector, uv_bau_message_intr1); -- cgit v0.10.2 From 1b0bcbcf62884959fa7214eb16c44cff445691c6 Mon Sep 17 00:00:00 2001 From: Pedro Scarapicchia Junior Date: Mon, 9 May 2011 14:10:49 +0000 Subject: net/9p/protocol.c: Fix a memory leak When p9pdu_readf() is called with "s" attribute, it allocates a pointer that will store a string. In p9dirent_read(), this pointer is not being released, leading to out of memory errors. This patch releases this pointer after string is copyed to dirent->d_name. Signed-off-by: Pedro Scarapicchia Junior Signed-off-by: Eric Van Hensbergen diff --git a/net/9p/protocol.c b/net/9p/protocol.c index b58a501..a873277 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -674,6 +674,7 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, } strcpy(dirent->d_name, nameptr); + kfree(nameptr); out: return fake_pdu.offset; -- cgit v0.10.2 From 411f05f123cbd7f8aa1edcae86970755a6e2a9d9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 12 May 2011 23:00:28 +0200 Subject: vsprintf: Turn kptr_restrict off by default kptr_restrict has been triggering bugs in apps such as perf, and it also makes the system less useful by default, so turn it off by default. This is how we generally handle security features that remove functionality, such as firewall code or SELinux - they have to be configured and activated from user-space. Distributions can turn kptr_restrict on again via this line in /etc/sysctrl.conf: kernel.kptr_restrict = 1 ( Also mark the variable __read_mostly while at it, as it's typically modified only once per bootup, or not at all. ) Signed-off-by: Ingo Molnar Acked-by: David S. Miller Signed-off-by: Linus Torvalds diff --git a/lib/vsprintf.c b/lib/vsprintf.c index bc0ac6b..dfd6019 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -797,7 +797,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr, return string(buf, end, uuid, spec); } -int kptr_restrict = 1; +int kptr_restrict __read_mostly; /* * Show a '%p' thing. A kernel extension is that the '%p' is followed -- cgit v0.10.2 From ca06707022d6ba4744198a8ebbe4994786b0c613 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Fri, 6 May 2011 23:44:46 +0000 Subject: ipv6: restore correct ECN handling on TCP xmit Since commit e9df2e8fd8fbc9 (Use appropriate sock tclass setting for routing lookup) we lost ability to properly add ECN codemarks to ipv6 TCP frames. It seems like TCP_ECN_send() calls INET_ECN_xmit(), which only sets the ECN bit in the IPv4 ToS field (inet_sk(sk)->tos), but after the patch, what's checked is inet6_sk(sk)->tclass, which is a completely different field. Close bug https://bugzilla.kernel.org/show_bug.cgi?id=34322 [Eric Dumazet] : added the INET_ECN_dontxmit() fix and replace macros by inline functions for clarity. Signed-off-by: Steinar H. Gunderson Signed-off-by: Eric Dumazet Cc: YOSHIFUJI Hideaki Cc: Andrew Morton Signed-off-by: David S. Miller diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 88bdd01..2fa8d13 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -38,9 +38,19 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) return outer; } -#define INET_ECN_xmit(sk) do { inet_sk(sk)->tos |= INET_ECN_ECT_0; } while (0) -#define INET_ECN_dontxmit(sk) \ - do { inet_sk(sk)->tos &= ~INET_ECN_MASK; } while (0) +static inline void INET_ECN_xmit(struct sock *sk) +{ + inet_sk(sk)->tos |= INET_ECN_ECT_0; + if (inet6_sk(sk) != NULL) + inet6_sk(sk)->tclass |= INET_ECN_ECT_0; +} + +static inline void INET_ECN_dontxmit(struct sock *sk) +{ + inet_sk(sk)->tos &= ~INET_ECN_MASK; + if (inet6_sk(sk) != NULL) + inet6_sk(sk)->tclass &= ~INET_ECN_MASK; +} #define IP6_ECN_flow_init(label) do { \ (label) &= ~htonl(INET_ECN_MASK << 20); \ -- cgit v0.10.2 From 9ddabb055d73c63037878bb9346e52c7f2e07e96 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 29 Apr 2011 15:30:02 +0200 Subject: i2c: pnx: Fix crash due to wrong init of timer->data alg_data is already a pointer which must be passed directly. Reported-by: Dieter Ripp Signed-off-by: Wolfram Sang Cc: Russell King Cc: Ben Dooks Signed-off-by: Ben Dooks diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index a97e3fe..04be9f8 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -65,7 +65,7 @@ static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data) jiffies, expires); timer->expires = jiffies + expires; - timer->data = (unsigned long)&alg_data; + timer->data = (unsigned long)alg_data; add_timer(timer); } -- cgit v0.10.2 From 11f770027b5c0de16544f3ec82b5c6f9f8d5a644 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 12 May 2011 16:13:54 -0700 Subject: rbd: fix leak of ops struct The ops vector must be freed by the rbd_do_request caller. Signed-off-by: Sage Weil diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3e90471..2146cab 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -996,6 +996,8 @@ static int rbd_do_op(struct request *rq, ops, num_reply, rbd_req_cb, 0, NULL); + + rbd_destroy_ops(ops); done: kfree(seg_name); return ret; @@ -1063,7 +1065,9 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev, { struct ceph_osd_req_op *ops; struct page **pages = NULL; - int ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0); + int ret; + + ret = rbd_create_rw_ops(&ops, 1, CEPH_OSD_OP_NOTIFY_ACK, 0); if (ret < 0) return ret; -- cgit v0.10.2 From d9282fca8a763be574a2fc20b2edcc6e132cbf90 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 11 May 2011 03:15:24 -0400 Subject: drm/radeon/kms: fix tiling reg on fusion The location of MC_ARB_RAMCFG changed on fusion. I've diffed all the other regs in evergreend.h and this is the only other reg that changed. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index c20eac3..9073e3b 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -1780,7 +1780,10 @@ static void evergreen_gpu_init(struct radeon_device *rdev) mc_shared_chmap = RREG32(MC_SHARED_CHMAP); - mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); + if (rdev->flags & RADEON_IS_IGP) + mc_arb_ramcfg = RREG32(FUS_MC_ARB_RAMCFG); + else + mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); switch (rdev->config.evergreen.max_tile_pipes) { case 1: diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 9453384..fc40e0c 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -200,6 +200,7 @@ #define BURSTLENGTH_SHIFT 9 #define BURSTLENGTH_MASK 0x00000200 #define CHANSIZE_OVERRIDE (1 << 11) +#define FUS_MC_ARB_RAMCFG 0x2768 #define MC_VM_AGP_TOP 0x2028 #define MC_VM_AGP_BOT 0x202C #define MC_VM_AGP_BASE 0x2030 -- cgit v0.10.2 From 05fa7ea7d23980de0014417a0e0af2048a0f9fc1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 11 May 2011 14:02:07 -0400 Subject: drm/radeon/kms: fix extended lvds info parsing On rev <= 1.1 tables, the offset is absolute, on newer tables, it's relative. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=700326 Signed-off-by: Alex Deucher Reviewed-by: Jerome Glisse Cc: stable@kernel.org Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index dd881d0..90dfb2b 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1574,9 +1574,17 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct ATOM_FAKE_EDID_PATCH_RECORD *fake_edid_record; ATOM_PANEL_RESOLUTION_PATCH_RECORD *panel_res_record; bool bad_record = false; - u8 *record = (u8 *)(mode_info->atom_context->bios + - data_offset + - le16_to_cpu(lvds_info->info.usModePatchTableOffset)); + u8 *record; + + if ((frev == 1) && (crev < 2)) + /* absolute */ + record = (u8 *)(mode_info->atom_context->bios + + le16_to_cpu(lvds_info->info.usModePatchTableOffset)); + else + /* relative */ + record = (u8 *)(mode_info->atom_context->bios + + data_offset + + le16_to_cpu(lvds_info->info.usModePatchTableOffset)); while (*record != ATOM_RECORD_END_TYPE) { switch (*record) { case LCD_MODE_PATCH_RECORD_MODE_TYPE: -- cgit v0.10.2 From 3a8ab79eae4500e6ac618a92a90cee63d6e804a8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 12 May 2011 21:15:15 -0400 Subject: drm/radeon/kms: add some evergreen/ni safe regs need to programmed from the userspace drivers. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/reg_srcs/cayman b/drivers/gpu/drm/radeon/reg_srcs/cayman index 6334f8a..0aa8e85 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/cayman +++ b/drivers/gpu/drm/radeon/reg_srcs/cayman @@ -33,6 +33,7 @@ cayman 0x9400 0x00008E48 SQ_EX_ALLOC_TABLE_SLOTS 0x00009100 SPI_CONFIG_CNTL 0x0000913C SPI_CONFIG_CNTL_1 +0x00009508 TA_CNTL_AUX 0x00009830 DB_DEBUG 0x00009834 DB_DEBUG2 0x00009838 DB_DEBUG3 diff --git a/drivers/gpu/drm/radeon/reg_srcs/evergreen b/drivers/gpu/drm/radeon/reg_srcs/evergreen index 7e16371..0e28cae 100644 --- a/drivers/gpu/drm/radeon/reg_srcs/evergreen +++ b/drivers/gpu/drm/radeon/reg_srcs/evergreen @@ -46,6 +46,7 @@ evergreen 0x9400 0x00008E48 SQ_EX_ALLOC_TABLE_SLOTS 0x00009100 SPI_CONFIG_CNTL 0x0000913C SPI_CONFIG_CNTL_1 +0x00009508 TA_CNTL_AUX 0x00009700 VC_CNTL 0x00009714 VC_ENHANCE 0x00009830 DB_DEBUG -- cgit v0.10.2 From d9a5ac9ef306eb5cc874f285185a15c303c50009 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 13 May 2011 15:52:09 +0200 Subject: x86, mce, AMD: Fix leaving freed data in a list b may be added to a list, but is not removed before being freed in the case of an error. This is done in the corresponding deallocation function, so the code here has been changed to follow that. The sematic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression E,E1,E2; identifier l; @@ *list_add(&E->l,E1); ... when != E1 when != list_del(&E->l) when != list_del_init(&E->l) when != E = E2 *kfree(E);// Signed-off-by: Julia Lawall Cc: Borislav Petkov Cc: Robert Richter Cc: Yinghai Lu Cc: Andreas Herrmann Cc: Link: http://lkml.kernel.org/r/1305294731-12127-1-git-send-email-julia@diku.dk Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 167f97b..bb0adad 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -509,6 +509,7 @@ recurse: out_free: if (b) { kobject_put(&b->kobj); + list_del(&b->miscj); kfree(b); } return err; -- cgit v0.10.2 From 5d44670facd3205212f8fe89eb422e3b5f309612 Mon Sep 17 00:00:00 2001 From: Marcus Meissner Date: Thu, 5 May 2011 10:44:11 -0700 Subject: ocfs2: Initialize data_ac (might be used uninitialized) CLANG found that there is a path that has data_ac uninitialized, this place 2917 /* This gets us the dx_root */ 2918 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); 2919 if (ret) { 3 Taking true branch 2920 mlog_errno(ret); 2921 goto out; 4 Control jumps to line 3168 2922 } Goes to the out: label without data_ac being initialized. Ciao, Marcus Signed-Off-By: Marcus Meissner Signed-off-by: Mark Fasheh Signed-off-by: Joel Becker diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 9fe5b8fd..8582e3f 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2868,7 +2868,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, bytes = blocks_wanted << sb->s_blocksize_bits; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); struct ocfs2_inode_info *oi = OCFS2_I(dir); - struct ocfs2_alloc_context *data_ac; + struct ocfs2_alloc_context *data_ac = NULL; struct ocfs2_alloc_context *meta_ac = NULL; struct buffer_head *dirdata_bh = NULL; struct buffer_head *dx_root_bh = NULL; -- cgit v0.10.2 From 9a790ba1ec02bbae0933e7ebd576c0bc329e9796 Mon Sep 17 00:00:00 2001 From: Tristan Ye Date: Thu, 12 May 2011 20:47:07 +0800 Subject: ocfs2: skip existing hole when removing the last extent_rec in punching-hole codes. In the case of removing a partial extent record which covers a hole, current punching-hole logic will try to remove more than the length of whole extent record, which leads to the failure of following assert(fs/ocfs2/alloc.c): 5507 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range); This patch tries to skip existing hole at the last attempt of removing a partial extent record, what's more, it also adds some necessary comments for better understanding of punching-hole codes. Signed-off-by: Tristan Ye Signed-off-by: Joel Becker diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 41565ae..89659d6 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1607,6 +1607,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode, range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); if (le32_to_cpu(rec->e_cpos) >= trunc_start) { + /* + * remove an entire extent record. + */ *trunc_cpos = le32_to_cpu(rec->e_cpos); /* * Skip holes if any. @@ -1617,7 +1620,16 @@ static void ocfs2_calc_trunc_pos(struct inode *inode, *blkno = le64_to_cpu(rec->e_blkno); *trunc_end = le32_to_cpu(rec->e_cpos); } else if (range > trunc_start) { + /* + * remove a partial extent record, which means we're + * removing the last extent record. + */ *trunc_cpos = trunc_start; + /* + * skip hole if any. + */ + if (range < *trunc_end) + *trunc_end = range; *trunc_len = *trunc_end - trunc_start; coff = trunc_start - le32_to_cpu(rec->e_cpos); *blkno = le64_to_cpu(rec->e_blkno) + -- cgit v0.10.2 From 4da6dc293604f55d156148b8f60b94053e3195fc Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 4 May 2011 10:27:10 -0700 Subject: ocfs2/dlm: Use negotiated o2dlm protocol version Patch fixes a bug in the o2dlm protocol negotiation in that it is using the builtin version rather than the negotiated version during the domain join. This causes join errors when a node having kernel >= 2.6.37 joins a cluster with nodes having kernels < 2.6.37. This only affects the o2cb cluster stack. Signed-off-by: Sunil Mushran Reported-by: Jacek Stepniewski Acked-by: Mark Fasheh Signed-off-by: Joel Becker diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 7540a49..3b179d6 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1614,7 +1614,8 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) spin_unlock(&dlm->spinlock); /* Support for global heartbeat and node info was added in 1.1 */ - if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { + if (dlm->dlm_locking_proto.pv_major > 1 || + dlm->dlm_locking_proto.pv_minor > 0) { status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); if (status) { mlog_errno(status); -- cgit v0.10.2 From 76d9fc2954d057b19bf5d7b854df2b621b00fdec Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 4 May 2011 10:28:00 -0700 Subject: ocfs2/cluster: Increase the live threshold for global heartbeat We have seen isolated cases (very few, I might add) of o2hb not detecting all live nodes on startup. One plausible reasoning for it is that other node had a hb io delay at the same time. The live threshold set at 2 (as low as it can be) could be increased to ameliorate the situation. But increasing the threshold directly affects mount time. Currently it takes around 5 secs to mount a volume in o2cb cluster with local heartbeat. Increasing the threshold will make mounts even slower. As the issue itself is rare, we have left things as they are for the local heartbeat mode. However we can improve the situation for global heartbeat mode as in that mode, we start the heartbeat much before the mount. This patch doubles the live threshold for the start of the first region in global heartbeat mode. Addresses internal Oracle bug#10635585. Signed-off-by: Sunil Mushran Acked-by: Mark Fasheh Signed-off-by: Joel Becker diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 6437202..1d28505 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1690,6 +1690,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, struct file *filp = NULL; struct inode *inode = NULL; ssize_t ret = -EINVAL; + int live_threshold; if (reg->hr_bdev) goto out; @@ -1766,8 +1767,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, * A node is considered live after it has beat LIVE_THRESHOLD * times. We're not steady until we've given them a chance * _after_ our first read. + * The default threshold is bare minimum so as to limit the delay + * during mounts. For global heartbeat, the threshold doubled for the + * first region. */ - atomic_set(®->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); + live_threshold = O2HB_LIVE_THRESHOLD; + if (o2hb_global_heartbeat_active()) { + spin_lock(&o2hb_live_lock); + if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1) + live_threshold <<= 1; + spin_unlock(&o2hb_live_lock); + } + atomic_set(®->hr_steady_iterations, live_threshold + 1); hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", reg->hr_item.ci_name); -- cgit v0.10.2 From 33c12a5436464f8d4f56d68e5e79e24a3a1f11aa Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 4 May 2011 10:28:01 -0700 Subject: ocfs2/cluster: Heartbeat mismatch message improved If o2hb finds unexpected values in the heartbeat slot, it prints a message "ERROR: Device "dm-6": another node is heartbeating in our slot!" This message could be misleading. This patch adds two more messages to help users better diagnose the problem. Signed-off-by: Sunil Mushran Acked-by: Mark Fasheh Signed-off-by: Joel Becker diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 1d28505..9a3e6bb 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg, /* We want to make sure that nobody is heartbeating on top of us -- * this will help detect an invalid configuration. */ -static int o2hb_check_last_timestamp(struct o2hb_region *reg) +static void o2hb_check_last_timestamp(struct o2hb_region *reg) { - int node_num, ret; struct o2hb_disk_slot *slot; struct o2hb_disk_heartbeat_block *hb_block; + char *errstr; - node_num = o2nm_this_node(); - - ret = 1; - slot = ®->hr_slots[node_num]; + slot = ®->hr_slots[o2nm_this_node()]; /* Don't check on our 1st timestamp */ - if (slot->ds_last_time) { - hb_block = slot->ds_raw_block; + if (!slot->ds_last_time) + return; - if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time) - ret = 0; - } + hb_block = slot->ds_raw_block; + if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && + le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && + hb_block->hb_node == slot->ds_node_num) + return; - return ret; +#define ERRSTR1 "Another node is heartbeating on device" +#define ERRSTR2 "Heartbeat generation mismatch on device" +#define ERRSTR3 "Heartbeat sequence mismatch on device" + + if (hb_block->hb_node != slot->ds_node_num) + errstr = ERRSTR1; + else if (le64_to_cpu(hb_block->hb_generation) != + slot->ds_last_generation) + errstr = ERRSTR2; + else + errstr = ERRSTR3; + + mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), " + "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name, + slot->ds_node_num, (unsigned long long)slot->ds_last_generation, + (unsigned long long)slot->ds_last_time, hb_block->hb_node, + (unsigned long long)le64_to_cpu(hb_block->hb_generation), + (unsigned long long)le64_to_cpu(hb_block->hb_seq)); } static inline void o2hb_prepare_block(struct o2hb_region *reg, @@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) /* With an up to date view of the slots, we can check that no * other node has been improperly configured to heartbeat in * our slot. */ - if (!o2hb_check_last_timestamp(reg)) - mlog(ML_ERROR, "Device \"%s\": another node is heartbeating " - "in our slot!\n", reg->hr_dev_name); + o2hb_check_last_timestamp(reg); /* fill in the proper info for our next heartbeat */ o2hb_prepare_block(reg, reg->hr_generation); @@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) } i = -1; - while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { - + while((i = find_next_bit(configured_nodes, + O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { change |= o2hb_check_slot(reg, ®->hr_slots[i]); } -- cgit v0.10.2 From 10b3dd76117a327557b8cb898b41c18afd08dc86 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 4 May 2011 10:28:02 -0700 Subject: ocfs2: Skip mount recovery for hard-ro mounts Patch skips mount recovery for hard-ro mounts which otherwise leads to an oops. Signed-off-by: Sunil Mushran Acked-by: Mark Fasheh Signed-off-by: Joel Becker diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index b141a44..295d564 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1260,6 +1260,9 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) { struct ocfs2_journal *journal = osb->journal; + if (ocfs2_is_hard_readonly(osb)) + return; + /* No need to queue up our truncate_log as regular cleanup will catch * that */ ocfs2_queue_recovery_completion(journal, osb->slot_num, -- cgit v0.10.2 From df016c665b10ae80d8db67ec8103b50c5c234e5c Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 4 May 2011 10:28:07 -0700 Subject: ocfs2/dlm: Target node death during resource migration leads to thread spin During resource migration, if the target node were to die, the thread doing the migration spins until the target node is not removed from the domain map. This patch slows the spin by making the thread wait for the recovery to kick in. Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index fede57e..84d1663 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2574,6 +2574,9 @@ fail: res->state &= ~DLM_LOCK_RES_MIGRATING; wake = 1; spin_unlock(&res->spinlock); + if (dlm_is_host_down(ret)) + dlm_wait_for_node_death(dlm, target, + DLM_NODE_DEATH_WAIT_MAX); goto leave; } -- cgit v0.10.2 From 47a150edc2ae734c0f4bf50aa19499e23b9a46f8 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Fri, 13 May 2011 04:27:54 +0100 Subject: Cache user_ns in struct cred If !CONFIG_USERNS, have current_user_ns() defined to (&init_user_ns). Get rid of _current_user_ns. This requires nsown_capable() to be defined in capability.c rather than as static inline in capability.h, so do that. Request_key needs init_user_ns defined at current_user_ns if !CONFIG_USERNS, so forward-declare that in cred.h if !CONFIG_USERNS at current_user_ns() define. Compile-tested with and without CONFIG_USERNS. Signed-off-by: Serge E. Hallyn [ This makes a huge performance difference for acl_permission_check(), up to 30%. And that is one of the hottest kernel functions for loads that are pathname-lookup heavy. ] Signed-off-by: Linus Torvalds diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b4..d4675af 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -546,18 +546,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); - -/** - * nsown_capable - Check superior capability to one's own user_ns - * @cap: The capability in question - * - * Return true if the current task has the given superior capability - * targeted at its own user namespace. - */ -static inline bool nsown_capable(int cap) -{ - return ns_capable(current_user_ns(), cap); -} +extern bool nsown_capable(int cap); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/cred.h b/include/linux/cred.h index 9aeeb0b..be16b61 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -146,6 +146,7 @@ struct cred { void *security; /* subjective LSM security */ #endif struct user_struct *user; /* real user ID subscription */ + struct user_namespace *user_ns; /* cached user->user_ns */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ }; @@ -354,10 +355,15 @@ static inline void put_cred(const struct cred *_cred) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) -#define _current_user_ns() (current_cred_xxx(user)->user_ns) #define current_security() (current_cred_xxx(security)) -extern struct user_namespace *current_user_ns(void); +#ifdef CONFIG_USER_NS +#define current_user_ns() (current_cred_xxx(user_ns)) +#else +extern struct user_namespace init_user_ns; +#define current_user_ns() (&init_user_ns) +#endif + #define current_uid_gid(_uid, _gid) \ do { \ diff --git a/kernel/capability.c b/kernel/capability.c index bf0c734..32a80e0 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -399,3 +399,15 @@ bool task_ns_capable(struct task_struct *t, int cap) return ns_capable(task_cred_xxx(t, user)->user_ns, cap); } EXPORT_SYMBOL(task_ns_capable); + +/** + * nsown_capable - Check superior capability to one's own user_ns + * @cap: The capability in question + * + * Return true if the current task has the given superior capability + * targeted at its own user namespace. + */ +bool nsown_capable(int cap) +{ + return ns_capable(current_user_ns(), cap); +} diff --git a/kernel/cred.c b/kernel/cred.c index 5557b55..8093c16 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -54,6 +54,7 @@ struct cred init_cred = { .cap_effective = CAP_INIT_EFF_SET, .cap_bset = CAP_INIT_BSET, .user = INIT_USER, + .user_ns = &init_user_ns, .group_info = &init_groups, #ifdef CONFIG_KEYS .tgcred = &init_tgcred, @@ -410,6 +411,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) goto error_put; } + /* cache user_ns in cred. Doesn't need a refcount because it will + * stay pinned by cred->user + */ + new->user_ns = new->user->user_ns; + #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -741,12 +747,6 @@ int set_create_files_as(struct cred *new, struct inode *inode) } EXPORT_SYMBOL(set_create_files_as); -struct user_namespace *current_user_ns(void) -{ - return _current_user_ns(); -} -EXPORT_SYMBOL(current_user_ns); - #ifdef CONFIG_DEBUG_CREDENTIALS bool creds_are_invalid(const struct cred *cred) -- cgit v0.10.2 From 26cf46be954a2dd391d32eeaf7d07c3a953dcc5a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 13 May 2011 11:51:01 -0700 Subject: vfs: micro-optimize acl_permission_check() It's a hot function, and we're better off not mixing types in the mask calculations. The compiler just ends up mixing 16-bit and 32-bit operations, for no good reason. So do everything in 'unsigned int' rather than mixing 'unsigned int' masking with a 'umode_t' (16-bit) mode variable. This, together with the parent commit (47a150edc2ae: "Cache user_ns in struct cred") makes acl_permission_check() much nicer. Signed-off-by: Linus Torvalds diff --git a/fs/namei.c b/fs/namei.c index 54fc993..e3c4f11 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -179,7 +179,7 @@ EXPORT_SYMBOL(putname); static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) { - umode_t mode = inode->i_mode; + unsigned int mode = inode->i_mode; mask &= MAY_READ | MAY_WRITE | MAY_EXEC; -- cgit v0.10.2 From a10e14667635dde504ed9e7ee851494c2cf2ae8e Mon Sep 17 00:00:00 2001 From: Vitalii Demianets Date: Thu, 12 May 2011 23:04:29 +0000 Subject: bonding,llc: Fix structure sizeof incompatibility for some PDUs With some combinations of arch/compiler (e.g. arm-linux-gcc) the sizeof operator on structure returns value greater than expected. In cases when the structure is used for mapping PDU fields it may lead to unexpected results (such as holes and alignment problems in skb data). __packed prevents this undesired behavior. Signed-off-by: Vitalii Demianets Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index b28baff..01b8a6a 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -39,7 +39,7 @@ typedef struct mac_addr { u8 mac_addr_value[ETH_ALEN]; -} mac_addr_t; +} __packed mac_addr_t; enum { BOND_AD_STABLE = 0, @@ -134,12 +134,12 @@ typedef struct lacpdu { u8 tlv_type_terminator; // = terminator u8 terminator_length; // = 0 u8 reserved_50[50]; // = 0 -} lacpdu_t; +} __packed lacpdu_t; typedef struct lacpdu_header { struct ethhdr hdr; struct lacpdu lacpdu; -} lacpdu_header_t; +} __packed lacpdu_header_t; // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) typedef struct bond_marker { @@ -155,12 +155,12 @@ typedef struct bond_marker { u8 tlv_type_terminator; // = 0x00 u8 terminator_length; // = 0x00 u8 reserved_90[90]; // = 0 -} bond_marker_t; +} __packed bond_marker_t; typedef struct bond_marker_header { struct ethhdr hdr; struct bond_marker marker; -} bond_marker_header_t; +} __packed bond_marker_header_t; #pragma pack() diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 75b8e29..f57e7d4 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -199,7 +199,7 @@ struct llc_pdu_sn { u8 ssap; u8 ctrl_1; u8 ctrl_2; -}; +} __packed; static inline struct llc_pdu_sn *llc_pdu_sn_hdr(struct sk_buff *skb) { @@ -211,7 +211,7 @@ struct llc_pdu_un { u8 dsap; u8 ssap; u8 ctrl_1; -}; +} __packed; static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) { @@ -359,7 +359,7 @@ struct llc_xid_info { u8 fmt_id; /* always 0x81 for LLC */ u8 type; /* different if NULL/non-NULL LSAP */ u8 rw; /* sender receive window */ -}; +} __packed; /** * llc_pdu_init_as_xid_cmd - sets bytes 3, 4 & 5 of LLC header as XID @@ -415,7 +415,7 @@ struct llc_frmr_info { u8 curr_ssv; /* current send state variable val */ u8 curr_rsv; /* current receive state variable */ u8 ind_bits; /* indicator bits set with macro */ -}; +} __packed; extern void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 type); extern void llc_pdu_set_pf_bit(struct sk_buff *skb, u8 bit_value); -- cgit v0.10.2 From 087fbc9962e10a65fb0b542ecfc116ebf6cf1735 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 13 May 2011 12:14:54 -0400 Subject: drm/i915: Revert i915.semaphore=1 default from i915 merge My Q67 / i7-2600 box has rev09 Sandy Bridge graphics. It hangs instantly when GNOME loads and it hangs so hard the reset button doesn't work. Setting i915.semaphore=0 fixes it. Semaphores were disabled in a1656b9090f7 ("drm/i915: Disable GPU semaphores by default") in 2.6.38 but were then re-enabled (by mistake?) by the merge 47ae63e0c2e5 ("Merge branch 'drm-intel-fixes' into drm-intel-next"). (It's worth noting that the offending change is i915_drv.c, which was not marked as a conflict - although a 'git show --cc' on the merge does show that neither parent had it set to 1) Signed-off-by: Andy Lutomirski Signed-off-by: Linus Torvalds diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c34a8dd..32d1b3e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -49,7 +49,7 @@ module_param_named(panel_ignore_lid, i915_panel_ignore_lid, int, 0600); unsigned int i915_powersave = 1; module_param_named(powersave, i915_powersave, int, 0600); -unsigned int i915_semaphores = 1; +unsigned int i915_semaphores = 0; module_param_named(semaphores, i915_semaphores, int, 0600); unsigned int i915_enable_rc6 = 0; -- cgit v0.10.2 From cb68552858c64db302771469b1202ea09e696329 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 13 May 2011 16:03:24 -0400 Subject: bridge: fix forwarding of IPv6 The commit 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e bridge: Reset IPCB when entering IP stack on NF_FORWARD broke forwarding of IPV6 packets in bridge because it would call bp_parse_ip_options with an IPV6 packet. Reported-by: Noah Meyerhans Signed-off-by: Stephen Hemminger Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f3bc322..74ef4d4 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -737,7 +737,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, nf_bridge->mask |= BRNF_PKT_TYPE; } - if (br_parse_ip_options(skb)) + if (pf == PF_INET && br_parse_ip_options(skb)) return NF_DROP; /* The physdev module checks on this */ -- cgit v0.10.2 From 1fec70932d867416ffe620dd17005f168cc84eb5 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Fri, 13 May 2011 13:52:56 -0700 Subject: rbd: fix split bio handling The rbd driver currently splits bios when they span an object boundary. However, the blk_end_request expects the completions to roll up the results in block device order, and the split rbd/ceph ops can complete in any order. This patch adds a struct rbd_req_coll to track completion of split requests and ensures that the results are passed back up to the block layer in order. This fixes errors where the file system gets completion of a read operation that spans an object boundary before the data has actually arrived. The bug is easily reproduced with iozone with a working set larger than available RAM. Reported-by: Fyodor Ustinov Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 2146cab..9712fad 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -92,6 +92,8 @@ struct rbd_client { struct list_head node; }; +struct rbd_req_coll; + /* * a single io request */ @@ -100,6 +102,24 @@ struct rbd_request { struct bio *bio; /* cloned bio */ struct page **pages; /* list of used pages */ u64 len; + int coll_index; + struct rbd_req_coll *coll; +}; + +struct rbd_req_status { + int done; + int rc; + u64 bytes; +}; + +/* + * a collection of requests + */ +struct rbd_req_coll { + int total; + int num_done; + struct kref kref; + struct rbd_req_status status[0]; }; struct rbd_snap { @@ -416,6 +436,17 @@ static void rbd_put_client(struct rbd_device *rbd_dev) rbd_dev->client = NULL; } +/* + * Destroy requests collection + */ +static void rbd_coll_release(struct kref *kref) +{ + struct rbd_req_coll *coll = + container_of(kref, struct rbd_req_coll, kref); + + dout("rbd_coll_release %p\n", coll); + kfree(coll); +} /* * Create a new header structure, translate header format from the on-disk @@ -590,6 +621,14 @@ static u64 rbd_get_segment(struct rbd_image_header *header, return len; } +static int rbd_get_num_segments(struct rbd_image_header *header, + u64 ofs, u64 len) +{ + u64 start_seg = ofs >> header->obj_order; + u64 end_seg = (ofs + len - 1) >> header->obj_order; + return end_seg - start_seg + 1; +} + /* * bio helpers */ @@ -735,6 +774,50 @@ static void rbd_destroy_ops(struct ceph_osd_req_op *ops) kfree(ops); } +static void rbd_coll_end_req_index(struct request *rq, + struct rbd_req_coll *coll, + int index, + int ret, u64 len) +{ + struct request_queue *q; + int min, max, i; + + dout("rbd_coll_end_req_index %p index %d ret %d len %lld\n", + coll, index, ret, len); + + if (!rq) + return; + + if (!coll) { + blk_end_request(rq, ret, len); + return; + } + + q = rq->q; + + spin_lock_irq(q->queue_lock); + coll->status[index].done = 1; + coll->status[index].rc = ret; + coll->status[index].bytes = len; + max = min = coll->num_done; + while (max < coll->total && coll->status[max].done) + max++; + + for (i = min; istatus[i].rc, + coll->status[i].bytes); + coll->num_done++; + kref_put(&coll->kref, rbd_coll_release); + } + spin_unlock_irq(q->queue_lock); +} + +static void rbd_coll_end_req(struct rbd_request *req, + int ret, u64 len) +{ + rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); +} + /* * Send ceph osd request */ @@ -749,6 +832,8 @@ static int rbd_do_request(struct request *rq, int flags, struct ceph_osd_req_op *ops, int num_reply, + struct rbd_req_coll *coll, + int coll_index, void (*rbd_cb)(struct ceph_osd_request *req, struct ceph_msg *msg), struct ceph_osd_request **linger_req, @@ -763,12 +848,20 @@ static int rbd_do_request(struct request *rq, struct ceph_osd_request_head *reqhead; struct rbd_image_header *header = &dev->header; - ret = -ENOMEM; req_data = kzalloc(sizeof(*req_data), GFP_NOIO); - if (!req_data) - goto done; + if (!req_data) { + if (coll) + rbd_coll_end_req_index(rq, coll, coll_index, + -ENOMEM, len); + return -ENOMEM; + } + + if (coll) { + req_data->coll = coll; + req_data->coll_index = coll_index; + } - dout("rbd_do_request len=%lld ofs=%lld\n", len, ofs); + dout("rbd_do_request obj=%s ofs=%lld len=%lld\n", obj, len, ofs); down_read(&header->snap_rwsem); @@ -828,7 +921,8 @@ static int rbd_do_request(struct request *rq, ret = ceph_osdc_wait_request(&dev->client->osdc, req); if (ver) *ver = le64_to_cpu(req->r_reassert_version.version); - dout("reassert_ver=%lld\n", le64_to_cpu(req->r_reassert_version.version)); + dout("reassert_ver=%lld\n", + le64_to_cpu(req->r_reassert_version.version)); ceph_osdc_put_request(req); } return ret; @@ -837,10 +931,8 @@ done_err: bio_chain_put(req_data->bio); ceph_osdc_put_request(req); done_pages: + rbd_coll_end_req(req_data, ret, len); kfree(req_data); -done: - if (rq) - blk_end_request(rq, ret, len); return ret; } @@ -874,7 +966,7 @@ static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) bytes = req_data->len; } - blk_end_request(req_data->rq, rc, bytes); + rbd_coll_end_req(req_data, rc, bytes); if (req_data->bio) bio_chain_put(req_data->bio); @@ -934,6 +1026,7 @@ static int rbd_req_sync_op(struct rbd_device *dev, flags, ops, 2, + NULL, 0, NULL, linger_req, ver); if (ret < 0) @@ -959,7 +1052,9 @@ static int rbd_do_op(struct request *rq, u64 snapid, int opcode, int flags, int num_reply, u64 ofs, u64 len, - struct bio *bio) + struct bio *bio, + struct rbd_req_coll *coll, + int coll_index) { char *seg_name; u64 seg_ofs; @@ -995,6 +1090,7 @@ static int rbd_do_op(struct request *rq, flags, ops, num_reply, + coll, coll_index, rbd_req_cb, 0, NULL); rbd_destroy_ops(ops); @@ -1010,13 +1106,15 @@ static int rbd_req_write(struct request *rq, struct rbd_device *rbd_dev, struct ceph_snap_context *snapc, u64 ofs, u64 len, - struct bio *bio) + struct bio *bio, + struct rbd_req_coll *coll, + int coll_index) { return rbd_do_op(rq, rbd_dev, snapc, CEPH_NOSNAP, CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, 2, - ofs, len, bio); + ofs, len, bio, coll, coll_index); } /* @@ -1026,14 +1124,16 @@ static int rbd_req_read(struct request *rq, struct rbd_device *rbd_dev, u64 snapid, u64 ofs, u64 len, - struct bio *bio) + struct bio *bio, + struct rbd_req_coll *coll, + int coll_index) { return rbd_do_op(rq, rbd_dev, NULL, (snapid ? snapid : CEPH_NOSNAP), CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 2, - ofs, len, bio); + ofs, len, bio, coll, coll_index); } /* @@ -1081,6 +1181,7 @@ static int rbd_req_sync_notify_ack(struct rbd_device *dev, CEPH_OSD_FLAG_READ, ops, 1, + NULL, 0, rbd_simple_req_cb, 0, NULL); rbd_destroy_ops(ops); @@ -1278,6 +1379,20 @@ static int rbd_req_sync_exec(struct rbd_device *dev, return ret; } +static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) +{ + struct rbd_req_coll *coll = + kzalloc(sizeof(struct rbd_req_coll) + + sizeof(struct rbd_req_status) * num_reqs, + GFP_ATOMIC); + + if (!coll) + return NULL; + coll->total = num_reqs; + kref_init(&coll->kref); + return coll; +} + /* * block device queue callback */ @@ -1295,6 +1410,8 @@ static void rbd_rq_fn(struct request_queue *q) bool do_write; int size, op_size = 0; u64 ofs; + int num_segs, cur_seg = 0; + struct rbd_req_coll *coll; /* peek at request from block layer */ if (!rq) @@ -1325,6 +1442,14 @@ static void rbd_rq_fn(struct request_queue *q) do_write ? "write" : "read", size, blk_rq_pos(rq) * 512ULL); + num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); + coll = rbd_alloc_coll(num_segs); + if (!coll) { + spin_lock_irq(q->queue_lock); + __blk_end_request_all(rq, -ENOMEM); + goto next; + } + do { /* a bio clone to be passed down to OSD req */ dout("rq->bio->bi_vcnt=%d\n", rq->bio->bi_vcnt); @@ -1332,35 +1457,41 @@ static void rbd_rq_fn(struct request_queue *q) rbd_dev->header.block_name, ofs, size, NULL, NULL); + kref_get(&coll->kref); bio = bio_chain_clone(&rq_bio, &next_bio, &bp, op_size, GFP_ATOMIC); if (!bio) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENOMEM); - goto next; + rbd_coll_end_req_index(rq, coll, cur_seg, + -ENOMEM, op_size); + goto next_seg; } + /* init OSD command: write or read */ if (do_write) rbd_req_write(rq, rbd_dev, rbd_dev->header.snapc, ofs, - op_size, bio); + op_size, bio, + coll, cur_seg); else rbd_req_read(rq, rbd_dev, cur_snap_id(rbd_dev), ofs, - op_size, bio); + op_size, bio, + coll, cur_seg); +next_seg: size -= op_size; ofs += op_size; + cur_seg++; rq_bio = next_bio; } while (size > 0); + kref_put(&coll->kref, rbd_coll_release); if (bp) bio_pair_release(bp); - spin_lock_irq(q->queue_lock); next: rq = blk_fetch_request(q); -- cgit v0.10.2 From f550806a7fbca06b487238442546aceb7ecbb0c9 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 15 Feb 2011 22:34:49 -0800 Subject: alpha: convert to clocksource_register_hz Converts alpha to use clocksource_register_hz. Signed-off-by: John Stultz CC: Richard Henderson CC: Ivan Kokshaysky CC: Thomas Gleixner Signed-off-by: Matt Turner diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 918e8e0..818e74e 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -375,8 +375,7 @@ static struct clocksource clocksource_rpcc = { static inline void register_rpcc_clocksource(long cycle_freq) { - clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4); - clocksource_register(&clocksource_rpcc); + clocksource_register_hz(&clocksource_rpcc, cycle_freq); } #else /* !CONFIG_SMP */ static inline void register_rpcc_clocksource(long cycle_freq) -- cgit v0.10.2 From 90b57f35164aa715dcc7d939a88780a23231f84e Mon Sep 17 00:00:00 2001 From: Michael Cree Date: Wed, 4 May 2011 08:14:50 +0000 Subject: alpha: Wire up syscalls new to 2.6.39 Wire up the syscalls: name_to_handle_at open_by_handle_at clock_adjtime syncfs and adjust some whitespace in the neighbourhood to align commments. Signed-off-by: Michael Cree Signed-off-by: Matt Turner diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 058937b..b183416 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -452,10 +452,14 @@ #define __NR_fanotify_init 494 #define __NR_fanotify_mark 495 #define __NR_prlimit64 496 +#define __NR_name_to_handle_at 497 +#define __NR_open_by_handle_at 498 +#define __NR_clock_adjtime 499 +#define __NR_syncfs 500 #ifdef __KERNEL__ -#define NR_SYSCALLS 497 +#define NR_SYSCALLS 501 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index a6a1de9..15f999d 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -498,23 +498,27 @@ sys_call_table: .quad sys_ni_syscall /* sys_timerfd */ .quad sys_eventfd .quad sys_recvmmsg - .quad sys_fallocate /* 480 */ + .quad sys_fallocate /* 480 */ .quad sys_timerfd_create .quad sys_timerfd_settime .quad sys_timerfd_gettime .quad sys_signalfd4 - .quad sys_eventfd2 /* 485 */ + .quad sys_eventfd2 /* 485 */ .quad sys_epoll_create1 .quad sys_dup3 .quad sys_pipe2 .quad sys_inotify_init1 - .quad sys_preadv /* 490 */ + .quad sys_preadv /* 490 */ .quad sys_pwritev .quad sys_rt_tgsigqueueinfo .quad sys_perf_event_open .quad sys_fanotify_init - .quad sys_fanotify_mark /* 495 */ + .quad sys_fanotify_mark /* 495 */ .quad sys_prlimit64 + .quad sys_name_to_handle_at + .quad sys_open_by_handle_at + .quad sys_clock_adjtime + .quad sys_syncfs /* 500 */ .size sys_call_table, . - sys_call_table .type sys_call_table, @object -- cgit v0.10.2 From 712f3147aee0fbbbbed2da20b21b272c5505125e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 13 May 2011 16:16:41 -0700 Subject: fbmem: fix remove_conflicting_framebuffers races MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a register_framebuffer() call results in us removing old conflicting framebuffers, the new registration_lock doesn't protect that situation. And we can't just add the same locking to the function, because these functions call each other: register_framebuffer() calls remove_conflicting_framebuffers, which in turn calls unregister_framebuffer for any conflicting entry. In order to fix it, this just creates wrapper functions around all three functions and makes the versions that actually do the work be called "do_xxx()", leaving just the wrapper that gets the lock and calls the worker function. So the rule becomes simply that "do_xxxx()" has to be called with the lock held, and now do_register_framebuffer() can just call do_remove_conflicting_framebuffers(), and that in turn can call _do_unregister_framebuffer(), and there is no deadlock, and we can hold the registration lock over the whole sequence, fixing the races. It also makes error cases simpler, and fixes one situation where we would return from unregister_framebuffer() without releasing the lock, pointed out by Bruno Prémont. Tested-by: Bruno Prémont Tested-by: Anca Emanuel Signed-off-by: Linus Torvalds diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index ea16e65..46ee5e5 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1537,8 +1537,10 @@ static bool fb_do_apertures_overlap(struct apertures_struct *gena, return false; } +static int do_unregister_framebuffer(struct fb_info *fb_info); + #define VGA_FB_PHYS 0xA0000 -void remove_conflicting_framebuffers(struct apertures_struct *a, +static void do_remove_conflicting_framebuffers(struct apertures_struct *a, const char *name, bool primary) { int i; @@ -1560,24 +1562,12 @@ void remove_conflicting_framebuffers(struct apertures_struct *a, printk(KERN_INFO "fb: conflicting fb hw usage " "%s vs %s - removing generic driver\n", name, registered_fb[i]->fix.id); - unregister_framebuffer(registered_fb[i]); + do_unregister_framebuffer(registered_fb[i]); } } } -EXPORT_SYMBOL(remove_conflicting_framebuffers); - -/** - * register_framebuffer - registers a frame buffer device - * @fb_info: frame buffer info structure - * - * Registers a frame buffer device @fb_info. - * - * Returns negative errno on error, or zero for success. - * - */ -int -register_framebuffer(struct fb_info *fb_info) +static int do_register_framebuffer(struct fb_info *fb_info) { int i; struct fb_event event; @@ -1589,10 +1579,9 @@ register_framebuffer(struct fb_info *fb_info) if (fb_check_foreignness(fb_info)) return -ENOSYS; - remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id, + do_remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id, fb_is_primary_device(fb_info)); - mutex_lock(®istration_lock); num_registered_fb++; for (i = 0 ; i < FB_MAX; i++) if (!registered_fb[i]) @@ -1635,7 +1624,6 @@ register_framebuffer(struct fb_info *fb_info) fb_var_to_videomode(&mode, &fb_info->var); fb_add_videomode(&mode, &fb_info->modelist); registered_fb[i] = fb_info; - mutex_unlock(®istration_lock); event.info = fb_info; if (!lock_fb_info(fb_info)) @@ -1645,37 +1633,14 @@ register_framebuffer(struct fb_info *fb_info) return 0; } - -/** - * unregister_framebuffer - releases a frame buffer device - * @fb_info: frame buffer info structure - * - * Unregisters a frame buffer device @fb_info. - * - * Returns negative errno on error, or zero for success. - * - * This function will also notify the framebuffer console - * to release the driver. - * - * This is meant to be called within a driver's module_exit() - * function. If this is called outside module_exit(), ensure - * that the driver implements fb_open() and fb_release() to - * check that no processes are using the device. - */ - -int -unregister_framebuffer(struct fb_info *fb_info) +static int do_unregister_framebuffer(struct fb_info *fb_info) { struct fb_event event; int i, ret = 0; - mutex_lock(®istration_lock); i = fb_info->node; - if (!registered_fb[i]) { - ret = -EINVAL; - goto done; - } - + if (!registered_fb[i]) + return -EINVAL; if (!lock_fb_info(fb_info)) return -ENODEV; @@ -1683,10 +1648,8 @@ unregister_framebuffer(struct fb_info *fb_info) ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event); unlock_fb_info(fb_info); - if (ret) { - ret = -EINVAL; - goto done; - } + if (ret) + return -EINVAL; if (fb_info->pixmap.addr && (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) @@ -1701,8 +1664,64 @@ unregister_framebuffer(struct fb_info *fb_info) /* this may free fb info */ put_fb_info(fb_info); -done: + return 0; +} + +void remove_conflicting_framebuffers(struct apertures_struct *a, + const char *name, bool primary) +{ + mutex_lock(®istration_lock); + do_remove_conflicting_framebuffers(a, name, primary); mutex_unlock(®istration_lock); +} +EXPORT_SYMBOL(remove_conflicting_framebuffers); + +/** + * register_framebuffer - registers a frame buffer device + * @fb_info: frame buffer info structure + * + * Registers a frame buffer device @fb_info. + * + * Returns negative errno on error, or zero for success. + * + */ +int +register_framebuffer(struct fb_info *fb_info) +{ + int ret; + + mutex_lock(®istration_lock); + ret = do_register_framebuffer(fb_info); + mutex_unlock(®istration_lock); + + return ret; +} + +/** + * unregister_framebuffer - releases a frame buffer device + * @fb_info: frame buffer info structure + * + * Unregisters a frame buffer device @fb_info. + * + * Returns negative errno on error, or zero for success. + * + * This function will also notify the framebuffer console + * to release the driver. + * + * This is meant to be called within a driver's module_exit() + * function. If this is called outside module_exit(), ensure + * that the driver implements fb_open() and fb_release() to + * check that no processes are using the device. + */ +int +unregister_framebuffer(struct fb_info *fb_info) +{ + int ret; + + mutex_lock(®istration_lock); + ret = do_unregister_framebuffer(fb_info); + mutex_unlock(®istration_lock); + return ret; } -- cgit v0.10.2 From c590cece75728a85ea06801df3ebad2d7ad8612c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= Date: Sat, 14 May 2011 12:24:15 +0200 Subject: Further fbcon sanity checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This moves the if (num_registered_fb == FB_MAX) return -ENXIO; check _AFTER_ the call to do_remove_conflicting_framebuffers() as this would (now in a safe way) allow a native driver to replace the conflicting one even if all slots in registered_fb[] are taken. This also prevents unregistering a framebuffer that is no longer registered (vga16f will unregister at module unload time even if the frame buffer had been unregistered earlier due to being found conflicting). Signed-off-by: Bruno Prémont Signed-off-by: Linus Torvalds diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 46ee5e5..5aac00e 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1573,15 +1573,15 @@ static int do_register_framebuffer(struct fb_info *fb_info) struct fb_event event; struct fb_videomode mode; - if (num_registered_fb == FB_MAX) - return -ENXIO; - if (fb_check_foreignness(fb_info)) return -ENOSYS; do_remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id, fb_is_primary_device(fb_info)); + if (num_registered_fb == FB_MAX) + return -ENXIO; + num_registered_fb++; for (i = 0 ; i < FB_MAX; i++) if (!registered_fb[i]) @@ -1639,7 +1639,7 @@ static int do_unregister_framebuffer(struct fb_info *fb_info) int i, ret = 0; i = fb_info->node; - if (!registered_fb[i]) + if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info) return -EINVAL; if (!lock_fb_info(fb_info)) -- cgit v0.10.2 From 22fe9446e82f1fe4b59900db4599061384efb0ad Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 14 May 2011 12:28:04 +0200 Subject: Revert "libata: ahci_start_engine compliant to AHCI spec" This reverts commit 270dac35c26433d06a89150c51e75ca0181ca7e4. The commits causes command timeouts on AC plug/unplug. It isn't yet clear why. As the commit was for a single rather obscure controller, revert the change for now. The problem was reported and bisected by Gu Rui in bug#34692. https://bugzilla.kernel.org/show_bug.cgi?id=34692 Also, reported by Rafael and Michael in the following thread. http://thread.gmane.org/gmane.linux.kernel/1138771 Signed-off-by: Tejun Heo Reported-by: Gu Rui Reported-by: Rafael J. Wysocki Reported-by: Michael Leun Cc: Jian Peng Cc: Jeff Garzik Signed-off-by: Linus Torvalds diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index ff9d832..d38c40f 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -561,27 +561,6 @@ void ahci_start_engine(struct ata_port *ap) { void __iomem *port_mmio = ahci_port_base(ap); u32 tmp; - u8 status; - - status = readl(port_mmio + PORT_TFDATA) & 0xFF; - - /* - * At end of section 10.1 of AHCI spec (rev 1.3), it states - * Software shall not set PxCMD.ST to 1 until it is determined - * that a functoinal device is present on the port as determined by - * PxTFD.STS.BSY=0, PxTFD.STS.DRQ=0 and PxSSTS.DET=3h - * - * Even though most AHCI host controllers work without this check, - * specific controller will fail under this condition - */ - if (status & (ATA_BUSY | ATA_DRQ)) - return; - else { - ahci_scr_read(&ap->link, SCR_STATUS, &tmp); - - if ((tmp & 0xf) != 0x3) - return; - } /* start DMA */ tmp = readl(port_mmio + PORT_CMD); -- cgit v0.10.2 From 5f6f12ccf3aa42cfc0c5bde9228df0c843dd63f7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 May 2011 16:04:11 +0200 Subject: libata: fix oops when LPM is used with PMP ae01b2493c (libata: Implement ATA_FLAG_NO_DIPM and apply it to mcp65) added ATA_FLAG_NO_DIPM and made ata_eh_set_lpm() check the flag. However, @ap is NULL if @link points to a PMP link and thus the unconditional @ap->flags dereference leads to the following oops. BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: [] ata_eh_recover+0x9a1/0x1510 ... Pid: 295, comm: scsi_eh_4 Tainted: P 2.6.38.5-core2 #1 System76, Inc. Serval Professional/Serval Professional RIP: 0010:[] [] ata_eh_recover+0x9a1/0x1510 RSP: 0018:ffff880132defbf0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff880132f40000 RCX: 0000000000000000 RDX: ffff88013377c000 RSI: ffff880132f40000 RDI: 0000000000000000 RBP: ffff880132defce0 R08: ffff88013377dc58 R09: ffff880132defd98 R10: 0000000000000000 R11: 00000000ffffffff R12: 0000000000000000 R13: 0000000000000000 R14: ffff88013377c000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8800bf700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000018 CR3: 0000000001a03000 CR4: 00000000000406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process scsi_eh_4 (pid: 295, threadinfo ffff880132dee000, task ffff880133b416c0) Stack: 0000000000000000 ffff880132defcc0 0000000000000000 ffff880132f42738 ffffffff813ee8f0 ffffffff813eefe0 ffff880132defd98 ffff88013377f190 ffffffffa00b3e30 ffffffff813ef030 0000000032defc60 ffff880100000000 Call Trace: [] sata_pmp_error_handler+0x607/0xc30 [] ahci_error_handler+0x1f/0x70 [libahci] [] ata_scsi_error+0x5be/0x900 [] scsi_error_handler+0x124/0x650 [] kthread+0x96/0xa0 [] kernel_thread_helper+0x4/0x10 Code: 8b 95 70 ff ff ff b8 00 00 00 00 48 3b 9a 10 2e 00 00 48 0f 44 c2 48 89 85 70 ff ff ff 48 8b 8d 70 ff ff ff f6 83 69 02 00 00 01 <48> 8b 41 18 0f 85 48 01 00 00 48 85 c9 74 12 48 8b 51 08 48 83 RIP [] ata_eh_recover+0x9a1/0x1510 RSP CR2: 0000000000000018 Fix it by testing @link->ap->flags instead. stable: ATA_FLAG_NO_DIPM was added during 2.6.39 cycle but was backported to 2.6.37 and 38. This is a fix for that and thus also applicable to 2.6.37 and 38. Signed-off-by: Tejun Heo Reported-by: "Nathan A. Mourey II" LKML-Reference: <1304555277.2059.2.camel@localhost.localdomain> Cc: Connor H Cc: stable@kernel.org Signed-off-by: Jeff Garzik diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index f26f2fe..dad9fd6 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -3316,7 +3316,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, struct ata_eh_context *ehc = &link->eh_context; struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; enum ata_lpm_policy old_policy = link->lpm_policy; - bool no_dipm = ap->flags & ATA_FLAG_NO_DIPM; + bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; unsigned int err_mask; int rc; -- cgit v0.10.2 From 05bf86b4ccfd0f197da61c67bd372111d15a6620 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 14 May 2011 12:06:42 -0700 Subject: tmpfs: fix race between swapoff and writepage Shame on me! Commit b1dea800ac39 "tmpfs: fix race between umount and writepage" fixed the advertized race, but introduced another: as even its comment makes clear, we cannot safely rely on a peek at list_empty() while holding no lock - until info->swapped is set, shmem_unuse_inode() may delete any formerly-swapped inode from the shmem_swaplist, which in this case would leave a swap area impossible to swapoff. Although I don't relish taking the mutex every time, I don't care much for the alternatives either; and at least the peek at list_empty() in shmem_evict_inode() (a hotter path since most inodes would never have been swapped) remains safe, because we already truncated the whole file. Signed-off-by: Hugh Dickins Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index 9e755c1..dfc7069 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1037,7 +1037,6 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) struct address_space *mapping; unsigned long index; struct inode *inode; - bool unlock_mutex = false; BUG_ON(!PageLocked(page)); mapping = page->mapping; @@ -1072,15 +1071,14 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) * we've taken the spinlock, because shmem_unuse_inode() will * prune a !swapped inode from the swaplist under both locks. */ - if (swap.val && list_empty(&info->swaplist)) { + if (swap.val) { mutex_lock(&shmem_swaplist_mutex); - /* move instead of add in case we're racing */ - list_move_tail(&info->swaplist, &shmem_swaplist); - unlock_mutex = true; + if (list_empty(&info->swaplist)) + list_add_tail(&info->swaplist, &shmem_swaplist); } spin_lock(&info->lock); - if (unlock_mutex) + if (swap.val) mutex_unlock(&shmem_swaplist_mutex); if (index >= info->next_index) { -- cgit v0.10.2 From f5de93914983bf04b92a786d1d205286fc53b49b Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Tue, 3 May 2011 16:44:13 +0000 Subject: Prevent oopsing in posix_acl_valid() If posix_acl_from_xattr() returns an error code, a negative address is dereferenced causing an oops; fix by checking for error code first. Signed-off-by: Daniel J Blueman Reviewed-by: Josef Bacik Signed-off-by: Chris Mason diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index a892bc2..827be9a 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -178,12 +178,13 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, if (value) { acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { ret = posix_acl_valid(acl); if (ret) goto out; - } else if (IS_ERR(acl)) { - return PTR_ERR(acl); } } -- cgit v0.10.2 From 1aba86d67f340a8001d67183ec32e8a62e3ec658 Mon Sep 17 00:00:00 2001 From: liubo Date: Fri, 8 Apr 2011 08:44:37 +0000 Subject: Btrfs: fix easily get into ENOSPC in mixed case When a btrfs disk is created by mixed data & metadata option, it will have no pure data or pure metadata space info. In btrfs's for-linus branch, commit 78b1ea13838039cd88afdd62519b40b344d6c920 (Btrfs: fix OOPS of empty filesystem after balance) initializes space infos at the very beginning. The problem is this initialization does not take the mixed case into account, which will cause btrfs will easily get into ENOSPC in mixed case. Signed-off-by: Liu Bo Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd52f7f..9ee6bd5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8856,23 +8856,38 @@ out: int btrfs_init_space_info(struct btrfs_fs_info *fs_info) { struct btrfs_space_info *space_info; + struct btrfs_super_block *disk_super; + u64 features; + u64 flags; + int mixed = 0; int ret; - ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0, - &space_info); - if (ret) - return ret; + disk_super = &fs_info->super_copy; + if (!btrfs_super_root(disk_super)) + return 1; - ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0, - &space_info); - if (ret) - return ret; + features = btrfs_super_incompat_flags(disk_super); + if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + mixed = 1; - ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0, - &space_info); + flags = BTRFS_BLOCK_GROUP_SYSTEM; + ret = update_space_info(fs_info, flags, 0, 0, &space_info); if (ret) - return ret; + goto out; + if (mixed) { + flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; + ret = update_space_info(fs_info, flags, 0, 0, &space_info); + } else { + flags = BTRFS_BLOCK_GROUP_METADATA; + ret = update_space_info(fs_info, flags, 0, 0, &space_info); + if (ret) + goto out; + + flags = BTRFS_BLOCK_GROUP_DATA; + ret = update_space_info(fs_info, flags, 0, 0, &space_info); + } +out: return ret; } -- cgit v0.10.2 From e1e8fb6a1ff3f9487e03a4cbf85b81d1316068ce Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 15 Apr 2011 03:02:49 +0000 Subject: fs: remove FS_COW_FL FS_COW_FL and FS_NOCOW_FL were newly introduced to control per file COW in btrfs, but FS_NOCOW_FL is sufficient. The fact is we don't have corresponding BTRFS_INODE_COW flag. COW is default, and FS_NOCOW_FL can be used to switch off COW for a single file. If we mount btrfs with nodatacow, a newly created file will be set with the FS_NOCOW_FL flag. So to turn on COW for it, we can just clear the FS_NOCOW_FL flag. Signed-off-by: Li Zefan Signed-off-by: Chris Mason diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f580a3a..3240dd9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -144,16 +144,13 @@ static int check_flags(unsigned int flags) if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ FS_NOATIME_FL | FS_NODUMP_FL | \ FS_SYNC_FL | FS_DIRSYNC_FL | \ - FS_NOCOMP_FL | FS_COMPR_FL | \ - FS_NOCOW_FL | FS_COW_FL)) + FS_NOCOMP_FL | FS_COMPR_FL | + FS_NOCOW_FL)) return -EOPNOTSUPP; if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) return -EINVAL; - if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL)) - return -EINVAL; - return 0; } @@ -218,6 +215,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) ip->flags |= BTRFS_INODE_DIRSYNC; else ip->flags &= ~BTRFS_INODE_DIRSYNC; + if (flags & FS_NOCOW_FL) + ip->flags |= BTRFS_INODE_NODATACOW; + else + ip->flags &= ~BTRFS_INODE_NODATACOW; /* * The COMPRESS flag can only be changed by users, while the NOCOMPRESS @@ -231,10 +232,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) ip->flags |= BTRFS_INODE_COMPRESS; ip->flags &= ~BTRFS_INODE_NOCOMPRESS; } - if (flags & FS_NOCOW_FL) - ip->flags |= BTRFS_INODE_NODATACOW; - else if (flags & FS_COW_FL) - ip->flags &= ~BTRFS_INODE_NODATACOW; trans = btrfs_join_transaction(root, 1); BUG_ON(IS_ERR(trans)); diff --git a/include/linux/fs.h b/include/linux/fs.h index de9dd81..56a4141 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -365,7 +365,6 @@ struct inodes_stat_t { #define FS_EXTENT_FL 0x00080000 /* Extents */ #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ -#define FS_COW_FL 0x02000000 /* Cow file */ #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -- cgit v0.10.2 From d0092bdda819914b8725da76a8c33eb06eb0bd21 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 15 Apr 2011 03:03:06 +0000 Subject: Btrfs: fix FS_IOC_GETFLAGS ioctl As we've added per file compression/cow support. Signed-off-by: Li Zefan Signed-off-by: Chris Mason diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3240dd9..aeabf6b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -81,6 +81,13 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags) iflags |= FS_NOATIME_FL; if (flags & BTRFS_INODE_DIRSYNC) iflags |= FS_DIRSYNC_FL; + if (flags & BTRFS_INODE_NODATACOW) + iflags |= FS_NOCOW_FL; + + if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS)) + iflags |= FS_COMPR_FL; + else if (flags & BTRFS_INODE_NOCOMPRESS) + iflags |= FS_NOCOMP_FL; return iflags; } -- cgit v0.10.2 From ebcb904dfe31644857422e3bb62e50f76fe86255 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 15 Apr 2011 03:03:17 +0000 Subject: Btrfs: fix FS_IOC_SETFLAGS ioctl Steps to reproduce the bug: - Call FS_IOC_SETLFAGS ioctl with flags=FS_COMPR_FL - Call FS_IOC_SETFLAGS ioctl with flags=0 - Call FS_IOC_GETFLAGS ioctl, and you'll see FS_COMPR_FL is still set! Signed-off-by: Li Zefan Signed-off-by: Chris Mason diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index aeabf6b..3e7031d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -238,6 +238,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) } else if (flags & FS_COMPR_FL) { ip->flags |= BTRFS_INODE_COMPRESS; ip->flags &= ~BTRFS_INODE_NOCOMPRESS; + } else { + ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); } trans = btrfs_join_transaction(root, 1); -- cgit v0.10.2 From b90194181988063266f3da0b7bf3e57268c627c8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 25 Apr 2011 16:25:20 -0300 Subject: perf tools: Honour the cpu list parameter when also monitoring a thread list The perf_evlist__create_maps was discarding the --cpu parameter when a --pid or --tid was specified, fix that. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/r/20110426204401.GB1746@ghostprotocols.net Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 45da8d1..1884a7c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -348,7 +348,7 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, if (evlist->threads == NULL) return -1; - if (target_tid != -1) + if (cpu_list == NULL && target_tid != -1) evlist->cpus = cpu_map__dummy_new(); else evlist->cpus = cpu_map__new(cpu_list); -- cgit v0.10.2 From aece948f5ddd70d70df2f35855c706ef9a4f62e2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 15 May 2011 09:39:00 -0300 Subject: perf evlist: Fix per thread mmap setup The PERF_EVENT_IOC_SET_OUTPUT ioctl was returning -EINVAL when using --pid when monitoring multithreaded apps, as we can only share a ring buffer for events on the same thread if not doing per cpu. Fix it by using per thread ring buffers. Tested with: [root@felicio ~]# tuna -t 26131 -CP | nl 1 thread ctxt_switches 2 pid SCHED_ rtpri affinity voluntary nonvoluntary cmd 3 26131 OTHER 0 0,1 10814276 2397830 chromium-browse 4 642 OTHER 0 0,1 14688 0 chromium-browse 5 26148 OTHER 0 0,1 713602 115479 chromium-browse 6 26149 OTHER 0 0,1 801958 2262 chromium-browse 7 26150 OTHER 0 0,1 1271128 248 chromium-browse 8 26151 OTHER 0 0,1 3 0 chromium-browse 9 27049 OTHER 0 0,1 36796 9 chromium-browse 10 618 OTHER 0 0,1 14711 0 chromium-browse 11 661 OTHER 0 0,1 14593 0 chromium-browse 12 29048 OTHER 0 0,1 28125 0 chromium-browse 13 26143 OTHER 0 0,1 2202789 781 chromium-browse [root@felicio ~]# So 11 threads under pid 26131, then: [root@felicio ~]# perf record -F 50000 --pid 26131 [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7fa4a2538000-7fa4a25b9000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 2 7fa4a25b9000-7fa4a263a000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 3 7fa4a263a000-7fa4a26bb000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 4 7fa4a26bb000-7fa4a273c000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 5 7fa4a273c000-7fa4a27bd000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 6 7fa4a27bd000-7fa4a283e000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 7 7fa4a283e000-7fa4a28bf000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 8 7fa4a28bf000-7fa4a2940000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 9 7fa4a2940000-7fa4a29c1000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 10 7fa4a29c1000-7fa4a2a42000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 11 7fa4a2a42000-7fa4a2ac3000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# 11 mmaps, one per thread since we didn't specify any CPU list, so we need one mmap per thread and: [root@felicio ~]# perf record -F 50000 --pid 26131 ^M ^C[ perf record: Woken up 79 times to write data ] [ perf record: Captured and wrote 20.614 MB perf.data (~900639 samples) ] [root@felicio ~]# perf report -D | grep PERF_RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort -n | uniq -c | sort -nr | nl 1 371310 26131 2 96516 26148 3 95694 26149 4 95203 26150 5 7291 26143 6 87 27049 7 76 661 8 60 29048 9 47 618 10 43 642 [root@felicio ~]# Ok, one of the threads, 26151 was quiescent, so no samples there, but all the others are there. Then, if I specify one CPU: [root@felicio ~]# perf record -F 50000 --pid 26131 --cpu 1 ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.680 MB perf.data (~29730 samples) ] [root@felicio ~]# perf report -D | grep PERF_RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort -n | uniq -c | sort -nr | nl 1 8444 26131 2 2584 26149 3 2518 26148 4 2324 26150 5 123 26143 6 9 661 7 9 29048 [root@felicio ~]# This machine has two cores, so fewer threads appeared on the radar, and: [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7f484b922000-7f484b9a3000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# Just one mmap, as now we can use just one per-cpu buffer instead of the per-thread needed in the previous case. For global profiling: [root@felicio ~]# perf record -F 50000 -a ^C[ perf record: Woken up 26 times to write data ] [ perf record: Captured and wrote 7.128 MB perf.data (~311412 samples) ] [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7fb49b435000-7fb49b4b6000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 2 7fb49b4b6000-7fb49b537000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# It uses per-cpu buffers. For just one thread: [root@felicio ~]# perf record -F 50000 --tid 26148 ^C[ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.330 MB perf.data (~14426 samples) ] [root@felicio ~]# perf report -D | grep PERF_RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort -n | uniq -c | sort -nr | nl 1 9969 26148 [root@felicio ~]# [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7f286a51b000-7f286a59c000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# Tested-by: David Ahern Tested-by: Lin Ming Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/r/20110426204401.GB1746@ghostprotocols.net Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4165382..0974f95 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -427,7 +427,7 @@ static void mmap_read_all(void) { int i; - for (i = 0; i < evsel_list->cpus->nr; i++) { + for (i = 0; i < evsel_list->nr_mmaps; i++) { if (evsel_list->mmap[i].base) mmap_read(&evsel_list->mmap[i]); } diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 11e3c84..2f9a337 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -549,7 +549,7 @@ static int test__basic_mmap(void) ++foo; } - while ((event = perf_evlist__read_on_cpu(evlist, 0)) != NULL) { + while ((event = perf_evlist__mmap_read(evlist, 0)) != NULL) { struct perf_sample sample; if (event->header.type != PERF_RECORD_SAMPLE) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7e3d6e3..ebfc7cf 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -801,12 +801,12 @@ static void perf_event__process_sample(const union perf_event *event, } } -static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu) +static void perf_session__mmap_read_idx(struct perf_session *self, int idx) { struct perf_sample sample; union perf_event *event; - while ((event = perf_evlist__read_on_cpu(top.evlist, cpu)) != NULL) { + while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) { perf_session__parse_sample(self, event, &sample); if (event->header.type == PERF_RECORD_SAMPLE) @@ -820,8 +820,8 @@ static void perf_session__mmap_read(struct perf_session *self) { int i; - for (i = 0; i < top.evlist->cpus->nr; i++) - perf_session__mmap_read_cpu(self, i); + for (i = 0; i < top.evlist->nr_mmaps; i++) + perf_session__mmap_read_idx(self, i); } static void start_counters(struct perf_evlist *evlist) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 1884a7c..23eb22b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -166,11 +166,11 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return NULL; } -union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) +union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) { /* XXX Move this to perf.c, making it generally available */ unsigned int page_size = sysconf(_SC_PAGE_SIZE); - struct perf_mmap *md = &evlist->mmap[cpu]; + struct perf_mmap *md = &evlist->mmap[idx]; unsigned int head = perf_mmap__read_head(md); unsigned int old = md->prev; unsigned char *data = md->base + page_size; @@ -235,31 +235,37 @@ union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu) void perf_evlist__munmap(struct perf_evlist *evlist) { - int cpu; + int i; - for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { - if (evlist->mmap[cpu].base != NULL) { - munmap(evlist->mmap[cpu].base, evlist->mmap_len); - evlist->mmap[cpu].base = NULL; + for (i = 0; i < evlist->nr_mmaps; i++) { + if (evlist->mmap[i].base != NULL) { + munmap(evlist->mmap[i].base, evlist->mmap_len); + evlist->mmap[i].base = NULL; } } + + free(evlist->mmap); + evlist->mmap = NULL; } int perf_evlist__alloc_mmap(struct perf_evlist *evlist) { - evlist->mmap = zalloc(evlist->cpus->nr * sizeof(struct perf_mmap)); + evlist->nr_mmaps = evlist->cpus->nr; + if (evlist->cpus->map[0] == -1) + evlist->nr_mmaps = evlist->threads->nr; + evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); return evlist->mmap != NULL ? 0 : -ENOMEM; } static int __perf_evlist__mmap(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int prot, int mask, int fd) + int idx, int prot, int mask, int fd) { - evlist->mmap[cpu].prev = 0; - evlist->mmap[cpu].mask = mask; - evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot, + evlist->mmap[idx].prev = 0; + evlist->mmap[idx].mask = mask; + evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot, MAP_SHARED, fd, 0); - if (evlist->mmap[cpu].base == MAP_FAILED) { - if (evlist->cpus->map[cpu] == -1 && evsel->attr.inherit) + if (evlist->mmap[idx].base == MAP_FAILED) { + if (evlist->cpus->map[idx] == -1 && evsel->attr.inherit) ui__warning("Inherit is not allowed on per-task " "events using mmap.\n"); return -1; @@ -269,6 +275,86 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, struct perf_evsel *ev return 0; } +static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, int prot, int mask) +{ + struct perf_evsel *evsel; + int cpu, thread; + + for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { + int output = -1; + + for (thread = 0; thread < evlist->threads->nr; thread++) { + list_for_each_entry(evsel, &evlist->entries, node) { + int fd = FD(evsel, cpu, thread); + + if (output == -1) { + output = fd; + if (__perf_evlist__mmap(evlist, evsel, cpu, + prot, mask, output) < 0) + goto out_unmap; + } else { + if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0) + goto out_unmap; + } + + if ((evsel->attr.read_format & PERF_FORMAT_ID) && + perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) + goto out_unmap; + } + } + } + + return 0; + +out_unmap: + for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { + if (evlist->mmap[cpu].base != NULL) { + munmap(evlist->mmap[cpu].base, evlist->mmap_len); + evlist->mmap[cpu].base = NULL; + } + } + return -1; +} + +static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, int prot, int mask) +{ + struct perf_evsel *evsel; + int thread; + + for (thread = 0; thread < evlist->threads->nr; thread++) { + int output = -1; + + list_for_each_entry(evsel, &evlist->entries, node) { + int fd = FD(evsel, 0, thread); + + if (output == -1) { + output = fd; + if (__perf_evlist__mmap(evlist, evsel, thread, + prot, mask, output) < 0) + goto out_unmap; + } else { + if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, output) != 0) + goto out_unmap; + } + + if ((evsel->attr.read_format & PERF_FORMAT_ID) && + perf_evlist__id_add_fd(evlist, evsel, 0, thread, fd) < 0) + goto out_unmap; + } + } + + return 0; + +out_unmap: + for (thread = 0; thread < evlist->threads->nr; thread++) { + if (evlist->mmap[thread].base != NULL) { + munmap(evlist->mmap[thread].base, evlist->mmap_len); + evlist->mmap[thread].base = NULL; + } + } + return -1; +} + /** perf_evlist__mmap - Create per cpu maps to receive events * * @evlist - list of events @@ -287,11 +373,11 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, struct perf_evsel *ev int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite) { unsigned int page_size = sysconf(_SC_PAGE_SIZE); - int mask = pages * page_size - 1, cpu; - struct perf_evsel *first_evsel, *evsel; + int mask = pages * page_size - 1; + struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; const struct thread_map *threads = evlist->threads; - int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE); + int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE); if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) return -ENOMEM; @@ -301,43 +387,18 @@ int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite) evlist->overwrite = overwrite; evlist->mmap_len = (pages + 1) * page_size; - first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node); list_for_each_entry(evsel, &evlist->entries, node) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0) return -ENOMEM; - - for (cpu = 0; cpu < cpus->nr; cpu++) { - for (thread = 0; thread < threads->nr; thread++) { - int fd = FD(evsel, cpu, thread); - - if (evsel->idx || thread) { - if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, - FD(first_evsel, cpu, 0)) != 0) - goto out_unmap; - } else if (__perf_evlist__mmap(evlist, evsel, cpu, - prot, mask, fd) < 0) - goto out_unmap; - - if ((evsel->attr.read_format & PERF_FORMAT_ID) && - perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) - goto out_unmap; - } - } } - return 0; + if (evlist->cpus->map[0] == -1) + return perf_evlist__mmap_per_thread(evlist, prot, mask); -out_unmap: - for (cpu = 0; cpu < cpus->nr; cpu++) { - if (evlist->mmap[cpu].base != NULL) { - munmap(evlist->mmap[cpu].base, evlist->mmap_len); - evlist->mmap[cpu].base = NULL; - } - } - return -1; + return perf_evlist__mmap_per_cpu(evlist, prot, mask); } int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8b1cb7a..7109d7a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -17,6 +17,7 @@ struct perf_evlist { struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; int nr_entries; int nr_fds; + int nr_mmaps; int mmap_len; bool overwrite; union perf_event event_copy; @@ -46,7 +47,7 @@ void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); -union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu); +union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); int perf_evlist__alloc_mmap(struct perf_evlist *evlist); int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index f5e3845..99c7226 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -680,7 +680,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, &cpu, &sample_id_all)) return NULL; - event = perf_evlist__read_on_cpu(evlist, cpu); + event = perf_evlist__mmap_read(evlist, cpu); if (event != NULL) { struct perf_evsel *first; PyObject *pyevent = pyrf_event__new(event); -- cgit v0.10.2 From d8083deb4f1aa0977980dfb834fcc336ef38318f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 13 May 2011 16:03:24 -0400 Subject: bridge: fix forwarding of IPv6 The commit 6b1e960fdbd75dcd9bcc3ba5ff8898ff1ad30b6e bridge: Reset IPCB when entering IP stack on NF_FORWARD broke forwarding of IPV6 packets in bridge because it would call bp_parse_ip_options with an IPV6 packet. Reported-by: Noah Meyerhans Signed-off-by: Stephen Hemminger Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index f3bc322..74ef4d4 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -737,7 +737,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, nf_bridge->mask |= BRNF_PKT_TYPE; } - if (br_parse_ip_options(skb)) + if (pf == PF_INET && br_parse_ip_options(skb)) return NF_DROP; /* The physdev module checks on this */ -- cgit v0.10.2 From 0f08190fe8af3cdb6ba19690eb0fa253ecef4bde Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Sun, 15 May 2011 17:20:29 +0200 Subject: IPVS: fix netns if reading ip_vs_* procfs entries Without this patch every access to ip_vs in procfs will increase the netns count i.e. an unbalanced get_net()/put_net(). (ipvsadm commands also use procfs.) The result is you can't exit a netns if reading ip_vs_* procfs entries. Signed-off-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 51f3af7..059af31 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -572,7 +572,7 @@ static const struct file_operations ip_vs_app_fops = { .open = ip_vs_app_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; #endif diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index d3fd91b..bf28ac2 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1046,7 +1046,7 @@ static const struct file_operations ip_vs_conn_fops = { .open = ip_vs_conn_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; static const char *ip_vs_origin_name(unsigned flags) @@ -1114,7 +1114,7 @@ static const struct file_operations ip_vs_conn_sync_fops = { .open = ip_vs_conn_sync_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; #endif diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ea72281..37890f2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2066,7 +2066,7 @@ static const struct file_operations ip_vs_info_fops = { .open = ip_vs_info_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif @@ -2109,7 +2109,7 @@ static const struct file_operations ip_vs_stats_fops = { .open = ip_vs_stats_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) @@ -2178,7 +2178,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = { .open = ip_vs_stats_percpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; #endif -- cgit v0.10.2 From a67b8887ced9d54cab7759bdb19deafed37481eb Mon Sep 17 00:00:00 2001 From: Florian Mickler Date: Sun, 15 May 2011 16:32:50 +0200 Subject: vga_switcheroo: don't toggle-switch devices If the requested device is already active, ignore the request. This restores the original behaviour of the interface. The change was probably an unintended side effect of commit 66b37c6777c4 vga_switcheroo: split switching into two stages which did not take into account to duplicate the !active check in the split-off stage2. Fix this by factoring that check out of stage1 into the debugfs_write routine. References: https://bugzilla.kernel.org/show_bug.cgi?id=34252 Reported-by: Igor Murzov Tested-by: Igor Murzov Signed-off-by: Florian Mickler Signed-off-by: Dave Airlie diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c index e01cacb..498b284 100644 --- a/drivers/gpu/vga/vga_switcheroo.c +++ b/drivers/gpu/vga/vga_switcheroo.c @@ -219,9 +219,6 @@ static int vga_switchto_stage1(struct vga_switcheroo_client *new_client) int i; struct vga_switcheroo_client *active = NULL; - if (new_client->active == true) - return 0; - for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) { if (vgasr_priv.clients[i].active == true) { active = &vgasr_priv.clients[i]; @@ -372,6 +369,9 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf, goto out; } + if (client->active == true) + goto out; + /* okay we want a switch - test if devices are willing to switch */ can_switch = true; for (i = 0; i < VGA_SWITCHEROO_MAX_CLIENTS; i++) { -- cgit v0.10.2 From 8eea1be174a1ea4b86323167bbadc8a6abdca613 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 13 May 2011 12:14:54 -0400 Subject: drm/i915: Revert i915.semaphore=1 default from 47ae63e0 My Q67 / i7-2600 box has rev09 Sandy Bridge graphics. It hangs instantly when GNOME loads and it hangs so hard the reset button doesn't work. Setting i915.semaphore=0 fixes it. Semaphores were disabled in a1656b9090f7008d2941c314f5a64724bea2ae37 in 2.6.38 and were re-enabled by commit 47ae63e0c2e5fdb582d471dc906eb29be94c732f Merge: c59a333 467cffb Author: Chris Wilson Date: Mon Mar 7 12:32:44 2011 +0000 Merge branch 'drm-intel-fixes' into drm-intel-next Apply the trivial conflicting regression fixes, but keep GPU semaphores enabled. Conflicts: drivers/gpu/drm/i915/i915_drv.h drivers/gpu/drm/i915/i915_gem_execbuffer.c (It's worth noting that the offending change is i915_drv.c, which is not a conflict.) Signed-off-by: Andy Lutomirski Acked-by: Keith Packard Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c34a8dd..32d1b3e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -49,7 +49,7 @@ module_param_named(panel_ignore_lid, i915_panel_ignore_lid, int, 0600); unsigned int i915_powersave = 1; module_param_named(powersave, i915_powersave, int, 0600); -unsigned int i915_semaphores = 1; +unsigned int i915_semaphores = 0; module_param_named(semaphores, i915_semaphores, int, 0600); unsigned int i915_enable_rc6 = 0; -- cgit v0.10.2 From 752d2635ebb12b6122ba05775f7d1ccfef14b275 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 22 Apr 2011 11:03:57 +0100 Subject: drm: Take lock around probes for drm_fb_helper_hotplug_event We need to hold the dev->mode_config.mutex whilst detecting the output status. But we also need to drop it for the call into drm_fb_helper_single_fb_probe(), which indirectly acquires the lock when attaching the fbcon. Failure to do so exposes a race with normal output probing. Detected by adding some warnings that the mutex is held to the backend detect routines: [ 17.772456] WARNING: at drivers/gpu/drm/i915/intel_crt.c:471 intel_crt_detect+0x3e/0x373 [i915]() [ 17.772458] Hardware name: Latitude E6400 [ 17.772460] Modules linked in: .... [ 17.772582] Pid: 11, comm: kworker/0:1 Tainted: G W 2.6.38.4-custom.2 #8 [ 17.772584] Call Trace: [ 17.772591] [] ? warn_slowpath_common+0x78/0x8c [ 17.772603] [] ? intel_crt_detect+0x3e/0x373 [i915] [ 17.772612] [] ? drm_helper_probe_single_connector_modes+0xbf/0x2af [drm_kms_helper] [ 17.772619] [] ? drm_fb_helper_probe_connector_modes+0x39/0x4d [drm_kms_helper] [ 17.772625] [] ? drm_fb_helper_hotplug_event+0xa5/0xc3 [drm_kms_helper] [ 17.772633] [] ? output_poll_execute+0x146/0x17c [drm_kms_helper] [ 17.772638] [] ? cfq_init_queue+0x247/0x345 [ 17.772644] [] ? output_poll_execute+0x0/0x17c [drm_kms_helper] [ 17.772648] [] ? process_one_work+0x193/0x28e [ 17.772652] [] ? worker_thread+0xef/0x172 [ 17.772655] [] ? worker_thread+0x0/0x172 [ 17.772658] [] ? worker_thread+0x0/0x172 [ 17.772663] [] ? kthread+0x7a/0x82 [ 17.772668] [] ? kernel_thread_helper+0x4/0x10 [ 17.772671] [] ? kthread+0x0/0x82 [ 17.772674] [] ? kernel_thread_helper+0x0/0x10 Reported-by: Frederik Himpe References: https://bugs.freedesktop.org/show_bug.cgi?id=36394 Signed-off-by: Chris Wilson Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 11d7a72..140b952 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1516,17 +1516,33 @@ bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel) } EXPORT_SYMBOL(drm_fb_helper_initial_config); -bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) +/** + * drm_fb_helper_hotplug_event - respond to a hotplug notification by + * probing all the outputs attached to the fb. + * @fb_helper: the drm_fb_helper + * + * LOCKING: + * Called at runtime, must take mode config lock. + * + * Scan the connectors attached to the fb_helper and try to put together a + * setup after *notification of a change in output configuration. + * + * RETURNS: + * 0 on success and a non-zero error code otherwise. + */ +int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) { + struct drm_device *dev = fb_helper->dev; int count = 0; u32 max_width, max_height, bpp_sel; bool bound = false, crtcs_bound = false; struct drm_crtc *crtc; if (!fb_helper->fb) - return false; + return 0; - list_for_each_entry(crtc, &fb_helper->dev->mode_config.crtc_list, head) { + mutex_lock(&dev->mode_config.mutex); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { if (crtc->fb) crtcs_bound = true; if (crtc->fb == fb_helper->fb) @@ -1535,7 +1551,8 @@ bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) if (!bound && crtcs_bound) { fb_helper->delayed_hotplug = true; - return false; + mutex_unlock(&dev->mode_config.mutex); + return 0; } DRM_DEBUG_KMS("\n"); @@ -1546,6 +1563,7 @@ bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) count = drm_fb_helper_probe_connector_modes(fb_helper, max_width, max_height); drm_setup_crtcs(fb_helper); + mutex_unlock(&dev->mode_config.mutex); return drm_fb_helper_single_fb_probe(fb_helper, bpp_sel); } diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index ade09d7..c99c3d3 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -127,7 +127,7 @@ void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch, int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info); -bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); +int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel); int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper); int drm_fb_helper_debug_enter(struct fb_info *info); -- cgit v0.10.2 From e503f9e4b092e2349a9477a333543de8f3c7f5d9 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Fri, 22 Apr 2011 00:22:43 +0800 Subject: x86, apic: Fix spurious error interrupts triggering on all non-boot APs This patch fixes a bug reported by a customer, who found that many unreasonable error interrupts reported on all non-boot CPUs (APs) during the system boot stage. According to Chapter 10 of Intel Software Developer Manual Volume 3A, Local APIC may signal an illegal vector error when an LVT entry is set as an illegal vector value (0~15) under FIXED delivery mode (bits 8-11 is 0), regardless of whether the mask bit is set or an interrupt actually happen. These errors are seen as error interrupts. The initial value of thermal LVT entries on all APs always reads 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI sequence to them and LVT registers are reset to 0s except for the mask bits which are set to 1s when APs receive INIT IPI. When the BIOS takes over the thermal throttling interrupt, the LVT thermal deliver mode should be SMI and it is required from the kernel to keep AP's LVT thermal monitoring register programmed as such as well. This issue happens when BIOS does not take over thermal throttling interrupt, AP's LVT thermal monitor register will be restored to 0x10000 which means vector 0 and fixed deliver mode, so all APs will signal illegal vector error interrupts. This patch check if interrupt delivery mode is not fixed mode before restoring AP's LVT thermal monitor register. Signed-off-by: Youquan Song Acked-by: Suresh Siddha Acked-by: Yong Wang Cc: hpa@linux.intel.com Cc: joe@perches.com Cc: jbaron@redhat.com Cc: trenn@suse.de Cc: kent.liu@intel.com Cc: chaohong.guo@intel.com Cc: # As far back as possible Link: http://lkml.kernel.org/r/1303402963-17738-1-git-send-email-youquan.song@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index d87988b..34595d5 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -78,6 +78,7 @@ #define APIC_DEST_LOGICAL 0x00800 #define APIC_DEST_PHYSICAL 0x00000 #define APIC_DM_FIXED 0x00000 +#define APIC_DM_FIXED_MASK 0x00700 #define APIC_DM_LOWEST 0x00100 #define APIC_DM_SMI 0x00200 #define APIC_DM_REMRD 0x00300 diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 6f8c5e9..0f03446 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -446,18 +446,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); + h = lvtthmr_init; /* * The initial value of thermal LVT entries on all APs always reads * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI * sequence to them and LVT registers are reset to 0s except for * the mask bits which are set to 1s when APs receive INIT IPI. - * Always restore the value that BIOS has programmed on AP based on - * BSP's info we saved since BIOS is always setting the same value - * for all threads/cores + * If BIOS takes over the thermal interrupt and sets its interrupt + * delivery mode to SMI (not fixed), it restores the value that the + * BIOS has programmed on AP based on BSP's info we saved since BIOS + * is always setting the same value for all threads/cores. */ - apic_write(APIC_LVTTHMR, lvtthmr_init); + if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED) + apic_write(APIC_LVTTHMR, lvtthmr_init); - h = lvtthmr_init; if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG -- cgit v0.10.2 From 70087dc38cc77ca8f46059564c00338777734762 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 16 May 2011 15:24:08 +0200 Subject: blk-throttle: Use task_subsys_state() to determine a task's blkio_cgroup Currentlly we first map the task to cgroup and then cgroup to blkio_cgroup. There is a more direct way to get to blkio_cgroup from task using task_subsys_state(). Use that. The real reason for the fix is that it also avoids a race in generic cgroup code. During remount/umount rebind_subsystems() is called and it can do following with and rcu protection. cgrp->subsys[i] = NULL; That means if somebody got hold of cgroup under rcu and then it tried to do cgroup->subsys[] to get to blkio_cgroup, it would get NULL which is wrong. I was running into this race condition with ltp running on a upstream derived kernel and that lead to crash. So ideally we should also fix cgroup generic code to wait for rcu grace period before setting pointer to NULL. Li Zefan is not very keen on introducing synchronize_wait() as he thinks it will slow down moun/remount/umount operations. So for the time being atleast fix the kernel crash by taking a more direct route to blkio_cgroup. One tester had reported a crash while running LTP on a derived kernel and with this fix crash is no more seen while the test has been running for over 6 days. Signed-off-by: Vivek Goyal Reviewed-by: Li Zefan Signed-off-by: Jens Axboe diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f0605ab..471fdcc 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -114,6 +114,13 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) } EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); +struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk) +{ + return container_of(task_subsys_state(tsk, blkio_subsys_id), + struct blkio_cgroup, css); +} +EXPORT_SYMBOL_GPL(task_blkio_cgroup); + static inline void blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight) { diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 10919fa..c774930 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -291,6 +291,7 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {} #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) extern struct blkio_cgroup blkio_root_cgroup; extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); +extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, struct blkio_group *blkg, void *key, dev_t dev, enum blkio_policy_id plid); @@ -314,6 +315,8 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg, struct cgroup; static inline struct blkio_cgroup * cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } +static inline struct blkio_cgroup * +task_blkio_cgroup(struct task_struct *tsk) { return NULL; } static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, struct blkio_group *blkg, void *key, dev_t dev, diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 0475a22..252a81a 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -160,9 +160,8 @@ static void throtl_put_tg(struct throtl_grp *tg) } static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, - struct cgroup *cgroup) + struct blkio_cgroup *blkcg) { - struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); struct throtl_grp *tg = NULL; void *key = td; struct backing_dev_info *bdi = &td->queue->backing_dev_info; @@ -229,12 +228,12 @@ done: static struct throtl_grp * throtl_get_tg(struct throtl_data *td) { - struct cgroup *cgroup; struct throtl_grp *tg = NULL; + struct blkio_cgroup *blkcg; rcu_read_lock(); - cgroup = task_cgroup(current, blkio_subsys_id); - tg = throtl_find_alloc_tg(td, cgroup); + blkcg = task_blkio_cgroup(current); + tg = throtl_find_alloc_tg(td, blkcg); if (!tg) tg = &td->root_tg; rcu_read_unlock(); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5b52011..ab7a9e6 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1014,10 +1014,9 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, cfqg->needs_update = true; } -static struct cfq_group * -cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) +static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd, + struct blkio_cgroup *blkcg, int create) { - struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); struct cfq_group *cfqg = NULL; void *key = cfqd; int i, j; @@ -1079,12 +1078,12 @@ done: */ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) { - struct cgroup *cgroup; + struct blkio_cgroup *blkcg; struct cfq_group *cfqg = NULL; rcu_read_lock(); - cgroup = task_cgroup(current, blkio_subsys_id); - cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create); + blkcg = task_blkio_cgroup(current); + cfqg = cfq_find_alloc_cfqg(cfqd, blkcg, create); if (!cfqg && create) cfqg = &cfqd->root_group; rcu_read_unlock(); -- cgit v0.10.2 From 86f315bbb2374f1f077500ad131dd9b71856e697 Mon Sep 17 00:00:00 2001 From: Chris Ball Date: Mon, 16 May 2011 11:32:26 -0400 Subject: Revert "mmc: fix a race between card-detect rescan and clock-gate work instances" This reverts commit 26fc8775b51484d8c0a671198639c6d5ae60533e, which has been reported to cause boot/resume-time crashes for some users: https://bbs.archlinux.org/viewtopic.php?id=118751. Signed-off-by: Chris Ball Cc: diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 2b200c1..461e6a1 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -94,7 +94,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host) spin_unlock_irqrestore(&host->clk_lock, flags); return; } - mmc_claim_host(host); + mutex_lock(&host->clk_gate_mutex); spin_lock_irqsave(&host->clk_lock, flags); if (!host->clk_requests) { spin_unlock_irqrestore(&host->clk_lock, flags); @@ -104,7 +104,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host) pr_debug("%s: gated MCI clock\n", mmc_hostname(host)); } spin_unlock_irqrestore(&host->clk_lock, flags); - mmc_release_host(host); + mutex_unlock(&host->clk_gate_mutex); } /* @@ -130,7 +130,7 @@ void mmc_host_clk_ungate(struct mmc_host *host) { unsigned long flags; - mmc_claim_host(host); + mutex_lock(&host->clk_gate_mutex); spin_lock_irqsave(&host->clk_lock, flags); if (host->clk_gated) { spin_unlock_irqrestore(&host->clk_lock, flags); @@ -140,7 +140,7 @@ void mmc_host_clk_ungate(struct mmc_host *host) } host->clk_requests++; spin_unlock_irqrestore(&host->clk_lock, flags); - mmc_release_host(host); + mutex_unlock(&host->clk_gate_mutex); } /** @@ -215,6 +215,7 @@ static inline void mmc_host_clk_init(struct mmc_host *host) host->clk_gated = false; INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work); spin_lock_init(&host->clk_lock); + mutex_init(&host->clk_gate_mutex); } /** diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index eb792cb..bcb793e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -183,6 +183,7 @@ struct mmc_host { struct work_struct clk_gate_work; /* delayed clock gate */ unsigned int clk_old; /* old clock value cache */ spinlock_t clk_lock; /* lock for clk fields */ + struct mutex clk_gate_mutex; /* mutex for clock gating */ #endif /* host specific block data */ -- cgit v0.10.2 From 867955f5682f7157fdafe8670804b9f8ea077bc7 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 16 May 2011 06:13:49 +0000 Subject: sfc: Fix oops in register dump after mapping change Commit 747df2258b1b9a2e25929ef496262c339c380009 ('sfc: Always map MCDI shared memory as uncacheable') introduced a separate mapping for the MCDI shared memory (MC_TREG_SMEM). This means we can no longer easily include it in the register dump. Since it is not particularly useful in debugging, substitute a recognisable dummy value. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/sfc/nic.c b/drivers/net/sfc/nic.c index 10f1cb7..9b29a8d 100644 --- a/drivers/net/sfc/nic.c +++ b/drivers/net/sfc/nic.c @@ -1937,6 +1937,13 @@ void efx_nic_get_regs(struct efx_nic *efx, void *buf) size = min_t(size_t, table->step, 16); + if (table->offset >= efx->type->mem_map_size) { + /* No longer mapped; return dummy data */ + memcpy(buf, "\xde\xc0\xad\xde", 4); + buf += table->rows * size; + continue; + } + for (i = 0; i < table->rows; i++) { switch (table->step) { case 4: /* 32-bit register or SRAM */ -- cgit v0.10.2 From ebde6f8acba92abfc203585198a54f47e83e2cd0 Mon Sep 17 00:00:00 2001 From: Thomas Jarosch Date: Mon, 16 May 2011 06:28:15 +0000 Subject: vmxnet3: Fix inconsistent LRO state after initialization During initialization of vmxnet3, the state of LRO gets out of sync with netdev->features. This leads to very poor TCP performance in a IP forwarding setup and is hitting many VMware users. Simplified call sequence: 1. vmxnet3_declare_features() initializes "adapter->lro" to true. 2. The kernel automatically disables LRO if IP forwarding is enabled, so vmxnet3_set_flags() gets called. This also updates netdev->features. 3. Now vmxnet3_setup_driver_shared() is called. "adapter->lro" is still set to true and LRO gets enabled again, even though netdev->features shows it's disabled. Fix it by updating "adapter->lro", too. The private vmxnet3 adapter flags are scheduled for removal in net-next, see commit a0d2730c9571aeba793cb5d3009094ee1d8fda35 "net: vmxnet3: convert to hw_features". Patch applies to 2.6.37 / 2.6.38 and 2.6.39-rc6. Please CC: comments. Signed-off-by: Thomas Jarosch Acked-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 51f2ef1..9764672 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -311,6 +311,9 @@ vmxnet3_set_flags(struct net_device *netdev, u32 data) /* toggle the LRO feature*/ netdev->features ^= NETIF_F_LRO; + /* Update private LRO flag */ + adapter->lro = lro_requested; + /* update harware LRO capability accordingly */ if (lro_requested) adapter->shared->devRead.misc.uptFeatures |= -- cgit v0.10.2 From 6f404e441d169afc90929ef5e451ec9779c1f11a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 16 May 2011 15:14:21 -0400 Subject: net: Change netdev_fix_features messages loglevel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those reduced to DEBUG can possibly be triggered by unprivileged processes and are nothing exceptional. Illegal checksum combinations can only be caused by driver bug, so promote those messages to WARN. Since GSO without SG will now only cause DEBUG message from netdev_fix_features(), remove the workaround from register_netdevice(). Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller diff --git a/net/core/dev.c b/net/core/dev.c index 9200944..b624fe4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5186,27 +5186,27 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - netdev_info(dev, "mixed HW and IP checksum settings.\n"); + netdev_warn(dev, "mixed HW and IP checksum settings.\n"); features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } if ((features & NETIF_F_NO_CSUM) && (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - netdev_info(dev, "mixed no checksumming and other settings.\n"); + netdev_warn(dev, "mixed no checksumming and other settings.\n"); features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); } /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { - netdev_info(dev, - "Dropping NETIF_F_SG since no checksum feature.\n"); + netdev_dbg(dev, + "Dropping NETIF_F_SG since no checksum feature.\n"); features &= ~NETIF_F_SG; } /* TSO requires that SG is present as well. */ if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { - netdev_info(dev, "Dropping TSO features since no SG feature.\n"); + netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); features &= ~NETIF_F_ALL_TSO; } @@ -5216,7 +5216,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) /* Software GSO depends on SG. */ if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { - netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); + netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); features &= ~NETIF_F_GSO; } @@ -5226,13 +5226,13 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) if (!((features & NETIF_F_GEN_CSUM) || (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - netdev_info(dev, + netdev_dbg(dev, "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; } if (!(features & NETIF_F_SG)) { - netdev_info(dev, + netdev_dbg(dev, "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); features &= ~NETIF_F_UFO; } @@ -5414,12 +5414,6 @@ int register_netdevice(struct net_device *dev) dev->features |= NETIF_F_SOFT_FEATURES; dev->wanted_features = dev->features & dev->hw_features; - /* Avoid warning from netdev_fix_features() for GSO without SG */ - if (!(dev->wanted_features & NETIF_F_SG)) { - dev->wanted_features &= ~NETIF_F_GSO; - dev->features &= ~NETIF_F_GSO; - } - /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. -- cgit v0.10.2 From 07f4beb0b5bbfaf36a64aa00d59e670ec578a95a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 May 2011 11:07:48 +0200 Subject: tick: Clear broadcast active bit when switching to oneshot The first cpu which switches from periodic to oneshot mode switches also the broadcast device into oneshot mode. The broadcast device serves as a backup for per cpu timers which stop in deeper C-states. To avoid starvation of the cpus which might be in idle and depend on broadcast mode it marks the other cpus as broadcast active and sets the brodcast expiry value of those cpus to the next tick. The oneshot mode broadcast bit for the other cpus is sticky and gets only cleared when those cpus exit idle. If a cpu was not idle while the bit got set in consequence the bit prevents that the broadcast device is armed on behalf of that cpu when it enters idle for the first time after it switched to oneshot mode. In most cases that goes unnoticed as one of the other cpus has usually a timer pending which keeps the broadcast device armed with a short timeout. Now if the only cpu which has a short timer active has the bit set then the broadcast device will not be armed on behalf of that cpu and will fire way after the expected timer expiry. In the case of Christians bug report it took ~145 seconds which is about half of the wrap around time of HPET (the limit for that device) due to the fact that all other cpus had no timers armed which expired before the 145 seconds timeframe. The solution is simply to clear the broadcast active bit unconditionally when a cpu switches to oneshot mode after the first cpu switched the broadcast device over. It's not idle at that point otherwise it would not be executing that code. [ I fundamentally hate that broadcast crap. Why the heck thought some folks that when going into deep idle it's a brilliant concept to switch off the last device which brings the cpu back from that state? ] Thanks to Christian for providing all the valuable debug information! Reported-and-tested-by: Christian Hoffmann Cc: John Stultz Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1105161105170.3078%40ionos%3E Cc: stable@kernel.org Signed-off-by: Thomas Gleixner diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index da800ff..723c763 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -522,10 +522,11 @@ static void tick_broadcast_init_next_event(struct cpumask *mask, */ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { + int cpu = smp_processor_id(); + /* Set it up only once ! */ if (bc->event_handler != tick_handle_oneshot_broadcast) { int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; - int cpu = smp_processor_id(); bc->event_handler = tick_handle_oneshot_broadcast; clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); @@ -551,6 +552,15 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) tick_broadcast_set_event(tick_next_period, 1); } else bc->next_event.tv64 = KTIME_MAX; + } else { + /* + * The first cpu which switches to oneshot mode sets + * the bit for all other cpus which are in the general + * (periodic) broadcast mask. So the bit is set and + * would prevent the first broadcast enter after this + * to program the bc device. + */ + tick_broadcast_clear_oneshot(cpu); } } -- cgit v0.10.2 From 93d2175d3d31f11ba04fcfa0e9a496a1b4bc8b34 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 May 2011 18:06:17 -0700 Subject: PCI: Clear bridge resource flags if requested size is 0 During pci remove/rescan testing found: pci 0000:c0:03.0: PCI bridge to [bus c4-c9] pci 0000:c0:03.0: bridge window [io 0x1000-0x0fff] pci 0000:c0:03.0: bridge window [mem 0xf0000000-0xf00fffff] pci 0000:c0:03.0: bridge window [mem 0xfc180000000-0xfc197ffffff 64bit pref] pci 0000:c0:03.0: device not available (can't reserve [io 0x1000-0x0fff]) pci 0000:c0:03.0: Error enabling bridge (-22), continuing pci 0000:c0:03.0: enabling bus mastering pci 0000:c0:03.0: setting latency timer to 64 pcieport 0000:c0:03.0: device not available (can't reserve [io 0x1000-0x0fff]) pcieport: probe of 0000:c0:03.0 failed with error -22 This bug was caused by commit c8adf9a3e873 ("PCI: pre-allocate additional resources to devices only after successful allocation of essential resources.") After that commit, pci_hotplug_io_size is changed to additional_io_size from minium size. So it will not go through resource_size(res) != 0 path, and will not be reset. The root cause is: pci_bridge_check_ranges will set RESOURCE_IO flag for pci bridge, and later if children do not need IO resource. those bridge resources will not need to be allocated. but flags is still there. that will confuse the the pci_enable_bridges later. related code: static void assign_requested_resources_sorted(struct resource_list *head, struct resource_list_x *fail_head) { struct resource *res; struct resource_list *list; int idx; for (list = head->next; list; list = list->next) { res = list->res; idx = res - &list->dev->resource[0]; if (resource_size(res) && pci_assign_resource(list->dev, idx)) { ... reset_resource(res); } } } At last, We have to clear the flags in pbus_size_mem/io when requested size == 0 and !add_head. becasue this case it will not go through adjust_resources_sorted(). Just make size1 = size0 when !add_head. it will make flags get cleared. At the same time when requested size == 0, add_size != 0, will still have in head and add_list. because we do not clear the flags for it. After this, we will get right result: pci 0000:c0:03.0: PCI bridge to [bus c4-c9] pci 0000:c0:03.0: bridge window [io disabled] pci 0000:c0:03.0: bridge window [mem 0xf0000000-0xf00fffff] pci 0000:c0:03.0: bridge window [mem 0xfc180000000-0xfc197ffffff 64bit pref] pci 0000:c0:03.0: enabling bus mastering pci 0000:c0:03.0: setting latency timer to 64 pcieport 0000:c0:03.0: setting latency timer to 64 pcieport 0000:c0:03.0: irq 160 for MSI/MSI-X pcieport 0000:c0:03.0: Signaling PME through PCIe PME interrupt pci 0000:c4:00.0: Signaling PME through PCIe PME interrupt pcie_pme 0000:c0:03.0:pcie01: service driver pcie_pme loaded aer 0000:c0:03.0:pcie02: service driver aer loaded pciehp 0000:c0:03.0:pcie04: Hotplug Controller: v3: more simple fix. also fix one typo in pbus_size_mem Signed-off-by: Yinghai Lu Reviewed-by: Ram Pai Cc: Jesse Barnes Cc: Bjorn Helgaas Signed-off-by: Linus Torvalds diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index ebf51ad..a806cb3 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -579,7 +579,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, } size0 = calculate_iosize(size, min_size, size1, resource_size(b_res), 4096); - size1 = !add_size? size0: + size1 = (!add_head || (add_head && !add_size)) ? size0 : calculate_iosize(size, min_size+add_size, size1, resource_size(b_res), 4096); if (!size0 && !size1) { @@ -677,7 +677,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, align += aligns[order]; } size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align); - size1 = !add_size ? size : + size1 = (!add_head || (add_head && !add_size)) ? size0 : calculate_memsize(size, min_size+add_size, 0, resource_size(b_res), min_align); if (!size0 && !size1) { -- cgit v0.10.2 From b5e6ab589d570ac79cc939517fab05c87a23c262 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 May 2011 13:16:54 -0700 Subject: mm: fix kernel-doc warning in page_alloc.c Fix new kernel-doc warning in mm/page_alloc.c: Warning(mm/page_alloc.c:2370): No description found for parameter 'nid' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 570d944..3f8bce2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2358,6 +2358,7 @@ EXPORT_SYMBOL(alloc_pages_exact); /** * alloc_pages_exact_nid - allocate an exact number of physically-contiguous * pages on a node. + * @nid: the preferred node ID where memory should be allocated * @size: the number of bytes to allocate * @gfp_mask: GFP flags for the allocation * -- cgit v0.10.2 From 9937a5e2f32892db0dbeefc2b3bc74b3ae3ea9c7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 May 2011 11:04:44 +0200 Subject: scsi: remove performance regression due to async queue run Commit c21e6beb removed our queue request_fn re-enter protection, and defaulted to always running the queues from kblockd to be safe. This was a known potential slow down, but should be safe. Unfortunately this is causing big performance regressions for some, so we need to improve this logic. Looking into the details of the re-enter, the real issue is on requeue of requests. Requeue of requests upon seeing a BUSY condition from the device ends up re-running the queue, causing traces like this: scsi_request_fn() scsi_dispatch_cmd() scsi_queue_insert() __scsi_queue_insert() scsi_run_queue() scsi_request_fn() ... potentially causing the issue we want to avoid. So special case the requeue re-run of the queue, but improve it to offload the entire run of local queue and starved queue from a single workqueue callback. This is a lot better than potentially kicking off a workqueue run for each device seen. This also fixes the issue of the local device going into recursion, since the above mentioned commit never moved that queue run out of line. Signed-off-by: Jens Axboe diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e9901b8..01e4e51 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -74,8 +74,6 @@ struct kmem_cache *scsi_sdb_cache; */ #define SCSI_QUEUE_DELAY 3 -static void scsi_run_queue(struct request_queue *q); - /* * Function: scsi_unprep_request() * @@ -161,7 +159,7 @@ static int __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy) blk_requeue_request(q, cmd->request); spin_unlock_irqrestore(q->queue_lock, flags); - scsi_run_queue(q); + kblockd_schedule_work(q, &device->requeue_work); return 0; } @@ -433,7 +431,11 @@ static void scsi_run_queue(struct request_queue *q) continue; } - blk_run_queue_async(sdev->request_queue); + spin_unlock(shost->host_lock); + spin_lock(sdev->request_queue->queue_lock); + __blk_run_queue(sdev->request_queue); + spin_unlock(sdev->request_queue->queue_lock); + spin_lock(shost->host_lock); } /* put any unprocessed entries back */ list_splice(&starved_list, &shost->starved_list); @@ -442,6 +444,16 @@ static void scsi_run_queue(struct request_queue *q) blk_run_queue(q); } +void scsi_requeue_run_queue(struct work_struct *work) +{ + struct scsi_device *sdev; + struct request_queue *q; + + sdev = container_of(work, struct scsi_device, requeue_work); + q = sdev->request_queue; + scsi_run_queue(q); +} + /* * Function: scsi_requeue_command() * diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 087821f..58584dc 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -242,6 +242,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, int display_failure_msg = 1, ret; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); extern void scsi_evt_thread(struct work_struct *work); + extern void scsi_requeue_run_queue(struct work_struct *work); sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, GFP_ATOMIC); @@ -264,6 +265,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, INIT_LIST_HEAD(&sdev->event_list); spin_lock_init(&sdev->list_lock); INIT_WORK(&sdev->event_work, scsi_evt_thread); + INIT_WORK(&sdev->requeue_work, scsi_requeue_run_queue); sdev->sdev_gendev.parent = get_device(&starget->dev); sdev->sdev_target = starget; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 2d3ec50..dd82e02 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -169,6 +169,7 @@ struct scsi_device { sdev_dev; struct execute_work ew; /* used to get process context on put */ + struct work_struct requeue_work; struct scsi_dh_data *scsi_dh_data; enum scsi_device_state sdev_state; -- cgit v0.10.2 From 328935e6348c6a7cb34798a68c326f4b8372e68a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 May 2011 14:55:18 +0200 Subject: Revert "x86, AMD: Fix APIC timer erratum 400 affecting K8 Rev.A-E processors" This reverts commit e20a2d205c05cef6b5783df339a7d54adeb50962, as it crashes certain boxes with specific AMD CPU models. Moving the lower endpoint of the Erratum 400 check to accomodate earlier K8 revisions (A-E) opens a can of worms which is simply not worth to fix properly by tweaking the errata checking framework: * missing IntPenging MSR on revisions < CG cause #GP: http://marc.info/?l=linux-kernel&m=130541471818831 * makes earlier revisions use the LAPIC timer instead of the C1E idle routine which switches to HPET, thus not waking up in deeper C-states: http://lkml.org/lkml/2011/4/24/20 Therefore, leave the original boundary starting with K8-revF. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bb9eb29..3532d3b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -698,7 +698,7 @@ cpu_dev_register(amd_cpu_dev); */ const int amd_erratum_400[] = - AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), + AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); EXPORT_SYMBOL_GPL(amd_erratum_400); -- cgit v0.10.2 From 14fb57dccb6e1defe9f89a66f548fcb24c374c1d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 17 May 2011 14:55:19 +0200 Subject: x86, AMD: Fix ARAT feature setting again Trying to enable the local APIC timer on early K8 revisions uncovers a number of other issues with it, in conjunction with the C1E enter path on AMD. Fixing those causes much more churn and troubles than the benefit of using that timer brings so don't enable it on K8 at all, falling back to the original functionality the kernel had wrt to that. Reported-and-bisected-by: Nick Bowler Cc: Boris Ostrovsky Cc: Andreas Herrmann Cc: Greg Kroah-Hartman Cc: Hans Rosenfeld Cc: Nick Bowler Cc: Joerg-Volker-Peetz Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1305636919-31165-3-git-send-email-bp@amd64.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3532d3b..6f9d1f6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -613,7 +613,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) #endif /* As a rule processors have APIC timer running in deep C states */ - if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) + if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) set_cpu_cap(c, X86_FEATURE_ARAT); /* -- cgit v0.10.2 From 221d1d797202984cb874e3ed9f1388593d34ee22 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 17 May 2011 06:40:30 -0400 Subject: cifs: add fallback in is_path_accessible for old servers The is_path_accessible check uses a QPathInfo call, which isn't supported by ancient win9x era servers. Fall back to an older SMBQueryInfo call if it fails with the magic error codes. Cc: stable@kernel.org Reported-and-Tested-by: Sandro Bonazzola Signed-off-by: Jeff Layton Signed-off-by: Steve French diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 05f1dcf..277262a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2673,6 +2673,11 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon, 0 /* not legacy */, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + + if (rc == -EOPNOTSUPP || rc == -EINVAL) + rc = SMBQueryInformation(xid, tcon, full_path, pfile_info, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); kfree(pfile_info); return rc; } -- cgit v0.10.2 From 11379b5e33950048ad66825da7f462b0d0da9d73 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 17 May 2011 15:28:21 -0400 Subject: cifs: fix cifsConvertToUCS() for the mapchars case As Metze pointed out, commit 84cdf74e broke mapchars option: Commit "cifs: fix unaligned accesses in cifsConvertToUCS" (84cdf74e8096a10dd6acbb870dd404b92f07a756) does multiple steps in just one commit (moving the function and changing it without testing). put_unaligned_le16(temp, &target[j]); is never called for any codepoint the goes via the 'default' switch statement. As a result we put just zero (or maybe uninitialized) bytes into the target buffer. His proposed patch looks correct, but doesn't apply to the current head of the tree. This patch should also fix it. Cc: # .38.x: 581ade4: cifs: clean up various nits in unicode routines (try #2) Reported-by: Stefan Metzmacher Signed-off-by: Jeff Layton Signed-off-by: Steve French diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 23d43cd..1b2e180 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -277,6 +277,7 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen, for (i = 0, j = 0; i < srclen; j++) { src_char = source[i]; + charlen = 1; switch (src_char) { case 0: put_unaligned(0, &target[j]); @@ -316,16 +317,13 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen, dst_char = cpu_to_le16(0x003f); charlen = 1; } - /* - * character may take more than one byte in the source - * string, but will take exactly two bytes in the - * target string - */ - i += charlen; - continue; } + /* + * character may take more than one byte in the source string, + * but will take exactly two bytes in the target string + */ + i += charlen; put_unaligned(dst_char, &target[j]); - i++; /* move to next char in source string */ } ctoUCS_out: -- cgit v0.10.2 From 0bf2461fdd9008290cf429e50e4f362dafab4249 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Tue, 17 May 2011 15:44:08 -0700 Subject: rapidio: fix default routing initialization Fix switch initialization to ensure that all switches have default routing disabled. This guarantees that no unexpected RapidIO packets arrive to the default port set by reset and there is no default routing destination until it is properly configured by software. This update also unifies handling of unmapped destinations by tsi57x, IDT Gen1 and IDT Gen2 switches. Signed-off-by: Alexandre Bounine Cc: Kumar Gala Cc: Matt Porter Cc: Li Yang Cc: Thomas Moll Cc: [2.6.37+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c index ac2701b..043ee31 100644 --- a/drivers/rapidio/switches/idt_gen2.c +++ b/drivers/rapidio/switches/idt_gen2.c @@ -95,6 +95,9 @@ idtg2_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount, else table++; + if (route_port == RIO_INVALID_ROUTE) + route_port = IDT_DEFAULT_ROUTE; + rio_mport_write_config_32(mport, destid, hopcount, LOCAL_RTE_CONF_DESTID_SEL, table); @@ -411,6 +414,12 @@ static int idtg2_switch_init(struct rio_dev *rdev, int do_enum) rdev->rswitch->em_handle = idtg2_em_handler; rdev->rswitch->sw_sysfs = idtg2_sysfs; + if (do_enum) { + /* Ensure that default routing is disabled on startup */ + rio_write_config_32(rdev, + RIO_STD_RTE_DEFAULT_PORT, IDT_NO_ROUTE); + } + return 0; } diff --git a/drivers/rapidio/switches/idtcps.c b/drivers/rapidio/switches/idtcps.c index 3a97107..d06ee2d 100644 --- a/drivers/rapidio/switches/idtcps.c +++ b/drivers/rapidio/switches/idtcps.c @@ -26,6 +26,9 @@ idtcps_route_add_entry(struct rio_mport *mport, u16 destid, u8 hopcount, { u32 result; + if (route_port == RIO_INVALID_ROUTE) + route_port = CPS_DEFAULT_ROUTE; + if (table == RIO_GLOBAL_TABLE) { rio_mport_write_config_32(mport, destid, hopcount, RIO_STD_RTE_CONF_DESTID_SEL_CSR, route_destid); @@ -130,6 +133,9 @@ static int idtcps_switch_init(struct rio_dev *rdev, int do_enum) /* set TVAL = ~50us */ rio_write_config_32(rdev, rdev->phys_efptr + RIO_PORT_LINKTO_CTL_CSR, 0x8e << 8); + /* Ensure that default routing is disabled on startup */ + rio_write_config_32(rdev, + RIO_STD_RTE_DEFAULT_PORT, CPS_NO_ROUTE); } return 0; diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c index 1a62934..db8b802 100644 --- a/drivers/rapidio/switches/tsi57x.c +++ b/drivers/rapidio/switches/tsi57x.c @@ -303,6 +303,12 @@ static int tsi57x_switch_init(struct rio_dev *rdev, int do_enum) rdev->rswitch->em_init = tsi57x_em_init; rdev->rswitch->em_handle = tsi57x_em_handler; + if (do_enum) { + /* Ensure that default routing is disabled on startup */ + rio_write_config_32(rdev, RIO_STD_RTE_DEFAULT_PORT, + RIO_INVALID_ROUTE); + } + return 0; } -- cgit v0.10.2 From d5f33d45e4c0e306e8d16b4573891a65d9ad544f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 17 May 2011 15:44:09 -0700 Subject: drivers/leds/leds-lm3530.c: add MODULE_DEVICE_TABLE Adding the necessary MODULE_DEVICE_TABLE() information allows the driver to be automatically loaded by udev. Signed-off-by: Axel Lin Cc: Shreshtha Kumar SAHU Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c index e7089a1..b37e618 100644 --- a/drivers/leds/leds-lm3530.c +++ b/drivers/leds/leds-lm3530.c @@ -349,6 +349,7 @@ static const struct i2c_device_id lm3530_id[] = { {LM3530_NAME, 0}, {} }; +MODULE_DEVICE_TABLE(i2c, lm3530_id); static struct i2c_driver lm3530_i2c_driver = { .probe = lm3530_probe, -- cgit v0.10.2 From d6c438b6cd733834a3cec55af8577a8fc3548016 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 17 May 2011 15:44:10 -0700 Subject: memcg: fix zone congestion ZONE_CONGESTED should be a state of global memory reclaim. If not, a busy memcg sets this and give unnecessary throttoling in wait_iff_congested() against memory recalim in other contexts. This makes system performance bad. I'll think about "memcg is congested!" flag is required or not, later. But this fix is required first. Signed-off-by: KAMEZAWA Hiroyuki Reviewed-by: Minchan Kim Cc: Daisuke Nishimura Acked-by: Ying Han Cc: Balbir Singh Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index f6b435c..8bfd450 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -937,7 +937,7 @@ keep_lumpy: * back off and wait for congestion to clear because further reclaim * will encounter the same problem */ - if (nr_dirty == nr_congested && nr_dirty != 0) + if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc)) zone_set_flag(zone, ZONE_CONGESTED); free_page_list(&free_pages); -- cgit v0.10.2 From b2db21997f43907f54500edaf063253ca2a186f9 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 17 May 2011 15:44:11 -0700 Subject: um: fix abort os_dump_core() uses abort() to terminate UML in case of an fatal error. glibc's abort() calls raise(SIGABRT) which makes use of tgkill(). tgkill() has no effect within UML's kernel threads because they are not pthreads. As fallback abort() executes an invalid instruction to terminate the process. Therefore UML gets killed by SIGSEGV and leaves a ugly log entry in the host's kernel ring buffer. To get rid of this we use our own abort routine. Signed-off-by: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 6ea7797..42827ca 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -75,6 +76,26 @@ void setup_hostinfo(char *buf, int len) host.release, host.version, host.machine); } +/* + * We cannot use glibc's abort(). It makes use of tgkill() which + * has no effect within UML's kernel threads. + * After that glibc would execute an invalid instruction to kill + * the calling process and UML crashes with SIGSEGV. + */ +static inline void __attribute__ ((noreturn)) uml_abort(void) +{ + sigset_t sig; + + fflush(NULL); + + if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT)) + sigprocmask(SIG_UNBLOCK, &sig, 0); + + for (;;) + if (kill(getpid(), SIGABRT) < 0) + exit(127); +} + void os_dump_core(void) { int pid; @@ -116,5 +137,5 @@ void os_dump_core(void) while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0) os_kill_ptraced_process(pid, 0); - abort(); + uml_abort(); } -- cgit v0.10.2 From f12a20fc9bfba4218ecbc4e40c8e08dc2a85dc99 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 17 May 2011 15:44:12 -0700 Subject: procfs: add stub for proc_mkdir_mode() Provide a stub for proc_mkdir_mode() when CONFIG_PROC_FS is not enabled, just like the stub for proc_mkdir(). Fixes this linux-next build error: drivers/net/wireless/airo.c:4504: error: implicit declaration of function 'proc_mkdir_mode' Signed-off-by: Randy Dunlap Cc: Stephen Rothwell Cc: Alexey Dobriyan Cc: "John W. Linville" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 838c114..eaf4350 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -208,6 +208,8 @@ static inline struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent,const char *dest) {return NULL;} static inline struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) {return NULL;} +static inline struct proc_dir_entry *proc_mkdir_mode(const char *name, + mode_t mode, struct proc_dir_entry *parent) { return NULL; } static inline struct proc_dir_entry *create_proc_read_entry(const char *name, mode_t mode, struct proc_dir_entry *base, -- cgit v0.10.2 From 3ec717b7ca4ee1d75d77e4f6286430d8f01d1dbd Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 18 May 2011 11:22:43 +0200 Subject: block: don't delay blk_run_queue_async Let's check a scenario: 1. blk_delay_queue(q, SCSI_QUEUE_DELAY); 2. blk_run_queue_async(); the second one will became a noop, because q->delay_work already has WORK_STRUCT_PENDING_BIT set, so the delayed work will still run after SCSI_QUEUE_DELAY. But blk_run_queue_async actually hopes the delayed work runs immediately. Fix this by doing a cancel on potentially pending delayed work before queuing an immediate run of the workqueue. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index a2e58ee..3fe00a1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -316,8 +316,10 @@ EXPORT_SYMBOL(__blk_run_queue); */ void blk_run_queue_async(struct request_queue *q) { - if (likely(!blk_queue_stopped(q))) + if (likely(!blk_queue_stopped(q))) { + __cancel_delayed_work(&q->delay_work); queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); + } } EXPORT_SYMBOL(blk_run_queue_async); -- cgit v0.10.2 From df7f99670a4c76f269ae57ce91876b309417a316 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 22 Feb 2011 01:09:49 -0800 Subject: configfs: Don't try to d_delete() negative dentries. When configfs is faking mkdir() on its subsystem or default group objects, it starts by adding a negative dentry. It then tries to instantiate the group. If that should fail, it must clean up after itself. I was using d_delete() here, but configfs_attach_group() promises to return an empty dentry on error. d_delete() explodes with the entry dentry. Let's try d_drop() instead. The unhashing is what we want for our dentry. Signed-off-by: Joel Becker diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 3313dd1..b11d734 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -689,7 +689,8 @@ static int create_default_group(struct config_group *parent_group, sd = child->d_fsdata; sd->s_type |= CONFIGFS_USET_DEFAULT; } else { - d_delete(child); + BUG_ON(child->d_inode); + d_drop(child); dput(child); } } @@ -1683,7 +1684,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) err = configfs_attach_group(sd->s_element, &group->cg_item, dentry); if (err) { - d_delete(dentry); + BUG_ON(dentry->d_inode); + d_drop(dentry); dput(dentry); } else { spin_lock(&configfs_dirent_lock); -- cgit v0.10.2 From 24307aa1e707b31613be92deaba7990e16bc1aec Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 18 May 2011 04:08:16 -0700 Subject: configfs: Fix race between configfs_readdir() and configfs_d_iput() configfs_readdir() will use the existing inode numbers of inodes in the dcache, but it makes them up for attribute files that aren't currently instantiated. There is a race where a closing attribute file can be tearing down at the same time as configfs_readdir() is trying to get its inode number. We want to get the inode number of open attribute files, because they should match while instantiated. We can't lock down the transition where dentry->d_inode is set to NULL, so we just check for NULL there. We can, however, ensure that an inode we find isn't iput() in configfs_d_iput() until after we've accessed it. Signed-off-by: Joel Becker diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index b11d734..9a37a9b 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -53,11 +53,14 @@ DEFINE_SPINLOCK(configfs_dirent_lock); static void configfs_d_iput(struct dentry * dentry, struct inode * inode) { - struct configfs_dirent * sd = dentry->d_fsdata; + struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { BUG_ON(sd->s_dentry != dentry); + /* Coordinate with configfs_readdir */ + spin_lock(&configfs_dirent_lock); sd->s_dentry = NULL; + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } iput(inode); @@ -1546,7 +1549,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir struct configfs_dirent * parent_sd = dentry->d_fsdata; struct configfs_dirent *cursor = filp->private_data; struct list_head *p, *q = &cursor->s_sibling; - ino_t ino; + ino_t ino = 0; int i = filp->f_pos; switch (i) { @@ -1574,6 +1577,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir struct configfs_dirent *next; const char * name; int len; + struct inode *inode = NULL; next = list_entry(p, struct configfs_dirent, s_sibling); @@ -1582,9 +1586,28 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir name = configfs_get_name(next); len = strlen(name); - if (next->s_dentry) - ino = next->s_dentry->d_inode->i_ino; - else + + /* + * We'll have a dentry and an inode for + * PINNED items and for open attribute + * files. We lock here to prevent a race + * with configfs_d_iput() clearing + * s_dentry before calling iput(). + * + * Why do we go to the trouble? If + * someone has an attribute file open, + * the inode number should match until + * they close it. Beyond that, we don't + * care. + */ + spin_lock(&configfs_dirent_lock); + dentry = next->s_dentry; + if (dentry) + inode = dentry->d_inode; + if (inode) + ino = inode->i_ino; + spin_unlock(&configfs_dirent_lock); + if (!inode) ino = iunique(configfs_sb, 2); if (filldir(dirent, name, len, filp->f_pos, ino, -- cgit v0.10.2 From 3436830af53c38b7674097c00b02b7a4064476f2 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 12 May 2011 13:55:48 +0100 Subject: MIPS: RB532: Fix iomap resource size miscalculation. This is the MIPS portion of Joe Perches 's https://patchwork.linux-mips.org/patch/2172/ which seems to have been lost in time and space. Signed-off-by: Ralf Baechle diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c index 37de05d..6c47dfe 100644 --- a/arch/mips/rb532/gpio.c +++ b/arch/mips/rb532/gpio.c @@ -185,7 +185,7 @@ int __init rb532_gpio_init(void) struct resource *r; r = rb532_gpio_reg0_res; - rb532_gpio_chip->regbase = ioremap_nocache(r->start, r->end - r->start); + rb532_gpio_chip->regbase = ioremap_nocache(r->start, resource_size(r)); if (!rb532_gpio_chip->regbase) { printk(KERN_ERR "rb532: cannot remap GPIO register 0\n"); -- cgit v0.10.2 From 10423c91ffc8e59d4f99d401f7beb3115cdc117a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 13 May 2011 10:33:28 +0100 Subject: MIPS: Fix duplicate invocation of notify_die. Initial patch by Yury Polyanskiy . Signed-off-by: Ralf Baechle Patchwork: https://patchwork.linux-mips.org/patch/2373/ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 71350f7..e9b3af2 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -374,7 +374,8 @@ void __noreturn die(const char *str, struct pt_regs *regs) unsigned long dvpret = dvpe(); #endif /* CONFIG_MIPS_MT_SMTC */ - notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV); + if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP) + sig = 0; console_verbose(); spin_lock_irq(&die_lock); @@ -383,9 +384,6 @@ void __noreturn die(const char *str, struct pt_regs *regs) mips_mt_regdump(dvpret); #endif /* CONFIG_MIPS_MT_SMTC */ - if (notify_die(DIE_OOPS, str, regs, 0, regs_to_trapnr(regs), SIGSEGV) == NOTIFY_STOP) - sig = 0; - printk("%s[#%d]:\n", str, ++die_counter); show_registers(regs); add_taint(TAINT_DIE); -- cgit v0.10.2 From 3e9957b4866f3767f19bf0e543b322ad7906c564 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 13 May 2011 17:41:21 +0200 Subject: MIPS: AR7: Fix GPIO register size for Titan variant. The 'size' variable contains the correct register size for both AR7 and Titan, but we never used it to ioremap the correct register size. This problem only shows up on Titan. [ralf@linux-mips.org: Fixed the fix. The original patch as in patchwork recognizes the problem correctly then fails to fix it ...] Reported-by: Alexander Clouter Signed-off-by: Florian Fainelli Patchwork: https://patchwork.linux-mips.org/patch/2380/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c index 425dfa5..bb571bc 100644 --- a/arch/mips/ar7/gpio.c +++ b/arch/mips/ar7/gpio.c @@ -325,9 +325,7 @@ int __init ar7_gpio_init(void) size = 0x1f; } - gpch->regs = ioremap_nocache(AR7_REGS_GPIO, - AR7_REGS_GPIO + 0x10); - + gpch->regs = ioremap_nocache(AR7_REGS_GPIO, size); if (!gpch->regs) { printk(KERN_ERR "%s: failed to ioremap regs\n", gpch->chip.label); -- cgit v0.10.2 From a5602a3273774c720aaf165ff670e5b85e5910a5 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 18 May 2011 13:14:36 +0100 Subject: MIPS: Kludge IP27 build for 2.6.39. Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index 655f849..7aa37dd 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -5,7 +5,9 @@ #include #include +#ifndef CONFIG_SGI_IP27 /* Kludge to fix 2.6.39 build for IP27 */ #include +#endif extern struct dma_map_ops *mips_dma_map_ops; -- cgit v0.10.2 From 01294d82622d6d9d64bde8e4530c7e2c6dbb6ee6 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Wed, 18 May 2011 10:27:39 -0500 Subject: of: fix race when matching drivers If two drivers are probing devices at the same time, both will write their match table result to the dev->of_match cache at the same time. Only write the result if the device matches. In a thread titled "SBus devices sometimes detected, sometimes not", Meelis reported his SBus hme was not detected about 50% of the time. From the debug suggested by Grant it was obvious another driver matched some devices between the call to match the hme and the hme discovery failling. Reported-by: Meelis Roos Signed-off-by: Milton Miller [grant.likely: modified to only call of_match_device() once] Signed-off-by: Grant Likely diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 8bfe6c1..b33d688 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -21,8 +21,12 @@ extern void of_device_make_bus_id(struct device *dev); static inline int of_driver_match_device(struct device *dev, const struct device_driver *drv) { - dev->of_match = of_match_device(drv->of_match_table, dev); - return dev->of_match != NULL; + const struct of_device_id *match; + + match = of_match_device(drv->of_match_table, dev); + if (match) + dev->of_match = match; + return match != NULL; } extern struct platform_device *of_dev_get(struct platform_device *dev); -- cgit v0.10.2 From b1608d69cb804e414d0887140ba08a9398e4e638 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 18 May 2011 11:19:24 -0600 Subject: drivercore: revert addition of of_match to struct device Commit b826291c, "drivercore/dt: add a match table pointer to struct device" added an of_match pointer to struct device to cache the of_match_table entry discovered at driver match time. This was unsafe because matching is not an atomic operation with probing a driver. If two or more drivers are attempted to be matched to a driver at the same time, then the cached matching entry pointer could get overwritten. This patch reverts the of_match cache pointer and reworks all users to call of_match_device() directly instead. Signed-off-by: Grant Likely diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 1882729..104faa8 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { .end = mpc83xx_suspend_end, }; +static struct of_device_id pmc_match[]; static int pmc_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct device_node *np = ofdev->dev.of_node; struct resource res; struct pmc_type *type; int ret = 0; - if (!ofdev->dev.of_match) + match = of_match_device(pmc_match, &ofdev->dev); + if (!match) return -EINVAL; - type = ofdev->dev.of_match->data; + type = match->data; if (!of_device_is_available(np)) return -ENODEV; diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index d5679dc..01cd2f0 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi, return 0; } +static const struct of_device_id fsl_of_msi_ids[]; static int __devinit fsl_of_msi_probe(struct platform_device *dev) { + const struct of_device_id *match; struct fsl_msi *msi; struct resource res; int err, i, j, irq_index, count; @@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev) u32 offset; static const u32 all_avail[] = { 0, NR_MSI_IRQS }; - if (!dev->dev.of_match) + match = of_match_device(fsl_of_msi_ids, &dev->dev); + if (!match) return -EINVAL; - features = dev->dev.of_match->data; + features = match->data; printk(KERN_DEBUG "Setting up Freescale MSI support\n"); diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c index 948068a..d1840db 100644 --- a/arch/sparc/kernel/pci_sabre.c +++ b/arch/sparc/kernel/pci_sabre.c @@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm, sabre_scan_bus(pbm, &op->dev); } +static const struct of_device_id sabre_match[]; static int __devinit sabre_probe(struct platform_device *op) { + const struct of_device_id *match; const struct linux_prom64_registers *pr_regs; struct device_node *dp = op->dev.of_node; struct pci_pbm_info *pbm; @@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op) const u32 *vdma; u64 clear_irq; - hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL); + match = of_match_device(sabre_match, &op->dev); + hummingbird_p = match && (match->data != NULL); if (!hummingbird_p) { struct device_node *cpu_dp; diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c index fecfcb2..283fbc3 100644 --- a/arch/sparc/kernel/pci_schizo.c +++ b/arch/sparc/kernel/pci_schizo.c @@ -1458,11 +1458,15 @@ out_err: return err; } +static const struct of_device_id schizo_match[]; static int __devinit schizo_probe(struct platform_device *op) { - if (!op->dev.of_match) + const struct of_device_id *match; + + match = of_match_device(schizo_match, &op->dev); + if (!match) return -EINVAL; - return __schizo_init(op, (unsigned long) op->dev.of_match->data); + return __schizo_init(op, (unsigned long)match->data); } /* The ordering of this table is very important. Some Tomatillo diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index bdd2719..bc9e702 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -2643,16 +2643,19 @@ fore200e_init(struct fore200e* fore200e, struct device *parent) } #ifdef CONFIG_SBUS +static const struct of_device_id fore200e_sba_match[]; static int __devinit fore200e_sba_probe(struct platform_device *op) { + const struct of_device_id *match; const struct fore200e_bus *bus; struct fore200e *fore200e; static int index = 0; int err; - if (!op->dev.of_match) + match = of_match_device(fore200e_sba_match, &op->dev); + if (!match) return -EINVAL; - bus = op->dev.of_match->data; + bus = match->data; fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL); if (!fore200e) diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index 43ac619..ac6739e 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -619,15 +619,18 @@ static void __devinit n2rng_driver_version(void) pr_info("%s", version); } +static const struct of_device_id n2rng_match[]; static int __devinit n2rng_probe(struct platform_device *op) { + const struct of_device_id *match; int victoria_falls; int err = -ENOMEM; struct n2rng *np; - if (!op->dev.of_match) + match = of_match_device(n2rng_match, &op->dev); + if (!match) return -EINVAL; - victoria_falls = (op->dev.of_match->data != NULL); + victoria_falls = (match->data != NULL); n2rng_driver_version(); np = kzalloc(sizeof(*np), GFP_KERNEL); diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index cc6c9b2..64c6b85 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2554,9 +2554,11 @@ static struct pci_driver ipmi_pci_driver = { }; #endif /* CONFIG_PCI */ +static struct of_device_id ipmi_match[]; static int __devinit ipmi_probe(struct platform_device *dev) { #ifdef CONFIG_OF + const struct of_device_id *match; struct smi_info *info; struct resource resource; const __be32 *regsize, *regspacing, *regshift; @@ -2566,7 +2568,8 @@ static int __devinit ipmi_probe(struct platform_device *dev) dev_info(&dev->dev, "probing via device tree\n"); - if (!dev->dev.of_match) + match = of_match_device(ipmi_match, &dev->dev); + if (!match) return -EINVAL; ret = of_address_to_resource(np, 0, &resource); @@ -2601,7 +2604,7 @@ static int __devinit ipmi_probe(struct platform_device *dev) return -ENOMEM; } - info->si_type = (enum si_type) dev->dev.of_match->data; + info->si_type = (enum si_type) match->data; info->addr_source = SI_DEVICETREE; info->irq_setup = std_irq_setup; diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c index d6412c1..39ccdea 100644 --- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c +++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c @@ -715,13 +715,13 @@ static int __devexit hwicap_remove(struct device *dev) } #ifdef CONFIG_OF -static int __devinit hwicap_of_probe(struct platform_device *op) +static int __devinit hwicap_of_probe(struct platform_device *op, + const struct hwicap_driver_config *config) { struct resource res; const unsigned int *id; const char *family; int rc; - const struct hwicap_driver_config *config = op->dev.of_match->data; const struct config_registers *regs; @@ -751,20 +751,24 @@ static int __devinit hwicap_of_probe(struct platform_device *op) regs); } #else -static inline int hwicap_of_probe(struct platform_device *op) +static inline int hwicap_of_probe(struct platform_device *op, + const struct hwicap_driver_config *config) { return -EINVAL; } #endif /* CONFIG_OF */ +static const struct of_device_id __devinitconst hwicap_of_match[]; static int __devinit hwicap_drv_probe(struct platform_device *pdev) { + const struct of_device_id *match; struct resource *res; const struct config_registers *regs; const char *family; - if (pdev->dev.of_match) - return hwicap_of_probe(pdev); + match = of_match_device(hwicap_of_match, &pdev->dev); + if (match) + return hwicap_of_probe(pdev, match->data); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index c1f0045..af8e7b1 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -1019,7 +1019,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci, struct ppc4xx_edac_pdata *pdata = NULL; const struct device_node *np = op->dev.of_node; - if (op->dev.of_match == NULL) + if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL) return -EINVAL; /* Initial driver pointers and private data */ diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 75b984c..107397a 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -560,15 +560,18 @@ static struct i2c_adapter mpc_ops = { .timeout = HZ, }; +static const struct of_device_id mpc_i2c_of_match[]; static int __devinit fsl_i2c_probe(struct platform_device *op) { + const struct of_device_id *match; struct mpc_i2c *i2c; const u32 *prop; u32 clock = MPC_I2C_CLOCK_LEGACY; int result = 0; int plen; - if (!op->dev.of_match) + match = of_match_device(mpc_i2c_of_match, &op->dev); + if (!match) return -EINVAL; i2c = kzalloc(sizeof(*i2c), GFP_KERNEL); @@ -605,8 +608,8 @@ static int __devinit fsl_i2c_probe(struct platform_device *op) clock = *prop; } - if (op->dev.of_match->data) { - struct mpc_i2c_data *data = op->dev.of_match->data; + if (match->data) { + struct mpc_i2c_data *data = match->data; data->setup(op->dev.of_node, i2c, clock, data->prescaler); } else { /* Backwards compatibility */ diff --git a/drivers/mmc/host/sdhci-of-core.c b/drivers/mmc/host/sdhci-of-core.c index f9b611f..60e4186 100644 --- a/drivers/mmc/host/sdhci-of-core.c +++ b/drivers/mmc/host/sdhci-of-core.c @@ -124,8 +124,10 @@ static bool __devinit sdhci_of_wp_inverted(struct device_node *np) #endif } +static const struct of_device_id sdhci_of_match[]; static int __devinit sdhci_of_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct device_node *np = ofdev->dev.of_node; struct sdhci_of_data *sdhci_of_data; struct sdhci_host *host; @@ -134,9 +136,10 @@ static int __devinit sdhci_of_probe(struct platform_device *ofdev) int size; int ret; - if (!ofdev->dev.of_match) + match = of_match_device(sdhci_of_match, &ofdev->dev); + if (!match) return -EINVAL; - sdhci_of_data = ofdev->dev.of_match->data; + sdhci_of_data = match->data; if (!of_device_is_available(np)) return -ENODEV; diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c index bd483f0..c1d3346 100644 --- a/drivers/mtd/maps/physmap_of.c +++ b/drivers/mtd/maps/physmap_of.c @@ -214,11 +214,13 @@ static void __devinit of_free_probes(const char **probes) } #endif +static struct of_device_id of_flash_match[]; static int __devinit of_flash_probe(struct platform_device *dev) { #ifdef CONFIG_MTD_PARTITIONS const char **part_probe_types; #endif + const struct of_device_id *match; struct device_node *dp = dev->dev.of_node; struct resource res; struct of_flash *info; @@ -232,9 +234,10 @@ static int __devinit of_flash_probe(struct platform_device *dev) struct mtd_info **mtd_list = NULL; resource_size_t res_size; - if (!dev->dev.of_match) + match = of_match_device(of_flash_match, &dev->dev); + if (!match) return -EINVAL; - probe_type = dev->dev.of_match->data; + probe_type = match->data; reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32); diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c index bd1d811..5fedc33 100644 --- a/drivers/net/can/mscan/mpc5xxx_can.c +++ b/drivers/net/can/mscan/mpc5xxx_can.c @@ -247,8 +247,10 @@ static u32 __devinit mpc512x_can_get_clock(struct platform_device *ofdev, } #endif /* CONFIG_PPC_MPC512x */ +static struct of_device_id mpc5xxx_can_table[]; static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct mpc5xxx_can_data *data; struct device_node *np = ofdev->dev.of_node; struct net_device *dev; @@ -258,9 +260,10 @@ static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev) int irq, mscan_clksrc = 0; int err = -ENOMEM; - if (!ofdev->dev.of_match) + match = of_match_device(mpc5xxx_can_table, &ofdev->dev); + if (!match) return -EINVAL; - data = (struct mpc5xxx_can_data *)ofdev->dev.of_match->data; + data = match->data; base = of_iomap(np, 0); if (!base) { diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index 24cb953..5131e61 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -998,8 +998,10 @@ static const struct net_device_ops fs_enet_netdev_ops = { #endif }; +static struct of_device_id fs_enet_match[]; static int __devinit fs_enet_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct net_device *ndev; struct fs_enet_private *fep; struct fs_platform_info *fpi; @@ -1007,14 +1009,15 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev) const u8 *mac_addr; int privsize, len, ret = -ENODEV; - if (!ofdev->dev.of_match) + match = of_match_device(fs_enet_match, &ofdev->dev); + if (!match) return -EINVAL; fpi = kzalloc(sizeof(*fpi), GFP_KERNEL); if (!fpi) return -ENOMEM; - if (!IS_FEC(ofdev->dev.of_match)) { + if (!IS_FEC(match)) { data = of_get_property(ofdev->dev.of_node, "fsl,cpm-command", &len); if (!data || len != 4) goto out_free_fpi; @@ -1049,7 +1052,7 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev) fep->dev = &ofdev->dev; fep->ndev = ndev; fep->fpi = fpi; - fep->ops = ofdev->dev.of_match->data; + fep->ops = match->data; ret = fep->ops->setup_data(ndev); if (ret) diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c index 7e840d3..6a2e150 100644 --- a/drivers/net/fs_enet/mii-fec.c +++ b/drivers/net/fs_enet/mii-fec.c @@ -101,17 +101,20 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus) return 0; } +static struct of_device_id fs_enet_mdio_fec_match[]; static int __devinit fs_enet_mdio_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct resource res; struct mii_bus *new_bus; struct fec_info *fec; int (*get_bus_freq)(struct device_node *); int ret = -ENOMEM, clock, speed; - if (!ofdev->dev.of_match) + match = of_match_device(fs_enet_mdio_fec_match, &ofdev->dev); + if (!match) return -EINVAL; - get_bus_freq = ofdev->dev.of_match->data; + get_bus_freq = match->data; new_bus = mdiobus_alloc(); if (!new_bus) diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index eb4f59f..bff2f79 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -3237,15 +3237,18 @@ static void happy_meal_pci_exit(void) #endif #ifdef CONFIG_SBUS +static const struct of_device_id hme_sbus_match[]; static int __devinit hme_sbus_probe(struct platform_device *op) { + const struct of_device_id *match; struct device_node *dp = op->dev.of_node; const char *model = of_get_property(dp, "model", NULL); int is_qfe; - if (!op->dev.of_match) + match = of_match_device(hme_sbus_match, &op->dev); + if (!match) return -EINVAL; - is_qfe = (op->dev.of_match->data != NULL); + is_qfe = (match->data != NULL); if (!is_qfe && model && !strcmp(model, "SUNW,sbus-qfe")) is_qfe = 1; diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c index e2d45c9..9689d41c 100644 --- a/drivers/scsi/qlogicpti.c +++ b/drivers/scsi/qlogicpti.c @@ -1292,8 +1292,10 @@ static struct scsi_host_template qpti_template = { .use_clustering = ENABLE_CLUSTERING, }; +static const struct of_device_id qpti_match[]; static int __devinit qpti_sbus_probe(struct platform_device *op) { + const struct of_device_id *match; struct scsi_host_template *tpnt; struct device_node *dp = op->dev.of_node; struct Scsi_Host *host; @@ -1301,9 +1303,10 @@ static int __devinit qpti_sbus_probe(struct platform_device *op) static int nqptis; const char *fcode; - if (!op->dev.of_match) + match = of_match_device(qpti_match, &op->dev); + if (!match) return -EINVAL; - tpnt = op->dev.of_match->data; + tpnt = match->data; /* Sometimes Antares cards come up not completely * setup, and we get a report of a zero IRQ. diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c index 0e8eec5..c911b24 100644 --- a/drivers/tty/serial/of_serial.c +++ b/drivers/tty/serial/of_serial.c @@ -80,14 +80,17 @@ static int __devinit of_platform_serial_setup(struct platform_device *ofdev, /* * Try to register a serial port */ +static struct of_device_id of_platform_serial_table[]; static int __devinit of_platform_serial_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct of_serial_info *info; struct uart_port port; int port_type; int ret; - if (!ofdev->dev.of_match) + match = of_match_device(of_platform_serial_table, &ofdev->dev); + if (!match) return -EINVAL; if (of_find_property(ofdev->dev.of_node, "used-by-rtas", NULL)) @@ -97,7 +100,7 @@ static int __devinit of_platform_serial_probe(struct platform_device *ofdev) if (info == NULL) return -ENOMEM; - port_type = (unsigned long)ofdev->dev.of_match->data; + port_type = (unsigned long)match->data; ret = of_platform_serial_setup(ofdev, port_type, &port); if (ret) goto out; diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index 36613b3..3a68e09 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -2539,15 +2539,18 @@ static void qe_udc_release(struct device *dev) } /* Driver probe functions */ +static const struct of_device_id qe_udc_match[]; static int __devinit qe_udc_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct device_node *np = ofdev->dev.of_node; struct qe_ep *ep; unsigned int ret = 0; unsigned int i; const void *prop; - if (!ofdev->dev.of_match) + match = of_match_device(qe_udc_match, &ofdev->dev); + if (!match) return -EINVAL; prop = of_get_property(np, "mode", NULL); @@ -2561,7 +2564,7 @@ static int __devinit qe_udc_probe(struct platform_device *ofdev) return -ENOMEM; } - udc_controller->soc_type = (unsigned long)ofdev->dev.of_match->data; + udc_controller->soc_type = (unsigned long)match->data; udc_controller->usb_regs = of_iomap(np, 0); if (!udc_controller->usb_regs) { ret = -ENOMEM; diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c index 528bceb..eed5436f 100644 --- a/drivers/watchdog/mpc8xxx_wdt.c +++ b/drivers/watchdog/mpc8xxx_wdt.c @@ -185,17 +185,20 @@ static struct miscdevice mpc8xxx_wdt_miscdev = { .fops = &mpc8xxx_wdt_fops, }; +static const struct of_device_id mpc8xxx_wdt_match[]; static int __devinit mpc8xxx_wdt_probe(struct platform_device *ofdev) { int ret; + const struct of_device_id *match; struct device_node *np = ofdev->dev.of_node; struct mpc8xxx_wdt_type *wdt_type; u32 freq = fsl_get_sys_freq(); bool enabled; - if (!ofdev->dev.of_match) + match = of_match_device(mpc8xxx_wdt_match, &ofdev->dev); + if (!match) return -EINVAL; - wdt_type = ofdev->dev.of_match->data; + wdt_type = match->data; if (!freq || freq == -1) return -EINVAL; diff --git a/include/linux/device.h b/include/linux/device.h index ab8dfc0..d08399d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -442,7 +442,6 @@ struct device { struct dev_archdata archdata; struct device_node *of_node; /* associated device tree node */ - const struct of_device_id *of_match; /* matching of_device_id from driver */ dev_t devt; /* dev_t, creates the sysfs "dev" */ diff --git a/include/linux/of_device.h b/include/linux/of_device.h index b33d688..ae56384 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -21,12 +21,7 @@ extern void of_device_make_bus_id(struct device *dev); static inline int of_driver_match_device(struct device *dev, const struct device_driver *drv) { - const struct of_device_id *match; - - match = of_match_device(drv->of_match_table, dev); - if (match) - dev->of_match = match; - return match != NULL; + return of_match_device(drv->of_match_table, dev) != NULL; } extern struct platform_device *of_dev_get(struct platform_device *dev); @@ -62,6 +57,11 @@ static inline int of_device_uevent(struct device *dev, static inline void of_device_node_put(struct device *dev) { } +static inline const struct of_device_id *of_match_device( + const struct of_device_id *matches, const struct device *dev) +{ + return NULL; +} #endif /* CONFIG_OF_DEVICE */ #endif /* _LINUX_OF_DEVICE_H */ -- cgit v0.10.2 From 61c4f2c81c61f73549928dfd9f3e8f26aa36a8cf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 18 May 2011 21:06:34 -0700 Subject: Linux 2.6.39 diff --git a/Makefile b/Makefile index 41ea6fb..123d858 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 39 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Flesh-Eating Bats with Fangs # *DOCUMENTATION* -- cgit v0.10.2 From 90d231f7673e20acc4f8b5c3effb5c12098179a7 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Fri, 29 Apr 2011 11:26:22 +0200 Subject: OMAP3 cpuidle: remove useless SDP specific timings The cpuidle states settings can be overriden by some board- specific settings, by calling omap3_pm_init_cpuidle. Remove the 3430SDP specific states settings registration since the figures are identical to the default ones (in cpuidle34xx.c). Signed-off-by: Jean Pihet Signed-off-by: Kevin Hilman diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c index 9afd087..7ffad7b 100644 --- a/arch/arm/mach-omap2/board-3430sdp.c +++ b/arch/arm/mach-omap2/board-3430sdp.c @@ -59,24 +59,6 @@ #define TWL4030_MSECURE_GPIO 22 -/* FIXME: These values need to be updated based on more profiling on 3430sdp*/ -static struct cpuidle_params omap3_cpuidle_params_table[] = { - /* C1 */ - {1, 2, 2, 5}, - /* C2 */ - {1, 10, 10, 30}, - /* C3 */ - {1, 50, 50, 300}, - /* C4 */ - {1, 1500, 1800, 4000}, - /* C5 */ - {1, 2500, 7500, 12000}, - /* C6 */ - {1, 3000, 8500, 15000}, - /* C7 */ - {1, 10000, 30000, 300000}, -}; - static uint32_t board_keymap[] = { KEY(0, 0, KEY_LEFT), KEY(0, 1, KEY_RIGHT), @@ -883,7 +865,6 @@ static void __init omap_3430sdp_init(void) omap3_mux_init(board_mux, OMAP_PACKAGE_CBB); omap_board_config = sdp3430_config; omap_board_config_size = ARRAY_SIZE(sdp3430_config); - omap3_pm_init_cpuidle(omap3_cpuidle_params_table); omap3430_i2c_init(); omap_display_init(&sdp3430_dss_data); if (omap_rev() > OMAP3430_REV_ES1_0) -- cgit v0.10.2 From 866ba0ef967c693dae952afafcb1582a390a82a0 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Mon, 9 May 2011 12:02:13 +0200 Subject: OMAP3: clean-up mach specific cpuidle data structures - sleep_latency and wake_latency are not used, replace them by exit_latency which is used by cpuidle. exit_latency simply is the sum of sleep_latency and wake_latency, - replace threshold by target_residency, - changed the OMAP3 specific cpuidle code accordingly, - changed the OMAP3 board code accordingly. Signed-off-by: Jean Pihet Signed-off-by: Kevin Hilman diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c index f8ba20a..fec4cac 100644 --- a/arch/arm/mach-omap2/board-rx51.c +++ b/arch/arm/mach-omap2/board-rx51.c @@ -58,21 +58,25 @@ static struct platform_device leds_gpio = { }, }; +/* + * cpuidle C-states definition override from the default values. + * The 'exit_latency' field is the sum of sleep and wake-up latencies. + */ static struct cpuidle_params rx51_cpuidle_params[] = { /* C1 */ - {1, 110, 162, 5}, + {110 + 162, 5 , 1}, /* C2 */ - {1, 106, 180, 309}, + {106 + 180, 309, 1}, /* C3 */ - {0, 107, 410, 46057}, + {107 + 410, 46057, 0}, /* C4 */ - {0, 121, 3374, 46057}, + {121 + 3374, 46057, 0}, /* C5 */ - {1, 855, 1146, 46057}, + {855 + 1146, 46057, 1}, /* C6 */ - {0, 7580, 4134, 484329}, + {7580 + 4134, 484329, 0}, /* C7 */ - {1, 7505, 15274, 484329}, + {7505 + 15274, 484329, 1}, }; static struct omap_lcd_config rx51_lcd_config = { diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index 1c240ef..d7bc31a 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -52,11 +52,10 @@ struct omap3_processor_cx { u8 valid; u8 type; - u32 sleep_latency; - u32 wakeup_latency; + u32 exit_latency; u32 mpu_state; u32 core_state; - u32 threshold; + u32 target_residency; u32 flags; const char *desc; }; @@ -75,19 +74,19 @@ struct powerdomain *cam_pd; */ static struct cpuidle_params cpuidle_params_table[] = { /* C1 */ - {1, 2, 2, 5}, + {2 + 2, 5, 1}, /* C2 */ - {1, 10, 10, 30}, + {10 + 10, 30, 1}, /* C3 */ - {1, 50, 50, 300}, + {50 + 50, 300, 1}, /* C4 */ - {1, 1500, 1800, 4000}, + {1500 + 1800, 4000, 1}, /* C5 */ - {1, 2500, 7500, 12000}, + {2500 + 7500, 12000, 1}, /* C6 */ - {1, 3000, 8500, 15000}, + {3000 + 8500, 15000, 1}, /* C7 */ - {1, 10000, 30000, 300000}, + {10000 + 30000, 300000, 1}, }; static int omap3_idle_bm_check(void) @@ -330,12 +329,10 @@ void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params) for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { cpuidle_params_table[i].valid = cpuidle_board_params[i].valid; - cpuidle_params_table[i].sleep_latency = - cpuidle_board_params[i].sleep_latency; - cpuidle_params_table[i].wake_latency = - cpuidle_board_params[i].wake_latency; - cpuidle_params_table[i].threshold = - cpuidle_board_params[i].threshold; + cpuidle_params_table[i].exit_latency = + cpuidle_board_params[i].exit_latency; + cpuidle_params_table[i].target_residency = + cpuidle_board_params[i].target_residency; } return; } @@ -357,12 +354,10 @@ void omap_init_power_states(void) omap3_power_states[OMAP3_STATE_C1].valid = cpuidle_params_table[OMAP3_STATE_C1].valid; omap3_power_states[OMAP3_STATE_C1].type = OMAP3_STATE_C1; - omap3_power_states[OMAP3_STATE_C1].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C1].sleep_latency; - omap3_power_states[OMAP3_STATE_C1].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C1].wake_latency; - omap3_power_states[OMAP3_STATE_C1].threshold = - cpuidle_params_table[OMAP3_STATE_C1].threshold; + omap3_power_states[OMAP3_STATE_C1].exit_latency = + cpuidle_params_table[OMAP3_STATE_C1].exit_latency; + omap3_power_states[OMAP3_STATE_C1].target_residency = + cpuidle_params_table[OMAP3_STATE_C1].target_residency; omap3_power_states[OMAP3_STATE_C1].mpu_state = PWRDM_POWER_ON; omap3_power_states[OMAP3_STATE_C1].core_state = PWRDM_POWER_ON; omap3_power_states[OMAP3_STATE_C1].flags = CPUIDLE_FLAG_TIME_VALID; @@ -372,12 +367,10 @@ void omap_init_power_states(void) omap3_power_states[OMAP3_STATE_C2].valid = cpuidle_params_table[OMAP3_STATE_C2].valid; omap3_power_states[OMAP3_STATE_C2].type = OMAP3_STATE_C2; - omap3_power_states[OMAP3_STATE_C2].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C2].sleep_latency; - omap3_power_states[OMAP3_STATE_C2].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C2].wake_latency; - omap3_power_states[OMAP3_STATE_C2].threshold = - cpuidle_params_table[OMAP3_STATE_C2].threshold; + omap3_power_states[OMAP3_STATE_C2].exit_latency = + cpuidle_params_table[OMAP3_STATE_C2].exit_latency; + omap3_power_states[OMAP3_STATE_C2].target_residency = + cpuidle_params_table[OMAP3_STATE_C2].target_residency; omap3_power_states[OMAP3_STATE_C2].mpu_state = PWRDM_POWER_ON; omap3_power_states[OMAP3_STATE_C2].core_state = PWRDM_POWER_ON; omap3_power_states[OMAP3_STATE_C2].flags = CPUIDLE_FLAG_TIME_VALID | @@ -388,12 +381,10 @@ void omap_init_power_states(void) omap3_power_states[OMAP3_STATE_C3].valid = cpuidle_params_table[OMAP3_STATE_C3].valid; omap3_power_states[OMAP3_STATE_C3].type = OMAP3_STATE_C3; - omap3_power_states[OMAP3_STATE_C3].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C3].sleep_latency; - omap3_power_states[OMAP3_STATE_C3].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C3].wake_latency; - omap3_power_states[OMAP3_STATE_C3].threshold = - cpuidle_params_table[OMAP3_STATE_C3].threshold; + omap3_power_states[OMAP3_STATE_C3].exit_latency = + cpuidle_params_table[OMAP3_STATE_C3].exit_latency; + omap3_power_states[OMAP3_STATE_C3].target_residency = + cpuidle_params_table[OMAP3_STATE_C3].target_residency; omap3_power_states[OMAP3_STATE_C3].mpu_state = PWRDM_POWER_RET; omap3_power_states[OMAP3_STATE_C3].core_state = PWRDM_POWER_ON; omap3_power_states[OMAP3_STATE_C3].flags = CPUIDLE_FLAG_TIME_VALID | @@ -404,12 +395,10 @@ void omap_init_power_states(void) omap3_power_states[OMAP3_STATE_C4].valid = cpuidle_params_table[OMAP3_STATE_C4].valid; omap3_power_states[OMAP3_STATE_C4].type = OMAP3_STATE_C4; - omap3_power_states[OMAP3_STATE_C4].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C4].sleep_latency; - omap3_power_states[OMAP3_STATE_C4].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C4].wake_latency; - omap3_power_states[OMAP3_STATE_C4].threshold = - cpuidle_params_table[OMAP3_STATE_C4].threshold; + omap3_power_states[OMAP3_STATE_C4].exit_latency = + cpuidle_params_table[OMAP3_STATE_C4].exit_latency; + omap3_power_states[OMAP3_STATE_C4].target_residency = + cpuidle_params_table[OMAP3_STATE_C4].target_residency; omap3_power_states[OMAP3_STATE_C4].mpu_state = PWRDM_POWER_OFF; omap3_power_states[OMAP3_STATE_C4].core_state = PWRDM_POWER_ON; omap3_power_states[OMAP3_STATE_C4].flags = CPUIDLE_FLAG_TIME_VALID | @@ -420,12 +409,10 @@ void omap_init_power_states(void) omap3_power_states[OMAP3_STATE_C5].valid = cpuidle_params_table[OMAP3_STATE_C5].valid; omap3_power_states[OMAP3_STATE_C5].type = OMAP3_STATE_C5; - omap3_power_states[OMAP3_STATE_C5].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C5].sleep_latency; - omap3_power_states[OMAP3_STATE_C5].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C5].wake_latency; - omap3_power_states[OMAP3_STATE_C5].threshold = - cpuidle_params_table[OMAP3_STATE_C5].threshold; + omap3_power_states[OMAP3_STATE_C5].exit_latency = + cpuidle_params_table[OMAP3_STATE_C5].exit_latency; + omap3_power_states[OMAP3_STATE_C5].target_residency = + cpuidle_params_table[OMAP3_STATE_C5].target_residency; omap3_power_states[OMAP3_STATE_C5].mpu_state = PWRDM_POWER_RET; omap3_power_states[OMAP3_STATE_C5].core_state = PWRDM_POWER_RET; omap3_power_states[OMAP3_STATE_C5].flags = CPUIDLE_FLAG_TIME_VALID | @@ -436,12 +423,10 @@ void omap_init_power_states(void) omap3_power_states[OMAP3_STATE_C6].valid = cpuidle_params_table[OMAP3_STATE_C6].valid; omap3_power_states[OMAP3_STATE_C6].type = OMAP3_STATE_C6; - omap3_power_states[OMAP3_STATE_C6].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C6].sleep_latency; - omap3_power_states[OMAP3_STATE_C6].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C6].wake_latency; - omap3_power_states[OMAP3_STATE_C6].threshold = - cpuidle_params_table[OMAP3_STATE_C6].threshold; + omap3_power_states[OMAP3_STATE_C6].exit_latency = + cpuidle_params_table[OMAP3_STATE_C6].exit_latency; + omap3_power_states[OMAP3_STATE_C6].target_residency = + cpuidle_params_table[OMAP3_STATE_C6].target_residency; omap3_power_states[OMAP3_STATE_C6].mpu_state = PWRDM_POWER_OFF; omap3_power_states[OMAP3_STATE_C6].core_state = PWRDM_POWER_RET; omap3_power_states[OMAP3_STATE_C6].flags = CPUIDLE_FLAG_TIME_VALID | @@ -452,12 +437,10 @@ void omap_init_power_states(void) omap3_power_states[OMAP3_STATE_C7].valid = cpuidle_params_table[OMAP3_STATE_C7].valid; omap3_power_states[OMAP3_STATE_C7].type = OMAP3_STATE_C7; - omap3_power_states[OMAP3_STATE_C7].sleep_latency = - cpuidle_params_table[OMAP3_STATE_C7].sleep_latency; - omap3_power_states[OMAP3_STATE_C7].wakeup_latency = - cpuidle_params_table[OMAP3_STATE_C7].wake_latency; - omap3_power_states[OMAP3_STATE_C7].threshold = - cpuidle_params_table[OMAP3_STATE_C7].threshold; + omap3_power_states[OMAP3_STATE_C7].exit_latency = + cpuidle_params_table[OMAP3_STATE_C7].exit_latency; + omap3_power_states[OMAP3_STATE_C7].target_residency = + cpuidle_params_table[OMAP3_STATE_C7].target_residency; omap3_power_states[OMAP3_STATE_C7].mpu_state = PWRDM_POWER_OFF; omap3_power_states[OMAP3_STATE_C7].core_state = PWRDM_POWER_OFF; omap3_power_states[OMAP3_STATE_C7].flags = CPUIDLE_FLAG_TIME_VALID | @@ -512,8 +495,8 @@ int __init omap3_idle_init(void) if (!cx->valid) continue; cpuidle_set_statedata(state, cx); - state->exit_latency = cx->sleep_latency + cx->wakeup_latency; - state->target_residency = cx->threshold; + state->exit_latency = cx->exit_latency; + state->target_residency = cx->target_residency; state->flags = cx->flags; state->enter = (state->flags & CPUIDLE_FLAG_CHECK_BM) ? omap3_enter_idle_bm : omap3_enter_idle; diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h index 797bfd1..32dbc13 100644 --- a/arch/arm/mach-omap2/pm.h +++ b/arch/arm/mach-omap2/pm.h @@ -36,11 +36,16 @@ static inline int omap4_opp_init(void) } #endif +/* + * cpuidle mach specific parameters + * + * The board code can override the default C-states definition using + * omap3_pm_init_cpuidle + */ struct cpuidle_params { - u8 valid; - u32 sleep_latency; - u32 wake_latency; - u32 threshold; + u32 exit_latency; /* exit_latency = sleep + wake-up latencies */ + u32 target_residency; + u8 valid; /* validates the C-state */ }; #if defined(CONFIG_PM) && defined(CONFIG_CPU_IDLE) -- cgit v0.10.2 From badc303a862ba6c5fd3d324f5332db07877f8159 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Mon, 9 May 2011 12:02:14 +0200 Subject: OMAP3: cpuidle: re-organize the C-states data The current implementation defines an internal structure and a C-states array. Using those structures is redundant to the structs used by the cpuidle framework. This patch provides a clean-up of the internal struct, removes the internal C-states array, stores the data using the existing cpuidle per C-state struct and registers the mach specific data to cpuidle C-state driver_data (accessed using cpuidle_[gs]et_statedata). Also removes unused macros, fields and code and compacts the repeating code using an inline helper function. The result is more compact and more readable code as well as reduced data RAM usage. Also retain C1 as the only always valid C-state and system safe state. Signed-off-by: Jean Pihet Signed-off-by: Kevin Hilman diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index d7bc31a..f9c8676 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -36,35 +36,6 @@ #ifdef CONFIG_CPU_IDLE -#define OMAP3_MAX_STATES 7 -#define OMAP3_STATE_C1 0 /* C1 - MPU WFI + Core active */ -#define OMAP3_STATE_C2 1 /* C2 - MPU WFI + Core inactive */ -#define OMAP3_STATE_C3 2 /* C3 - MPU CSWR + Core inactive */ -#define OMAP3_STATE_C4 3 /* C4 - MPU OFF + Core iactive */ -#define OMAP3_STATE_C5 4 /* C5 - MPU RET + Core RET */ -#define OMAP3_STATE_C6 5 /* C6 - MPU OFF + Core RET */ -#define OMAP3_STATE_C7 6 /* C7 - MPU OFF + Core OFF */ - -#define OMAP3_STATE_MAX OMAP3_STATE_C7 - -#define CPUIDLE_FLAG_CHECK_BM 0x10000 /* use omap3_enter_idle_bm() */ - -struct omap3_processor_cx { - u8 valid; - u8 type; - u32 exit_latency; - u32 mpu_state; - u32 core_state; - u32 target_residency; - u32 flags; - const char *desc; -}; - -struct omap3_processor_cx omap3_power_states[OMAP3_MAX_STATES]; -struct omap3_processor_cx current_cx_state; -struct powerdomain *mpu_pd, *core_pd, *per_pd; -struct powerdomain *cam_pd; - /* * The latencies/thresholds for various C states have * to be configured from the respective board files. @@ -88,6 +59,17 @@ static struct cpuidle_params cpuidle_params_table[] = { /* C7 */ {10000 + 30000, 300000, 1}, }; +#define OMAP3_NUM_STATES ARRAY_SIZE(cpuidle_params_table) + +/* Mach specific information to be recorded in the C-state driver_data */ +struct omap3_idle_statedata { + u32 mpu_state; + u32 core_state; + u8 valid; +}; +struct omap3_idle_statedata omap3_idle_data[OMAP3_NUM_STATES]; + +struct powerdomain *mpu_pd, *core_pd, *per_pd, *cam_pd; static int omap3_idle_bm_check(void) { @@ -121,12 +103,10 @@ static int _cpuidle_deny_idle(struct powerdomain *pwrdm, static int omap3_enter_idle(struct cpuidle_device *dev, struct cpuidle_state *state) { - struct omap3_processor_cx *cx = cpuidle_get_statedata(state); + struct omap3_idle_statedata *cx = cpuidle_get_statedata(state); struct timespec ts_preidle, ts_postidle, ts_idle; u32 mpu_state = cx->mpu_state, core_state = cx->core_state; - current_cx_state = *cx; - /* Used to keep track of the total time in idle */ getnstimeofday(&ts_preidle); @@ -139,7 +119,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev, if (omap_irq_pending() || need_resched()) goto return_sleep_time; - if (cx->type == OMAP3_STATE_C1) { + /* Deny idle for C1 */ + if (state == &dev->states[0]) { pwrdm_for_each_clkdm(mpu_pd, _cpuidle_deny_idle); pwrdm_for_each_clkdm(core_pd, _cpuidle_deny_idle); } @@ -147,7 +128,8 @@ static int omap3_enter_idle(struct cpuidle_device *dev, /* Execute ARM wfi */ omap_sram_idle(); - if (cx->type == OMAP3_STATE_C1) { + /* Re-allow idle for C1 */ + if (state == &dev->states[0]) { pwrdm_for_each_clkdm(mpu_pd, _cpuidle_allow_idle); pwrdm_for_each_clkdm(core_pd, _cpuidle_allow_idle); } @@ -169,26 +151,26 @@ return_sleep_time: * * If the current state is valid, it is returned back to the caller. * Else, this function searches for a lower c-state which is still - * valid (as defined in omap3_power_states[]). + * valid. */ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, - struct cpuidle_state *curr) + struct cpuidle_state *curr) { struct cpuidle_state *next = NULL; - struct omap3_processor_cx *cx; + struct omap3_idle_statedata *cx; - cx = (struct omap3_processor_cx *)cpuidle_get_statedata(curr); + cx = cpuidle_get_statedata(curr); /* Check if current state is valid */ if (cx->valid) { return curr; } else { - u8 idx = OMAP3_STATE_MAX; + int idx = OMAP3_NUM_STATES - 1; /* * Reach the current state starting at highest C-state */ - for (; idx >= OMAP3_STATE_C1; idx--) { + for (; idx >= 0; idx--) { if (&dev->states[idx] == curr) { next = &dev->states[idx]; break; @@ -205,9 +187,7 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, * Start search from the next (lower) state. */ idx--; - for (; idx >= OMAP3_STATE_C1; idx--) { - struct omap3_processor_cx *cx; - + for (; idx >= 0; idx--) { cx = cpuidle_get_statedata(&dev->states[idx]); if (cx->valid) { next = &dev->states[idx]; @@ -215,7 +195,7 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, } } /* - * C1 and C2 are always valid. + * C1 is always valid. * So, no need to check for 'next==NULL' outside this loop. */ } @@ -228,9 +208,8 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, * @dev: cpuidle device * @state: The target state to be programmed * - * Used for C states with CPUIDLE_FLAG_CHECK_BM flag set. This - * function checks for any pending activity and then programs the - * device to the specified or a safer state. + * This function checks for any pending activity and then programs + * the device to the specified or a safer state. */ static int omap3_enter_idle_bm(struct cpuidle_device *dev, struct cpuidle_state *state) @@ -238,10 +217,10 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev, struct cpuidle_state *new_state = next_valid_state(dev, state); u32 core_next_state, per_next_state = 0, per_saved_state = 0; u32 cam_state; - struct omap3_processor_cx *cx; + struct omap3_idle_statedata *cx; int ret; - if ((state->flags & CPUIDLE_FLAG_CHECK_BM) && omap3_idle_bm_check()) { + if (omap3_idle_bm_check()) { BUG_ON(!dev->safe_state); new_state = dev->safe_state; goto select_state; @@ -307,8 +286,8 @@ void omap3_cpuidle_update_states(u32 mpu_deepest_state, u32 core_deepest_state) { int i; - for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { - struct omap3_processor_cx *cx = &omap3_power_states[i]; + for (i = 0; i < OMAP3_NUM_STATES; i++) { + struct omap3_idle_statedata *cx = &omap3_idle_data[i]; if ((cx->mpu_state >= mpu_deepest_state) && (cx->core_state >= core_deepest_state)) { @@ -326,9 +305,8 @@ void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params) if (!cpuidle_board_params) return; - for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { - cpuidle_params_table[i].valid = - cpuidle_board_params[i].valid; + for (i = 0; i < OMAP3_NUM_STATES; i++) { + cpuidle_params_table[i].valid = cpuidle_board_params[i].valid; cpuidle_params_table[i].exit_latency = cpuidle_board_params[i].exit_latency; cpuidle_params_table[i].target_residency = @@ -337,185 +315,104 @@ void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params) return; } -/* omap3_init_power_states - Initialises the OMAP3 specific C states. - * - * Below is the desciption of each C state. - * C1 . MPU WFI + Core active - * C2 . MPU WFI + Core inactive - * C3 . MPU CSWR + Core inactive - * C4 . MPU OFF + Core inactive - * C5 . MPU CSWR + Core CSWR - * C6 . MPU OFF + Core CSWR - * C7 . MPU OFF + Core OFF - */ -void omap_init_power_states(void) -{ - /* C1 . MPU WFI + Core active */ - omap3_power_states[OMAP3_STATE_C1].valid = - cpuidle_params_table[OMAP3_STATE_C1].valid; - omap3_power_states[OMAP3_STATE_C1].type = OMAP3_STATE_C1; - omap3_power_states[OMAP3_STATE_C1].exit_latency = - cpuidle_params_table[OMAP3_STATE_C1].exit_latency; - omap3_power_states[OMAP3_STATE_C1].target_residency = - cpuidle_params_table[OMAP3_STATE_C1].target_residency; - omap3_power_states[OMAP3_STATE_C1].mpu_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C1].core_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C1].flags = CPUIDLE_FLAG_TIME_VALID; - omap3_power_states[OMAP3_STATE_C1].desc = "MPU ON + CORE ON"; - - /* C2 . MPU WFI + Core inactive */ - omap3_power_states[OMAP3_STATE_C2].valid = - cpuidle_params_table[OMAP3_STATE_C2].valid; - omap3_power_states[OMAP3_STATE_C2].type = OMAP3_STATE_C2; - omap3_power_states[OMAP3_STATE_C2].exit_latency = - cpuidle_params_table[OMAP3_STATE_C2].exit_latency; - omap3_power_states[OMAP3_STATE_C2].target_residency = - cpuidle_params_table[OMAP3_STATE_C2].target_residency; - omap3_power_states[OMAP3_STATE_C2].mpu_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C2].core_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C2].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C2].desc = "MPU ON + CORE ON"; - - /* C3 . MPU CSWR + Core inactive */ - omap3_power_states[OMAP3_STATE_C3].valid = - cpuidle_params_table[OMAP3_STATE_C3].valid; - omap3_power_states[OMAP3_STATE_C3].type = OMAP3_STATE_C3; - omap3_power_states[OMAP3_STATE_C3].exit_latency = - cpuidle_params_table[OMAP3_STATE_C3].exit_latency; - omap3_power_states[OMAP3_STATE_C3].target_residency = - cpuidle_params_table[OMAP3_STATE_C3].target_residency; - omap3_power_states[OMAP3_STATE_C3].mpu_state = PWRDM_POWER_RET; - omap3_power_states[OMAP3_STATE_C3].core_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C3].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C3].desc = "MPU RET + CORE ON"; - - /* C4 . MPU OFF + Core inactive */ - omap3_power_states[OMAP3_STATE_C4].valid = - cpuidle_params_table[OMAP3_STATE_C4].valid; - omap3_power_states[OMAP3_STATE_C4].type = OMAP3_STATE_C4; - omap3_power_states[OMAP3_STATE_C4].exit_latency = - cpuidle_params_table[OMAP3_STATE_C4].exit_latency; - omap3_power_states[OMAP3_STATE_C4].target_residency = - cpuidle_params_table[OMAP3_STATE_C4].target_residency; - omap3_power_states[OMAP3_STATE_C4].mpu_state = PWRDM_POWER_OFF; - omap3_power_states[OMAP3_STATE_C4].core_state = PWRDM_POWER_ON; - omap3_power_states[OMAP3_STATE_C4].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C4].desc = "MPU OFF + CORE ON"; - - /* C5 . MPU CSWR + Core CSWR*/ - omap3_power_states[OMAP3_STATE_C5].valid = - cpuidle_params_table[OMAP3_STATE_C5].valid; - omap3_power_states[OMAP3_STATE_C5].type = OMAP3_STATE_C5; - omap3_power_states[OMAP3_STATE_C5].exit_latency = - cpuidle_params_table[OMAP3_STATE_C5].exit_latency; - omap3_power_states[OMAP3_STATE_C5].target_residency = - cpuidle_params_table[OMAP3_STATE_C5].target_residency; - omap3_power_states[OMAP3_STATE_C5].mpu_state = PWRDM_POWER_RET; - omap3_power_states[OMAP3_STATE_C5].core_state = PWRDM_POWER_RET; - omap3_power_states[OMAP3_STATE_C5].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C5].desc = "MPU RET + CORE RET"; - - /* C6 . MPU OFF + Core CSWR */ - omap3_power_states[OMAP3_STATE_C6].valid = - cpuidle_params_table[OMAP3_STATE_C6].valid; - omap3_power_states[OMAP3_STATE_C6].type = OMAP3_STATE_C6; - omap3_power_states[OMAP3_STATE_C6].exit_latency = - cpuidle_params_table[OMAP3_STATE_C6].exit_latency; - omap3_power_states[OMAP3_STATE_C6].target_residency = - cpuidle_params_table[OMAP3_STATE_C6].target_residency; - omap3_power_states[OMAP3_STATE_C6].mpu_state = PWRDM_POWER_OFF; - omap3_power_states[OMAP3_STATE_C6].core_state = PWRDM_POWER_RET; - omap3_power_states[OMAP3_STATE_C6].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C6].desc = "MPU OFF + CORE RET"; - - /* C7 . MPU OFF + Core OFF */ - omap3_power_states[OMAP3_STATE_C7].valid = - cpuidle_params_table[OMAP3_STATE_C7].valid; - omap3_power_states[OMAP3_STATE_C7].type = OMAP3_STATE_C7; - omap3_power_states[OMAP3_STATE_C7].exit_latency = - cpuidle_params_table[OMAP3_STATE_C7].exit_latency; - omap3_power_states[OMAP3_STATE_C7].target_residency = - cpuidle_params_table[OMAP3_STATE_C7].target_residency; - omap3_power_states[OMAP3_STATE_C7].mpu_state = PWRDM_POWER_OFF; - omap3_power_states[OMAP3_STATE_C7].core_state = PWRDM_POWER_OFF; - omap3_power_states[OMAP3_STATE_C7].flags = CPUIDLE_FLAG_TIME_VALID | - CPUIDLE_FLAG_CHECK_BM; - omap3_power_states[OMAP3_STATE_C7].desc = "MPU OFF + CORE OFF"; - - /* - * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot - * enable OFF mode in a stable form for previous revisions. - * we disable C7 state as a result. - */ - if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) { - omap3_power_states[OMAP3_STATE_C7].valid = 0; - cpuidle_params_table[OMAP3_STATE_C7].valid = 0; - pr_warn("%s: core off state C7 disabled due to i583\n", - __func__); - } -} - struct cpuidle_driver omap3_idle_driver = { .name = "omap3_idle", .owner = THIS_MODULE, }; +/* Fill in the state data from the mach tables and register the driver_data */ +static inline struct omap3_idle_statedata *_fill_cstate( + struct cpuidle_device *dev, + int idx, const char *descr) +{ + struct omap3_idle_statedata *cx = &omap3_idle_data[idx]; + struct cpuidle_state *state = &dev->states[idx]; + + state->exit_latency = cpuidle_params_table[idx].exit_latency; + state->target_residency = cpuidle_params_table[idx].target_residency; + state->flags = CPUIDLE_FLAG_TIME_VALID; + state->enter = omap3_enter_idle_bm; + cx->valid = cpuidle_params_table[idx].valid; + sprintf(state->name, "C%d", idx + 1); + strncpy(state->desc, descr, CPUIDLE_DESC_LEN); + cpuidle_set_statedata(state, cx); + + return cx; +} + /** * omap3_idle_init - Init routine for OMAP3 idle * - * Registers the OMAP3 specific cpuidle driver with the cpuidle + * Registers the OMAP3 specific cpuidle driver to the cpuidle * framework with the valid set of states. */ int __init omap3_idle_init(void) { - int i, count = 0; - struct omap3_processor_cx *cx; - struct cpuidle_state *state; struct cpuidle_device *dev; + struct omap3_idle_statedata *cx; mpu_pd = pwrdm_lookup("mpu_pwrdm"); core_pd = pwrdm_lookup("core_pwrdm"); per_pd = pwrdm_lookup("per_pwrdm"); cam_pd = pwrdm_lookup("cam_pwrdm"); - omap_init_power_states(); cpuidle_register_driver(&omap3_idle_driver); - dev = &per_cpu(omap3_idle_dev, smp_processor_id()); - for (i = OMAP3_STATE_C1; i < OMAP3_MAX_STATES; i++) { - cx = &omap3_power_states[i]; - state = &dev->states[count]; - - if (!cx->valid) - continue; - cpuidle_set_statedata(state, cx); - state->exit_latency = cx->exit_latency; - state->target_residency = cx->target_residency; - state->flags = cx->flags; - state->enter = (state->flags & CPUIDLE_FLAG_CHECK_BM) ? - omap3_enter_idle_bm : omap3_enter_idle; - if (cx->type == OMAP3_STATE_C1) - dev->safe_state = state; - sprintf(state->name, "C%d", count+1); - strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); - count++; - } + /* C1 . MPU WFI + Core active */ + cx = _fill_cstate(dev, 0, "MPU ON + CORE ON"); + (&dev->states[0])->enter = omap3_enter_idle; + dev->safe_state = &dev->states[0]; + cx->valid = 1; /* C1 is always valid */ + cx->mpu_state = PWRDM_POWER_ON; + cx->core_state = PWRDM_POWER_ON; + + /* C2 . MPU WFI + Core inactive */ + cx = _fill_cstate(dev, 1, "MPU ON + CORE ON"); + cx->mpu_state = PWRDM_POWER_ON; + cx->core_state = PWRDM_POWER_ON; + + /* C3 . MPU CSWR + Core inactive */ + cx = _fill_cstate(dev, 2, "MPU RET + CORE ON"); + cx->mpu_state = PWRDM_POWER_RET; + cx->core_state = PWRDM_POWER_ON; + + /* C4 . MPU OFF + Core inactive */ + cx = _fill_cstate(dev, 3, "MPU OFF + CORE ON"); + cx->mpu_state = PWRDM_POWER_OFF; + cx->core_state = PWRDM_POWER_ON; + + /* C5 . MPU RET + Core RET */ + cx = _fill_cstate(dev, 4, "MPU RET + CORE RET"); + cx->mpu_state = PWRDM_POWER_RET; + cx->core_state = PWRDM_POWER_RET; - if (!count) - return -EINVAL; - dev->state_count = count; + /* C6 . MPU OFF + Core RET */ + cx = _fill_cstate(dev, 5, "MPU OFF + CORE RET"); + cx->mpu_state = PWRDM_POWER_OFF; + cx->core_state = PWRDM_POWER_RET; + + /* C7 . MPU OFF + Core OFF */ + cx = _fill_cstate(dev, 6, "MPU OFF + CORE OFF"); + /* + * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot + * enable OFF mode in a stable form for previous revisions. + * We disable C7 state as a result. + */ + if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) { + cx->valid = 0; + pr_warn("%s: core off state C7 disabled due to i583\n", + __func__); + } + cx->mpu_state = PWRDM_POWER_OFF; + cx->core_state = PWRDM_POWER_OFF; if (enable_off_mode) omap3_cpuidle_update_states(PWRDM_POWER_OFF, PWRDM_POWER_OFF); else omap3_cpuidle_update_states(PWRDM_POWER_RET, PWRDM_POWER_RET); + dev->state_count = OMAP3_NUM_STATES; if (cpuidle_register_device(dev)) { printk(KERN_ERR "%s: CPUidle register device failed\n", __func__); -- cgit v0.10.2 From c6cd91de1cb4694f2dfcc7df831e276fffdffffc Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Mon, 9 May 2011 12:02:15 +0200 Subject: OMAP3: cpuidle: code rework for improved readability - fix single and multi-lines comments format - removed the omap3_idle_bm_check function and replaced the test in omap3_enter_idle_bm by the equivalent code - re-organize omap3_enter_idle_bm code path, assign local variables only when needed - reword some comments Signed-off-by: Jean Pihet Signed-off-by: Kevin Hilman diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index f9c8676..dd31e53 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -71,13 +71,6 @@ struct omap3_idle_statedata omap3_idle_data[OMAP3_NUM_STATES]; struct powerdomain *mpu_pd, *core_pd, *per_pd, *cam_pd; -static int omap3_idle_bm_check(void) -{ - if (!omap3_can_sleep()) - return 1; - return 0; -} - static int _cpuidle_allow_idle(struct powerdomain *pwrdm, struct clockdomain *clkdm) { @@ -157,9 +150,7 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, struct cpuidle_state *curr) { struct cpuidle_state *next = NULL; - struct omap3_idle_statedata *cx; - - cx = cpuidle_get_statedata(curr); + struct omap3_idle_statedata *cx = cpuidle_get_statedata(curr); /* Check if current state is valid */ if (cx->valid) { @@ -167,9 +158,7 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, } else { int idx = OMAP3_NUM_STATES - 1; - /* - * Reach the current state starting at highest C-state - */ + /* Reach the current state starting at highest C-state */ for (; idx >= 0; idx--) { if (&dev->states[idx] == curr) { next = &dev->states[idx]; @@ -177,9 +166,7 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, } } - /* - * Should never hit this condition. - */ + /* Should never hit this condition */ WARN_ON(next == NULL); /* @@ -214,29 +201,16 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, static int omap3_enter_idle_bm(struct cpuidle_device *dev, struct cpuidle_state *state) { - struct cpuidle_state *new_state = next_valid_state(dev, state); - u32 core_next_state, per_next_state = 0, per_saved_state = 0; - u32 cam_state; + struct cpuidle_state *new_state; + u32 core_next_state, per_next_state = 0, per_saved_state = 0, cam_state; struct omap3_idle_statedata *cx; int ret; - if (omap3_idle_bm_check()) { - BUG_ON(!dev->safe_state); + if (!omap3_can_sleep()) { new_state = dev->safe_state; goto select_state; } - cx = cpuidle_get_statedata(state); - core_next_state = cx->core_state; - - /* - * FIXME: we currently manage device-specific idle states - * for PER and CORE in combination with CPU-specific - * idle states. This is wrong, and device-specific - * idle management needs to be separated out into - * its own code. - */ - /* * Prevent idle completely if CAM is active. * CAM does not have wakeup capability in OMAP3. @@ -248,9 +222,19 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev, } /* + * FIXME: we currently manage device-specific idle states + * for PER and CORE in combination with CPU-specific + * idle states. This is wrong, and device-specific + * idle management needs to be separated out into + * its own code. + */ + + /* * Prevent PER off if CORE is not in retention or off as this * would disable PER wakeups completely. */ + cx = cpuidle_get_statedata(state); + core_next_state = cx->core_state; per_next_state = per_saved_state = pwrdm_read_next_pwrst(per_pd); if ((per_next_state == PWRDM_POWER_OFF) && (core_next_state > PWRDM_POWER_RET)) @@ -260,6 +244,8 @@ static int omap3_enter_idle_bm(struct cpuidle_device *dev, if (per_next_state != per_saved_state) pwrdm_set_next_pwrst(per_pd, per_next_state); + new_state = next_valid_state(dev, state); + select_state: dev->last_state = new_state; ret = omap3_enter_idle(dev, new_state); @@ -320,7 +306,7 @@ struct cpuidle_driver omap3_idle_driver = { .owner = THIS_MODULE, }; -/* Fill in the state data from the mach tables and register the driver_data */ +/* Helper to fill the C-state common data and register the driver_data */ static inline struct omap3_idle_statedata *_fill_cstate( struct cpuidle_device *dev, int idx, const char *descr) -- cgit v0.10.2 From 04908918191f7926ec7af99890fb4ddb3b769c13 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Mon, 9 May 2011 12:02:16 +0200 Subject: OMAP3: cpuidle: change the power domains modes determination logic The achievable power modes of the power domains in cpuidle depends on the system wide 'enable_off_mode' knob in debugfs. Upon changing enable_off_mode, do not change the C-states 'valid' field but instead dynamically restrict the power modes when entering idle. The C-states 'valid' field is just used to enable/disable some C-states at init and shall not be changed later on. Signed-off-by: Jean Pihet Signed-off-by: Kevin Hilman diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index dd31e53..4bf6e6e 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -138,22 +138,40 @@ return_sleep_time: } /** - * next_valid_state - Find next valid c-state + * next_valid_state - Find next valid C-state * @dev: cpuidle device - * @state: Currently selected c-state + * @state: Currently selected C-state * * If the current state is valid, it is returned back to the caller. * Else, this function searches for a lower c-state which is still * valid. + * + * A state is valid if the 'valid' field is enabled and + * if it satisfies the enable_off_mode condition. */ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, struct cpuidle_state *curr) { struct cpuidle_state *next = NULL; struct omap3_idle_statedata *cx = cpuidle_get_statedata(curr); + u32 mpu_deepest_state = PWRDM_POWER_RET; + u32 core_deepest_state = PWRDM_POWER_RET; + + if (enable_off_mode) { + mpu_deepest_state = PWRDM_POWER_OFF; + /* + * Erratum i583: valable for ES rev < Es1.2 on 3630. + * CORE OFF mode is not supported in a stable form, restrict + * instead the CORE state to RET. + */ + if (!IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) + core_deepest_state = PWRDM_POWER_OFF; + } /* Check if current state is valid */ - if (cx->valid) { + if ((cx->valid) && + (cx->mpu_state >= mpu_deepest_state) && + (cx->core_state >= core_deepest_state)) { return curr; } else { int idx = OMAP3_NUM_STATES - 1; @@ -176,7 +194,9 @@ static struct cpuidle_state *next_valid_state(struct cpuidle_device *dev, idx--; for (; idx >= 0; idx--) { cx = cpuidle_get_statedata(&dev->states[idx]); - if (cx->valid) { + if ((cx->valid) && + (cx->mpu_state >= mpu_deepest_state) && + (cx->core_state >= core_deepest_state)) { next = &dev->states[idx]; break; } @@ -259,31 +279,6 @@ select_state: DEFINE_PER_CPU(struct cpuidle_device, omap3_idle_dev); -/** - * omap3_cpuidle_update_states() - Update the cpuidle states - * @mpu_deepest_state: Enable states up to and including this for mpu domain - * @core_deepest_state: Enable states up to and including this for core domain - * - * This goes through the list of states available and enables and disables the - * validity of C states based on deepest state that can be achieved for the - * variable domain - */ -void omap3_cpuidle_update_states(u32 mpu_deepest_state, u32 core_deepest_state) -{ - int i; - - for (i = 0; i < OMAP3_NUM_STATES; i++) { - struct omap3_idle_statedata *cx = &omap3_idle_data[i]; - - if ((cx->mpu_state >= mpu_deepest_state) && - (cx->core_state >= core_deepest_state)) { - cx->valid = 1; - } else { - cx->valid = 0; - } - } -} - void omap3_pm_init_cpuidle(struct cpuidle_params *cpuidle_board_params) { int i; @@ -393,11 +388,6 @@ int __init omap3_idle_init(void) cx->mpu_state = PWRDM_POWER_OFF; cx->core_state = PWRDM_POWER_OFF; - if (enable_off_mode) - omap3_cpuidle_update_states(PWRDM_POWER_OFF, PWRDM_POWER_OFF); - else - omap3_cpuidle_update_states(PWRDM_POWER_RET, PWRDM_POWER_RET); - dev->state_count = OMAP3_NUM_STATES; if (cpuidle_register_device(dev)) { printk(KERN_ERR "%s: CPUidle register device failed\n", diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h index 32dbc13..45bcfce 100644 --- a/arch/arm/mach-omap2/pm.h +++ b/arch/arm/mach-omap2/pm.h @@ -78,10 +78,6 @@ extern u32 sleep_while_idle; #define sleep_while_idle 0 #endif -#if defined(CONFIG_CPU_IDLE) -extern void omap3_cpuidle_update_states(u32, u32); -#endif - #if defined(CONFIG_PM_DEBUG) && defined(CONFIG_DEBUG_FS) extern void pm_dbg_update_time(struct powerdomain *pwrdm, int prev); extern int pm_dbg_regset_save(int reg_set); diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 0c5e3a4..caf9f6c 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -779,18 +779,6 @@ void omap3_pm_off_mode_enable(int enable) else state = PWRDM_POWER_RET; -#ifdef CONFIG_CPU_IDLE - /* - * Erratum i583: implementation for ES rev < Es1.2 on 3630. We cannot - * enable OFF mode in a stable form for previous revisions, restrict - * instead to RET - */ - if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583)) - omap3_cpuidle_update_states(state, PWRDM_POWER_RET); - else - omap3_cpuidle_update_states(state, state); -#endif - list_for_each_entry(pwrst, &pwrst_list, node) { if (IS_PM34XX_ERRATUM(PM_SDRC_WAKEUP_ERRATUM_i583) && pwrst->pwrdm == core_pwrdm && -- cgit v0.10.2 From 99aa18278e867574d72201b806f82ace07d4804b Mon Sep 17 00:00:00 2001 From: Sanjeev Premi Date: Wed, 18 May 2011 14:44:40 +0530 Subject: OMAP3: PM: Boot message is not an error, and not helpful, remove it It shows up on the console despite using "silent" in the bootargs, and it's really just noise in the boot log since PM init is always called. Signed-off-by: Sanjeev Premi Cc: jhnikula@gmail.com [khilman@ti.com: minor changelog edits] Signed-off-by: Kevin Hilman diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index caf9f6c..c155c9d 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -883,8 +883,6 @@ static int __init omap3_pm_init(void) pm_errata_configure(); - printk(KERN_ERR "Power Management for TI OMAP3.\n"); - /* XXX prcm_setup_regs needs to be before enabling hw * supervised mode for powerdomains */ prcm_setup_regs(); -- cgit v0.10.2