From 74b2107543da4ed9607ec484f63c42362dc9fca6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Apr 2011 12:02:53 -0400 Subject: Btrfs: make sure to use the delalloc reserve when filling delalloc In the prealloc filling code and compressed code we don't set trans->block_rsv to the delalloc block reserve properly, which is going to make us use metadata from the wrong pool, this patch fixes that. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cd8ab0..3b9f164 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -619,6 +619,7 @@ retry: trans = btrfs_join_transaction(root, 1); BUG_ON(IS_ERR(trans)); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_reserve_extent(trans, root, async_extent->compressed_size, async_extent->compressed_size, @@ -1060,6 +1061,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, trans = btrfs_join_transaction(root, 1); } BUG_ON(IS_ERR(trans)); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; cow_start = (u64)-1; cur_offset = start; -- cgit v0.10.2 From 7a7eaa40a39bde4eefc91aadeb1ce3dc4e6a1252 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Apr 2011 12:54:33 -0400 Subject: Btrfs: take away the num_items argument from btrfs_join_transaction I keep forgetting that btrfs_join_transaction() just ignores the num_items argument, which leads me to sending pointless patches and looking stupid :). So just kill the num_items argument from btrfs_join_transaction and btrfs_start_ioctl_transaction, since neither of them use it. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 228cf36..9d6c9e3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1568,7 +1568,7 @@ static int transaction_kthread(void *arg) transid = cur->transid; spin_unlock(&root->fs_info->new_trans_lock); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); if (transid == trans->transid) { ret = btrfs_commit_transaction(trans, root); @@ -2495,13 +2495,13 @@ int btrfs_commit_super(struct btrfs_root *root) down_write(&root->fs_info->cleanup_work_sem); up_write(&root->fs_info->cleanup_work_sem); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); /* run commit again to drop the original snapshot */ - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ee6bd5..941b28e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3174,7 +3174,7 @@ again: spin_unlock(&data_sinfo->lock); alloc: alloc_target = btrfs_get_alloc_profile(root, 1); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -3202,7 +3202,7 @@ alloc: commit_trans: if (!committed && !root->fs_info->open_ioctl_trans) { committed = 1; - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); @@ -3589,7 +3589,7 @@ again: goto out; ret = -ENOSPC; - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) goto out; ret = btrfs_commit_transaction(trans, root); @@ -3816,7 +3816,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, if (trans) return -EAGAIN; - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); ret = btrfs_commit_transaction(trans, root); return 0; @@ -7649,7 +7649,7 @@ int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) BUG_ON(reloc_root->commit_root != NULL); while (1) { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); mutex_lock(&root->fs_info->drop_mutex); @@ -8176,7 +8176,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, BUG_ON(cache->ro); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); alloc_flags = update_block_group_flags(root, cache->flags); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3b9f164..e47bdf0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -420,7 +420,7 @@ again: } } if (start == 0) { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -617,7 +617,7 @@ retry: async_extent->start + async_extent->ram_size - 1, GFP_NOFS); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_reserve_extent(trans, root, @@ -779,7 +779,7 @@ static noinline int cow_file_range(struct inode *inode, int ret = 0; BUG_ON(root == root->fs_info->tree_root); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1056,9 +1056,9 @@ static noinline int run_delalloc_nocow(struct inode *inode, BUG_ON(!path); if (root == root->fs_info->tree_root) { nolock = true; - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); } else { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); } BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1718,9 +1718,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); if (!ret) { if (nolock) - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); else - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1735,9 +1735,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 0, &cached_state, GFP_NOFS); if (nolock) - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); else - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -2415,7 +2415,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) (u64)-1); if (root->orphan_block_rsv || root->orphan_item_inserted) { - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (!IS_ERR(trans)) btrfs_end_transaction(trans, root); } @@ -4378,9 +4378,9 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) { if (nolock) - trans = btrfs_join_transaction_nolock(root, 1); + trans = btrfs_join_transaction_nolock(root); else - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); btrfs_set_trans_block_group(trans, inode); @@ -4407,7 +4407,7 @@ void btrfs_dirty_inode(struct inode *inode) if (BTRFS_I(inode)->dummy_inode) return; - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); @@ -5226,7 +5226,7 @@ again: free_extent_map(em); em = NULL; btrfs_release_path(root, path); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return ERR_CAST(trans); goto again; @@ -5470,7 +5470,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, btrfs_drop_extent_cache(inode, start, start + len - 1, 0); } - trans = btrfs_join_transaction(root, 0); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return ERR_CAST(trans); @@ -5703,7 +5703,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * to make sure the current transaction stays open * while we look for nocow cross refs */ - trans = btrfs_join_transaction(root, 0); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) goto must_cow; @@ -5841,7 +5841,7 @@ again: BUG_ON(!ordered); - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { err = -ENOMEM; goto out; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2616f7e..908c3d4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -242,7 +242,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); } - trans = btrfs_join_transaction(root, 1); + trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); ret = btrfs_update_inode(trans, root, inode); @@ -2182,7 +2182,7 @@ static long btrfs_ioctl_trans_start(struct file *file) mutex_unlock(&root->fs_info->trans_mutex); ret = -ENOMEM; - trans = btrfs_start_ioctl_transaction(root, 0); + trans = btrfs_start_ioctl_transaction(root); if (IS_ERR(trans)) goto out_drop; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 199a801..8bb2566 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2149,7 +2149,7 @@ again: err = ret; } - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) { if (!err) btrfs_block_rsv_release(rc->extent_root, @@ -3233,7 +3233,7 @@ truncate: goto out; } - trans = btrfs_join_transaction(root, 0); + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { btrfs_free_path(path); ret = PTR_ERR(trans); @@ -3642,7 +3642,7 @@ int prepare_to_relocate(struct reloc_control *rc) rc->create_reloc_tree = 1; set_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); BUG_ON(IS_ERR(trans)); btrfs_commit_transaction(trans, rc->extent_root); return 0; @@ -3831,7 +3831,7 @@ restart: btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); /* get rid of pinned extents */ - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) err = PTR_ERR(trans); else @@ -4156,7 +4156,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) set_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) { unset_reloc_control(rc); err = PTR_ERR(trans); @@ -4190,7 +4190,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) unset_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root); if (IS_ERR(trans)) err = PTR_ERR(trans); else diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c571734..70bfb26 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -257,22 +257,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, { return start_transaction(root, num_items, TRANS_START); } -struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, - int num_blocks) +struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) { return start_transaction(root, 0, TRANS_JOIN); } -struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, - int num_blocks) +struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) { return start_transaction(root, 0, TRANS_JOIN_NOLOCK); } -struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, - int num_blocks) +struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) { - return start_transaction(r, 0, TRANS_USERSPACE); + return start_transaction(root, 0, TRANS_USERSPACE); } /* wait for a transaction commit to be fully complete */ @@ -1171,7 +1168,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, INIT_DELAYED_WORK(&ac->work, do_async_commit); ac->root = root; - ac->newtrans = btrfs_join_transaction(root, 0); + ac->newtrans = btrfs_join_transaction(root); if (IS_ERR(ac->newtrans)) { int err = PTR_ERR(ac->newtrans); kfree(ac); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e441acc..1f573f0 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -92,12 +92,9 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_items); -struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, - int num_blocks); -struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, - int num_blocks); -struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, - int num_blocks); +struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -- cgit v0.10.2 From 2a1eb4614d984d5cd4c928784e9afcf5c07f93be Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Apr 2011 15:15:59 -0400 Subject: Btrfs: if we've already started a trans handle, use that one We currently track trans handles in current->journal_info, but we don't actually use it. This patch fixes it. This will cover the case where we have multiple people starting transactions down the call chain. This keeps us from having to allocate a new handle and all of that, we just increase the use count of the current handle, save the old block_rsv, and return. I tested this with xfstests and it worked out fine. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 70bfb26..46f4056 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -184,6 +184,15 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) return ERR_PTR(-EROFS); + + if (current->journal_info) { + WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); + h = current->journal_info; + h->use_count++; + h->orig_rsv = h->block_rsv; + h->block_rsv = NULL; + goto got_it; + } again: h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); if (!h) @@ -213,7 +222,9 @@ again: h->block_group = 0; h->bytes_reserved = 0; h->delayed_ref_updates = 0; + h->use_count = 1; h->block_rsv = NULL; + h->orig_rsv = NULL; smp_mb(); if (cur_trans->blocked && may_wait_transaction(root, type)) { @@ -241,6 +252,7 @@ again: } } +got_it: if (type != TRANS_JOIN_NOLOCK) mutex_lock(&root->fs_info->trans_mutex); record_root_in_trans(h, root); @@ -428,6 +440,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_fs_info *info = root->fs_info; int count = 0; + if (--trans->use_count) { + trans->block_rsv = trans->orig_rsv; + return 0; + } + while (count < 4) { unsigned long cur = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 1f573f0..154314f 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -47,11 +47,13 @@ struct btrfs_trans_handle { u64 transid; u64 block_group; u64 bytes_reserved; + unsigned long use_count; unsigned long blocks_reserved; unsigned long blocks_used; unsigned long delayed_ref_updates; struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; + struct btrfs_block_rsv *orig_rsv; }; struct btrfs_pending_snapshot { -- cgit v0.10.2 From a4abeea41adfa3c143c289045f4625dfaeba2212 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 11 Apr 2011 17:25:13 -0400 Subject: Btrfs: kill trans_mutex We use trans_mutex for lots of things, here's a basic list 1) To serialize trans_handles joining the currently running transaction 2) To make sure that no new trans handles are started while we are committing 3) To protect the dead_roots list and the transaction lists Really the serializing trans_handles joining is not too hard, and can really get bogged down in acquiring a reference to the transaction. So replace the trans_mutex with a trans_lock spinlock and use it to do the following 1) Protect fs_info->running_transaction. All trans handles have to do is check this, and then take a reference of the transaction and keep on going. 2) Protect the fs_info->trans_list. This doesn't get used too much, basically it just holds the current transactions, which will usually just be the currently committing transaction and the currently running transaction at most. 3) Protect the dead roots list. This is only ever processed by splicing the list so this is relatively simple. 4) Protect the fs_info->reloc_ctl stuff. This is very lightweight and was using the trans_mutex before, so this is a pretty straightforward change. 5) Protect fs_info->no_trans_join. Because we don't hold the trans_lock over the entirety of the commit we need to have a way to block new people from creating a new transaction while we're doing our work. So we set no_trans_join and in join_transaction we test to see if that is set, and if it is we do a wait_on_commit. 6) Make the transaction use count atomic so we don't need to take locks to modify it when we're dropping references. 7) Add a commit_lock to the transaction to make sure multiple people trying to commit the same transaction don't race and commit at the same time. 8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl trans. I have tested this with xfstests, but obviously it is a pretty hairy change so lots of testing is greatly appreciated. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f4b81d..522a39b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -919,7 +919,6 @@ struct btrfs_fs_info { * is required instead of the faster short fsync log commits */ u64 last_trans_log_full_commit; - u64 open_ioctl_trans; unsigned long mount_opt:20; unsigned long compress_type:4; u64 max_inline; @@ -936,7 +935,6 @@ struct btrfs_fs_info { struct super_block *sb; struct inode *btree_inode; struct backing_dev_info bdi; - struct mutex trans_mutex; struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; struct mutex cleaner_mutex; @@ -957,6 +955,7 @@ struct btrfs_fs_info { struct rw_semaphore subvol_sem; struct srcu_struct subvol_srcu; + spinlock_t trans_lock; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -969,6 +968,7 @@ struct btrfs_fs_info { atomic_t async_submit_draining; atomic_t nr_async_bios; atomic_t async_delalloc_pages; + atomic_t open_ioctl_trans; /* * this is used by the balancing code to wait for all the pending @@ -1032,6 +1032,7 @@ struct btrfs_fs_info { int closing; int log_root_recovering; int enospc_unlink; + int trans_no_join; u64 total_pinned; @@ -1053,7 +1054,6 @@ struct btrfs_fs_info { struct reloc_control *reloc_ctl; spinlock_t delalloc_lock; - spinlock_t new_trans_lock; u64 delalloc_bytes; /* data_alloc_cluster is only used in ssd mode */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9d6c9e3..93ef254 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1551,22 +1551,22 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); - spin_lock(&root->fs_info->new_trans_lock); + spin_lock(&root->fs_info->trans_lock); cur = root->fs_info->running_transaction; if (!cur) { - spin_unlock(&root->fs_info->new_trans_lock); + spin_unlock(&root->fs_info->trans_lock); goto sleep; } now = get_seconds(); if (!cur->blocked && (now < cur->start_time || now - cur->start_time < 30)) { - spin_unlock(&root->fs_info->new_trans_lock); + spin_unlock(&root->fs_info->trans_lock); delay = HZ * 5; goto sleep; } transid = cur->transid; - spin_unlock(&root->fs_info->new_trans_lock); + spin_unlock(&root->fs_info->trans_lock); trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); @@ -1658,7 +1658,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->ordered_operations); INIT_LIST_HEAD(&fs_info->caching_block_groups); spin_lock_init(&fs_info->delalloc_lock); - spin_lock_init(&fs_info->new_trans_lock); + spin_lock_init(&fs_info->trans_lock); spin_lock_init(&fs_info->ref_cache_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); @@ -1687,6 +1687,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb = sb; fs_info->max_inline = 8192 * 1024; fs_info->metadata_ratio = 0; + fs_info->trans_no_join = 0; fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); @@ -1735,7 +1736,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->do_barriers = 1; - mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->chunk_mutex); @@ -3006,10 +3006,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) WARN_ON(1); - mutex_lock(&root->fs_info->trans_mutex); mutex_lock(&root->fs_info->transaction_kthread_mutex); + spin_lock(&root->fs_info->trans_lock); list_splice_init(&root->fs_info->trans_list, &list); + root->fs_info->trans_no_join = 1; + spin_unlock(&root->fs_info->trans_lock); + while (!list_empty(&list)) { t = list_entry(list.next, struct btrfs_transaction, list); if (!t) @@ -3034,23 +3037,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) t->blocked = 0; if (waitqueue_active(&root->fs_info->transaction_wait)) wake_up(&root->fs_info->transaction_wait); - mutex_unlock(&root->fs_info->trans_mutex); - mutex_lock(&root->fs_info->trans_mutex); t->commit_done = 1; if (waitqueue_active(&t->commit_wait)) wake_up(&t->commit_wait); - mutex_unlock(&root->fs_info->trans_mutex); - - mutex_lock(&root->fs_info->trans_mutex); btrfs_destroy_pending_snapshots(t); btrfs_destroy_delalloc_inodes(root); - spin_lock(&root->fs_info->new_trans_lock); + spin_lock(&root->fs_info->trans_lock); root->fs_info->running_transaction = NULL; - spin_unlock(&root->fs_info->new_trans_lock); + spin_unlock(&root->fs_info->trans_lock); btrfs_destroy_marked_extents(root, &t->dirty_pages, EXTENT_DIRTY); @@ -3064,8 +3062,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) kmem_cache_free(btrfs_transaction_cachep, t); } + spin_lock(&root->fs_info->trans_lock); + root->fs_info->trans_no_join = 0; + spin_unlock(&root->fs_info->trans_lock); mutex_unlock(&root->fs_info->transaction_kthread_mutex); - mutex_unlock(&root->fs_info->trans_mutex); return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 941b28e..ca59965 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3200,7 +3200,8 @@ alloc: /* commit the current transaction and try again */ commit_trans: - if (!committed && !root->fs_info->open_ioctl_trans) { + if (!committed && + !atomic_read(&root->fs_info->open_ioctl_trans)) { committed = 1; trans = btrfs_join_transaction(root); if (IS_ERR(trans)) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 75899a0..cd5e82e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1222,14 +1222,12 @@ int btrfs_sync_file(struct file *file, int datasync) * the current transaction, we can bail out now without any * syncing */ - mutex_lock(&root->fs_info->trans_mutex); + smp_mb(); if (BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { BTRFS_I(inode)->last_trans = 0; - mutex_unlock(&root->fs_info->trans_mutex); goto out; } - mutex_unlock(&root->fs_info->trans_mutex); /* * ok we haven't committed the transaction yet, lets do a commit diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 908c3d4..a578620 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2177,9 +2177,7 @@ static long btrfs_ioctl_trans_start(struct file *file) if (ret) goto out; - mutex_lock(&root->fs_info->trans_mutex); - root->fs_info->open_ioctl_trans++; - mutex_unlock(&root->fs_info->trans_mutex); + atomic_inc(&root->fs_info->open_ioctl_trans); ret = -ENOMEM; trans = btrfs_start_ioctl_transaction(root); @@ -2190,9 +2188,7 @@ static long btrfs_ioctl_trans_start(struct file *file) return 0; out_drop: - mutex_lock(&root->fs_info->trans_mutex); - root->fs_info->open_ioctl_trans--; - mutex_unlock(&root->fs_info->trans_mutex); + atomic_dec(&root->fs_info->open_ioctl_trans); mnt_drop_write(file->f_path.mnt); out: return ret; @@ -2426,9 +2422,7 @@ long btrfs_ioctl_trans_end(struct file *file) btrfs_end_transaction(trans, root); - mutex_lock(&root->fs_info->trans_mutex); - root->fs_info->open_ioctl_trans--; - mutex_unlock(&root->fs_info->trans_mutex); + atomic_dec(&root->fs_info->open_ioctl_trans); mnt_drop_write(file->f_path.mnt); return 0; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8bb2566..09c30d3 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2136,10 +2136,10 @@ int prepare_to_merge(struct reloc_control *rc, int err) u64 num_bytes = 0; int ret; - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&root->fs_info->trans_lock); rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; rc->merging_rsv_size += rc->nodes_relocated * 2; - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); again: if (!err) { num_bytes = rc->merging_rsv_size; @@ -2208,9 +2208,9 @@ int merge_reloc_roots(struct reloc_control *rc) int ret; again: root = rc->extent_root; - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&root->fs_info->trans_lock); list_splice_init(&rc->reloc_roots, &reloc_roots); - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); while (!list_empty(&reloc_roots)) { found = 1; @@ -3583,17 +3583,17 @@ next: static void set_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - mutex_lock(&fs_info->trans_mutex); + spin_lock(&fs_info->trans_lock); fs_info->reloc_ctl = rc; - mutex_unlock(&fs_info->trans_mutex); + spin_unlock(&fs_info->trans_lock); } static void unset_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - mutex_lock(&fs_info->trans_mutex); + spin_lock(&fs_info->trans_lock); fs_info->reloc_ctl = NULL; - mutex_unlock(&fs_info->trans_mutex); + spin_unlock(&fs_info->trans_lock); } static int check_extent_flags(u64 flags) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 46f4056..43816f8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -34,6 +34,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(atomic_read(&transaction->use_count) == 0); if (atomic_dec_and_test(&transaction->use_count)) { + BUG_ON(!list_empty(&transaction->list)); memset(transaction, 0, sizeof(*transaction)); kmem_cache_free(btrfs_transaction_cachep, transaction); } @@ -48,47 +49,73 @@ static noinline void switch_commit_root(struct btrfs_root *root) /* * either allocate a new transaction or hop into the existing one */ -static noinline int join_transaction(struct btrfs_root *root) +static noinline int join_transaction(struct btrfs_root *root, int nofail) { struct btrfs_transaction *cur_trans; + + spin_lock(&root->fs_info->trans_lock); + if (root->fs_info->trans_no_join) { + if (!nofail) { + spin_unlock(&root->fs_info->trans_lock); + return -EBUSY; + } + } + cur_trans = root->fs_info->running_transaction; - if (!cur_trans) { - cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, - GFP_NOFS); - if (!cur_trans) - return -ENOMEM; - root->fs_info->generation++; - atomic_set(&cur_trans->num_writers, 1); - cur_trans->num_joined = 0; - cur_trans->transid = root->fs_info->generation; - init_waitqueue_head(&cur_trans->writer_wait); - init_waitqueue_head(&cur_trans->commit_wait); - cur_trans->in_commit = 0; - cur_trans->blocked = 0; - atomic_set(&cur_trans->use_count, 1); - cur_trans->commit_done = 0; - cur_trans->start_time = get_seconds(); - - cur_trans->delayed_refs.root = RB_ROOT; - cur_trans->delayed_refs.num_entries = 0; - cur_trans->delayed_refs.num_heads_ready = 0; - cur_trans->delayed_refs.num_heads = 0; - cur_trans->delayed_refs.flushing = 0; - cur_trans->delayed_refs.run_delayed_start = 0; - spin_lock_init(&cur_trans->delayed_refs.lock); - - INIT_LIST_HEAD(&cur_trans->pending_snapshots); - list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - extent_io_tree_init(&cur_trans->dirty_pages, - root->fs_info->btree_inode->i_mapping, - GFP_NOFS); - spin_lock(&root->fs_info->new_trans_lock); - root->fs_info->running_transaction = cur_trans; - spin_unlock(&root->fs_info->new_trans_lock); - } else { + if (cur_trans) { + atomic_inc(&cur_trans->use_count); + atomic_inc(&cur_trans->num_writers); + cur_trans->num_joined++; + spin_unlock(&root->fs_info->trans_lock); + return 0; + } + spin_unlock(&root->fs_info->trans_lock); + + cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); + if (!cur_trans) + return -ENOMEM; + spin_lock(&root->fs_info->trans_lock); + if (root->fs_info->running_transaction) { + kmem_cache_free(btrfs_transaction_cachep, cur_trans); + cur_trans = root->fs_info->running_transaction; + atomic_inc(&cur_trans->use_count); atomic_inc(&cur_trans->num_writers); cur_trans->num_joined++; + spin_unlock(&root->fs_info->trans_lock); + return 0; } + atomic_set(&cur_trans->num_writers, 1); + cur_trans->num_joined = 0; + init_waitqueue_head(&cur_trans->writer_wait); + init_waitqueue_head(&cur_trans->commit_wait); + cur_trans->in_commit = 0; + cur_trans->blocked = 0; + /* + * One for this trans handle, one so it will live on until we + * commit the transaction. + */ + atomic_set(&cur_trans->use_count, 2); + cur_trans->commit_done = 0; + cur_trans->start_time = get_seconds(); + + cur_trans->delayed_refs.root = RB_ROOT; + cur_trans->delayed_refs.num_entries = 0; + cur_trans->delayed_refs.num_heads_ready = 0; + cur_trans->delayed_refs.num_heads = 0; + cur_trans->delayed_refs.flushing = 0; + cur_trans->delayed_refs.run_delayed_start = 0; + spin_lock_init(&cur_trans->commit_lock); + spin_lock_init(&cur_trans->delayed_refs.lock); + + INIT_LIST_HEAD(&cur_trans->pending_snapshots); + list_add_tail(&cur_trans->list, &root->fs_info->trans_list); + extent_io_tree_init(&cur_trans->dirty_pages, + root->fs_info->btree_inode->i_mapping, + GFP_NOFS); + root->fs_info->generation++; + cur_trans->transid = root->fs_info->generation; + root->fs_info->running_transaction = cur_trans; + spin_unlock(&root->fs_info->trans_lock); return 0; } @@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root) * to make sure the old root from before we joined the transaction is deleted * when the transaction commits */ -static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root) { if (root->ref_cows && root->last_trans < trans->transid) { WARN_ON(root == root->fs_info->extent_root); WARN_ON(root->commit_root != root->node); + spin_lock(&root->fs_info->fs_roots_radix_lock); + if (root->last_trans == trans->transid) { + spin_unlock(&root->fs_info->fs_roots_radix_lock); + return 0; + } + root->last_trans = trans->transid; radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); - root->last_trans = trans->transid; + spin_unlock(&root->fs_info->fs_roots_radix_lock); btrfs_init_reloc_root(trans, root); } return 0; } -int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - if (!root->ref_cows) - return 0; - - mutex_lock(&root->fs_info->trans_mutex); - if (root->last_trans == trans->transid) { - mutex_unlock(&root->fs_info->trans_mutex); - return 0; - } - - record_root_in_trans(trans, root); - mutex_unlock(&root->fs_info->trans_mutex); - return 0; -} - /* wait for commit against the current transaction to become unblocked * when this is done, it is safe to start a new transaction, but the current * transaction might not be fully on disk. @@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root) { struct btrfs_transaction *cur_trans; + spin_lock(&root->fs_info->trans_lock); cur_trans = root->fs_info->running_transaction; if (cur_trans && cur_trans->blocked) { DEFINE_WAIT(wait); atomic_inc(&cur_trans->use_count); + spin_unlock(&root->fs_info->trans_lock); while (1) { prepare_to_wait(&root->fs_info->transaction_wait, &wait, TASK_UNINTERRUPTIBLE); if (!cur_trans->blocked) break; - mutex_unlock(&root->fs_info->trans_mutex); schedule(); - mutex_lock(&root->fs_info->trans_mutex); } finish_wait(&root->fs_info->transaction_wait, &wait); put_transaction(cur_trans); + } else { + spin_unlock(&root->fs_info->trans_lock); } } @@ -167,10 +185,16 @@ enum btrfs_trans_type { static int may_wait_transaction(struct btrfs_root *root, int type) { - if (!root->fs_info->log_root_recovering && - ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || - type == TRANS_USERSPACE)) + if (root->fs_info->log_root_recovering) + return 0; + + if (type == TRANS_USERSPACE) + return 1; + + if (type == TRANS_START && + !atomic_read(&root->fs_info->open_ioctl_trans)) return 1; + return 0; } @@ -198,23 +222,21 @@ again: if (!h) return ERR_PTR(-ENOMEM); - if (type != TRANS_JOIN_NOLOCK) - mutex_lock(&root->fs_info->trans_mutex); if (may_wait_transaction(root, type)) wait_current_trans(root); - ret = join_transaction(root); + do { + ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); + if (ret == -EBUSY) + wait_current_trans(root); + } while (ret == -EBUSY); + if (ret < 0) { kmem_cache_free(btrfs_trans_handle_cachep, h); - if (type != TRANS_JOIN_NOLOCK) - mutex_unlock(&root->fs_info->trans_mutex); return ERR_PTR(ret); } cur_trans = root->fs_info->running_transaction; - atomic_inc(&cur_trans->use_count); - if (type != TRANS_JOIN_NOLOCK) - mutex_unlock(&root->fs_info->trans_mutex); h->transid = cur_trans->transid; h->transaction = cur_trans; @@ -253,11 +275,7 @@ again: } got_it: - if (type != TRANS_JOIN_NOLOCK) - mutex_lock(&root->fs_info->trans_mutex); - record_root_in_trans(h, root); - if (type != TRANS_JOIN_NOLOCK) - mutex_unlock(&root->fs_info->trans_mutex); + btrfs_record_root_in_trans(h, root); if (!current->journal_info && type != TRANS_USERSPACE) current->journal_info = h; @@ -289,17 +307,13 @@ static noinline int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { DEFINE_WAIT(wait); - mutex_lock(&root->fs_info->trans_mutex); while (!commit->commit_done) { prepare_to_wait(&commit->commit_wait, &wait, TASK_UNINTERRUPTIBLE); if (commit->commit_done) break; - mutex_unlock(&root->fs_info->trans_mutex); schedule(); - mutex_lock(&root->fs_info->trans_mutex); } - mutex_unlock(&root->fs_info->trans_mutex); finish_wait(&commit->commit_wait, &wait); return 0; } @@ -309,50 +323,49 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) struct btrfs_transaction *cur_trans = NULL, *t; int ret; - mutex_lock(&root->fs_info->trans_mutex); - ret = 0; if (transid) { if (transid <= root->fs_info->last_trans_committed) - goto out_unlock; + goto out; /* find specified transaction */ + spin_lock(&root->fs_info->trans_lock); list_for_each_entry(t, &root->fs_info->trans_list, list) { if (t->transid == transid) { cur_trans = t; + atomic_inc(&cur_trans->use_count); break; } if (t->transid > transid) break; } + spin_unlock(&root->fs_info->trans_lock); ret = -EINVAL; if (!cur_trans) - goto out_unlock; /* bad transid */ + goto out; /* bad transid */ } else { /* find newest transaction that is committing | committed */ + spin_lock(&root->fs_info->trans_lock); list_for_each_entry_reverse(t, &root->fs_info->trans_list, list) { if (t->in_commit) { if (t->commit_done) - goto out_unlock; + goto out; cur_trans = t; + atomic_inc(&cur_trans->use_count); break; } } + spin_unlock(&root->fs_info->trans_lock); if (!cur_trans) - goto out_unlock; /* nothing committing|committed */ + goto out; /* nothing committing|committed */ } - atomic_inc(&cur_trans->use_count); - mutex_unlock(&root->fs_info->trans_mutex); - wait_for_commit(root, cur_trans); - mutex_lock(&root->fs_info->trans_mutex); put_transaction(cur_trans); ret = 0; -out_unlock: - mutex_unlock(&root->fs_info->trans_mutex); +out: return ret; } @@ -401,10 +414,8 @@ harder: void btrfs_throttle(struct btrfs_root *root) { - mutex_lock(&root->fs_info->trans_mutex); - if (!root->fs_info->open_ioctl_trans) + if (!atomic_read(&root->fs_info->open_ioctl_trans)) wait_current_trans(root); - mutex_unlock(&root->fs_info->trans_mutex); } static int should_end_transaction(struct btrfs_trans_handle *trans, @@ -422,6 +433,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans = trans->transaction; int updates; + smp_mb(); if (cur_trans->blocked || cur_trans->delayed_refs.flushing) return 1; @@ -467,9 +479,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_trans_release_metadata(trans, root); - if (lock && !root->fs_info->open_ioctl_trans && - should_end_transaction(trans, root)) + if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && + should_end_transaction(trans, root)) { trans->transaction->blocked = 1; + smp_wmb(); + } if (lock && cur_trans->blocked && !cur_trans->in_commit) { if (throttle) @@ -739,9 +753,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, */ int btrfs_add_dead_root(struct btrfs_root *root) { - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&root->fs_info->trans_lock); list_add(&root->root_list, &root->fs_info->dead_roots); - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); return 0; } @@ -757,6 +771,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, int ret; int err = 0; + spin_lock(&fs_info->fs_roots_radix_lock); while (1) { ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, (void **)gang, 0, @@ -769,6 +784,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + spin_unlock(&fs_info->fs_roots_radix_lock); btrfs_free_log(trans, root); btrfs_update_reloc_root(trans, root); @@ -783,10 +799,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, err = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); + spin_lock(&fs_info->fs_roots_radix_lock); if (err) break; } } + spin_unlock(&fs_info->fs_roots_radix_lock); return err; } @@ -972,7 +990,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, parent = dget_parent(dentry); parent_inode = parent->d_inode; parent_root = BTRFS_I(parent_inode)->root; - record_root_in_trans(trans, parent_root); + btrfs_record_root_in_trans(trans, parent_root); /* * insert the directory item @@ -990,7 +1008,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, parent_root, parent_inode); BUG_ON(ret); - record_root_in_trans(trans, root); + btrfs_record_root_in_trans(trans, root); btrfs_set_root_last_snapshot(&root->root_item, trans->transid); memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); btrfs_check_and_init_root_item(new_root_item); @@ -1080,20 +1098,20 @@ static void update_super_roots(struct btrfs_root *root) int btrfs_transaction_in_commit(struct btrfs_fs_info *info) { int ret = 0; - spin_lock(&info->new_trans_lock); + spin_lock(&info->trans_lock); if (info->running_transaction) ret = info->running_transaction->in_commit; - spin_unlock(&info->new_trans_lock); + spin_unlock(&info->trans_lock); return ret; } int btrfs_transaction_blocked(struct btrfs_fs_info *info) { int ret = 0; - spin_lock(&info->new_trans_lock); + spin_lock(&info->trans_lock); if (info->running_transaction) ret = info->running_transaction->blocked; - spin_unlock(&info->new_trans_lock); + spin_unlock(&info->trans_lock); return ret; } @@ -1117,9 +1135,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, &wait); break; } - mutex_unlock(&root->fs_info->trans_mutex); schedule(); - mutex_lock(&root->fs_info->trans_mutex); finish_wait(&root->fs_info->transaction_blocked_wait, &wait); } } @@ -1145,9 +1161,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, &wait); break; } - mutex_unlock(&root->fs_info->trans_mutex); schedule(); - mutex_lock(&root->fs_info->trans_mutex); finish_wait(&root->fs_info->transaction_wait, &wait); } @@ -1193,22 +1207,18 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, } /* take transaction reference */ - mutex_lock(&root->fs_info->trans_mutex); cur_trans = trans->transaction; atomic_inc(&cur_trans->use_count); - mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); schedule_delayed_work(&ac->work, 0); /* wait for transaction to start and unblock */ - mutex_lock(&root->fs_info->trans_mutex); if (wait_for_unblock) wait_current_trans_commit_start_and_unblock(root, cur_trans); else wait_current_trans_commit_start(root, cur_trans); put_transaction(cur_trans); - mutex_unlock(&root->fs_info->trans_mutex); return 0; } @@ -1252,38 +1262,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_run_delayed_refs(trans, root, 0); BUG_ON(ret); - mutex_lock(&root->fs_info->trans_mutex); + spin_lock(&cur_trans->commit_lock); if (cur_trans->in_commit) { + spin_unlock(&cur_trans->commit_lock); atomic_inc(&cur_trans->use_count); - mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); ret = wait_for_commit(root, cur_trans); BUG_ON(ret); - mutex_lock(&root->fs_info->trans_mutex); put_transaction(cur_trans); - mutex_unlock(&root->fs_info->trans_mutex); return 0; } trans->transaction->in_commit = 1; trans->transaction->blocked = 1; + spin_unlock(&cur_trans->commit_lock); wake_up(&root->fs_info->transaction_blocked_wait); + spin_lock(&root->fs_info->trans_lock); if (cur_trans->list.prev != &root->fs_info->trans_list) { prev_trans = list_entry(cur_trans->list.prev, struct btrfs_transaction, list); if (!prev_trans->commit_done) { atomic_inc(&prev_trans->use_count); - mutex_unlock(&root->fs_info->trans_mutex); + spin_unlock(&root->fs_info->trans_lock); wait_for_commit(root, prev_trans); - mutex_lock(&root->fs_info->trans_mutex); put_transaction(prev_trans); + } else { + spin_unlock(&root->fs_info->trans_lock); } + } else { + spin_unlock(&root->fs_info->trans_lock); } if (now < cur_trans->start_time || now - cur_trans->start_time < 1) @@ -1291,12 +1304,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, do { int snap_pending = 0; + joined = cur_trans->num_joined; if (!list_empty(&trans->transaction->pending_snapshots)) snap_pending = 1; WARN_ON(cur_trans != trans->transaction); - mutex_unlock(&root->fs_info->trans_mutex); if (flush_on_commit || snap_pending) { btrfs_start_delalloc_inodes(root, 1); @@ -1316,14 +1329,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, prepare_to_wait(&cur_trans->writer_wait, &wait, TASK_UNINTERRUPTIBLE); - smp_mb(); if (atomic_read(&cur_trans->num_writers) > 1) schedule_timeout(MAX_SCHEDULE_TIMEOUT); else if (should_grow) schedule_timeout(1); - mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); + spin_lock(&root->fs_info->trans_lock); + root->fs_info->trans_no_join = 1; + spin_unlock(&root->fs_info->trans_lock); } while (atomic_read(&cur_trans->num_writers) > 1 || (should_grow && cur_trans->num_joined != joined)); @@ -1364,9 +1378,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_prepare_extent_commit(trans, root); cur_trans = root->fs_info->running_transaction; - spin_lock(&root->fs_info->new_trans_lock); - root->fs_info->running_transaction = NULL; - spin_unlock(&root->fs_info->new_trans_lock); btrfs_set_root_node(&root->fs_info->tree_root->root_item, root->fs_info->tree_root->node); @@ -1387,10 +1398,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, sizeof(root->fs_info->super_copy)); trans->transaction->blocked = 0; + spin_lock(&root->fs_info->trans_lock); + root->fs_info->running_transaction = NULL; + root->fs_info->trans_no_join = 0; + spin_unlock(&root->fs_info->trans_lock); wake_up(&root->fs_info->transaction_wait); - mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root, 0); @@ -1403,22 +1417,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_finish_extent_commit(trans, root); - mutex_lock(&root->fs_info->trans_mutex); - cur_trans->commit_done = 1; root->fs_info->last_trans_committed = cur_trans->transid; wake_up(&cur_trans->commit_wait); + spin_lock(&root->fs_info->trans_lock); list_del_init(&cur_trans->list); + spin_unlock(&root->fs_info->trans_lock); + put_transaction(cur_trans); put_transaction(cur_trans); trace_btrfs_transaction_commit(root); - mutex_unlock(&root->fs_info->trans_mutex); - if (current->journal_info == trans) current->journal_info = NULL; @@ -1438,9 +1451,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) LIST_HEAD(list); struct btrfs_fs_info *fs_info = root->fs_info; - mutex_lock(&fs_info->trans_mutex); + spin_lock(&fs_info->trans_lock); list_splice_init(&fs_info->dead_roots, &list); - mutex_unlock(&fs_info->trans_mutex); + spin_unlock(&fs_info->trans_lock); while (!list_empty(&list)) { root = list_entry(list.next, struct btrfs_root, root_list); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 154314f..11c6efc 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -28,10 +28,12 @@ struct btrfs_transaction { * transaction can end */ atomic_t num_writers; + atomic_t use_count; unsigned long num_joined; + + spinlock_t commit_lock; int in_commit; - atomic_t use_count; int commit_done; int blocked; struct list_head list; -- cgit v0.10.2 From fcb80c2affd63237cff5b34cba5756be7c976a5a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 3 May 2011 10:40:22 -0400 Subject: Btrfs: fix how we do space reservation for truncate The ceph guys keep running into problems where we have space reserved in our orphan block rsv when freeing it up. This is because they tend to do snapshots alot, so their truncates tend to use a bunch of space, so when we go to do things like update the inode we have to steal reservation space in order to make the reservation happen. This happens because truncate can use as much space as it freaking feels like, but we still have to hold space for removing the orphan item and updating the inode, which will definitely always happen. So in order to fix this we need to split all of the reservation stuf up. So with this patch we have 1) The orphan block reserve which only holds the space for deleting our orphan item when everything is over. 2) The truncate block reserve which gets allocated and used specifically for the space that the truncate will use on a per truncate basis. 3) The transaction will always have 1 item's worth of data reserved so we can update the inode normally. Hopefully this will make the ceph problem go away. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 522a39b..f31aed7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2224,6 +2224,9 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, void btrfs_block_rsv_release(struct btrfs_root *root, struct btrfs_block_rsv *block_rsv, u64 num_bytes); +int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_block_rsv *rsv); int btrfs_set_block_group_ro(struct btrfs_root *root, struct btrfs_block_group_cache *cache); int btrfs_set_block_group_rw(struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ca59965..a2ca561 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3980,6 +3980,37 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) 3 * num_items; } +int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_block_rsv *rsv) +{ + struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; + u64 num_bytes; + int ret; + + /* + * Truncate should be freeing data, but give us 2 items just in case it + * needs to use some space. We may want to be smarter about this in the + * future. + */ + num_bytes = calc_trans_metadata_size(root, 2); + + /* We already have enough bytes, just return */ + if (rsv->reserved >= num_bytes) + return 0; + + num_bytes -= rsv->reserved; + + /* + * You should have reserved enough space before hand to do this, so this + * should not fail. + */ + ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); + BUG_ON(ret); + + return 0; +} + int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root, int num_items) @@ -4020,23 +4051,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; /* - * one for deleting orphan item, one for updating inode and - * two for calling btrfs_truncate_inode_items. - * - * btrfs_truncate_inode_items is a delete operation, it frees - * more space than it uses in most cases. So two units of - * metadata space should be enough for calling it many times. - * If all of the metadata space is used, we can commit - * transaction and use space it freed. + * We need to hold space in order to delete our orphan item once we've + * added it, so this takes the reservation so we can release it later + * when we are truly done with the orphan item. */ - u64 num_bytes = calc_trans_metadata_size(root, 4); + u64 num_bytes = calc_trans_metadata_size(root, 1); return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); } void btrfs_orphan_release_metadata(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 num_bytes = calc_trans_metadata_size(root, 4); + u64 num_bytes = calc_trans_metadata_size(root, 1); btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e47bdf0..bc12ba2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6591,6 +6591,7 @@ out: static int btrfs_truncate(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_block_rsv *rsv; int ret; int err = 0; struct btrfs_trans_handle *trans; @@ -6604,28 +6605,83 @@ static int btrfs_truncate(struct inode *inode) btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_ordered_update_i_size(inode, inode->i_size, NULL); - trans = btrfs_start_transaction(root, 5); - if (IS_ERR(trans)) - return PTR_ERR(trans); + /* + * Yes ladies and gentelment, this is indeed ugly. The fact is we have + * 3 things going on here + * + * 1) We need to reserve space for our orphan item and the space to + * delete our orphan item. Lord knows we don't want to have a dangling + * orphan item because we didn't reserve space to remove it. + * + * 2) We need to reserve space to update our inode. + * + * 3) We need to have something to cache all the space that is going to + * be free'd up by the truncate operation, but also have some slack + * space reserved in case it uses space during the truncate (thank you + * very much snapshotting). + * + * And we need these to all be seperate. The fact is we can use alot of + * space doing the truncate, and we have no earthly idea how much space + * we will use, so we need the truncate reservation to be seperate so it + * doesn't end up using space reserved for updating the inode or + * removing the orphan item. We also need to be able to stop the + * transaction and start a new one, which means we need to be able to + * update the inode several times, and we have no idea of knowing how + * many times that will be, so we can't just reserve 1 item for the + * entirety of the opration, so that has to be done seperately as well. + * Then there is the orphan item, which does indeed need to be held on + * to for the whole operation, and we need nobody to touch this reserved + * space except the orphan code. + * + * So that leaves us with + * + * 1) root->orphan_block_rsv - for the orphan deletion. + * 2) rsv - for the truncate reservation, which we will steal from the + * transaction reservation. + * 3) fs_info->trans_block_rsv - this will have 1 items worth left for + * updating the inode. + */ + rsv = btrfs_alloc_block_rsv(root); + if (!rsv) + return -ENOMEM; + btrfs_add_durable_block_rsv(root->fs_info, rsv); + + trans = btrfs_start_transaction(root, 4); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out; + } btrfs_set_trans_block_group(trans, inode); + /* + * Reserve space for the truncate process. Truncate should be adding + * space, but if there are snapshots it may end up using space. + */ + ret = btrfs_truncate_reserve_metadata(trans, root, rsv); + BUG_ON(ret); + ret = btrfs_orphan_add(trans, inode); if (ret) { btrfs_end_transaction(trans, root); - return ret; + goto out; } nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); - /* Now start a transaction for the truncate */ - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); + /* + * Ok so we've already migrated our bytes over for the truncate, so here + * just reserve the one slot we need for updating the inode. + */ + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out; + } btrfs_set_trans_block_group(trans, inode); - trans->block_rsv = root->orphan_block_rsv; + trans->block_rsv = rsv; /* * setattr is responsible for setting the ordered_data_close flag, @@ -6649,24 +6705,18 @@ static int btrfs_truncate(struct inode *inode) while (1) { if (!trans) { - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, inode); - trans->block_rsv = root->orphan_block_rsv; - } + trans = btrfs_start_transaction(root, 3); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out; + } - ret = btrfs_block_rsv_check(trans, root, - root->orphan_block_rsv, 0, 5); - if (ret == -EAGAIN) { - ret = btrfs_commit_transaction(trans, root); - if (ret) - return ret; - trans = NULL; - continue; - } else if (ret) { - err = ret; - break; + ret = btrfs_truncate_reserve_metadata(trans, root, + rsv); + BUG_ON(ret); + + btrfs_set_trans_block_group(trans, inode); + trans->block_rsv = rsv; } ret = btrfs_truncate_inode_items(trans, root, inode, @@ -6677,6 +6727,7 @@ static int btrfs_truncate(struct inode *inode) break; } + trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); if (ret) { err = ret; @@ -6690,6 +6741,7 @@ static int btrfs_truncate(struct inode *inode) } if (ret == 0 && inode->i_nlink > 0) { + trans->block_rsv = root->orphan_block_rsv; ret = btrfs_orphan_del(trans, inode); if (ret) err = ret; @@ -6701,15 +6753,20 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_orphan_del(NULL, inode); } + trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); if (ret && !err) err = ret; nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); + btrfs_btree_balance_dirty(root, nr); + +out: + btrfs_free_block_rsv(root, rsv); + if (ret && !err) err = ret; - btrfs_btree_balance_dirty(root, nr); return err; } -- cgit v0.10.2 From af60bed24eb0e3b6d93eaa6bb395a5721e6c09a8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 4 May 2011 11:11:17 -0400 Subject: Btrfs: set range_start to the right start in count_range_bits In count_range_bits we are adjusting total_bytes based on the range we are searching for, but we don't adjust the range start according to the range we are searching for, which makes for weird results. For example, if the range [0-8192] is set DELALLOC, but I search for 4096-8192, I will get back 4096 for the number of bytes found, but the range_start will be 0, which makes it look like the range is [0-4096]. So instead set range_start = max(cur_start, state->start). This makes everything come out right. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ba41da5..b5f6f22 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1480,7 +1480,7 @@ u64 count_range_bits(struct extent_io_tree *tree, if (total_bytes >= max_bytes) break; if (!found) { - *start = state->start; + *start = max(cur_start, state->start); found = 1; } last = state->end; -- cgit v0.10.2 From cb25c2ea6a79702ab7895b873c6c43e0d3bc3c72 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 May 2011 12:17:34 -0400 Subject: Btrfs: map the node block when looking for readahead targets If we have particularly full nodes, we could call btrfs_node_blockptr up to 32 times, which is 32 pairs of kmap/kunmap, which _sucks_. So go ahead and map the extent buffer while we look for readahead targets. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 84d7ca1..009bcf7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1229,6 +1229,7 @@ static void reada_for_search(struct btrfs_root *root, u64 search; u64 target; u64 nread = 0; + u64 gen; int direction = path->reada; struct extent_buffer *eb; u32 nr; @@ -1256,6 +1257,15 @@ static void reada_for_search(struct btrfs_root *root, nritems = btrfs_header_nritems(node); nr = slot; while (1) { + if (!node->map_token) { + unsigned long offset = btrfs_node_key_ptr_offset(nr); + map_private_extent_buffer(node, offset, + sizeof(struct btrfs_key_ptr), + &node->map_token, + &node->kaddr, + &node->map_start, + &node->map_len, KM_USER1); + } if (direction < 0) { if (nr == 0) break; @@ -1273,14 +1283,23 @@ static void reada_for_search(struct btrfs_root *root, search = btrfs_node_blockptr(node, nr); if ((search <= target && target - search <= 65536) || (search > target && search - target <= 65536)) { - readahead_tree_block(root, search, blocksize, - btrfs_node_ptr_generation(node, nr)); + gen = btrfs_node_ptr_generation(node, nr); + if (node->map_token) { + unmap_extent_buffer(node, node->map_token, + KM_USER1); + node->map_token = NULL; + } + readahead_tree_block(root, search, blocksize, gen); nread += blocksize; } nscan++; if ((nread > 65536 || nscan > 32)) break; } + if (node->map_token) { + unmap_extent_buffer(node, node->map_token, KM_USER1); + node->map_token = NULL; + } } /* -- cgit v0.10.2 From 7e2355ba1a11649f0b212a29fdb9f47476f1248e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 May 2011 12:25:37 -0400 Subject: Btrfs: don't look at the extent buffer level 3 times in a row We have a bit of debugging in btrfs_search_slot to make sure the level of the cow block is the same as the original block we were cow'ing. I don't think I've ever seen this tripped, so kill it. This saves us 2 kmap's per level in our search. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 009bcf7..f7a0a64 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1672,9 +1672,6 @@ again: } cow_done: BUG_ON(!cow && ins_len); - if (level != btrfs_header_level(b)) - WARN_ON(1); - level = btrfs_header_level(b); p->nodes[level] = b; if (!p->skip_locking) -- cgit v0.10.2 From d82a6f1d7e8b61ed5996334d0db66651bb43641d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 May 2011 15:26:06 -0400 Subject: Btrfs: kill BTRFS_I(inode)->block_group Originally this was going to be used as a way to give hints to the allocator, but frankly we can get much better hints elsewhere and it's not even used at all for anything usefull. In addition to be completely useless, when we initialize an inode we try and find a freeish block group to set as the inodes block group, and with a completely full 40gb fs this takes _forever_, so I imagine with say 1tb fs this is just unbearable. So just axe the thing altoghether, we don't need it and it saves us 8 bytes in the inode and saves us 500 microseconds per inode lookup in my testcase. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 57c3bb2..4bc852d 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -120,9 +120,6 @@ struct btrfs_inode { */ u64 index_cnt; - /* the start of block group preferred for allocations. */ - u64 block_group; - /* the fsync log has some corner cases that mean we have to check * directories to see if any unlinks have been done before * the directory was logged. See tree-log.c for all the diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f31aed7..0f8c489 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2512,8 +2512,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, - u64 new_dirid, u64 alloc_hint); + struct btrfs_root *new_root, u64 new_dirid); int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a2ca561..9f0a4e3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5319,6 +5319,7 @@ checks: btrfs_add_free_space(block_group, offset, search_start - offset); BUG_ON(offset > search_start); + btrfs_put_block_group(block_group); break; loop: failed_cluster_refill = false; @@ -5411,14 +5412,7 @@ loop: ret = -ENOSPC; } else if (!ins->objectid) { ret = -ENOSPC; - } - - /* we found what we needed */ - if (ins->objectid) { - if (!(data & BTRFS_BLOCK_GROUP_DATA)) - trans->block_group = block_group->key.objectid; - - btrfs_put_block_group(block_group); + } else if (ins->objectid) { ret = 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bc12ba2..dd5938a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -136,7 +136,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, return -ENOMEM; path->leave_spinning = 1; - btrfs_set_trans_block_group(trans, inode); key.objectid = inode->i_ino; key.offset = start; @@ -422,7 +421,6 @@ again: if (start == 0) { trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; /* lets try to make an inline extent */ @@ -781,7 +779,6 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(root == root->fs_info->tree_root); trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; num_bytes = (end - start + blocksize) & ~(blocksize - 1); @@ -1502,8 +1499,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, { struct btrfs_ordered_sum *sum; - btrfs_set_trans_block_group(trans, inode); - list_for_each_entry(sum, list, list) { btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); @@ -1722,7 +1717,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) else trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); @@ -1739,7 +1733,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) else trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = &root->fs_info->delalloc_block_rsv; if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) @@ -2495,7 +2488,6 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; int maybe_acls; - u64 alloc_group_block; u32 rdev; int ret; @@ -2539,8 +2531,6 @@ static void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->index_cnt = (u64)-1; BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); - alloc_group_block = btrfs_inode_block_group(leaf, inode_item); - /* * try to precache a NULL acl entry for files that don't have * any xattrs or acls @@ -2549,8 +2539,6 @@ static void btrfs_read_locked_inode(struct inode *inode) if (!maybe_acls) cache_no_acl(inode); - BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, - alloc_group_block, 0); btrfs_free_path(path); inode_item = NULL; @@ -2630,7 +2618,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_transid(leaf, item, trans->transid); btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); - btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); + btrfs_set_inode_block_group(leaf, item, 0); if (leaf->map_token) { unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); @@ -2971,8 +2959,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, @@ -3068,8 +3054,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { err = btrfs_unlink_subvol(trans, root, dir, BTRFS_I(inode)->location.objectid, @@ -3649,7 +3633,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) err = PTR_ERR(trans); break; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, inode, cur_offset, cur_offset + hole_size, @@ -3785,7 +3768,6 @@ void btrfs_evict_inode(struct inode *inode) while (1) { trans = btrfs_start_transaction(root, 0); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = root->orphan_block_rsv; ret = btrfs_block_rsv_check(trans, root, @@ -4383,7 +4365,6 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, inode); if (nolock) ret = btrfs_end_transaction_nolock(trans, root); else @@ -4409,7 +4390,6 @@ void btrfs_dirty_inode(struct inode *inode) trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); - btrfs_set_trans_block_group(trans, inode); ret = btrfs_update_inode(trans, root, inode); if (ret && ret == -ENOSPC) { @@ -4424,7 +4404,6 @@ void btrfs_dirty_inode(struct inode *inode) } return; } - btrfs_set_trans_block_group(trans, inode); ret = btrfs_update_inode(trans, root, inode); if (ret) { @@ -4519,8 +4498,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, const char *name, int name_len, - u64 ref_objectid, u64 objectid, - u64 alloc_hint, int mode, u64 *index) + u64 ref_objectid, u64 objectid, int mode, + u64 *index) { struct inode *inode; struct btrfs_inode_item *inode_item; @@ -4567,8 +4546,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 0; else owner = 1; - BTRFS_I(inode)->block_group = - btrfs_find_block_group(root, 0, alloc_hint, owner); key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -4729,11 +4706,9 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, mode, &index); + mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -4745,7 +4720,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; @@ -4754,8 +4728,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); btrfs_update_inode(trans, root, inode); } - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); @@ -4791,11 +4763,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, mode, &index); + mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -4807,7 +4777,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; @@ -4818,8 +4787,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); @@ -4866,8 +4833,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_inc_nlink(inode); inode->i_ctime = CURRENT_TIME; - - btrfs_set_trans_block_group(trans, dir); ihold(inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); @@ -4876,7 +4841,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; } else { struct dentry *parent = dget_parent(dentry); - btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); BUG_ON(err); btrfs_log_new_name(trans, inode, NULL, parent); @@ -4917,12 +4881,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) trans = btrfs_start_transaction(root, 5); if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, S_IFDIR | mode, - &index); + S_IFDIR | mode, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -4936,7 +4898,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; - btrfs_set_trans_block_group(trans, inode); btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, root, inode); @@ -4950,8 +4911,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); drop_on_err = 0; - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); out_fail: nr = trans->blocks_used; @@ -6652,8 +6611,6 @@ static int btrfs_truncate(struct inode *inode) goto out; } - btrfs_set_trans_block_group(trans, inode); - /* * Reserve space for the truncate process. Truncate should be adding * space, but if there are snapshots it may end up using space. @@ -6680,7 +6637,6 @@ static int btrfs_truncate(struct inode *inode) err = PTR_ERR(trans); goto out; } - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = rsv; /* @@ -6715,7 +6671,6 @@ static int btrfs_truncate(struct inode *inode) rsv); BUG_ON(ret); - btrfs_set_trans_block_group(trans, inode); trans->block_rsv = rsv; } @@ -6775,15 +6730,14 @@ out: * create a new subvolume directory/inode (helper for the ioctl). */ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, - u64 new_dirid, u64 alloc_hint) + struct btrfs_root *new_root, u64 new_dirid) { struct inode *inode; int err; u64 index = 0; inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, - new_dirid, alloc_hint, S_IFDIR | 0700, &index); + new_dirid, S_IFDIR | 0700, &index); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; @@ -6893,21 +6847,6 @@ void btrfs_destroy_inode(struct inode *inode) spin_unlock(&root->fs_info->ordered_extent_lock); } - if (root == root->fs_info->tree_root) { - struct btrfs_block_group_cache *block_group; - - block_group = btrfs_lookup_block_group(root->fs_info, - BTRFS_I(inode)->block_group); - if (block_group && block_group->inode == inode) { - spin_lock(&block_group->lock); - block_group->inode = NULL; - spin_unlock(&block_group->lock); - btrfs_put_block_group(block_group); - } else if (block_group) { - btrfs_put_block_group(block_group); - } - } - spin_lock(&root->orphan_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", @@ -7091,8 +7030,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_notrans; } - btrfs_set_trans_block_group(trans, new_dir); - if (dest != root) btrfs_record_root_in_trans(trans, dest); @@ -7331,12 +7268,9 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, dir); - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dir->i_ino, objectid, - BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, - &index); + S_IFLNK|S_IRWXUGO, &index); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_unlock; @@ -7348,7 +7282,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; } - btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; @@ -7359,8 +7292,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); if (drop_inode) goto out_unlock; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a578620..8e90ccf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -413,8 +413,7 @@ static noinline int create_subvol(struct btrfs_root *root, btrfs_record_root_in_trans(trans, new_root); - ret = btrfs_create_subvol_root(trans, new_root, new_dirid, - BTRFS_I(dir)->block_group); + ret = btrfs_create_subvol_root(trans, new_root, new_dirid); /* * insert the directory item */ diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 43816f8..f4ea695 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -241,7 +241,6 @@ again: h->transid = cur_trans->transid; h->transaction = cur_trans; h->blocks_used = 0; - h->block_group = 0; h->bytes_reserved = 0; h->delayed_ref_updates = 0; h->use_count = 1; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 11c6efc..da7289e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -47,7 +47,6 @@ struct btrfs_transaction { struct btrfs_trans_handle { u64 transid; - u64 block_group; u64 bytes_reserved; unsigned long use_count; unsigned long blocks_reserved; @@ -70,19 +69,6 @@ struct btrfs_pending_snapshot { struct list_head list; }; -static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, - struct inode *inode) -{ - trans->block_group = BTRFS_I(inode)->block_group; -} - -static inline void btrfs_update_inode_block_group( - struct btrfs_trans_handle *trans, - struct inode *inode) -{ - BTRFS_I(inode)->block_group = trans->block_group; -} - static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, struct inode *inode) { diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index cfd6605..72ab029 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, inode); - ret = do_setxattr(trans, inode, name, value, size, flags); if (ret) goto out; -- cgit v0.10.2 From 589d8ade83f07c0f11c8191c0ca309f34d7a2c14 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 May 2011 17:30:53 -0400 Subject: Btrfs: try not to sleep as much when doing slow caching When the fs is super full and we unmount the fs, we could get stuck in this thing where unmount is waiting for the caching kthread to make progress and the caching kthread keeps scheduling because we're in the middle of a commit. So instead just let the caching kthread keep going and only yeild if need_resched(). This makes my horrible umount case go from taking up to 10 minutes to taking less than 20 seconds. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9f0a4e3..96be624 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -378,15 +378,18 @@ again: if (ret) break; - caching_ctl->progress = last; - btrfs_release_path(extent_root, path); - up_read(&fs_info->extent_commit_sem); - mutex_unlock(&caching_ctl->mutex); - if (btrfs_transaction_in_commit(fs_info)) - schedule_timeout(1); - else + if (need_resched() || + btrfs_next_leaf(extent_root, path)) { + caching_ctl->progress = last; + btrfs_release_path(extent_root, path); + up_read(&fs_info->extent_commit_sem); + mutex_unlock(&caching_ctl->mutex); cond_resched(); - goto again; + goto again; + } + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + continue; } if (key.objectid < block_group->key.objectid) { -- cgit v0.10.2 From 026fd317828500524cdc7e5ff9e8e7923abb2868 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 May 2011 10:32:11 -0400 Subject: Btrfs: don't always do readahead Our readahead is sort of sloppy, and really isn't always needed. For example if ls is doing a stating ls (which is the default) it's going to stat in non-disk order, so if say you have a directory with a stupid amount of files, readahead is going to do nothing but waste time in the case of doing the stat. Taking the unconditional readahead out made my test go from 57 minutes to 36 minutes. This means that everywhere we do loop through the tree we want to make sure we do set path->reada properly, so I went through and found all of the places where we loop through the path and set reada to 1. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f7a0a64..f61c16c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -48,8 +48,6 @@ struct btrfs_path *btrfs_alloc_path(void) { struct btrfs_path *path; path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); - if (path) - path->reada = 1; return path; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 96be624..1ba2cc5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -347,7 +347,7 @@ static int caching_kthread(void *data) */ path->skip_locking = 1; path->search_commit_root = 1; - path->reada = 2; + path->reada = 1; key.objectid = last; key.offset = 0; @@ -8556,6 +8556,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); if (cache_gen != 0 && diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dd5938a..6228a30 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4242,7 +4242,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, filp->f_pos = 2; } path = btrfs_alloc_path(); - path->reada = 2; + if (!path) + return -ENOMEM; + path->reada = 1; btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; @@ -5043,7 +5045,15 @@ again: if (!path) { path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + err = -ENOMEM; + goto out; + } + /* + * Chances are we'll be called again, so go ahead and do + * readahead + */ + path->reada = 1; } ret = btrfs_lookup_file_extent(trans, root, path, diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 09c30d3..5872b41 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -676,6 +676,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, err = -ENOMEM; goto out; } + path1->reada = 1; + path2->reada = 2; node = alloc_backref_node(cache); if (!node) { @@ -1996,6 +1998,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; reloc_root = root->reloc_root; root_item = &reloc_root->root_item; @@ -3297,6 +3300,7 @@ static int find_data_references(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; root = read_fs_root(rc->extent_root->fs_info, ref_root); if (IS_ERR(root)) { @@ -3665,6 +3669,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; ret = prepare_to_relocate(rc); if (ret) { @@ -4090,6 +4095,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = -1; key.objectid = BTRFS_TREE_RELOC_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; -- cgit v0.10.2 From cca1c81f43e26ab60c0d1090fb90992358d69bdf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 May 2011 11:07:12 -0400 Subject: Btrfs: don't try to allocate from a block group that doesn't have enough space If we have a very large filesystem, we can spend a lot of time in find_free_extent just trying to allocate from empty block groups. So instead check to see if the block group even has enough space for the allocation, and if not go on to the next block group. Signed-off-by: Josef Bacik diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1ba2cc5..c8c3184 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5159,6 +5159,14 @@ have_block_group: if (unlikely(block_group->ro)) goto loop; + spin_lock(&block_group->tree_lock); + if (cached && + block_group->free_space < num_bytes + empty_size) { + spin_unlock(&block_group->tree_lock); + goto loop; + } + spin_unlock(&block_group->tree_lock); + /* * Ok we want to try and use the cluster allocator, so lets look * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will -- cgit v0.10.2 From 207dde8289d9b005b665cb9d8d2bb9464256101d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 13 May 2011 14:49:23 -0400 Subject: Btrfs: check for duplicate entries in the free space cache If there are duplicate entries in the free space cache, discard the entire cache and load it the old fashioned way. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 63731a1..d634a7e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -420,7 +420,14 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, spin_lock(&block_group->tree_lock); ret = link_free_space(block_group, e); spin_unlock(&block_group->tree_lock); - BUG_ON(ret); + if (ret) { + printk(KERN_ERR "Duplicate entries in " + "free space cache, dumping\n"); + kunmap(page); + unlock_page(page); + page_cache_release(page); + goto free_cache; + } } else { e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); if (!e->bitmap) { @@ -437,6 +444,14 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, recalculate_thresholds(block_group); spin_unlock(&block_group->tree_lock); list_add_tail(&e->list, &bitmaps); + if (ret) { + printk(KERN_ERR "Duplicate entries in " + "free space cache, dumping\n"); + kunmap(page); + unlock_page(page); + page_cache_release(page); + goto free_cache; + } } num_entries--; @@ -909,10 +924,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset, * logically. */ if (bitmap) { - WARN_ON(info->bitmap); + if (info->bitmap) { + WARN_ON_ONCE(1); + return -EEXIST; + } p = &(*p)->rb_right; } else { - WARN_ON(!info->bitmap); + if (!info->bitmap) { + WARN_ON_ONCE(1); + return -EEXIST; + } p = &(*p)->rb_left; } } -- cgit v0.10.2 From d90c732122a1f6d0efe388a8a204f67f144b2eb3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 17 May 2011 09:50:54 -0400 Subject: Btrfs: leave spinning on lookup and map the leaf On lookup we only want to read the inode item, so leave the path spinning. Also we're just wholesale reading the leaf off, so map the leaf so we don't do a bunch of kmap/kunmaps. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6228a30..dc8fb2b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2493,6 +2493,7 @@ static void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); + path->leave_spinning = 1; memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); @@ -2502,6 +2503,12 @@ static void btrfs_read_locked_inode(struct inode *inode) leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); + if (!leaf->map_token) + map_private_extent_buffer(leaf, (unsigned long)inode_item, + sizeof(struct btrfs_inode_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); inode->i_mode = btrfs_inode_mode(leaf, inode_item); inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); @@ -2539,6 +2546,11 @@ static void btrfs_read_locked_inode(struct inode *inode) if (!maybe_acls) cache_no_acl(inode); + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + btrfs_free_path(path); inode_item = NULL; -- cgit v0.10.2 From a3793a0d86e2d65dda6beb86fa295be25b238159 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 22 Feb 2011 09:57:44 +0000 Subject: sh: switch ap325rxa to dynamically manage the platform camera Use soc_camera_platform helper functions to dynamically manage the camera device. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index 618bd56..969421f 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -359,37 +359,31 @@ static struct soc_camera_link camera_link = { .priv = &camera_info, }; -static void dummy_release(struct device *dev) +static struct platform_device *camera_device; + +static void ap325rxa_camera_release(struct device *dev) { + soc_camera_platform_release(&camera_device); } -static struct platform_device camera_device = { - .name = "soc_camera_platform", - .dev = { - .platform_data = &camera_info, - .release = dummy_release, - }, -}; - static int ap325rxa_camera_add(struct soc_camera_link *icl, struct device *dev) { - if (icl != &camera_link || camera_probe() <= 0) - return -ENODEV; + int ret = soc_camera_platform_add(icl, dev, &camera_device, &camera_link, + ap325rxa_camera_release, 0); + if (ret < 0) + return ret; - camera_info.dev = dev; + ret = camera_probe(); + if (ret < 0) + soc_camera_platform_del(icl, camera_device, &camera_link); - return platform_device_register(&camera_device); + return ret; } static void ap325rxa_camera_del(struct soc_camera_link *icl) { - if (icl != &camera_link) - return; - - platform_device_unregister(&camera_device); - memset(&camera_device.dev.kobj, 0, - sizeof(camera_device.dev.kobj)); + soc_camera_platform_del(icl, camera_device, &camera_link); } #endif /* CONFIG_I2C */ -- cgit v0.10.2 From 52b96c2592e8fdb93231c9644b68112518916a57 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 15 Apr 2011 18:30:55 +0000 Subject: sh: add MMCIF runtime PM support on ecovec Signed-off-by: Guennadi Liakhovetski Cc: Simon Horman Cc: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index bb13d0e..3a32741 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -885,6 +885,9 @@ static struct platform_device sh_mmcif_device = { }, .num_resources = ARRAY_SIZE(sh_mmcif_resources), .resource = sh_mmcif_resources, + .archdata = { + .hwblk_id = HWBLK_MMC, + }, }; #endif -- cgit v0.10.2 From 0f0ebd980e0e8a2fd33ab3ef0d5b0cffbcddd8a1 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 24 May 2011 17:25:23 +0900 Subject: sh: arch/sh/kernel/process_32.c needs linux/prefetch.h. Trivial build fix for certain configurations that don't grab linux/prefetch.h via alternate means (specifically SH-2 and SH-3 parts). Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 762a139..b473f0c 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 4e2b1084b0fb79c963b73586815e1ef034dc2b57 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 24 May 2011 17:33:51 +0900 Subject: sh: Update shmin to reflect PIO dependency. shmin uses __set_io_port_base() for legacy I/O mapping that ethernet and other SuperI/O functions depend on. Ensure that PIO support is built in until the board is updated for MMIO properly. Signed-off-by: Paul Mundt diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index b44e377..a164c9e 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -167,7 +167,7 @@ config ARCH_HAS_CPU_IDLE_WAIT config NO_IOPORT def_bool !PCI - depends on !SH_CAYMAN && !SH_SH4202_MICRODEV + depends on !SH_CAYMAN && !SH_SH4202_MICRODEV && !SH_SHMIN config IO_TRAPPED bool -- cgit v0.10.2 From b779260b097dec295e8798277ed08ec5ecd3b2c1 Mon Sep 17 00:00:00 2001 From: Joseph Cihula Date: Tue, 3 May 2011 00:08:37 -0700 Subject: intel-iommu: fix VT-d PMR disable for TXT on S3 resume This patch is a follow on to https://lkml.org/lkml/2011/3/21/239, which was merged as commit 51a63e67da6056c13b5b597dcc9e1b3bd7ceaa55. This patch adds support for S3, as pointed out by Chris Wright. Signed-off-by: Joseph Cihula Signed-off-by: David Woodhouse diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index d552d2c..4e4e020 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -142,6 +142,12 @@ static void __init check_tylersburg_isoch(void); static int rwbf_quirk; /* + * set to 1 to panic kernel if can't successfully enable VT-d + * (used when kernel is launched w/ TXT) + */ +static int force_on = 0; + +/* * 0: Present * 1-11: Reserved * 12-63: Context Ptr (12 - (haw-1)) @@ -2217,7 +2223,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return 0; } -static int __init init_dmars(int force_on) +static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; struct dmar_rmrr_unit *rmrr; @@ -3117,7 +3123,17 @@ static int init_iommu_hw(void) if (iommu->qi) dmar_reenable_qi(iommu); - for_each_active_iommu(iommu, drhd) { + for_each_iommu(iommu, drhd) { + if (drhd->ignored) { + /* + * we always have to disable PMRs or DMA may fail on + * this device + */ + if (force_on) + iommu_disable_protect_mem_regions(iommu); + continue; + } + iommu_flush_write_buffer(iommu); iommu_set_root_entry(iommu); @@ -3126,7 +3142,8 @@ static int init_iommu_hw(void) DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - iommu_enable_translation(iommu); + if (iommu_enable_translation(iommu)) + return 1; iommu_disable_protect_mem_regions(iommu); } @@ -3193,7 +3210,10 @@ static void iommu_resume(void) unsigned long flag; if (init_iommu_hw()) { - WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); + if (force_on) + panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); + else + WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); return; } @@ -3270,7 +3290,6 @@ static struct notifier_block device_nb = { int __init intel_iommu_init(void) { int ret = 0; - int force_on = 0; /* VT-d is required for a TXT/tboot launch, so enforce that */ force_on = tboot_force_iommu(); @@ -3308,7 +3327,7 @@ int __init intel_iommu_init(void) init_no_remapping_devices(); - ret = init_dmars(force_on); + ret = init_dmars(); if (ret) { if (force_on) panic("tboot: Failed to initialize DMARs\n"); -- cgit v0.10.2 From b3a530e4e734cab7043a3ab7d135f539042443a7 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 15 May 2011 12:34:55 +0200 Subject: intel-iommu: Remove obsolete comment from detect_intel_iommu Since cacd4213d8, this comment no longer applies. Signed-off-by: Jan Kiszka Signed-off-by: David Woodhouse diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 12e02bf..3dc9bef 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -698,12 +698,7 @@ int __init detect_intel_iommu(void) { #ifdef CONFIG_INTR_REMAP struct acpi_table_dmar *dmar; - /* - * for now we will disable dma-remapping when interrupt - * remapping is enabled. - * When support for queued invalidation for IOTLB invalidation - * is added, we will not need this any more. - */ + dmar = (struct acpi_table_dmar *) dmar_tbl; if (ret && cpu_has_x2apic && dmar->flags & 0x1) printk(KERN_INFO -- cgit v0.10.2 From 7b668357810ecb5fdda4418689d50f5d95aea6a8 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 24 May 2011 12:02:41 +0100 Subject: intel-iommu: Flush unmaps at domain_exit We typically batch unmaps to be lazily flushed out at regular intervals. When we destroy a domain, we need to force a flush of these lazy unmaps to be sure none reference the domain we're about to free. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=35062 Signed-off-by: Alex Williamson Signed-off-by: David Woodhouse Cc: stable@kernel.org diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4e4e020..395f253 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1422,6 +1422,10 @@ static void domain_exit(struct dmar_domain *domain) if (!domain) return; + /* Flush any lazy unmaps that may reference this domain */ + if (!intel_iommu_strict) + flush_unmaps_timeout(0); + domain_remove_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); -- cgit v0.10.2 From 1c7fcbed1adee44708dafb65ac40b186c203d7e8 Mon Sep 17 00:00:00 2001 From: Damian Date: Tue, 24 May 2011 07:01:22 +0000 Subject: sh_mobile_meram: MERAM platform data for LCDC Based on the patch by Takanari Hayama Add the necessary platform data to add MERAM functionality to LCDC Includes platform data for both the AP4EVB and mackerel Signed-off-by: Damian Hobson-Garcia Signed-off-by: Paul Mundt diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 1e35fa9..c9aad5e 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -249,6 +249,29 @@ static int slot_cn7_get_cd(struct platform_device *pdev) { return !gpio_get_value(GPIO_PORT41); } +/* MERAM */ +static struct sh_mobile_meram_info meram_info = { + .addr_mode = SH_MOBILE_MERAM_MODE1, +}; + +static struct resource meram_resources[] = { + [0] = { + .name = "MERAM", + .start = 0xe8000000, + .end = 0xe81fffff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device meram_device = { + .name = "sh_mobile_meram", + .id = 0, + .num_resources = ARRAY_SIZE(meram_resources), + .resource = meram_resources, + .dev = { + .platform_data = &meram_info, + }, +}; /* SH_MMCIF */ static struct resource sh_mmcif_resources[] = { @@ -431,13 +454,29 @@ const static struct fb_videomode ap4evb_lcdc_modes[] = { #endif }, }; +static struct sh_mobile_meram_cfg lcd_meram_cfg = { + .icb[0] = { + .marker_icb = 28, + .cache_icb = 24, + .meram_offset = 0x0, + .meram_size = 0x40, + }, + .icb[1] = { + .marker_icb = 29, + .cache_icb = 25, + .meram_offset = 0x40, + .meram_size = 0x40, + }, +}; static struct sh_mobile_lcdc_info lcdc_info = { + .meram_dev = &meram_info, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .lcd_cfg = ap4evb_lcdc_modes, .num_cfg = ARRAY_SIZE(ap4evb_lcdc_modes), + .meram_cfg = &lcd_meram_cfg, } }; @@ -708,15 +747,31 @@ static struct platform_device fsi_device = { static struct platform_device fsi_ak4643_device = { .name = "sh_fsi2_a_ak4643", }; +static struct sh_mobile_meram_cfg hdmi_meram_cfg = { + .icb[0] = { + .marker_icb = 30, + .cache_icb = 26, + .meram_offset = 0x80, + .meram_size = 0x100, + }, + .icb[1] = { + .marker_icb = 31, + .cache_icb = 27, + .meram_offset = 0x180, + .meram_size = 0x100, + }, +}; static struct sh_mobile_lcdc_info sh_mobile_lcdc1_info = { .clock_source = LCDC_CLK_EXTERNAL, + .meram_dev = &meram_info, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .interface_type = RGB24, .clock_divider = 1, .flags = LCDC_FLAGS_DWPOL, + .meram_cfg = &hdmi_meram_cfg, } }; @@ -945,6 +1000,7 @@ static struct platform_device *ap4evb_devices[] __initdata = { &csi2_device, &ceu_device, &ap4evb_camera, + &meram_device, }; static void __init hdmi_init_pm_clock(void) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 7da2ca2..a165a9e 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -279,6 +279,30 @@ static struct platform_device smc911x_device = { }, }; +/* MERAM */ +static struct sh_mobile_meram_info mackerel_meram_info = { + .addr_mode = SH_MOBILE_MERAM_MODE1, +}; + +static struct resource meram_resources[] = { + [0] = { + .name = "MERAM", + .start = 0xe8000000, + .end = 0xe81fffff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device meram_device = { + .name = "sh_mobile_meram", + .id = 0, + .num_resources = ARRAY_SIZE(meram_resources), + .resource = meram_resources, + .dev = { + .platform_data = &mackerel_meram_info, + }, +}; + /* LCDC */ static struct fb_videomode mackerel_lcdc_modes[] = { { @@ -307,7 +331,23 @@ static int mackerel_get_brightness(void *board_data) return gpio_get_value(GPIO_PORT31); } +static struct sh_mobile_meram_cfg lcd_meram_cfg = { + .icb[0] = { + .marker_icb = 28, + .cache_icb = 24, + .meram_offset = 0x0, + .meram_size = 0x40, + }, + .icb[1] = { + .marker_icb = 29, + .cache_icb = 25, + .meram_offset = 0x40, + .meram_size = 0x40, + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { + .meram_dev = &mackerel_meram_info, .clock_source = LCDC_CLK_BUS, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, @@ -327,6 +367,7 @@ static struct sh_mobile_lcdc_info lcdc_info = { .name = "sh_mobile_lcdc_bl", .max_brightness = 1, }, + .meram_cfg = &lcd_meram_cfg, } }; @@ -353,8 +394,23 @@ static struct platform_device lcdc_device = { }, }; +static struct sh_mobile_meram_cfg hdmi_meram_cfg = { + .icb[0] = { + .marker_icb = 30, + .cache_icb = 26, + .meram_offset = 0x80, + .meram_size = 0x100, + }, + .icb[1] = { + .marker_icb = 31, + .cache_icb = 27, + .meram_offset = 0x180, + .meram_size = 0x100, + }, +}; /* HDMI */ static struct sh_mobile_lcdc_info hdmi_lcdc_info = { + .meram_dev = &mackerel_meram_info, .clock_source = LCDC_CLK_EXTERNAL, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, @@ -362,6 +418,7 @@ static struct sh_mobile_lcdc_info hdmi_lcdc_info = { .interface_type = RGB24, .clock_divider = 1, .flags = LCDC_FLAGS_DWPOL, + .meram_cfg = &hdmi_meram_cfg, } }; @@ -949,6 +1006,7 @@ static struct platform_device *mackerel_devices[] __initdata = { &mackerel_camera, &hdmi_lcdc_device, &hdmi_device, + &meram_device, }; /* Keypad Initialization */ -- cgit v0.10.2 From 54525552c6ccfd867e819845da14be994e303218 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 24 May 2011 10:23:59 +0000 Subject: sh: mark DMA slave ID 0 as invalid This makes it possible to leave DMA slave IDs in the platform data at default 0 value without hitting DMA channel allocation error paths. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt diff --git a/arch/sh/include/cpu-sh4/cpu/sh7722.h b/arch/sh/include/cpu-sh4/cpu/sh7722.h index 7a5b8a3..bd06227 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7722.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7722.h @@ -236,6 +236,7 @@ enum { }; enum { + SHDMA_SLAVE_INVALID, SHDMA_SLAVE_SCIF0_TX, SHDMA_SLAVE_SCIF0_RX, SHDMA_SLAVE_SCIF1_TX, diff --git a/arch/sh/include/cpu-sh4/cpu/sh7724.h b/arch/sh/include/cpu-sh4/cpu/sh7724.h index 7eb4359..3daef8e 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7724.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7724.h @@ -285,6 +285,7 @@ enum { }; enum { + SHDMA_SLAVE_INVALID, SHDMA_SLAVE_SCIF0_TX, SHDMA_SLAVE_SCIF0_RX, SHDMA_SLAVE_SCIF1_TX, diff --git a/arch/sh/include/cpu-sh4/cpu/sh7757.h b/arch/sh/include/cpu-sh4/cpu/sh7757.h index 05b8196..41f9f8b 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7757.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7757.h @@ -252,6 +252,7 @@ enum { }; enum { + SHDMA_SLAVE_INVALID, SHDMA_SLAVE_SDHI_TX, SHDMA_SLAVE_SDHI_RX, SHDMA_SLAVE_MMCIF_TX, -- cgit v0.10.2 From 4bff4a7ee4b9e487a1bc1d31082e77b6a381418a Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 11 May 2011 16:51:20 +0000 Subject: ARM: arch-shmobile: support SDHI card detection on mackerel, using a GPIO On sh7372 the card-detection pin of SDHI0 can also produce interrupts, when configured as GPIO. Use this feature to power down SDHI0, when no card is plugged in. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index d4fe740..776f205 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -913,6 +914,17 @@ static int slot_cn7_get_cd(struct platform_device *pdev) } /* SDHI0 */ +static irqreturn_t mackerel_sdhi0_gpio_cd(int irq, void *arg) +{ + struct device *dev = arg; + struct sh_mobile_sdhi_info *info = dev->platform_data; + struct tmio_mmc_data *pdata = info->pdata; + + tmio_mmc_cd_wakeup(pdata); + + return IRQ_HANDLED; +} + static struct sh_mobile_sdhi_info sdhi0_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI0_TX, .dma_slave_rx = SHDMA_SLAVE_SDHI0_RX, @@ -1296,6 +1308,7 @@ static void __init mackerel_init(void) { u32 srcr4; struct clk *clk; + int ret; sh7372_pinmux_init(); @@ -1401,6 +1414,13 @@ static void __init mackerel_init(void) gpio_request(GPIO_FN_SDHID0_1, NULL); gpio_request(GPIO_FN_SDHID0_0, NULL); + ret = request_irq(evt2irq(0x3340), mackerel_sdhi0_gpio_cd, + IRQF_TRIGGER_FALLING, "sdhi0 cd", &sdhi0_device.dev); + if (!ret) + sdhi0_info.tmio_flags |= TMIO_MMC_HAS_COLD_CD; + else + pr_err("Cannot get IRQ #%d: %d\n", evt2irq(0x3340), ret); + #if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) /* enable SDHI1 */ gpio_request(GPIO_FN_SDHICMD1, NULL); -- cgit v0.10.2 From 21bc7af6e5e684b44725b20f679e701e38ceef15 Mon Sep 17 00:00:00 2001 From: Yogesh Ashok Powar Date: Wed, 18 May 2011 12:02:03 -0700 Subject: mwifiex: correct event header length While decoding received event packet from firmware, 4 bytes of interface header are already removed unconditionally. So for handling event only 4 more bytes needs to be pulled. This is achieved by changing event header length to 4. Almost all the events, except BA stream related and AMSDU aggregation control events, do not have the payload in their event skb. Such events handling depends only on the event ID. This event ID is the first four bytes of the event skb, which is copied to a separate variable before pulling the skb header. Hence event handling worked only for those events that didn't have payload in event skb. This patch fixes the broken event path of the events with payload in their event skb without harming existing working event path for the events without payload. Signed-off-by: Yogesh Ashok Powar Signed-off-by: Kiran Divekar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/sdio.h b/drivers/net/wireless/mwifiex/sdio.h index a0e9bc5..4e97e90 100644 --- a/drivers/net/wireless/mwifiex/sdio.h +++ b/drivers/net/wireless/mwifiex/sdio.h @@ -167,8 +167,8 @@ /* Rx unit register */ #define CARD_RX_UNIT_REG 0x63 -/* Event header Len*/ -#define MWIFIEX_EVENT_HEADER_LEN 8 +/* Event header len w/o 4 bytes of interface header */ +#define MWIFIEX_EVENT_HEADER_LEN 4 /* Max retry number of CMD53 write */ #define MAX_WRITE_IOMEM_RETRY 2 -- cgit v0.10.2 From 208c72f4fe44fe09577e7975ba0e7fa0278f3d03 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Thu, 19 May 2011 00:43:38 +0300 Subject: nl80211: fix check for valid SSID size in scan operations In both trigger_scan and sched_scan operations, we were checking for the SSID length before assigning the value correctly. Since the memory was just kzalloc'ed, the check was always failing and SSID with over 32 characters were allowed to go through. This was causing a buffer overflow when copying the actual SSID to the proper place. This bug has been there since 2.6.29-rc4. Cc: stable@kernel.org Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ec83f41..88a565f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3406,12 +3406,12 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) i = 0; if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { + request->ssids[i].ssid_len = nla_len(attr); if (request->ssids[i].ssid_len > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); - request->ssids[i].ssid_len = nla_len(attr); i++; } } @@ -3572,6 +3572,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { + request->ssids[i].ssid_len = nla_len(attr); if (request->ssids[i].ssid_len > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; @@ -3579,7 +3580,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, } memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); - request->ssids[i].ssid_len = nla_len(attr); i++; } } -- cgit v0.10.2 From a9e12869758430424804dd4332e0d2afdfdf00b0 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 19 May 2011 10:17:04 -0500 Subject: rtlwifi: Fix kernel panic resulting from RX buffer allocation failure To handle amsdu_8k capability, the PCI routine of this driver must allocate receive buffers of order 2. Under heavy load, this causes fragmentation of memory. The present code releases the current buffer before checking to see if a new one is availble. Recovery from allocation failures is not possible, which results in kernel panics. The fix is to reorder the code to check that a new buffer can be allocated before the old one is released. If not possible, the received frame is dropped and the old one is reused. Without this change, it is impossible to transfer a 2 GB file without a kernel panic. Signed-off-by: Larry Finger Cc: Stable [2.6.{37,38,39}] Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index a409528..c2b83a5 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -669,11 +669,6 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) &rx_status, (u8 *) pdesc, skb); - pci_unmap_single(rtlpci->pdev, - *((dma_addr_t *) skb->cb), - rtlpci->rxbuffersize, - PCI_DMA_FROMDEVICE); - skb_put(skb, rtlpriv->cfg->ops->get_desc((u8 *) pdesc, false, HW_DESC_RXPKT_LEN)); @@ -690,6 +685,21 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) hdr = rtl_get_hdr(skb); fc = rtl_get_fc(skb); + /* try for new buffer - if allocation fails, drop + * frame and reuse old buffer + */ + new_skb = dev_alloc_skb(rtlpci->rxbuffersize); + if (unlikely(!new_skb)) { + RT_TRACE(rtlpriv, (COMP_INTR | COMP_RECV), + DBG_DMESG, + ("can't alloc skb for rx\n")); + goto done; + } + pci_unmap_single(rtlpci->pdev, + *((dma_addr_t *) skb->cb), + rtlpci->rxbuffersize, + PCI_DMA_FROMDEVICE); + if (!stats.crc || !stats.hwerror) { memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status)); @@ -758,15 +768,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) rtl_lps_leave(hw); } - new_skb = dev_alloc_skb(rtlpci->rxbuffersize); - if (unlikely(!new_skb)) { - RT_TRACE(rtlpriv, (COMP_INTR | COMP_RECV), - DBG_DMESG, - ("can't alloc skb for rx\n")); - goto done; - } skb = new_skb; - /*skb->dev = dev; */ rtlpci->rx_ring[rx_queue_idx].rx_buf[rtlpci-> rx_ring -- cgit v0.10.2 From 0019a2c9277bf6d083032a5a9857249e75407a8c Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 19 May 2011 11:48:45 -0500 Subject: rtlwifi: Use order 2 RX buffer allocation only if necessary Although a previous fix handles the kernel panics that result from failure to allocate a new RX buffer, memory fragmentation can be reduced if the amsdu_8k capability is disabled as new buffers need only be of O(0), not O(2). Signed-off-by: Larry Finger Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index c2b83a5..89100e7 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -1115,6 +1115,13 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw) rtlpci->rx_ring[rx_queue_idx].idx = 0; + /* If amsdu_8k is disabled, set buffersize to 4096. This + * change will reduce memory fragmentation. + */ + if (rtlpci->rxbuffersize > 4096 && + rtlpriv->rtlhal.disable_amsdu_8k) + rtlpci->rxbuffersize = 4096; + for (i = 0; i < rtlpci->rxringcount; i++) { struct sk_buff *skb = dev_alloc_skb(rtlpci->rxbuffersize); -- cgit v0.10.2 From fb23d86382a088d50020fd05024d40af5b00f885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 20 May 2011 01:04:46 +0200 Subject: b43: N-PHY: initialize last var in calibration function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Larry Finger Signed-off-by: Rafał Miłecki Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/b43/phy_n.c b/drivers/net/wireless/b43/phy_n.c index 9ed6515..05960dd 100644 --- a/drivers/net/wireless/b43/phy_n.c +++ b/drivers/net/wireless/b43/phy_n.c @@ -3093,7 +3093,7 @@ static int b43_nphy_cal_tx_iq_lo(struct b43_wldev *dev, int freq; bool avoid = false; u8 length; - u16 tmp, core, type, count, max, numb, last, cmd; + u16 tmp, core, type, count, max, numb, last = 0, cmd; const u16 *table; bool phy6or5x; -- cgit v0.10.2 From a4d86d953b8593791cb29cf2acffd48f9ee6c4f9 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 20 May 2011 17:52:10 +0530 Subject: ath9k: Reset chip on baseband hang Resetting hardware helps to recover from baseband hang/panic for AR9003 based chips. Cc: stable@kernel.org Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index bd18b1d..409ef58 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -670,7 +670,8 @@ void ath9k_tasklet(unsigned long data) u32 status = sc->intrstatus; u32 rxmask; - if (status & ATH9K_INT_FATAL) { + if ((status & ATH9K_INT_FATAL) || + (status & ATH9K_INT_BB_WATCHDOG)) { ath_reset(sc, true); return; } @@ -737,6 +738,7 @@ irqreturn_t ath_isr(int irq, void *dev) { #define SCHED_INTR ( \ ATH9K_INT_FATAL | \ + ATH9K_INT_BB_WATCHDOG | \ ATH9K_INT_RXORN | \ ATH9K_INT_RXEOL | \ ATH9K_INT_RX | \ -- cgit v0.10.2 From 51ac8cbb2176dc159ee910d7074c6796079c3068 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 20 May 2011 17:52:13 +0530 Subject: ath9k_hw: disable phy restart on baseband panic caused by RXSM While receiving unsupported rate frame rx state machine gets into a state 0xb and if phy_restart happens in that state, BB would go hang. If RXSM is in 0xb state after first bb panic, ensure to disable the phy_restart. Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c index eee23ec..892c48b 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c @@ -1381,3 +1381,25 @@ void ar9003_hw_bb_watchdog_dbg_info(struct ath_hw *ah) "==== BB update: done ====\n\n"); } EXPORT_SYMBOL(ar9003_hw_bb_watchdog_dbg_info); + +void ar9003_hw_disable_phy_restart(struct ath_hw *ah) +{ + u32 val; + + /* While receiving unsupported rate frame rx state machine + * gets into a state 0xb and if phy_restart happens in that + * state, BB would go hang. If RXSM is in 0xb state after + * first bb panic, ensure to disable the phy_restart. + */ + if (!((MS(ah->bb_watchdog_last_status, + AR_PHY_WATCHDOG_RX_OFDM_SM) == 0xb) || + ah->bb_hang_rx_ofdm)) + return; + + ah->bb_hang_rx_ofdm = true; + val = REG_READ(ah, AR_PHY_RESTART); + val &= ~AR_PHY_RESTART_ENA; + + REG_WRITE(ah, AR_PHY_RESTART, val); +} +EXPORT_SYMBOL(ar9003_hw_disable_phy_restart); diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 72543ce..1be7c8b 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -1555,9 +1555,12 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, if (ah->btcoex_hw.enabled) ath9k_hw_btcoex_enable(ah); - if (AR_SREV_9300_20_OR_LATER(ah)) + if (AR_SREV_9300_20_OR_LATER(ah)) { ar9003_hw_bb_watchdog_config(ah); + ar9003_hw_disable_phy_restart(ah); + } + ath9k_hw_apply_gpio_override(ah); return 0; diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 57435ce..4b157c5 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -842,6 +842,7 @@ struct ath_hw { u32 bb_watchdog_last_status; u32 bb_watchdog_timeout_ms; /* in ms, 0 to disable */ + u8 bb_hang_rx_ofdm; /* true if bb hang due to rx_ofdm */ unsigned int paprd_target_power; unsigned int paprd_training_power; @@ -990,6 +991,7 @@ void ar9002_hw_enable_wep_aggregation(struct ath_hw *ah); void ar9003_hw_bb_watchdog_config(struct ath_hw *ah); void ar9003_hw_bb_watchdog_read(struct ath_hw *ah); void ar9003_hw_bb_watchdog_dbg_info(struct ath_hw *ah); +void ar9003_hw_disable_phy_restart(struct ath_hw *ah); void ar9003_paprd_enable(struct ath_hw *ah, bool val); void ar9003_paprd_populate_single_table(struct ath_hw *ah, struct ath9k_hw_cal_data *caldata, -- cgit v0.10.2 From 41e2b05b9598d6bdf91fc20280bfc538d853f769 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 20 May 2011 17:52:14 +0530 Subject: ath9k: set 40 Mhz rate only if hw is configured in ht40 Whenever there is a channel width change from 40 Mhz to 20 Mhz, the hardware is reconfigured to ht20. Meantime before doing the rate control updation, the packets are being transmitted are selected rate with IEEE80211_TX_RC_40_MHZ_WIDTH. While transmitting ht40 rate packets in ht20 mode is causing baseband panic with AR9003 based chips. ==== BB update: BB status=0x02001109 ==== ath: ** BB state: wd=1 det=1 rdar=0 rOFDM=1 rCCK=1 tOFDM=0 tCCK=0 agc=2 src=0 ** ath: ** BB WD cntl: cntl1=0xffff0085 cntl2=0x00000004 ** ath: ** BB mode: BB_gen_controls=0x000033c0 ** ath: ** BB busy times: rx_clear=99%, rx_frame=0%, tx_frame=0% ** ath: ==== BB update: done ==== Cc: stable@kernel.org Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index 133502b..c0d0773 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -689,7 +689,8 @@ static void ath_rc_rate_set_series(const struct ath_rate_table *rate_table, if (WLAN_RC_PHY_HT(rate_table->info[rix].phy)) { rate->flags |= IEEE80211_TX_RC_MCS; - if (WLAN_RC_PHY_40(rate_table->info[rix].phy)) + if (WLAN_RC_PHY_40(rate_table->info[rix].phy) && + conf_is_ht40(&txrc->hw->conf)) rate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (WLAN_RC_PHY_SGI(rate_table->info[rix].phy)) rate->flags |= IEEE80211_TX_RC_SHORT_GI; -- cgit v0.10.2 From 1d38c16ce4156f63b45abbd09dd28ca2ef5172b4 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Fri, 20 May 2011 17:52:15 +0530 Subject: mac80211: stop queues before rate control updation Stop tx queues before updating rate control to ensure proper rate selection. Otherwise packets can be transmitted in 40 Mhz whereas hw is configured in HT20. Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4f6b267..7465955 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -232,6 +232,9 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type)); } + ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); + /* channel_type change automatically detected */ ieee80211_hw_config(local, 0); @@ -245,6 +248,9 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); } + ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); + ht_opmode = le16_to_cpu(hti->operation_mode); /* if bss configuration changed store the new one */ -- cgit v0.10.2 From 1007da0604b1d2f064bfecece0f131d57237b03f Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 26 May 2011 09:57:33 -0600 Subject: ASoC: Fix dapm_is_shared_kcontrol so everything isn't shared Commit af46800 ("ASoC: Implement mux control sharing") introduced function dapm_is_shared_kcontrol. When this function returns true, the naming of DAPM controls is derived from the kcontrol_new. Otherwise, the name comes from the widget (and possibly a widget's naming prefix). A bug in the implementation of dapm_is_shared_kcontrol made it return 1 in all cases. Hence, that commit caused a change in control naming for all controls instead of just shared controls. Specifically, a control is always considered shared because it is always compared against itself. Solve this by never comparing against the widget containing the control being created. Equally, controls should never be shared between DAPM contexts; when the same codec is instantiated multiple times, the same kcontrol_new will be used. However, the control should no be shared between the multiple instances. I tested that with the Tegra WM8903 driver: * Shared is now mostly 0 as expected, and sometimes 1. * The expected controls are still generated after this change. However, I don't have any systems that have a widget/control naming prefix, so I can't test that aspect. Thanks for Jarkko Nikula for pointing out how to fix this. Reported-by: Liam Girdwood Tested-by: Jarkko Nikula Signed-off-by: Stephen Warren Acked-by: Liam Girdwood Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 999bb08..776e6f4 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -325,6 +325,7 @@ static int dapm_connect_mixer(struct snd_soc_dapm_context *dapm, } static int dapm_is_shared_kcontrol(struct snd_soc_dapm_context *dapm, + struct snd_soc_dapm_widget *kcontrolw, const struct snd_kcontrol_new *kcontrol_new, struct snd_kcontrol **kcontrol) { @@ -334,6 +335,8 @@ static int dapm_is_shared_kcontrol(struct snd_soc_dapm_context *dapm, *kcontrol = NULL; list_for_each_entry(w, &dapm->card->widgets, list) { + if (w == kcontrolw || w->dapm != kcontrolw->dapm) + continue; for (i = 0; i < w->num_kcontrols; i++) { if (&w->kcontrol_news[i] == kcontrol_new) { if (w->kcontrols) @@ -468,7 +471,7 @@ static int dapm_new_mux(struct snd_soc_dapm_context *dapm, return -EINVAL; } - shared = dapm_is_shared_kcontrol(dapm, &w->kcontrol_news[0], + shared = dapm_is_shared_kcontrol(dapm, w, &w->kcontrol_news[0], &kcontrol); if (kcontrol) { wlist = kcontrol->private_data; -- cgit v0.10.2 From ea02c63d57d7ec099f66ddb2942b4022e865cd5f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 27 May 2011 21:56:16 +0800 Subject: ASoC: Fix wm_hubs input PGA ZC bits Signed-off-by: Mark Brown Acked-by: Liam Girdwood diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c index e55b298..9e370d1 100644 --- a/sound/soc/codecs/wm_hubs.c +++ b/sound/soc/codecs/wm_hubs.c @@ -215,23 +215,23 @@ static const struct snd_kcontrol_new analogue_snd_controls[] = { SOC_SINGLE_TLV("IN1L Volume", WM8993_LEFT_LINE_INPUT_1_2_VOLUME, 0, 31, 0, inpga_tlv), SOC_SINGLE("IN1L Switch", WM8993_LEFT_LINE_INPUT_1_2_VOLUME, 7, 1, 1), -SOC_SINGLE("IN1L ZC Switch", WM8993_LEFT_LINE_INPUT_1_2_VOLUME, 7, 1, 0), +SOC_SINGLE("IN1L ZC Switch", WM8993_LEFT_LINE_INPUT_1_2_VOLUME, 6, 1, 0), SOC_SINGLE_TLV("IN1R Volume", WM8993_RIGHT_LINE_INPUT_1_2_VOLUME, 0, 31, 0, inpga_tlv), SOC_SINGLE("IN1R Switch", WM8993_RIGHT_LINE_INPUT_1_2_VOLUME, 7, 1, 1), -SOC_SINGLE("IN1R ZC Switch", WM8993_RIGHT_LINE_INPUT_1_2_VOLUME, 7, 1, 0), +SOC_SINGLE("IN1R ZC Switch", WM8993_RIGHT_LINE_INPUT_1_2_VOLUME, 6, 1, 0), SOC_SINGLE_TLV("IN2L Volume", WM8993_LEFT_LINE_INPUT_3_4_VOLUME, 0, 31, 0, inpga_tlv), SOC_SINGLE("IN2L Switch", WM8993_LEFT_LINE_INPUT_3_4_VOLUME, 7, 1, 1), -SOC_SINGLE("IN2L ZC Switch", WM8993_LEFT_LINE_INPUT_3_4_VOLUME, 7, 1, 0), +SOC_SINGLE("IN2L ZC Switch", WM8993_LEFT_LINE_INPUT_3_4_VOLUME, 6, 1, 0), SOC_SINGLE_TLV("IN2R Volume", WM8993_RIGHT_LINE_INPUT_3_4_VOLUME, 0, 31, 0, inpga_tlv), SOC_SINGLE("IN2R Switch", WM8993_RIGHT_LINE_INPUT_3_4_VOLUME, 7, 1, 1), -SOC_SINGLE("IN2R ZC Switch", WM8993_RIGHT_LINE_INPUT_3_4_VOLUME, 7, 1, 0), +SOC_SINGLE("IN2R ZC Switch", WM8993_RIGHT_LINE_INPUT_3_4_VOLUME, 6, 1, 0), SOC_SINGLE_TLV("MIXINL IN2L Volume", WM8993_INPUT_MIXER3, 7, 1, 0, inmix_sw_tlv), -- cgit v0.10.2 From aac11c1b351413aa3412e258e2b2dcba31777209 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 24 May 2011 16:28:55 +0200 Subject: iwl4965: fix 5GHz operation rx_status.band is used uninitialized, what disallow to work on 5GHz . Cc: stable@kernel.org # 2.6.39+ Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/iwlegacy/iwl-4965-lib.c b/drivers/net/wireless/iwlegacy/iwl-4965-lib.c index 7e5e85a..a7a4739 100644 --- a/drivers/net/wireless/iwlegacy/iwl-4965-lib.c +++ b/drivers/net/wireless/iwlegacy/iwl-4965-lib.c @@ -628,11 +628,11 @@ void iwl4965_rx_reply_rx(struct iwl_priv *priv, /* rx_status carries information about the packet to mac80211 */ rx_status.mactime = le64_to_cpu(phy_res->timestamp); + rx_status.band = (phy_res->phy_flags & RX_RES_PHY_FLAGS_BAND_24_MSK) ? + IEEE80211_BAND_2GHZ : IEEE80211_BAND_5GHZ; rx_status.freq = ieee80211_channel_to_frequency(le16_to_cpu(phy_res->channel), rx_status.band); - rx_status.band = (phy_res->phy_flags & RX_RES_PHY_FLAGS_BAND_24_MSK) ? - IEEE80211_BAND_2GHZ : IEEE80211_BAND_5GHZ; rx_status.rate_idx = iwl4965_hwrate_to_mac80211_idx(rate_n_flags, rx_status.band); rx_status.flag = 0; -- cgit v0.10.2 From 64bd0821a3b66c3307d7a4ee5523e3e35ec2df0e Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Wed, 25 May 2011 09:44:05 +0800 Subject: wireless: Default to 'n' for 2 new added devices in Kconfig. We make oldconfig every time when a new kernel arrives, but if we don't have such a device(I guess this is the most common case for a new device), the default value should be 'n' so that the kernel size we build doesn't grow up too much quickly. For anyone who has the device, it is OK for them to turn it on by themselves. Cc: "John W. Linville" Cc: Johannes Berg Signed-off-by: Tao Ma Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig index d9ff841..d9c08c6 100644 --- a/drivers/net/wireless/ath/ath9k/Kconfig +++ b/drivers/net/wireless/ath/ath9k/Kconfig @@ -26,7 +26,6 @@ config ATH9K config ATH9K_PCI bool "Atheros ath9k PCI/PCIe bus support" depends on ATH9K && PCI - default PCI ---help--- This option enables the PCI bus support in ath9k. diff --git a/drivers/net/wireless/rt2x00/Kconfig b/drivers/net/wireless/rt2x00/Kconfig index 9def1e5..b2f8b8f 100644 --- a/drivers/net/wireless/rt2x00/Kconfig +++ b/drivers/net/wireless/rt2x00/Kconfig @@ -166,7 +166,6 @@ config RT2800USB_RT35XX config RT2800USB_RT53XX bool "rt2800usb - Include support for rt53xx devices (EXPERIMENTAL)" depends on EXPERIMENTAL - default y ---help--- This adds support for rt53xx wireless chipset family to the rt2800pci driver. -- cgit v0.10.2 From a331400bf01231253a0d9ab211c83212d2ac4edb Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Thu, 26 May 2011 11:46:37 +0300 Subject: mac80211: clear local->ps_data on disassoc local->ps_data wasn't cleared on disassociation, which (in some corner cases) caused reconnections to enter psm before association completed. Signed-off-by: Eliad Peller Signed-off-by: John W. Linville diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7465955..456cccf 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1095,6 +1095,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, local->hw.conf.flags &= ~IEEE80211_CONF_PS; config_changed |= IEEE80211_CONF_CHANGE_PS; } + local->ps_sdata = NULL; ieee80211_hw_config(local, config_changed); -- cgit v0.10.2 From 64c754ed3b0009e4fa248f739000dc234eb0d2c9 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 26 May 2011 10:53:17 +0200 Subject: mac80211: Remove duplicate linux/slab.h include from net/mac80211/scan.c Commit 79f460ca49d8d5700756ab7071c951311c7f29cc add a duplicate linux/slab.h include to net/mac80211/scan.c - remove it. Signed-off-by: Jesper Juhl Acked-by: Luciano Coelho Signed-off-by: John W. Linville diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 27af672..58ffa7d 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 1df85ecec36ad5da3f0165760704310d6c03f65f Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Fri, 27 May 2011 01:08:04 +0800 Subject: ath9k: Fix AR9287 calibration The AR9287 calibration code was not being called because of an incorrect MAC revision check. This forced the AR9287 to use the AR9285 initial calibration code and bypass the AR9287 code entirely. Signed-off-by: Adrian Chadd Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c index 015d974..2d4c091 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c @@ -829,7 +829,7 @@ static bool ar9002_hw_init_cal(struct ath_hw *ah, struct ath9k_channel *chan) if (AR_SREV_9271(ah)) { if (!ar9285_hw_cl_cal(ah, chan)) return false; - } else if (AR_SREV_9285_12_OR_LATER(ah)) { + } else if (AR_SREV_9285(ah) && AR_SREV_9285_12_OR_LATER(ah)) { if (!ar9285_hw_clc(ah, chan)) return false; } else { -- cgit v0.10.2 From 89e1be50c68eb5e58b873dce87bbac627ee18d1f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 27 May 2011 23:11:24 -0400 Subject: x86: Put back -pg to tsc.o and add no GCOV to vread_tsc_64.o The commit 44259b1abfaa8bb819d25d41d71e8e33e25dd36a Author: Andy Lutomirski x86-64: Move vread_tsc into a new file with sensible options Removed the -pg from tsc.o which caused the function graph tracer to go into an infinite function call recursion as it uses the tsc internally outside its recursion protection, thus tracing the tsc breaks the function graph tracer. This commit also added the file vread_tsc_64.c that gets used by vdso but failed to prevent GCOV from monkeying with it, causing userspace to try to access kernel data when GCOV was enabled. Thanks to Thomas Gleixner for pointing out GCOV as the likely culprit that added strange kernel accesses into the vread_tsc() call. Cc: Author: Andy Lutomirski Cc: Thomas Gleixner Signed-off-by: Steven Rostedt diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f5abe3a..90b06d4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -8,6 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities +CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_pvclock.o = -pg @@ -28,6 +29,7 @@ CFLAGS_paravirt.o := $(nostackp) GCOV_PROFILE_vsyscall_64.o := n GCOV_PROFILE_hpet.o := n GCOV_PROFILE_tsc.o := n +GCOV_PROFILE_vread_tsc_64.o := n GCOV_PROFILE_paravirt.o := n # vread_tsc_64 is hot and should be fully optimized: -- cgit v0.10.2 From 6df87e65dbe4528ef07b917af89913abb8caaaba Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 28 May 2011 14:44:46 +0200 Subject: block: improve the bio_add_page() and bio_add_pc_page() descriptions The descriptions of bio_add_page() and bio_add_pc_page() are slightly inconsistent; improve them. Signed-off-by: Andreas Gruenbacher Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe diff --git a/fs/bio.c b/fs/bio.c index 840a0d7..9bfade8 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -638,10 +638,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page * @offset: vec entry offset * * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block - * device limitations. The target block device must allow bio's - * smaller than PAGE_SIZE, so it is always possible to add a single - * page to an empty bio. This should only be used by REQ_PC bios. + * number of reasons, such as the bio being full or target block device + * limitations. The target block device must allow bio's up to PAGE_SIZE, + * so it is always possible to add a single page to an empty bio. + * + * This should only be used by REQ_PC bios. */ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset) @@ -659,10 +660,9 @@ EXPORT_SYMBOL(bio_add_pc_page); * @offset: vec entry offset * * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block - * device limitations. The target block device must allow bio's - * smaller than PAGE_SIZE, so it is always possible to add a single - * page to an empty bio. + * number of reasons, such as the bio being full or target block device + * limitations. The target block device must allow bio's up to PAGE_SIZE, + * so it is always possible to add a single page to an empty bio. */ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset) -- cgit v0.10.2 From 35fbf5bcf497d6ddbe7b6478141e7526d1474ff5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 28 May 2011 14:44:46 +0200 Subject: nbd: pass MSG_* flags to kernel_recvmsg() Unlike kernel_sendmsg(), kernel_recvmsg() requires passing flags explicitly via last parameter instead of struct msghdr.msg_flags. Therefore calls to sock_xmit(lo, 0, ..., MSG_WAITALL) have not been processed properly by tcp layer wrt. the flag. Fix it. Signed-off-by: Namhyung Kim Cc: Paul Clements Signed-off-by: Jens Axboe diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index e6fc716..1df3bfe 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -192,7 +192,8 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size, if (lo->xmit_timeout) del_timer_sync(&ti); } else - result = kernel_recvmsg(sock, &msg, &iov, 1, size, 0); + result = kernel_recvmsg(sock, &msg, &iov, 1, size, + msg.msg_flags); if (signal_pending(current)) { siginfo_t info; -- cgit v0.10.2 From 3b2710824e00d238554c13b5add347e6c701ab1a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 28 May 2011 14:44:46 +0200 Subject: nbd: limit module parameters to a sane value The 'max_part' parameter controls the number of maximum partition a nbd device can have. However if a user specifies very large value it would exceed the limitation of device minor number and can cause a kernel oops (or, at least, produce invalid device nodes in some cases). In addition, specifying large 'nbds_max' value causes same problem for the same reason. On my desktop, following command results to the kernel bug: $ sudo modprobe nbd max_part=100000 kernel BUG at /media/Linux_Data/project/linux/fs/sysfs/group.c:65! invalid opcode: 0000 [#1] SMP last sysfs file: /sys/devices/virtual/block/nbd4/range CPU 1 Modules linked in: nbd(+) bridge stp llc kvm_intel kvm asus_atk0110 sg sr_mod cdrom Pid: 2522, comm: modprobe Tainted: G W 2.6.39-leonard+ #159 System manufacturer System Product Name/P5G41TD-M PRO RIP: 0010:[] [] internal_create_group+0x2f/0x166 RSP: 0018:ffff8801009f1de8 EFLAGS: 00010246 RAX: 00000000ffffffef RBX: ffff880103920478 RCX: 00000000000a7bd3 RDX: ffffffff81a2dbe0 RSI: 0000000000000000 RDI: ffff880103920478 RBP: ffff8801009f1e38 R08: ffff880103920468 R09: ffff880103920478 R10: ffff8801009f1de8 R11: ffff88011eccbb68 R12: ffffffff81a2dbe0 R13: ffff880103920468 R14: 0000000000000000 R15: ffff880103920400 FS: 00007f3c49de9700(0000) GS:ffff88011f800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f3b7fe7c000 CR3: 00000000cd58d000 CR4: 00000000000406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 2522, threadinfo ffff8801009f0000, task ffff8801009a93a0) Stack: ffff8801009f1e58 ffffffff812e8f6e ffff8801009f1e58 ffffffff812e7a80 ffff880000000010 ffff880103920400 ffff8801002fd0c0 ffff880103920468 0000000000000011 ffff880103920400 ffff8801009f1e48 ffffffff8115ab6a Call Trace: [] ? device_add+0x4f1/0x5e4 [] ? dev_set_name+0x41/0x43 [] sysfs_create_group+0x13/0x15 [] blk_trace_init_sysfs+0x14/0x16 [] blk_register_queue+0x4c/0xfd [] add_disk+0xe4/0x29c [] nbd_init+0x2ab/0x30d [nbd] [] ? 0xffffffffa007dfff [] do_one_initcall+0x7f/0x13e [] sys_init_module+0xa1/0x1e3 [] system_call_fastpath+0x16/0x1b Code: 41 57 41 56 41 55 41 54 53 48 83 ec 28 0f 1f 44 00 00 48 89 fb 41 89 f6 49 89 d4 48 85 ff 74 0b 85 f6 75 0b 48 83 7f 30 00 75 14 <0f> 0b eb fe b9 ea ff ff ff 48 83 7f 30 00 0f 84 09 01 00 00 49 RIP [] internal_create_group+0x2f/0x166 RSP ---[ end trace 753285ffbf72c57c ]--- Signed-off-by: Namhyung Kim Cc: Laurent Vivier Cc: Paul Clements Cc: stable@kernel.org Signed-off-by: Jens Axboe diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1df3bfe..fdee756 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -757,6 +757,12 @@ static int __init nbd_init(void) if (max_part > 0) part_shift = fls(max_part); + if ((1UL << part_shift) > DISK_MAX_PARTS) + return -EINVAL; + + if (nbds_max > 1UL << (MINORBITS - part_shift)) + return -EINVAL; + for (i = 0; i < nbds_max; i++) { struct gendisk *disk = alloc_disk(1 << part_shift); if (!disk) -- cgit v0.10.2 From 5988ce239682854d4e632fb58bff000700830394 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 28 May 2011 14:44:46 +0200 Subject: nbd: adjust 'max_part' according to part_shift The 'max_part' parameter determines how many partitions are supported on each nbd device. However the actual number can be changed to the power of 2 minus 1 form during the module initialization as alloc_disk() is called with (1 << part_shift) for some reason. So adjust 'max_part' also at least for consistency with loop and brd. It is exported via sysfs already, and a user should check this value after module loading if [s]he wants to use that number correctly (i.e. fdisk or something). Signed-off-by: Namhyung Kim Cc: Laurent Vivier Cc: Paul Clements Signed-off-by: Jens Axboe diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index fdee756..f533f33 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -754,9 +754,20 @@ static int __init nbd_init(void) return -ENOMEM; part_shift = 0; - if (max_part > 0) + if (max_part > 0) { part_shift = fls(max_part); + /* + * Adjust max_part according to part_shift as it is exported + * to user space so that user can know the max number of + * partition kernel should be able to manage. + * + * Note that -1 is required because partition 0 is reserved + * for the whole disk. + */ + max_part = (1UL << part_shift) - 1; + } + if ((1UL << part_shift) > DISK_MAX_PARTS) return -EINVAL; -- cgit v0.10.2 From 15517f7c213442e4d8a098cf0732b237f764c576 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:08 -0600 Subject: lguest: fix timer interrupt setup Without an IRQ chip set, we now get a WARN_ON and no timer interrupt. This prevents booting. Fortunately, the fix is a one-liner: set up the timer IRQ like everything else. Signed-off-by: Rusty Russell Cc: stable@kernel.org # .39.x diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index e191c09..db832fd 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -993,6 +993,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) static void lguest_time_init(void) { /* Set up the timer interrupt (0) to go to our simple timer routine */ + lguest_setup_irq(0); irq_set_handler(0, lguest_time_irq); clocksource_register_hz(&lguest_clock, NSEC_PER_SEC); -- cgit v0.10.2 From bc805a03c26e1e25171bc627c6264553d27f746c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:11 -0600 Subject: lguest: fix up compilation after move ed16648eb5b86917f0b90bdcdbc857202da72f90 "Move kvm, uml, and lguest subdirectories" broke the lguest example launcher. Signed-off-by: Rusty Russell diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile index bebac6b..0ac3420 100644 --- a/Documentation/virtual/lguest/Makefile +++ b/Documentation/virtual/lguest/Makefile @@ -1,5 +1,5 @@ # This creates the demonstration utility "lguest" which runs a Linux guest. -# Missing headers? Add "-I../../include -I../../arch/x86/include" +# Missing headers? Add "-I../../../include -I../../../arch/x86/include" CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE all: lguest diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c index d9da7e1..711ba9e 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/Documentation/virtual/lguest/lguest.c @@ -49,7 +49,7 @@ #include #include #include -#include "../../include/linux/lguest_launcher.h" +#include "../../../include/linux/lguest_launcher.h" /*L:110 * We can ignore the 42 include files we need for this program, but I do want * to draw attention to the use of kernel-style types. -- cgit v0.10.2 From 990c91f0af46c57f0291060d928c7ab82f9d5667 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:12 -0600 Subject: lguest: remove support for VIRTIO_F_NOTIFY_ON_EMPTY. No virtio device does this any more, so no need to clutter lguest with it. Signed-off-by: Rusty Russell diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c index 711ba9e..cd9d6af 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/Documentation/virtual/lguest/lguest.c @@ -135,9 +135,6 @@ struct device { /* Is it operational */ bool running; - /* Does Guest want an intrrupt on empty? */ - bool irq_on_empty; - /* Device-specific data. */ void *priv; }; @@ -637,10 +634,7 @@ static void trigger_irq(struct virtqueue *vq) /* If they don't want an interrupt, don't send one... */ if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { - /* ... unless they've asked us to force one on empty. */ - if (!vq->dev->irq_on_empty - || lg_last_avail(vq) != vq->vring.avail->idx) - return; + return; } /* Send the Guest an interrupt tell them we used something up. */ @@ -1057,15 +1051,6 @@ static void create_thread(struct virtqueue *vq) close(vq->eventfd); } -static bool accepted_feature(struct device *dev, unsigned int bit) -{ - const u8 *features = get_feature_bits(dev) + dev->feature_len; - - if (dev->feature_len < bit / CHAR_BIT) - return false; - return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT)); -} - static void start_device(struct device *dev) { unsigned int i; @@ -1079,8 +1064,6 @@ static void start_device(struct device *dev) verbose(" %02x", get_feature_bits(dev) [dev->feature_len+i]); - dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); - for (vq = dev->vq; vq; vq = vq->next) { if (vq->service) create_thread(vq); @@ -1564,7 +1547,6 @@ static void setup_tun_net(char *arg) /* Set up the tun device. */ configure_device(ipfd, tapif, ip); - add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); /* Expect Guest to handle everything except UFO */ add_feature(dev, VIRTIO_NET_F_CSUM); add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); -- cgit v0.10.2 From 7a7c924cf03da2a76ea4dc0aac1a788cf95a9c29 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Feb 2011 21:43:48 +0100 Subject: virtio_blk: allow re-reading config space at runtime Wire up the virtio_driver config_changed method to get notified about config changes raised by the host. For now we just re-read the device size to support online resizing of devices, but once we add more attributes that might be changeable they could be added as well. Note that the config_changed method is called from irq context, so we'll have to use the workqueue infrastructure to provide us a proper user context for our changes. Signed-off-by: Christoph Hellwig Signed-off-by: Rusty Russell diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6ecf89c..33a48a8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -6,10 +6,12 @@ #include #include #include +#include #define PART_BITS 4 static int major, index; +struct workqueue_struct *virtblk_wq; struct virtio_blk { @@ -26,6 +28,9 @@ struct virtio_blk mempool_t *pool; + /* Process context for config space updates */ + struct work_struct config_work; + /* What host tells us, plus 2 for header & tailer. */ unsigned int sg_elems; @@ -291,6 +296,46 @@ static ssize_t virtblk_serial_show(struct device *dev, } DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); +static void virtblk_config_changed_work(struct work_struct *work) +{ + struct virtio_blk *vblk = + container_of(work, struct virtio_blk, config_work); + struct virtio_device *vdev = vblk->vdev; + struct request_queue *q = vblk->disk->queue; + char cap_str_2[10], cap_str_10[10]; + u64 capacity, size; + + /* Host must always specify the capacity. */ + vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), + &capacity, sizeof(capacity)); + + /* If capacity is too big, truncate with warning. */ + if ((sector_t)capacity != capacity) { + dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", + (unsigned long long)capacity); + capacity = (sector_t)-1; + } + + size = capacity * queue_logical_block_size(q); + string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); + string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); + + dev_notice(&vdev->dev, + "new size: %llu %d-byte logical blocks (%s/%s)\n", + (unsigned long long)capacity, + queue_logical_block_size(q), + cap_str_10, cap_str_2); + + set_capacity(vblk->disk, capacity); +} + +static void virtblk_config_changed(struct virtio_device *vdev) +{ + struct virtio_blk *vblk = vdev->priv; + + queue_work(virtblk_wq, &vblk->config_work); +} + static int __devinit virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -327,6 +372,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk->vdev = vdev; vblk->sg_elems = sg_elems; sg_init_table(vblk->sg, vblk->sg_elems); + INIT_WORK(&vblk->config_work, virtblk_config_changed_work); /* We expect one virtqueue, for output. */ vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); @@ -477,6 +523,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + flush_work(&vblk->config_work); + /* Nothing should be pending. */ BUG_ON(!list_empty(&vblk->reqs)); @@ -508,27 +556,47 @@ static unsigned int features[] = { * Use __refdata to avoid this warning. */ static struct virtio_driver __refdata virtio_blk = { - .feature_table = features, - .feature_table_size = ARRAY_SIZE(features), - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = virtblk_probe, - .remove = __devexit_p(virtblk_remove), + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtblk_probe, + .remove = __devexit_p(virtblk_remove), + .config_changed = virtblk_config_changed, }; static int __init init(void) { + int error; + + virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); + if (!virtblk_wq) + return -ENOMEM; + major = register_blkdev(0, "virtblk"); - if (major < 0) - return major; - return register_virtio_driver(&virtio_blk); + if (major < 0) { + error = major; + goto out_destroy_workqueue; + } + + error = register_virtio_driver(&virtio_blk); + if (error) + goto out_unregister_blkdev; + return 0; + +out_unregister_blkdev: + unregister_blkdev(major, "virtblk"); +out_destroy_workqueue: + destroy_workqueue(virtblk_wq); + return error; } static void __exit fini(void) { unregister_blkdev(major, "virtblk"); unregister_virtio_driver(&virtio_blk); + destroy_workqueue(virtblk_wq); } module_init(init); module_exit(fini); -- cgit v0.10.2 From 6917f83ffe5e6b6414ccc845263b792ed201c0f1 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Sun, 24 Apr 2011 02:49:26 +0800 Subject: drivers, block: virtio_blk: Replace cryptic number with the macro It is easier to figure out the context by reading SCSI_SENSE_BUFFERSIZE instead of plain '96'. Signed-off-by: Liu Yuan Signed-off-by: Rusty Russell diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 33a48a8..079c088 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -7,6 +7,7 @@ #include #include #include +#include #define PART_BITS 4 @@ -146,7 +147,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, num = blk_rq_map_sg(q, vbr->req, vblk->sg + out); if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) { - sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96); + sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE); sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr, sizeof(vbr->in_hdr)); } -- cgit v0.10.2 From 177dbd95637a52b9118aca757d5856ec3995d3e7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:13 -0600 Subject: virtio console: don't manually set or finalize VIRTIO_CONSOLE_F_MULTIPORT. That's already been done by the virtio infrastructure before the probe function is called. Reported-by: alexey.kardashevskiy@au1.ibm.com Acked-by: Amit Shah Tested-by: Amit Shah Signed-off-by: Rusty Russell diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 838568a..fb68b12 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1677,17 +1677,12 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) portdev->config.max_nr_ports = 1; if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) { multiport = true; - vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT; - vdev->config->get(vdev, offsetof(struct virtio_console_config, max_nr_ports), &portdev->config.max_nr_ports, sizeof(portdev->config.max_nr_ports)); } - /* Let the Host know we support multiple ports.*/ - vdev->config->finalize_features(vdev); - err = init_vqs(portdev); if (err < 0) { dev_err(&vdev->dev, "Error %d initializing vqs\n", err); -- cgit v0.10.2 From bf50e69f63d21091e525185c3ae761412be0ba72 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Apr 2011 10:43:25 -0700 Subject: virtio balloon: kill tell-host-first logic The virtio balloon driver has a VIRTIO_BALLOON_F_MUST_TELL_HOST feature bit. Whenever the bit is set, the guest kernel must always tell the host before we free pages back to the allocator. Without this feature, we might free a page (and have another user touch it) while the hypervisor is unprepared for it. But, if the bit is _not_ set, we are under no obligation to reverse the order; we're under no obligation to do _anything_. As of now, qemu-kvm defines the bit, but doesn't set it. This patch makes the "tell host first" logic the only case. This should make everybody happy, and reduce the amount of untested or untestable code in the kernel. This _also_ means that we don't have to preserve a pfn list after the pages are freed, which should let us get rid of some temporary storage (vb->pfns) eventually. Signed-off-by: Dave Hansen Signed-off-by: Rusty Russell diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0f1da45..e058ace 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -40,9 +40,6 @@ struct virtio_balloon /* Waiting for host to ack the pages we released. */ struct completion acked; - /* Do we have to tell Host *before* we reuse pages? */ - bool tell_host_first; - /* The pages we've told the Host we're not using. */ unsigned int num_pages; struct list_head pages; @@ -151,13 +148,14 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) vb->num_pages--; } - if (vb->tell_host_first) { - tell_host(vb, vb->deflate_vq); - release_pages_by_pfn(vb->pfns, vb->num_pfns); - } else { - release_pages_by_pfn(vb->pfns, vb->num_pfns); - tell_host(vb, vb->deflate_vq); - } + + /* + * Note that if + * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); + * is true, we *have* to do it in this order + */ + tell_host(vb, vb->deflate_vq); + release_pages_by_pfn(vb->pfns, vb->num_pfns); } static inline void update_stat(struct virtio_balloon *vb, int idx, @@ -325,9 +323,6 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_del_vqs; } - vb->tell_host_first - = virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); - return 0; out_del_vqs: -- cgit v0.10.2 From a1b383870a28cfbd1657d4922c0fafc634a62ebd Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 May 2011 11:14:13 -0600 Subject: virtio: add full three-clause BSD text to headers. It's unclear to me if it's important, but it's obviously causing my technical colleages some headaches and I'd hate such imprecision to slow virtio adoption. I've emailed this to all non-trivial contributors for approval, too. Signed-off-by: Rusty Russell Acked-by: Grant Likely Acked-by: Ryan Harper Acked-by: Anthony Liguori Acked-by: Eric Van Hensbergen Acked-by: john cooper Acked-by: Aneesh Kumar K.V Acked-by: Christian Borntraeger Acked-by: Fernando Luis Vazquez Cao diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index e68b439..277c4ad 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_9P_H #define _LINUX_VIRTIO_9P_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index a50ecd1..652dc8b 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_BALLOON_H #define _LINUX_VIRTIO_BALLOON_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 167720d..e0edb40 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_BLK_H #define _LINUX_VIRTIO_BLK_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 800617b..39c88c5 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_CONFIG_H #define _LINUX_VIRTIO_CONFIG_H /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. */ + * anyone can use the definitions to implement compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ /* Virtio devices use a standardized configuration space to define their * features and pass configuration information, but each implementation can diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index e4d3335..bdf4b00 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -5,7 +5,31 @@ #include /* * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * Copyright (C) Red Hat, Inc., 2009, 2010, 2011 * Copyright (C) Amit Shah , 2009, 2010, 2011 diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h index 06660c0..85bb0bb 100644 --- a/include/linux/virtio_ids.h +++ b/include/linux/virtio_ids.h @@ -5,7 +5,29 @@ * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. - */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #define VIRTIO_ID_NET 1 /* virtio net */ #define VIRTIO_ID_BLOCK 2 /* virtio block */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 085e422..136040b 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index 9a3d7c4..ea66f3f 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -11,6 +11,29 @@ * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #ifndef _LINUX_VIRTIO_PCI_H diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e4d144b..a813e5d 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -7,6 +7,29 @@ * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * * Copyright Rusty Russell IBM Corporation 2007. */ #include -- cgit v0.10.2 From 770b31a85e000b0194974922f238a30ade4246b6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:10:17 +0300 Subject: virtio: event index interface Define a new feature bit for the guest and host to utilize an event index (like Xen) instead if a flag bit to enable/disable interrupts and kicks. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index a813e5d..c4eef73 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -52,6 +52,12 @@ /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC 28 +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { /* Address (guest-physical). */ @@ -106,6 +112,7 @@ struct vring { * __u16 avail_flags; * __u16 avail_idx; * __u16 available[num]; + * __u16 used_event_idx; * * // Padding to the next align boundary. * char pad[]; @@ -114,8 +121,14 @@ struct vring { * __u16 used_flags; * __u16 used_idx; * struct vring_used_elem used[num]; + * __u16 avail_event_idx; * }; */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num]) + static inline void vring_init(struct vring *vr, unsigned int num, void *p, unsigned long align) { @@ -130,7 +143,7 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) { return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) + align - 1) & ~(align - 1)) - + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; + + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; } #ifdef __KERNEL__ -- cgit v0.10.2 From bf7035bf20563a6cadcb9e870406e7b21daf5e30 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:10:27 +0300 Subject: virtio ring: inline function to check for events With the new used_event and avail_event and features, both host and guest need similar logic to check whether events are enabled, so it helps to put the common code in the header. Note that Xen has similar logic for notification hold-off in include/xen/interface/io/ring.h with req_event and req_prod corresponding to event_idx + 1 and new_idx respectively. +1 comes from the fact that req_event and req_prod in Xen start at 1, while event index in virtio starts at 0. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index c4eef73..4a32cb6 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -146,6 +146,20 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; } +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other size, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); +} + #ifdef __KERNEL__ #include struct virtio_device; -- cgit v0.10.2 From a5c262c5fd83ece01bd649fb08416c501d4c59d7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:10:44 +0300 Subject: virtio_ring: support event idx feature Support for the new event idx feature: 1. When enabling interrupts, publish the current avail index value to the host to get interrupts on the next update. 2. Use the new avail_event feature to reduce the number of exits from the guest. Simple test with the simulator: [virtio]# time ./virtio_test spurious wakeus: 0x7 real 0m0.169s user 0m0.140s sys 0m0.019s [virtio]# time ./virtio_test --no-event-idx spurious wakeus: 0x11 real 0m0.649s user 0m0.295s sys 0m0.335s Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b0043fb..a5fadc4 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -82,6 +82,9 @@ struct vring_virtqueue /* Host supports indirect buffers */ bool indirect; + /* Host publishes avail event idx */ + bool event; + /* Number of free buffers */ unsigned int num_free; /* Head of free buffer list. */ @@ -237,18 +240,22 @@ EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); void virtqueue_kick(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + u16 new, old; START_USE(vq); /* Descriptors and available array need to be set before we expose the * new available array entries. */ virtio_wmb(); - vq->vring.avail->idx += vq->num_added; + old = vq->vring.avail->idx; + new = vq->vring.avail->idx = old + vq->num_added; vq->num_added = 0; /* Need to update avail index before checking if we should notify */ virtio_mb(); - if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) + if (vq->event ? + vring_need_event(vring_avail_event(&vq->vring), new, old) : + !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) /* Prod other side to tell it about changes. */ vq->notify(&vq->vq); @@ -324,6 +331,14 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) ret = vq->data[i]; detach_buf(vq, i); vq->last_used_idx++; + /* If we expect an interrupt for the next entry, tell host + * by writing event index and flush out the write before + * the read in the next get_buf call. */ + if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { + vring_used_event(&vq->vring) = vq->last_used_idx; + virtio_mb(); + } + END_USE(vq); return ret; } @@ -345,7 +360,11 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) /* We optimistically turn back on interrupts, then check if there was * more to do. */ + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + vring_used_event(&vq->vring) = vq->last_used_idx; virtio_mb(); if (unlikely(more_used(vq))) { END_USE(vq); @@ -438,6 +457,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, #endif vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); + vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); /* No callback? Tell other side not to bother us. */ if (!callback) @@ -472,6 +492,8 @@ void vring_transport_features(struct virtio_device *vdev) switch (i) { case VIRTIO_RING_F_INDIRECT_DESC: break; + case VIRTIO_RING_F_EVENT_IDX: + break; default: /* We don't understand this bit. */ clear_bit(i, vdev->features); -- cgit v0.10.2 From 8ea8cf89e19aeb596b818ee5f2bec8a8b0586b60 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:10:54 +0300 Subject: vhost: support event index Support the new event index feature. When acked, utilize it to reduce the # of interrupts sent to the guest. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2f7c76a..e224a92 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -144,7 +144,7 @@ static void handle_tx(struct vhost_net *net) } mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); if (wmem < sock->sk->sk_sndbuf / 2) tx_poll_stop(net); @@ -166,8 +166,8 @@ static void handle_tx(struct vhost_net *net) set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); break; } - if (unlikely(vhost_enable_notify(vq))) { - vhost_disable_notify(vq); + if (unlikely(vhost_enable_notify(&net->dev, vq))) { + vhost_disable_notify(&net->dev, vq); continue; } break; @@ -315,7 +315,7 @@ static void handle_rx(struct vhost_net *net) return; mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); vhost_hlen = vq->vhost_hlen; sock_hlen = vq->sock_hlen; @@ -334,10 +334,10 @@ static void handle_rx(struct vhost_net *net) break; /* OK, now we need to know about added descriptors. */ if (!headcount) { - if (unlikely(vhost_enable_notify(vq))) { + if (unlikely(vhost_enable_notify(&net->dev, vq))) { /* They have slipped one in as we were * doing that: check again. */ - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); continue; } /* Nothing new? Wait for eventfd to tell us diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 099f302..734e1d7 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -49,7 +49,7 @@ static void handle_vq(struct vhost_test *n) return; mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&n->dev, vq); for (;;) { head = vhost_get_vq_desc(&n->dev, vq, vq->iov, @@ -61,8 +61,8 @@ static void handle_vq(struct vhost_test *n) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { - if (unlikely(vhost_enable_notify(vq))) { - vhost_disable_notify(vq); + if (unlikely(vhost_enable_notify(&n->dev, vq))) { + vhost_disable_notify(&n->dev, vq); continue; } break; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 7aa4eea..ea966b3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -37,6 +37,9 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; +#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) +#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -161,6 +164,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->last_avail_idx = 0; vq->avail_idx = 0; vq->last_used_idx = 0; + vq->signalled_used = 0; + vq->signalled_used_valid = false; vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; @@ -489,16 +494,17 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, return 1; } -static int vq_access_ok(unsigned int num, +static int vq_access_ok(struct vhost_dev *d, unsigned int num, struct vring_desc __user *desc, struct vring_avail __user *avail, struct vring_used __user *used) { + size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return access_ok(VERIFY_READ, desc, num * sizeof *desc) && access_ok(VERIFY_READ, avail, - sizeof *avail + num * sizeof *avail->ring) && + sizeof *avail + num * sizeof *avail->ring + s) && access_ok(VERIFY_WRITE, used, - sizeof *used + num * sizeof *used->ring); + sizeof *used + num * sizeof *used->ring + s); } /* Can we log writes? */ @@ -514,9 +520,11 @@ int vhost_log_access_ok(struct vhost_dev *dev) /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ -static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) +static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, + void __user *log_base) { struct vhost_memory *mp; + size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; mp = rcu_dereference_protected(vq->dev->memory, lockdep_is_held(&vq->mutex)); @@ -524,15 +532,15 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + - vq->num * sizeof *vq->used->ring)); + vq->num * sizeof *vq->used->ring + s)); } /* Can we start vq? */ /* Caller should have vq mutex and device mutex */ int vhost_vq_access_ok(struct vhost_virtqueue *vq) { - return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) && - vq_log_access_ok(vq, vq->log_base); + return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) && + vq_log_access_ok(vq->dev, vq, vq->log_base); } static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) @@ -577,6 +585,7 @@ static int init_used(struct vhost_virtqueue *vq, if (r) return r; + vq->signalled_used_valid = false; return get_user(vq->last_used_idx, &used->idx); } @@ -674,7 +683,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) * If it is not, we don't as size might not have been setup. * We will verify when backend is configured. */ if (vq->private_data) { - if (!vq_access_ok(vq->num, + if (!vq_access_ok(d, vq->num, (void __user *)(unsigned long)a.desc_user_addr, (void __user *)(unsigned long)a.avail_user_addr, (void __user *)(unsigned long)a.used_user_addr)) { @@ -818,7 +827,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg) vq = d->vqs + i; mutex_lock(&vq->mutex); /* If ring is inactive, will check when it's enabled. */ - if (vq->private_data && !vq_log_access_ok(vq, base)) + if (vq->private_data && !vq_log_access_ok(d, vq, base)) r = -EFAULT; else vq->log_base = base; @@ -1219,6 +1228,10 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* On success, increment avail index. */ vq->last_avail_idx++; + + /* Assume notifications from guest are disabled at this point, + * if they aren't we would need to update avail_event index. */ + BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); return head; } @@ -1267,6 +1280,12 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) eventfd_signal(vq->log_ctx, 1); } vq->last_used_idx++; + /* If the driver never bothers to signal in a very long while, + * used index might wrap around. If that happens, invalidate + * signalled_used index we stored. TODO: make sure driver + * signals at least once in 2^16 and remove this. */ + if (unlikely(vq->last_used_idx == vq->signalled_used)) + vq->signalled_used_valid = false; return 0; } @@ -1275,6 +1294,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, unsigned count) { struct vring_used_elem __user *used; + u16 old, new; int start; start = vq->last_used_idx % vq->num; @@ -1292,7 +1312,14 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, ((void __user *)used - (void __user *)vq->used), count * sizeof *used); } - vq->last_used_idx += count; + old = vq->last_used_idx; + new = (vq->last_used_idx += count); + /* If the driver never bothers to signal in a very long while, + * used index might wrap around. If that happens, invalidate + * signalled_used index we stored. TODO: make sure driver + * signals at least once in 2^16 and remove this. */ + if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) + vq->signalled_used_valid = false; return 0; } @@ -1331,29 +1358,47 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, return r; } -/* This actually signals the guest, using eventfd. */ -void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { - __u16 flags; - + __u16 old, new, event; + bool v; /* Flush out used index updates. This is paired * with the barrier that the Guest executes when enabling * interrupts. */ smp_mb(); - if (__get_user(flags, &vq->avail->flags)) { - vq_err(vq, "Failed to get flags"); - return; + if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && + unlikely(vq->avail_idx == vq->last_avail_idx)) + return true; + + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + __u16 flags; + if (__get_user(flags, &vq->avail->flags)) { + vq_err(vq, "Failed to get flags"); + return true; + } + return !(flags & VRING_AVAIL_F_NO_INTERRUPT); } + old = vq->signalled_used; + v = vq->signalled_used_valid; + new = vq->signalled_used = vq->last_used_idx; + vq->signalled_used_valid = true; - /* If they don't want an interrupt, don't signal, unless empty. */ - if ((flags & VRING_AVAIL_F_NO_INTERRUPT) && - (vq->avail_idx != vq->last_avail_idx || - !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY))) - return; + if (unlikely(!v)) + return true; + if (get_user(event, vhost_used_event(vq))) { + vq_err(vq, "Failed to get used event idx"); + return true; + } + return vring_need_event(event, new, old); +} + +/* This actually signals the guest, using eventfd. */ +void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ /* Signal the Guest tell them we used something up. */ - if (vq->call_ctx) + if (vq->call_ctx && vhost_notify(dev, vq)) eventfd_signal(vq->call_ctx, 1); } @@ -1376,7 +1421,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, } /* OK, now we need to know about added descriptors. */ -bool vhost_enable_notify(struct vhost_virtqueue *vq) +bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { u16 avail_idx; int r; @@ -1384,11 +1429,34 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) return false; vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; - r = put_user(vq->used_flags, &vq->used->flags); - if (r) { - vq_err(vq, "Failed to enable notification at %p: %d\n", - &vq->used->flags, r); - return false; + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + r = put_user(vq->used_flags, &vq->used->flags); + if (r) { + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + return false; + } + } else { + r = put_user(vq->avail_idx, vhost_avail_event(vq)); + if (r) { + vq_err(vq, "Failed to update avail event index at %p: %d\n", + vhost_avail_event(vq), r); + return false; + } + } + if (unlikely(vq->log_used)) { + void __user *used; + /* Make sure data is seen before log. */ + smp_wmb(); + used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ? + &vq->used->flags : vhost_avail_event(vq); + /* Log used flags or event index entry write. Both are 16 bit + * fields. */ + log_write(vq->log_base, vq->log_addr + + (used - (void __user *)vq->used), + sizeof(u16)); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); } /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ @@ -1404,15 +1472,17 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) } /* We don't need to be notified again. */ -void vhost_disable_notify(struct vhost_virtqueue *vq) +void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (vq->used_flags & VRING_USED_F_NO_NOTIFY) return; vq->used_flags |= VRING_USED_F_NO_NOTIFY; - r = put_user(vq->used_flags, &vq->used->flags); - if (r) - vq_err(vq, "Failed to enable notification at %p: %d\n", - &vq->used->flags, r); + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + r = put_user(vq->used_flags, &vq->used->flags); + if (r) + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + } } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index b3363ae..8e03379 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -84,6 +84,12 @@ struct vhost_virtqueue { /* Used flags */ u16 used_flags; + /* Last used index value we have signalled on */ + u16 signalled_used; + + /* Last used index value we have signalled on */ + bool signalled_used_valid; + /* Log writes to used structure. */ bool log_used; u64 log_addr; @@ -149,8 +155,8 @@ void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *, void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); -void vhost_disable_notify(struct vhost_virtqueue *); -bool vhost_enable_notify(struct vhost_virtqueue *); +void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); +bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len); @@ -162,11 +168,12 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, } while (0) enum { - VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) | - (1 << VIRTIO_RING_F_INDIRECT_DESC) | - (1 << VHOST_F_LOG_ALL) | - (1 << VHOST_NET_F_VIRTIO_NET_HDR) | - (1 << VIRTIO_NET_F_MRG_RXBUF), + VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | + (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX) | + (1ULL << VHOST_F_LOG_ALL) | + (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | + (1ULL << VIRTIO_NET_F_MRG_RXBUF), }; static inline int vhost_has_feature(struct vhost_dev *dev, int bit) -- cgit v0.10.2 From 4423fe40b03f32b11e72ecfa03077e702e55d5a9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:11:05 +0300 Subject: virtio_test: support event index Add ability to test the new event idx feature, enable by default. Signed-off-by: Rusty Russell diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index df0c6d2..74d3331 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -198,6 +198,14 @@ const struct option longopts[] = { .val = 'h', }, { + .name = "event-idx", + .val = 'E', + }, + { + .name = "no-event-idx", + .val = 'e', + }, + { .name = "indirect", .val = 'I', }, @@ -211,13 +219,17 @@ const struct option longopts[] = { static void help() { - fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n"); + fprintf(stderr, "Usage: virtio_test [--help]" + " [--no-indirect]" + " [--no-event-idx]" + "\n"); } int main(int argc, char **argv) { struct vdev_info dev; - unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC; + unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX); int o; for (;;) { @@ -228,6 +240,9 @@ int main(int argc, char **argv) case '?': help(); exit(2); + case 'e': + features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); + break; case 'h': help(); goto done; -- cgit v0.10.2 From 7ab358c23cbf15cea08129cd722d1ce77433a94d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:11:14 +0300 Subject: virtio: add api for delayed callbacks Add an API that tells the other side that callbacks should be delayed until a lot of work has been done. Implement using the new event_idx feature. Note: it might seem advantageous to let the drivers ask for a callback after a specific capacity has been reached. However, as a single head can free many entries in the descriptor table, we don't really have a clue about capacity until get_buf is called. The API is the simplest to implement at the moment, we'll see what kind of hints drivers can pass when there's more than one user of the feature. Signed-off-by: Michael S. Tsirkin Signed-off-by: Rusty Russell diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index a5fadc4..68b9136 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -376,6 +376,33 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); +bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 bufs; + + START_USE(vq); + + /* We optimistically turn back on interrupts, then check if there was + * more to do. */ + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ + vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + /* TODO: tune this threshold */ + bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; + vring_used_event(&vq->vring) = vq->last_used_idx + bufs; + virtio_mb(); + if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { + END_USE(vq); + return false; + } + + END_USE(vq); + return true; +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); + void *virtqueue_detach_unused_buf(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index aff5b4f..7108857 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -51,6 +51,13 @@ struct virtqueue { * This re-enables callbacks; it returns "false" if there are pending * buffers in the queue, to detect a possible race between the driver * checking for more work, and enabling callbacks. + * virtqueue_enable_cb_delayed: restart callbacks after disable_cb. + * vq: the struct virtqueue we're talking about. + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. * virtqueue_detach_unused_buf: detach first unused buffer * vq: the struct virtqueue we're talking about. * Returns NULL or the "data" token handed to add_buf @@ -86,6 +93,8 @@ void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); +bool virtqueue_enable_cb_delayed(struct virtqueue *vq); + void *virtqueue_detach_unused_buf(struct virtqueue *vq); /** -- cgit v0.10.2 From 7a66f784375c5922315bbe879b789ee50b924d26 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 20 May 2011 02:11:23 +0300 Subject: virtio_net: delay TX callbacks Ask for delayed callbacks on TX ring full, to give the other side more of a chance to make progress. Signed-off-by: Michael S. Tsirkin Acked-by: David S. Miller Signed-off-by: Rusty Russell diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0cb0b06..f685324 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -609,7 +609,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) * before it gets out of hand. Naturally, this wastes entries. */ if (capacity < 2+MAX_SKB_FRAGS) { netif_stop_queue(dev); - if (unlikely(!virtqueue_enable_cb(vi->svq))) { + if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) { /* More just got used, free them then recheck. */ capacity += free_old_xmit_skbs(vi); if (capacity >= 2+MAX_SKB_FRAGS) { -- cgit v0.10.2 From a1706ac4c0201ea0143dc0db0659001b26ceeabb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 May 2011 07:42:51 +0200 Subject: Revert "block: Remove extra discard_alignment from hd_struct." It was not a good idea to start dereferencing disk->queue from the fs sysfs strategy for displaying discard alignment. We ran into first a NULL pointer deref, and after fixing that we sometimes see unvalid disk->queue pointer values. Since discard is the only one of the bunch actually looking into the queue, just revert the change. This reverts commit 23ceb5b7719e9276d4fa72a3ecf94dd396755276. Conflicts: fs/partitions/check.c diff --git a/fs/partitions/check.c b/fs/partitions/check.c index f82e762..d545e97 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -255,13 +255,7 @@ ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - struct gendisk *disk = dev_to_disk(dev); - unsigned int alignment = 0; - - if (disk->queue) - alignment = queue_limit_discard_alignment(&disk->queue->limits, - p->start_sect); - return sprintf(buf, "%u\n", alignment); + return sprintf(buf, "%u\n", p->discard_alignment); } ssize_t part_stat_show(struct device *dev, @@ -455,6 +449,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, p->start_sect = start; p->alignment_offset = queue_limit_alignment_offset(&disk->queue->limits, start); + p->discard_alignment = + queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; p->policy = get_disk_ro(disk); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b78956b..300d758 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -100,6 +100,7 @@ struct hd_struct { sector_t start_sect; sector_t nr_sects; sector_t alignment_offset; + unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; int policy, partno; -- cgit v0.10.2 From 3cebde2413ba42504cf2c10ec1d47582912435cd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 29 May 2011 21:20:59 -0700 Subject: vfs: shrink_dcache_parent before rmdir, dir rename The dentry_unhash push-down series missed that shink_dcache_parent needs to be called prior to rmdir or dir rename to clear DCACHE_REFERENCED and allow efficient dentry reclaim. Reported-by: Dave Chinner Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 1ab641f..e2e4e8d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2579,6 +2579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (error) goto out; + shrink_dcache_parent(dentry); error = dir->i_op->rmdir(dir, dentry); if (error) goto out; @@ -2993,6 +2994,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; + if (target) + shrink_dcache_parent(new_dentry); error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (error) goto out; -- cgit v0.10.2 From c7427d23f7ed695ac226dbe3a84d7f19091d34ce Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 May 2011 01:50:53 -0400 Subject: autofs4: bogus dentry_unhash() added in ->unlink() Signed-off-by: Al Viro diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 87d95a8..f55ae23 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -583,8 +583,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) return -EACCES; - dentry_unhash(dentry); - if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) -- cgit v0.10.2 From 598e887d8b01655780c81cc86a9e7820ed091580 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 30 May 2011 11:38:06 +0200 Subject: x86 idle: Fix mwait deprecation warning message Fix: arch/x86/kernel/process.c:645:1: warning: unknown escape sequence '\i' due to missing escape backslash, introduced by this commit: 5d4c47e0195b: x86 idle: deprecate mwait_idle() and "idle=mwait" cmdline param Signed-off-by: Borislav Petkov Cc: Len Brown Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1306748286-24701-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 426a5b6..3d83a71 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -642,7 +642,7 @@ static int __init idle_setup(char *str) boot_option_idle_override = IDLE_POLL; } else if (!strcmp(str, "mwait")) { boot_option_idle_override = IDLE_FORCE_MWAIT; - WARN_ONCE(1, "\idle=mwait\" will be removed in 2012\"\n"); + WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\"\n"); } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is -- cgit v0.10.2 From 4f3c125c7420c85eaff627145557e392a871922d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 30 May 2011 08:23:57 -0400 Subject: x86: Fix mwait_play_dead() faulting on mwait-incapable cpus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A logic error in mwait_play_dead() causes the kernel to use mwait even on cpus which don't support it, such as KVM virtual cpus. Introduced by: 349c004e3d31: x86: A fast way to check capabilities of the current cpu Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=36222 Reported-by: Török Edwin Signed-off-by: Avi Kivity Cc: Christoph Lameter Cc: Tejun Heo Link: http://lkml.kernel.org/r/1306758237-9327-1-git-send-email-avi@redhat.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index eefd967..33a0c11 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1332,7 +1332,7 @@ static inline void mwait_play_dead(void) void *mwait_ptr; struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); - if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)) + if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) return; if (!this_cpu_has(X86_FEATURE_CLFLSH)) return; -- cgit v0.10.2 From 194cd8dfc9e4f29249b3bd45c5cb5c0a33a6438c Mon Sep 17 00:00:00 2001 From: Nobuhiro Iwamatsu Date: Tue, 31 May 2011 13:27:41 +0900 Subject: sh: asm/tlb.h needs linux/swap.h Commit 1e56a56410bb64bce62d44563e35a143fc2d515f introduced the mmu_gather rework for sh, but missed a linux/swap.h include: CC arch/sh/mm/tlb-urb.o In file included from arch/sh/mm/tlb-urb.c:14:0: arch/sh/include/asm/tlb.h: In function '__tlb_remove_page': arch/sh/include/asm/tlb.h:92:2: error: implicit declaration of function 'free_page_and_swap_cache' Signed-off-by: Nobuhiro Iwamatsu CC: Peter Zijlstra Signed-off-by: Paul Mundt diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 6c308d8..ec88bfc 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -9,6 +9,7 @@ #include #ifdef CONFIG_MMU +#include #include #include #include -- cgit v0.10.2 From 65d517eb7224d24ee4206416161390f30d69e622 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 14:37:44 +0900 Subject: sh64: asm/pgtable.h needs asm/mmu.h Needed to satisfy the __in_29bit_mode() check. Signed-off-by: Paul Mundt diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index db85916..9210e93 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -18,6 +18,7 @@ #include #endif #include +#include #ifndef __ASSEMBLY__ #include -- cgit v0.10.2 From 3f9b8520b06013939ad247ba08b69529b5f14be1 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 14:38:29 +0900 Subject: sh64: Move from P1SEG to CAC_ADDR for consistent sync. sh64 doesn't define a P1SEGADDR, resulting in a build failure. The proper mapping can be attained for both sh32 and 64 via the CAC_ADDR macro, so switch to that instead. Signed-off-by: Paul Mundt diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 40733a9..f251b5f 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -82,7 +82,7 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size, void *addr; addr = __in_29bit_mode() ? - (void *)P1SEGADDR((unsigned long)vaddr) : vaddr; + (void *)CAC_ADDR((unsigned long)vaddr) : vaddr; switch (direction) { case DMA_FROM_DEVICE: /* invalidate only */ -- cgit v0.10.2 From db7eba292e913390fa881272bfbc3da0a5380513 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 14:39:49 +0900 Subject: sh: Fix up asm-generic/ptrace.h fallout. There was an ordering issue with regards to instruction_pointer() being used in profile_pc() prior to the asm-generic/ptrace.h include, which subsequently provided the instruction_pointer() definition. In the interest of simplicity we simply open-code the regs->pc deref for the profile_pc() definition instead. The FP functions were also broken due to a lack of a common regs->fp, so provide a common GET_FP() that is safe for both architectures in order to fix up the frame pointer helpers too. Signed-off-by: Paul Mundt diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h index 40725b4..88bd6be 100644 --- a/arch/sh/include/asm/ptrace.h +++ b/arch/sh/include/asm/ptrace.h @@ -41,7 +41,9 @@ #define user_mode(regs) (((regs)->sr & 0x40000000)==0) #define kernel_stack_pointer(_regs) ((unsigned long)(_regs)->regs[15]) -#define GET_USP(regs) ((regs)->regs[15]) + +#define GET_FP(regs) ((regs)->regs[14]) +#define GET_USP(regs) ((regs)->regs[15]) extern void show_regs(struct pt_regs *); @@ -131,7 +133,7 @@ extern void ptrace_triggered(struct perf_event *bp, int nmi, static inline unsigned long profile_pc(struct pt_regs *regs) { - unsigned long pc = instruction_pointer(regs); + unsigned long pc = regs->pc; if (virt_addr_uncached(pc)) return CAC_ADDR(pc); -- cgit v0.10.2 From d4905ce38c73964b868037e49a5945e1cf47a7f2 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 15:23:20 +0900 Subject: Revert "clocksource: sh_tmu: Runtime PM support" This reverts commit 1b842e91fea9447eff5eb687e28ad61c02f5033e. There is a fundamental ordering race between the early and late probe paths and the runtime PM tie-in that results in __pm_runtime_resume() attempting to take a lock that hasn't been initialized yet (which by proxy also suggests that pm_runtime_init() hasn't yet been run on the device either, making the entire thing unsafe) -- resulting in instant death on SMP or on UP with spinlock debugging enabled: sh_tmu.0: used for clock events sh_tmu.0: used for periodic clock events BUG: spinlock trylock failure on UP on CPU#0, swapper/0 lock: 804db198, .magic: 00000000, .owner: /-1, .owner_cpu: 0 ... Revert it for now until the ordering issues can be resolved, or we can get some more help from the runtime PM framework to make this possible. Signed-off-by: Paul Mundt diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 1729628..8081357 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -110,12 +109,10 @@ static int sh_tmu_enable(struct sh_tmu_priv *p) { int ret; - /* wake up device and enable clock */ - pm_runtime_get_sync(&p->pdev->dev); + /* enable clock */ ret = clk_enable(p->clk); if (ret) { dev_err(&p->pdev->dev, "cannot enable clock\n"); - pm_runtime_put_sync(&p->pdev->dev); return ret; } @@ -144,9 +141,8 @@ static void sh_tmu_disable(struct sh_tmu_priv *p) /* disable interrupts in TMU block */ sh_tmu_write(p, TCR, 0x0000); - /* stop clock and mark device as idle */ + /* stop clock */ clk_disable(p->clk); - pm_runtime_put_sync(&p->pdev->dev); } static void sh_tmu_set_next(struct sh_tmu_priv *p, unsigned long delta, @@ -415,7 +411,6 @@ static int __devinit sh_tmu_probe(struct platform_device *pdev) if (p) { dev_info(&pdev->dev, "kept as earlytimer\n"); - pm_runtime_enable(&pdev->dev); return 0; } @@ -430,9 +425,6 @@ static int __devinit sh_tmu_probe(struct platform_device *pdev) kfree(p); platform_set_drvdata(pdev, NULL); } - - if (!is_early_platform_device(pdev)) - pm_runtime_enable(&pdev->dev); return ret; } -- cgit v0.10.2 From 9436b4abec28a22edd961ae375535d940625f1f2 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 15:26:42 +0900 Subject: Revert "clocksource: sh_cmt: Runtime PM support" This reverts commit 01fa68b58492a5d6708a91c1f474b6a099a9509e. The same note as per the sh_tmu change applies here, too. Signed-off-by: Paul Mundt diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 036e586..dc7c033 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -153,12 +152,10 @@ static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) { int ret; - /* wake up device and enable clock */ - pm_runtime_get_sync(&p->pdev->dev); + /* enable clock */ ret = clk_enable(p->clk); if (ret) { dev_err(&p->pdev->dev, "cannot enable clock\n"); - pm_runtime_put_sync(&p->pdev->dev); return ret; } @@ -190,9 +187,8 @@ static void sh_cmt_disable(struct sh_cmt_priv *p) /* disable interrupts in CMT block */ sh_cmt_write(p, CMCSR, 0); - /* stop clock and mark device as idle */ + /* stop clock */ clk_disable(p->clk); - pm_runtime_put_sync(&p->pdev->dev); } /* private flags */ @@ -664,7 +660,6 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev) if (p) { dev_info(&pdev->dev, "kept as earlytimer\n"); - pm_runtime_enable(&pdev->dev); return 0; } @@ -679,9 +674,6 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev) kfree(p); platform_set_drvdata(pdev, NULL); } - - if (!is_early_platform_device(pdev)) - pm_runtime_enable(&pdev->dev); return ret; } -- cgit v0.10.2 From 5c2de44417523385010b529599a2b30f290831a3 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 31 May 2011 15:53:03 +0900 Subject: dmaengine: shdma: Fix up fallout from runtime PM changes. The runtime PM changes introduce sh_dmae_rst() wrapping via the runtime_resume helper, depending on dev_get_drvdata() to fetch the platform data needed for the DMAOR initialization default at a time where drvdata hasn't yet been established by the probe path, resulting in general probe misery: Unable to handle kernel NULL pointer dereference at virtual address 000000c4 pc = 8025adee *pde = 00000000 Oops: 0000 [#1] Modules linked in: Pid : 1, Comm: swapper CPU : 0 Not tainted (3.0.0-rc1-00012-g9436b4a-dirty #1456) PC is at sh_dmae_rst+0x28/0x86 PR is at sh_dmae_rst+0x22/0x86 PC : 8025adee SP : 9e803d10 SR : 400080f1 TEA : 000000c4 R0 : 000000c4 R1 : 0000fff8 R2 : 00000000 R3 : 00000040 R4 : 000000f0 R5 : 00000000 R6 : 00000000 R7 : 804f184c R8 : 00000000 R9 : 804dd0e8 R10 : 80283204 R11 : ffffffda R12 : 000000a0 R13 : 804dd18c R14 : 9e803d10 MACH: 00000000 MACL: 00008f20 GBR : 00000000 PR : 8025ade8 Call trace: [<8025ae70>] sh_dmae_runtime_resume+0x24/0x34 [<80283238>] pm_generic_runtime_resume+0x34/0x3c [<80283370>] rpm_callback+0x4a/0x7e [<80283efc>] rpm_resume+0x240/0x384 [<80283f54>] rpm_resume+0x298/0x384 [<8028428c>] __pm_runtime_resume+0x44/0x7c [<8038a358>] __ioremap_caller+0x0/0xec [<80284296>] __pm_runtime_resume+0x4e/0x7c [<8038a358>] __ioremap_caller+0x0/0xec [<80666254>] sh_dmae_probe+0x180/0x6a0 [<802803ae>] platform_drv_probe+0x26/0x2e Fix up the ordering accordingly. Signed-off-by: Paul Mundt diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 636e409..727e76f 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -1144,6 +1144,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev) /* platform data */ shdev->pdata = pdata; + platform_set_drvdata(pdev, shdev); + pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -1256,7 +1258,6 @@ static int __init sh_dmae_probe(struct platform_device *pdev) pm_runtime_put(&pdev->dev); - platform_set_drvdata(pdev, shdev); dma_async_device_register(&shdev->common); return err; @@ -1278,6 +1279,8 @@ rst_err: if (dmars) iounmap(shdev->dmars); + + platform_set_drvdata(pdev, NULL); emapdmars: iounmap(shdev->chan_reg); synchronize_rcu(); @@ -1316,6 +1319,8 @@ static int __exit sh_dmae_remove(struct platform_device *pdev) iounmap(shdev->dmars); iounmap(shdev->chan_reg); + platform_set_drvdata(pdev, NULL); + synchronize_rcu(); kfree(shdev); -- cgit v0.10.2 From d72bce0e67e8afc6eb959f656013cbb577426f1e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 May 2011 13:34:51 +0200 Subject: rcu: Cure load woes Commit cc3ce5176d83 (rcu: Start RCU kthreads in TASK_INTERRUPTIBLE state) fudges a sleeping task' state, resulting in the scheduler seeing a TASK_UNINTERRUPTIBLE task going to sleep, but a TASK_INTERRUPTIBLE task waking up. The result is unbalanced load calculation. The problem that patch tried to address is that the RCU threads could stay in UNINTERRUPTIBLE state for quite a while and triggering the hung task detector due to on-demand wake-ups. Cure the problem differently by always giving the tasks at least one wake-up once the CPU is fully up and running, this will kick them out of the initial UNINTERRUPTIBLE state and into the regular INTERRUPTIBLE wait state. [ The alternative would be teaching kthread_create() to start threads as INTERRUPTIBLE but that needs a tad more thought. ] Reported-by: Damien Wyart Signed-off-by: Peter Zijlstra Acked-by: Paul E. McKenney Link: http://lkml.kernel.org/r/1306755291.1200.2872.camel@twins Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 77a7671..89419ff 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1648,7 +1648,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) if (IS_ERR(t)) return PTR_ERR(t); kthread_bind(t, cpu); - set_task_state(t, TASK_INTERRUPTIBLE); per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); per_cpu(rcu_cpu_kthread_task, cpu) = t; @@ -1756,7 +1755,6 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, if (IS_ERR(t)) return PTR_ERR(t); raw_spin_lock_irqsave(&rnp->lock, flags); - set_task_state(t, TASK_INTERRUPTIBLE); rnp->node_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = 99; @@ -1765,6 +1763,8 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); } +static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); + /* * Spawn all kthreads -- called as soon as the scheduler is running. */ @@ -1772,18 +1772,30 @@ static int __init rcu_spawn_kthreads(void) { int cpu; struct rcu_node *rnp; + struct task_struct *t; rcu_kthreads_spawnable = 1; for_each_possible_cpu(cpu) { per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) + if (cpu_online(cpu)) { (void)rcu_spawn_one_cpu_kthread(cpu); + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t) + wake_up_process(t); + } } rnp = rcu_get_root(rcu_state); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + if (rnp->node_kthread_task) + wake_up_process(rnp->node_kthread_task); if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) + rcu_for_each_leaf_node(rcu_state, rnp) { (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + t = rnp->node_kthread_task; + if (t) + wake_up_process(t); + rcu_wake_one_boost_kthread(rnp); + } } return 0; } @@ -2188,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } -static void __cpuinit rcu_online_cpu(int cpu) +static void __cpuinit rcu_prepare_cpu(int cpu) { rcu_init_percpu_data(cpu, &rcu_sched_state, 0); rcu_init_percpu_data(cpu, &rcu_bh_state, 0); rcu_preempt_init_percpu_data(cpu); } -static void __cpuinit rcu_online_kthreads(int cpu) +static void __cpuinit rcu_prepare_kthreads(int cpu) { struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; @@ -2209,6 +2221,31 @@ static void __cpuinit rcu_online_kthreads(int cpu) } /* + * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, + * but the RCU threads are woken on demand, and if demand is low this + * could be a while triggering the hung task watchdog. + * + * In order to avoid this, poke all tasks once the CPU is fully + * up and running. + */ +static void __cpuinit rcu_online_kthreads(int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_node *rnp = rdp->mynode; + struct task_struct *t; + + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t) + wake_up_process(t); + + t = rnp->node_kthread_task; + if (t) + wake_up_process(t); + + rcu_wake_one_boost_kthread(rnp); +} + +/* * Handle CPU online/offline notification events. */ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, @@ -2221,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - rcu_online_cpu(cpu); - rcu_online_kthreads(cpu); + rcu_prepare_cpu(cpu); + rcu_prepare_kthreads(cpu); break; case CPU_ONLINE: + rcu_online_kthreads(cpu); case CPU_DOWN_FAILED: rcu_node_kthread_setaffinity(rnp, -1); rcu_cpu_kthread_setrt(cpu, 1); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a767b7d..c8bff30 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1295,7 +1295,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, if (IS_ERR(t)) return PTR_ERR(t); raw_spin_lock_irqsave(&rnp->lock, flags); - set_task_state(t, TASK_INTERRUPTIBLE); rnp->boost_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = RCU_KTHREAD_PRIO; @@ -1303,6 +1302,12 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } +static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) +{ + if (rnp->boost_kthread_task) + wake_up_process(rnp->boost_kthread_task); +} + #else /* #ifdef CONFIG_RCU_BOOST */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) @@ -1326,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } +static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) +{ +} + #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP -- cgit v0.10.2 From 339dedf709e21d5718d6596750166f70e8bed40a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 31 May 2011 18:01:23 +1000 Subject: powerpc/pmac: Don't register pmac PIC syscore ops when HW not present The Apple custom PIC only exist in some earlier machine models, anything with an MPIC will crash on suspend if we register those syscore ops unconditionally. This is a regression caused by commit f5a592f7d74e ("PM / PowerPC: Use struct syscore_ops instead of sysdevs for PM") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 9089b04..7667db4 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -715,7 +715,8 @@ static struct syscore_ops pmacpic_syscore_ops = { static int __init init_pmacpic_syscore(void) { - register_syscore_ops(&pmacpic_syscore_ops); + if (pmac_irq_hw[0]) + register_syscore_ops(&pmacpic_syscore_ops); return 0; } -- cgit v0.10.2 From ea9d6553b3b3044e7374774cc33bb1b2eee19dd3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 31 May 2011 13:45:53 +0200 Subject: block: remove unwanted semicolons Since those defined functions require additional semicolon from the caller, they could cause potential syntax errors when used in if-else statements. Signed-off-by: Namhyung Kim Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ae9091a..1a23722 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1282,8 +1282,8 @@ queue_max_integrity_segments(struct request_queue *q) #define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) -#define blk_integrity_unregister(a) do { } while (0); -#define blk_queue_max_integrity_segments(a, b) do { } while (0); +#define blk_integrity_unregister(a) do { } while (0) +#define blk_queue_max_integrity_segments(a, b) do { } while (0) #define queue_max_integrity_segments(a) (0) #define blk_integrity_merge_rq(a, b, c) (0) #define blk_integrity_merge_bio(a, b, c) (0) -- cgit v0.10.2 From 83caba8436b28bd9081013fd840424983127d4ca Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 31 May 2011 10:09:24 -0700 Subject: [IA64] wire up sendmmsg() syscall for Itanium Add entries in unistd.h and entry.S to make this new syscall visible. Signed-off-by: Tony Luck diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 1cf0f49..7c928da 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -320,11 +320,12 @@ #define __NR_clock_adjtime 1328 #define __NR_syncfs 1329 #define __NR_setns 1330 +#define __NR_sendmmsg 1331 #ifdef __KERNEL__ -#define NR_syscalls 307 /* length of syscall table */ +#define NR_syscalls 308 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 9ca8019..97dd2ab 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1776,6 +1776,7 @@ sys_call_table: data8 sys_clock_adjtime data8 sys_syncfs data8 sys_setns // 1330 + data8 sys_sendmmsg .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ -- cgit v0.10.2 From 4495a7d41dbda03841c2a1c2a5ce7135a45131ba Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Tue, 31 May 2011 10:04:09 +0200 Subject: CFQ: Fix typo and remove unnecessary semicolon Fix comment typo and remove unnecessary semicolon at macro Signed-off-by: Kyungmin Park Signed-off-by: Jens Axboe diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 7c52d68..8a02c95 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -185,7 +185,7 @@ struct cfq_group { int nr_cfqq; /* - * Per group busy queus average. Useful for workload slice calc. We + * Per group busy queues average. Useful for workload slice calc. We * create the array for each prio class but at run time it is used * only for RT and BE class and slot for IDLE class remains unused. * This is primarily done to avoid confusion and a gcc warning. @@ -369,16 +369,16 @@ CFQ_CFQQ_FNS(wait_busy); #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ - blkg_path(&(cfqq)->cfqg->blkg), ##args); + blkg_path(&(cfqq)->cfqg->blkg), ##args) #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \ blk_add_trace_msg((cfqd)->queue, "%s " fmt, \ - blkg_path(&(cfqg)->blkg), ##args); \ + blkg_path(&(cfqg)->blkg), ##args) \ #else #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) -#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0); +#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) #endif #define cfq_log(cfqd, fmt, args...) \ blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args) -- cgit v0.10.2 From 71005be40a7fc95edda3cc462361ce0243e4f5fa Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 26 May 2011 21:31:08 +0100 Subject: libertas: Set command sequence number later to ensure consistency Before this patch, the command sequence number is being set before lbs_queue_cmd() adds the command to the queue. However, lbs_queue_cmd() sometimes forces commands to queue-jump (e.g. CMD_802_11_WAKEUP_CONFIRM). It currently does this without considering that sequence numbers might need adjusting to keep things running in order. Fix this by setting the sequence number at a later stage, just before we're actually submitting the command to the hardware. Also fixes a possible race where seqnum was being modified outside of the driver lock. Signed-off-by: Daniel Drake Acked-by: Dan Williams Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c index 84566db..71c8f3f 100644 --- a/drivers/net/wireless/libertas/cmd.c +++ b/drivers/net/wireless/libertas/cmd.c @@ -994,6 +994,8 @@ static void lbs_submit_command(struct lbs_private *priv, cmd = cmdnode->cmdbuf; spin_lock_irqsave(&priv->driver_lock, flags); + priv->seqnum++; + cmd->seqnum = cpu_to_le16(priv->seqnum); priv->cur_cmd = cmdnode; spin_unlock_irqrestore(&priv->driver_lock, flags); @@ -1621,11 +1623,9 @@ struct cmd_ctrl_node *__lbs_cmd_async(struct lbs_private *priv, /* Copy the incoming command to the buffer */ memcpy(cmdnode->cmdbuf, in_cmd, in_cmd_size); - /* Set sequence number, clean result, move to buffer */ - priv->seqnum++; + /* Set command, clean result, move to buffer */ cmdnode->cmdbuf->command = cpu_to_le16(command); cmdnode->cmdbuf->size = cpu_to_le16(in_cmd_size); - cmdnode->cmdbuf->seqnum = cpu_to_le16(priv->seqnum); cmdnode->cmdbuf->result = 0; lbs_deb_host("PREP_CMD: command 0x%04x\n", command); -- cgit v0.10.2 From dd08682150e1815fe5cdd0673a2f2e9cd2d55a7a Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 27 May 2011 15:34:45 +0300 Subject: wl12xx: fix passive and radar channel generation for scheduled scan We were comparing bitwise AND results with a boolean, so when the boolean was set to true, it was not matching as it should. Fix this by booleanizing the bitwise AND results with !!. Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/wl12xx/scan.c b/drivers/net/wireless/wl12xx/scan.c index f37e5a3..f223e0e 100644 --- a/drivers/net/wireless/wl12xx/scan.c +++ b/drivers/net/wireless/wl12xx/scan.c @@ -338,8 +338,8 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, flags = req->channels[i]->flags; if (!(flags & IEEE80211_CHAN_DISABLED) && - ((flags & IEEE80211_CHAN_PASSIVE_SCAN) == passive) && - ((flags & IEEE80211_CHAN_RADAR) == radar) && + (!!(flags & IEEE80211_CHAN_PASSIVE_SCAN) == passive) && + (!!(flags & IEEE80211_CHAN_RADAR) == radar) && (req->channels[i]->band == band)) { wl1271_debug(DEBUG_SCAN, "band %d, center_freq %d ", req->channels[i]->band, -- cgit v0.10.2 From 2497a246e880d1fb537f754f551177c01fa39242 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 27 May 2011 15:34:46 +0300 Subject: wl12xx: fix DFS channels handling in scheduled scan DFS channels were never getting included in the scheduled scans, because they always contain the passive flag as well and the call was asking for DFS and active channels. Fix this by ignoring the passive flag when collecting DFS channels. Also, move the DFS channels in the channel list before the 5GHz active channels (this was implemented in the FW differently than specified). Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/wl12xx/scan.c b/drivers/net/wireless/wl12xx/scan.c index f223e0e..8ccbb91 100644 --- a/drivers/net/wireless/wl12xx/scan.c +++ b/drivers/net/wireless/wl12xx/scan.c @@ -337,10 +337,12 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, i++) { flags = req->channels[i]->flags; - if (!(flags & IEEE80211_CHAN_DISABLED) && - (!!(flags & IEEE80211_CHAN_PASSIVE_SCAN) == passive) && + if ((req->channels[i]->band == band) && + !(flags & IEEE80211_CHAN_DISABLED) && (!!(flags & IEEE80211_CHAN_RADAR) == radar) && - (req->channels[i]->band == band)) { + /* if radar is set, we ignore the passive flag */ + (radar || + !!(flags & IEEE80211_CHAN_PASSIVE_SCAN) == passive)) { wl1271_debug(DEBUG_SCAN, "band %d, center_freq %d ", req->channels[i]->band, req->channels[i]->center_freq); @@ -350,6 +352,8 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, wl1271_debug(DEBUG_SCAN, "max_power %d", req->channels[i]->max_power); + if (flags & IEEE80211_CHAN_RADAR) + channels[j].flags |= SCAN_CHANNEL_FLAGS_DFS; if (flags & IEEE80211_CHAN_PASSIVE_SCAN) { channels[j].passive_duration = cpu_to_le16(c->dwell_time_passive); @@ -359,7 +363,7 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, channels[j].max_duration = cpu_to_le16(c->max_dwell_time_active); } - channels[j].tx_power_att = req->channels[j]->max_power; + channels[j].tx_power_att = req->channels[i]->max_power; channels[j].channel = req->channels[i]->hw_value; j++; @@ -386,7 +390,11 @@ wl1271_scan_sched_scan_channels(struct wl1271 *wl, wl1271_scan_get_sched_scan_channels(wl, req, cfg->channels, IEEE80211_BAND_2GHZ, false, false, idx); - idx += cfg->active[0]; + /* + * 5GHz channels always start at position 14, not immediately + * after the last 2.4GHz channel + */ + idx = 14; cfg->passive[1] = wl1271_scan_get_sched_scan_channels(wl, req, cfg->channels, @@ -394,22 +402,23 @@ wl1271_scan_sched_scan_channels(struct wl1271 *wl, false, true, idx); idx += cfg->passive[1]; - cfg->active[1] = + cfg->dfs = wl1271_scan_get_sched_scan_channels(wl, req, cfg->channels, IEEE80211_BAND_5GHZ, - false, false, 14); - idx += cfg->active[1]; + true, true, idx); + idx += cfg->dfs; - cfg->dfs = + cfg->active[1] = wl1271_scan_get_sched_scan_channels(wl, req, cfg->channels, IEEE80211_BAND_5GHZ, - true, false, idx); - idx += cfg->dfs; + false, false, idx); + idx += cfg->active[1]; wl1271_debug(DEBUG_SCAN, " 2.4GHz: active %d passive %d", cfg->active[0], cfg->passive[0]); wl1271_debug(DEBUG_SCAN, " 5GHz: active %d passive %d", cfg->active[1], cfg->passive[1]); + wl1271_debug(DEBUG_SCAN, " DFS: %d", cfg->dfs); return idx; } diff --git a/drivers/net/wireless/wl12xx/scan.h b/drivers/net/wireless/wl12xx/scan.h index c833195..a0b6c5d 100644 --- a/drivers/net/wireless/wl12xx/scan.h +++ b/drivers/net/wireless/wl12xx/scan.h @@ -137,6 +137,9 @@ enum { SCAN_BSS_TYPE_ANY, }; +#define SCAN_CHANNEL_FLAGS_DFS BIT(0) +#define SCAN_CHANNEL_FLAGS_DFS_ENABLED BIT(1) + struct conn_scan_ch_params { __le16 min_duration; __le16 max_duration; -- cgit v0.10.2 From 50a66d7f04adbfab9db55144c58dc693358cb635 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 27 May 2011 15:34:47 +0300 Subject: wl12xx: add separate config value for DFS dwell time on sched scan Use a different value for DFS dwell time when performing a scheduled scan. Previously we were using the same value as for normal passive scans. This adds some flexibility between these two different types of passive scan. For now we use 150 TUs for DFS channel dwell time. This may need to be fine-tuned in the future. Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/wl12xx/conf.h b/drivers/net/wireless/wl12xx/conf.h index 1ab6c86..c83fefb 100644 --- a/drivers/net/wireless/wl12xx/conf.h +++ b/drivers/net/wireless/wl12xx/conf.h @@ -1157,6 +1157,9 @@ struct conf_sched_scan_settings { /* time to wait on the channel for passive scans (in TUs) */ u32 dwell_time_passive; + /* time to wait on the channel for DFS scans (in TUs) */ + u32 dwell_time_dfs; + /* number of probe requests to send on each channel in active scans */ u8 num_probe_reqs; diff --git a/drivers/net/wireless/wl12xx/main.c b/drivers/net/wireless/wl12xx/main.c index bc00e52..e6497dc 100644 --- a/drivers/net/wireless/wl12xx/main.c +++ b/drivers/net/wireless/wl12xx/main.c @@ -311,6 +311,7 @@ static struct conf_drv_settings default_conf = { .min_dwell_time_active = 8, .max_dwell_time_active = 30, .dwell_time_passive = 100, + .dwell_time_dfs = 150, .num_probe_reqs = 2, .rssi_threshold = -90, .snr_threshold = 0, diff --git a/drivers/net/wireless/wl12xx/scan.c b/drivers/net/wireless/wl12xx/scan.c index 8ccbb91..28ec0ad 100644 --- a/drivers/net/wireless/wl12xx/scan.c +++ b/drivers/net/wireless/wl12xx/scan.c @@ -352,9 +352,12 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, wl1271_debug(DEBUG_SCAN, "max_power %d", req->channels[i]->max_power); - if (flags & IEEE80211_CHAN_RADAR) + if (flags & IEEE80211_CHAN_RADAR) { channels[j].flags |= SCAN_CHANNEL_FLAGS_DFS; - if (flags & IEEE80211_CHAN_PASSIVE_SCAN) { + channels[j].passive_duration = + cpu_to_le16(c->dwell_time_dfs); + } + else if (flags & IEEE80211_CHAN_PASSIVE_SCAN) { channels[j].passive_duration = cpu_to_le16(c->dwell_time_passive); } else { -- cgit v0.10.2 From 66870b1ccd5c1460e437c18b0026e2dcaab1ece9 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Fri, 27 May 2011 15:34:48 +0300 Subject: wl12xx: fix oops in sched_scan when forcing a passive scan Fix kernel oops when trying to use passive scheduled scans. The reason was that in passive scans there are no SSIDs, so there was a NULL pointer dereference. To solve the problem, we now check the number of SSIDs provided in the sched_scan request and only access the list if there's one or more (ie. passive scan is not forced). We also force all the channels to be passive by adding the IEEE80211_CHAN_PASSIVE_SCAN flag locally before the checks in the wl1271_scan_get_sched_scan_channels() function. Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/wl12xx/scan.c b/drivers/net/wireless/wl12xx/scan.c index 28ec0ad..56f76ab 100644 --- a/drivers/net/wireless/wl12xx/scan.c +++ b/drivers/net/wireless/wl12xx/scan.c @@ -331,12 +331,16 @@ wl1271_scan_get_sched_scan_channels(struct wl1271 *wl, struct conf_sched_scan_settings *c = &wl->conf.sched_scan; int i, j; u32 flags; + bool force_passive = !req->n_ssids; for (i = 0, j = start; i < req->n_channels && j < MAX_CHANNELS_ALL_BANDS; i++) { flags = req->channels[i]->flags; + if (force_passive) + flags |= IEEE80211_CHAN_PASSIVE_SCAN; + if ((req->channels[i]->band == band) && !(flags & IEEE80211_CHAN_DISABLED) && (!!(flags & IEEE80211_CHAN_RADAR) == radar) && @@ -433,6 +437,7 @@ int wl1271_scan_sched_scan_config(struct wl1271 *wl, struct wl1271_cmd_sched_scan_config *cfg = NULL; struct conf_sched_scan_settings *c = &wl->conf.sched_scan; int i, total_channels, ret; + bool force_passive = !req->n_ssids; wl1271_debug(DEBUG_CMD, "cmd sched_scan scan config"); @@ -456,7 +461,7 @@ int wl1271_scan_sched_scan_config(struct wl1271 *wl, for (i = 0; i < SCAN_MAX_CYCLE_INTERVALS; i++) cfg->intervals[i] = cpu_to_le32(req->interval); - if (req->ssids[0].ssid_len && req->ssids[0].ssid) { + if (!force_passive && req->ssids[0].ssid_len && req->ssids[0].ssid) { cfg->filter_type = SCAN_SSID_FILTER_SPECIFIC; cfg->ssid_len = req->ssids[0].ssid_len; memcpy(cfg->ssid, req->ssids[0].ssid, @@ -473,7 +478,7 @@ int wl1271_scan_sched_scan_config(struct wl1271 *wl, goto out; } - if (cfg->active[0]) { + if (!force_passive && cfg->active[0]) { ret = wl1271_cmd_build_probe_req(wl, req->ssids[0].ssid, req->ssids[0].ssid_len, ies->ie[IEEE80211_BAND_2GHZ], @@ -485,7 +490,7 @@ int wl1271_scan_sched_scan_config(struct wl1271 *wl, } } - if (cfg->active[1]) { + if (!force_passive && cfg->active[1]) { ret = wl1271_cmd_build_probe_req(wl, req->ssids[0].ssid, req->ssids[0].ssid_len, ies->ie[IEEE80211_BAND_5GHZ], -- cgit v0.10.2 From 59342f6a6bc35df623fb44784daa5e1077063b8f Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 30 May 2011 10:15:47 +0300 Subject: zd1211rw: fix to work on OHCI zd1211 devices register 'EP 4 OUT' endpoint as Interrupt type on USB 2.0: Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 1 However on USB 1.1 endpoint becomes Bulk: Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Commit 37939810b937aba830dd751291fcdc51cae1a6cb assumed that endpoint is always interrupt type and changed usb_bulk_msg() calls to usb_interrupt_msg(). Problem here is that usb_bulk_msg() on interrupt endpoint selfcorrects the call and changes requested pipe to interrupt type (see usb_bulk_msg). However with usb_interrupt_msg() on bulk endpoint does not correct the pipe type to bulk, but instead URB is submitted with interrupt type pipe. So pre-2.6.39 used usb_bulk_msg() and therefore worked with both endpoint types, however in 2.6.39 usb_interrupt_msg() with bulk endpoint causes ohci_hcd to fail submitted URB instantly with -ENOSPC and preventing zd1211rw from working with OHCI. Fix this by detecting endpoint type and using correct endpoint/pipe types for URB. Also fix asynchronous zd_usb_iowrite16v_async() to use right URB type on 'EP 4 OUT'. Cc: stable@kernel.org Signed-off-by: Jussi Kivilinna Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 0e81994..631194d 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -1533,6 +1533,31 @@ static void __exit usb_exit(void) module_init(usb_init); module_exit(usb_exit); +static int zd_ep_regs_out_msg(struct usb_device *udev, void *data, int len, + int *actual_length, int timeout) +{ + /* In USB 2.0 mode EP_REGS_OUT endpoint is interrupt type. However in + * USB 1.1 mode endpoint is bulk. Select correct type URB by endpoint + * descriptor. + */ + struct usb_host_endpoint *ep; + unsigned int pipe; + + pipe = usb_sndintpipe(udev, EP_REGS_OUT); + ep = usb_pipe_endpoint(udev, pipe); + if (!ep) + return -EINVAL; + + if (usb_endpoint_xfer_int(&ep->desc)) { + return usb_interrupt_msg(udev, pipe, data, len, + actual_length, timeout); + } else { + pipe = usb_sndbulkpipe(udev, EP_REGS_OUT); + return usb_bulk_msg(udev, pipe, data, len, actual_length, + timeout); + } +} + static int usb_int_regs_length(unsigned int count) { return sizeof(struct usb_int_regs) + count * sizeof(struct reg_data); @@ -1648,15 +1673,14 @@ int zd_usb_ioread16v(struct zd_usb *usb, u16 *values, udev = zd_usb_to_usbdev(usb); prepare_read_regs_int(usb); - r = usb_interrupt_msg(udev, usb_sndintpipe(udev, EP_REGS_OUT), - req, req_len, &actual_req_len, 50 /* ms */); + r = zd_ep_regs_out_msg(udev, req, req_len, &actual_req_len, 50 /*ms*/); if (r) { dev_dbg_f(zd_usb_dev(usb), - "error in usb_interrupt_msg(). Error number %d\n", r); + "error in zd_ep_regs_out_msg(). Error number %d\n", r); goto error; } if (req_len != actual_req_len) { - dev_dbg_f(zd_usb_dev(usb), "error in usb_interrupt_msg()\n" + dev_dbg_f(zd_usb_dev(usb), "error in zd_ep_regs_out_msg()\n" " req_len %d != actual_req_len %d\n", req_len, actual_req_len); r = -EIO; @@ -1818,9 +1842,17 @@ int zd_usb_iowrite16v_async(struct zd_usb *usb, const struct zd_ioreq16 *ioreqs, rw->value = cpu_to_le16(ioreqs[i].value); } - usb_fill_int_urb(urb, udev, usb_sndintpipe(udev, EP_REGS_OUT), - req, req_len, iowrite16v_urb_complete, usb, - ep->desc.bInterval); + /* In USB 2.0 mode endpoint is interrupt type. However in USB 1.1 mode + * endpoint is bulk. Select correct type URB by endpoint descriptor. + */ + if (usb_endpoint_xfer_int(&ep->desc)) + usb_fill_int_urb(urb, udev, usb_sndintpipe(udev, EP_REGS_OUT), + req, req_len, iowrite16v_urb_complete, usb, + ep->desc.bInterval); + else + usb_fill_bulk_urb(urb, udev, usb_sndbulkpipe(udev, EP_REGS_OUT), + req, req_len, iowrite16v_urb_complete, usb); + urb->transfer_flags |= URB_FREE_BUFFER; /* Submit previous URB */ @@ -1924,15 +1956,14 @@ int zd_usb_rfwrite(struct zd_usb *usb, u32 value, u8 bits) } udev = zd_usb_to_usbdev(usb); - r = usb_interrupt_msg(udev, usb_sndintpipe(udev, EP_REGS_OUT), - req, req_len, &actual_req_len, 50 /* ms */); + r = zd_ep_regs_out_msg(udev, req, req_len, &actual_req_len, 50 /*ms*/); if (r) { dev_dbg_f(zd_usb_dev(usb), - "error in usb_interrupt_msg(). Error number %d\n", r); + "error in zd_ep_regs_out_msg(). Error number %d\n", r); goto out; } if (req_len != actual_req_len) { - dev_dbg_f(zd_usb_dev(usb), "error in usb_interrupt_msg()" + dev_dbg_f(zd_usb_dev(usb), "error in zd_ep_regs_out_msg()" " req_len %d != actual_req_len %d\n", req_len, actual_req_len); r = -EIO; -- cgit v0.10.2 From 1144181c1bc054dc5e001a6f10b4820167e6c883 Mon Sep 17 00:00:00 2001 From: Wey-Yi Guy Date: Mon, 30 May 2011 09:32:52 -0700 Subject: iwlagn: fix incorrect PCI subsystem id for 6150 devices For 6150 devices, modify the supported PCI subsystem ID. Cc: stable@kernel.org Signed-off-by: Wey-Yi Guy Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/iwlwifi/iwl-6000.c b/drivers/net/wireless/iwlwifi/iwl-6000.c index f8c710d..fda6fe0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-6000.c +++ b/drivers/net/wireless/iwlwifi/iwl-6000.c @@ -603,19 +603,27 @@ struct iwl_cfg iwl6050_2abg_cfg = { IWL_DEVICE_6050, }; +#define IWL_DEVICE_6150 \ + .fw_name_pre = IWL6050_FW_PRE, \ + .ucode_api_max = IWL6050_UCODE_API_MAX, \ + .ucode_api_min = IWL6050_UCODE_API_MIN, \ + .ops = &iwl6150_ops, \ + .eeprom_ver = EEPROM_6150_EEPROM_VERSION, \ + .eeprom_calib_ver = EEPROM_6150_TX_POWER_VERSION, \ + .base_params = &iwl6050_base_params, \ + .need_dc_calib = true, \ + .led_mode = IWL_LED_BLINK, \ + .internal_wimax_coex = true + struct iwl_cfg iwl6150_bgn_cfg = { .name = "Intel(R) Centrino(R) Wireless-N + WiMAX 6150 BGN", - .fw_name_pre = IWL6050_FW_PRE, - .ucode_api_max = IWL6050_UCODE_API_MAX, - .ucode_api_min = IWL6050_UCODE_API_MIN, - .eeprom_ver = EEPROM_6150_EEPROM_VERSION, - .eeprom_calib_ver = EEPROM_6150_TX_POWER_VERSION, - .ops = &iwl6150_ops, - .base_params = &iwl6050_base_params, + IWL_DEVICE_6150, .ht_params = &iwl6000_ht_params, - .need_dc_calib = true, - .led_mode = IWL_LED_RF_STATE, - .internal_wimax_coex = true, +}; + +struct iwl_cfg iwl6150_bg_cfg = { + .name = "Intel(R) Centrino(R) Wireless-N + WiMAX 6150 BG", + IWL_DEVICE_6150, }; struct iwl_cfg iwl6000_3agn_cfg = { diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 11c6c11..a662adc 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -3831,11 +3831,11 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = { /* 6150 WiFi/WiMax Series */ {IWL_PCI_DEVICE(0x0885, 0x1305, iwl6150_bgn_cfg)}, - {IWL_PCI_DEVICE(0x0885, 0x1306, iwl6150_bgn_cfg)}, + {IWL_PCI_DEVICE(0x0885, 0x1307, iwl6150_bg_cfg)}, {IWL_PCI_DEVICE(0x0885, 0x1325, iwl6150_bgn_cfg)}, - {IWL_PCI_DEVICE(0x0885, 0x1326, iwl6150_bgn_cfg)}, + {IWL_PCI_DEVICE(0x0885, 0x1327, iwl6150_bg_cfg)}, {IWL_PCI_DEVICE(0x0886, 0x1315, iwl6150_bgn_cfg)}, - {IWL_PCI_DEVICE(0x0886, 0x1316, iwl6150_bgn_cfg)}, + {IWL_PCI_DEVICE(0x0886, 0x1317, iwl6150_bg_cfg)}, /* 1000 Series WiFi */ {IWL_PCI_DEVICE(0x0083, 0x1205, iwl1000_bgn_cfg)}, diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.h b/drivers/net/wireless/iwlwifi/iwl-agn.h index 2495fe7..d171684 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.h +++ b/drivers/net/wireless/iwlwifi/iwl-agn.h @@ -89,6 +89,7 @@ extern struct iwl_cfg iwl6000_3agn_cfg; extern struct iwl_cfg iwl6050_2agn_cfg; extern struct iwl_cfg iwl6050_2abg_cfg; extern struct iwl_cfg iwl6150_bgn_cfg; +extern struct iwl_cfg iwl6150_bg_cfg; extern struct iwl_cfg iwl1000_bgn_cfg; extern struct iwl_cfg iwl1000_bg_cfg; extern struct iwl_cfg iwl100_bgn_cfg; -- cgit v0.10.2 From 48bdf072c3f1f8f739f76d19c74f4c79605cac46 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Sun, 29 May 2011 10:55:44 +0000 Subject: ip_options_compile: properly handle unaligned pointer The current code takes an unaligned pointer and does htonl() on it to make it big-endian, then does a memcpy(). The problem is that the compiler decides that since the pointer is to a __be32, it is legal to optimize the copy into a processor word store. However, on an architecture that does not handled unaligned writes in kernel space, this produces an unaligned exception fault. The solution is to track the pointer as a "char *" (which removes a bunch of unpleasant casts in any case), and then just use put_unaligned_be32() to write the value to memory. Signed-off-by: Chris Metcalf Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index c3118e1..ec93335 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -350,7 +351,7 @@ int ip_options_compile(struct net *net, goto error; } if (optptr[2] <= optlen) { - __be32 *timeptr = NULL; + unsigned char *timeptr = NULL; if (optptr[2]+3 > optptr[1]) { pp_ptr = optptr + 2; goto error; @@ -359,7 +360,7 @@ int ip_options_compile(struct net *net, case IPOPT_TS_TSONLY: opt->ts = optptr - iph; if (skb) - timeptr = (__be32*)&optptr[optptr[2]-1]; + timeptr = &optptr[optptr[2]-1]; opt->ts_needtime = 1; optptr[2] += 4; break; @@ -371,7 +372,7 @@ int ip_options_compile(struct net *net, opt->ts = optptr - iph; if (rt) { memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4); - timeptr = (__be32*)&optptr[optptr[2]+3]; + timeptr = &optptr[optptr[2]+3]; } opt->ts_needaddr = 1; opt->ts_needtime = 1; @@ -389,7 +390,7 @@ int ip_options_compile(struct net *net, if (inet_addr_type(net, addr) == RTN_UNICAST) break; if (skb) - timeptr = (__be32*)&optptr[optptr[2]+3]; + timeptr = &optptr[optptr[2]+3]; } opt->ts_needtime = 1; optptr[2] += 8; @@ -403,10 +404,10 @@ int ip_options_compile(struct net *net, } if (timeptr) { struct timespec tv; - __be32 midtime; + u32 midtime; getnstimeofday(&tv); - midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC); - memcpy(timeptr, &midtime, sizeof(__be32)); + midtime = (tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC; + put_unaligned_be32(midtime, timeptr); opt->is_changed = 1; } } else { -- cgit v0.10.2 From b10cec8a4e8167075b9e1ff3f05419769e7f381a Mon Sep 17 00:00:00 2001 From: Dennis Aberilla Date: Sun, 29 May 2011 11:46:54 +0000 Subject: drivers/net: ks8842 Fix crash on received packet when in PIO mode. This patch fixes a driver crash during packet reception due to not enough bytes allocated in the skb. Since the loop reads out 4 bytes at a time, we need to allow for up to 3 bytes of slack space. Signed-off-by: Dennis Aberilla Signed-off-by: David S. Miller diff --git a/drivers/net/ks8842.c b/drivers/net/ks8842.c index f0d8346..9bd0f55 100644 --- a/drivers/net/ks8842.c +++ b/drivers/net/ks8842.c @@ -662,7 +662,7 @@ static void ks8842_rx_frame(struct net_device *netdev, /* check the status */ if ((status & RXSR_VALID) && !(status & RXSR_ERROR)) { - struct sk_buff *skb = netdev_alloc_skb_ip_align(netdev, len); + struct sk_buff *skb = netdev_alloc_skb_ip_align(netdev, len + 3); if (skb) { -- cgit v0.10.2 From a000c01e60e40e15304ffe48fff051d17a7bea91 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 29 May 2011 23:23:36 +0000 Subject: sctp: stop pending timers and purge queues when peer restart asoc If the peer restart the asoc, we should not only fail any unsent/unacked data, but also stop the T3-rtx, SACK, T4-rto timers, and teardown ASCONF queues. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 2b44764..dd6847e 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -107,6 +107,7 @@ typedef enum { SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */ SCTP_CMD_SEND_MSG, /* Send the whole use message */ SCTP_CMD_SEND_NEXT_ASCONF, /* Send the next ASCONF after ACK */ + SCTP_CMD_PURGE_ASCONF_QUEUE, /* Purge all asconf queues.*/ SCTP_CMD_LAST } sctp_verb_t; diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 795f488..7df327a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1993,7 +1993,7 @@ void sctp_assoc_clean_asconf_ack_cache(const struct sctp_association *asoc); struct sctp_chunk *sctp_assoc_lookup_asconf_ack( const struct sctp_association *asoc, __be32 serial); - +void sctp_asconf_queue_teardown(struct sctp_association *asoc); int sctp_cmp_addr_exact(const union sctp_addr *ss1, const union sctp_addr *ss2); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 525f97c..4a62888 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -444,15 +444,7 @@ void sctp_association_free(struct sctp_association *asoc) asoc->peer.transport_count = 0; - /* Free any cached ASCONF_ACK chunk. */ - sctp_assoc_free_asconf_acks(asoc); - - /* Free the ASCONF queue. */ - sctp_assoc_free_asconf_queue(asoc); - - /* Free any cached ASCONF chunk. */ - if (asoc->addip_last_asconf) - sctp_chunk_free(asoc->addip_last_asconf); + sctp_asconf_queue_teardown(asoc); /* AUTH - Free the endpoint shared keys */ sctp_auth_destroy_keys(&asoc->endpoint_shared_keys); @@ -1646,3 +1638,16 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack( return NULL; } + +void sctp_asconf_queue_teardown(struct sctp_association *asoc) +{ + /* Free any cached ASCONF_ACK chunk. */ + sctp_assoc_free_asconf_acks(asoc); + + /* Free the ASCONF queue. */ + sctp_assoc_free_asconf_queue(asoc); + + /* Free any cached ASCONF chunk. */ + if (asoc->addip_last_asconf) + sctp_chunk_free(asoc->addip_last_asconf); +} diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index d612ca1..534c2e5 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1670,6 +1670,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_SEND_NEXT_ASCONF: sctp_cmd_send_asconf(asoc); break; + case SCTP_CMD_PURGE_ASCONF_QUEUE: + sctp_asconf_queue_teardown(asoc); + break; default: pr_warn("Impossible command: %u, %p\n", cmd->verb, cmd->obj.ptr); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7f4a4f8..a297283 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1718,11 +1718,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_CONSUME; } - /* For now, fail any unsent/unacked data. Consider the optional - * choice of resending of this data. + /* For now, stop pending T3-rtx and SACK timers, fail any unsent/unacked + * data. Consider the optional choice of resending of this data. */ + sctp_add_cmd_sf(commands, SCTP_CMD_T3_RTX_TIMERS_STOP, SCTP_NULL()); + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, + SCTP_TO(SCTP_EVENT_TIMEOUT_SACK)); sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); + /* Stop pending T4-rto timer, teardown ASCONF queue, ASCONF-ACK queue + * and ASCONF-ACK cache. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, + SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); + sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL()); + repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem; -- cgit v0.10.2 From 930a6eac9f40e692bd9670d89bcd9ac0f4019356 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Mon, 30 May 2011 07:06:24 +0000 Subject: drivers/net/usb/catc.c: Fix potential deadlock in catc_ctrl_run() catc_ctrl_run() calls usb_submit_urb() with GFP_KERNEL, while it is called from catc_ctrl_async() and catc_ctrl_done() with catc->ctrl_lock spinlock held. The patch replaces GFP_KERNEL with GFP_ATOMIC. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index d7221c4..8056f8a 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -495,7 +495,7 @@ static void catc_ctrl_run(struct catc *catc) if (!q->dir && q->buf && q->len) memcpy(catc->ctrl_buf, q->buf, q->len); - if ((status = usb_submit_urb(catc->ctrl_urb, GFP_KERNEL))) + if ((status = usb_submit_urb(catc->ctrl_urb, GFP_ATOMIC))) err("submit(ctrl_urb) status %d", status); } -- cgit v0.10.2 From 948252cb9e01d65a89ecadf67be5018351eee15e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 May 2011 19:27:48 -0700 Subject: Revert "net: fix section mismatches" This reverts commit e5cb966c0838e4da43a3b0751bdcac7fe719f7b4. It causes new build regressions with gcc-4.2 which is pretty common on non-x86 platforms. Reported-by: James Bottomley Signed-off-by: David S. Miller diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index 5f25889..44b28b2 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -185,7 +185,7 @@ static int max_interrupt_work = 10; static int nopnp; #endif -static int el3_common_init(struct net_device *dev); +static int __devinit el3_common_init(struct net_device *dev); static void el3_common_remove(struct net_device *dev); static ushort id_read_eeprom(int index); static ushort read_eeprom(int ioaddr, int index); @@ -395,7 +395,7 @@ static struct isa_driver el3_isa_driver = { static int isa_registered; #ifdef CONFIG_PNP -static const struct pnp_device_id el3_pnp_ids[] __devinitconst = { +static struct pnp_device_id el3_pnp_ids[] = { { .id = "TCM5090" }, /* 3Com Etherlink III (TP) */ { .id = "TCM5091" }, /* 3Com Etherlink III */ { .id = "TCM5094" }, /* 3Com Etherlink III (combo) */ @@ -478,7 +478,7 @@ static int pnp_registered; #endif /* CONFIG_PNP */ #ifdef CONFIG_EISA -static const struct eisa_device_id el3_eisa_ids[] __devinitconst = { +static struct eisa_device_id el3_eisa_ids[] = { { "TCM5090" }, { "TCM5091" }, { "TCM5092" }, @@ -508,7 +508,7 @@ static int eisa_registered; #ifdef CONFIG_MCA static int el3_mca_probe(struct device *dev); -static const short el3_mca_adapter_ids[] __devinitconst = { +static short el3_mca_adapter_ids[] __initdata = { 0x627c, 0x627d, 0x62db, @@ -517,7 +517,7 @@ static const short el3_mca_adapter_ids[] __devinitconst = { 0x0000 }; -static const char *const el3_mca_adapter_names[] __devinitconst = { +static char *el3_mca_adapter_names[] __initdata = { "3Com 3c529 EtherLink III (10base2)", "3Com 3c529 EtherLink III (10baseT)", "3Com 3c529 EtherLink III (test mode)", @@ -601,7 +601,7 @@ static void el3_common_remove (struct net_device *dev) } #ifdef CONFIG_MCA -static int __devinit el3_mca_probe(struct device *device) +static int __init el3_mca_probe(struct device *device) { /* Based on Erik Nygren's (nygren@mit.edu) 3c529 patch, * heavily modified by Chris Beauregard @@ -671,7 +671,7 @@ static int __devinit el3_mca_probe(struct device *device) #endif /* CONFIG_MCA */ #ifdef CONFIG_EISA -static int __devinit el3_eisa_probe (struct device *device) +static int __init el3_eisa_probe (struct device *device) { short i; int ioaddr, irq, if_port; diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index 99f43d2..8cc2256 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -901,14 +901,14 @@ static const struct dev_pm_ops vortex_pm_ops = { #endif /* !CONFIG_PM */ #ifdef CONFIG_EISA -static const struct eisa_device_id vortex_eisa_ids[] __devinitconst = { +static struct eisa_device_id vortex_eisa_ids[] = { { "TCM5920", CH_3C592 }, { "TCM5970", CH_3C597 }, { "" } }; MODULE_DEVICE_TABLE(eisa, vortex_eisa_ids); -static int __devinit vortex_eisa_probe(struct device *device) +static int __init vortex_eisa_probe(struct device *device) { void __iomem *ioaddr; struct eisa_device *edev; diff --git a/drivers/net/depca.c b/drivers/net/depca.c index 1765405..8b0084d 100644 --- a/drivers/net/depca.c +++ b/drivers/net/depca.c @@ -331,18 +331,18 @@ static struct { "DE422",\ ""} -static const char* const depca_signature[] __devinitconst = DEPCA_SIGNATURE; +static char* __initdata depca_signature[] = DEPCA_SIGNATURE; enum depca_type { DEPCA, de100, de101, de200, de201, de202, de210, de212, de422, unknown }; -static const char depca_string[] = "depca"; +static char depca_string[] = "depca"; static int depca_device_remove (struct device *device); #ifdef CONFIG_EISA -static const struct eisa_device_id depca_eisa_ids[] __devinitconst = { +static struct eisa_device_id depca_eisa_ids[] = { { "DEC4220", de422 }, { "" } }; @@ -367,19 +367,19 @@ static struct eisa_driver depca_eisa_driver = { #define DE210_ID 0x628d #define DE212_ID 0x6def -static const short depca_mca_adapter_ids[] __devinitconst = { +static short depca_mca_adapter_ids[] = { DE210_ID, DE212_ID, 0x0000 }; -static const char *depca_mca_adapter_name[] = { +static char *depca_mca_adapter_name[] = { "DEC EtherWORKS MC Adapter (DE210)", "DEC EtherWORKS MC Adapter (DE212)", NULL }; -static const enum depca_type depca_mca_adapter_type[] = { +static enum depca_type depca_mca_adapter_type[] = { de210, de212, 0 @@ -541,9 +541,10 @@ static void SetMulticastFilter(struct net_device *dev); static int load_packet(struct net_device *dev, struct sk_buff *skb); static void depca_dbg_open(struct net_device *dev); -static const u_char de1xx_irq[] __devinitconst = { 2, 3, 4, 5, 7, 9, 0 }; -static const u_char de2xx_irq[] __devinitconst = { 5, 9, 10, 11, 15, 0 }; -static const u_char de422_irq[] __devinitconst = { 5, 9, 10, 11, 0 }; +static u_char de1xx_irq[] __initdata = { 2, 3, 4, 5, 7, 9, 0 }; +static u_char de2xx_irq[] __initdata = { 5, 9, 10, 11, 15, 0 }; +static u_char de422_irq[] __initdata = { 5, 9, 10, 11, 0 }; +static u_char *depca_irq; static int irq; static int io; @@ -579,7 +580,7 @@ static const struct net_device_ops depca_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -static int __devinit depca_hw_init (struct net_device *dev, struct device *device) +static int __init depca_hw_init (struct net_device *dev, struct device *device) { struct depca_private *lp; int i, j, offset, netRAM, mem_len, status = 0; @@ -747,7 +748,6 @@ static int __devinit depca_hw_init (struct net_device *dev, struct device *devic if (dev->irq < 2) { unsigned char irqnum; unsigned long irq_mask, delay; - const u_char *depca_irq; irq_mask = probe_irq_on(); @@ -770,7 +770,6 @@ static int __devinit depca_hw_init (struct net_device *dev, struct device *devic break; default: - depca_irq = NULL; break; /* Not reached */ } @@ -1303,7 +1302,7 @@ static void SetMulticastFilter(struct net_device *dev) } } -static int __devinit depca_common_init (u_long ioaddr, struct net_device **devp) +static int __init depca_common_init (u_long ioaddr, struct net_device **devp) { int status = 0; @@ -1334,7 +1333,7 @@ static int __devinit depca_common_init (u_long ioaddr, struct net_device **devp) /* ** Microchannel bus I/O device probe */ -static int __devinit depca_mca_probe(struct device *device) +static int __init depca_mca_probe(struct device *device) { unsigned char pos[2]; unsigned char where; @@ -1458,7 +1457,7 @@ static int __devinit depca_mca_probe(struct device *device) ** ISA bus I/O device probe */ -static void __devinit depca_platform_probe (void) +static void __init depca_platform_probe (void) { int i; struct platform_device *pldev; @@ -1498,7 +1497,7 @@ static void __devinit depca_platform_probe (void) } } -static enum depca_type __devinit depca_shmem_probe (ulong *mem_start) +static enum depca_type __init depca_shmem_probe (ulong *mem_start) { u_long mem_base[] = DEPCA_RAM_BASE_ADDRESSES; enum depca_type adapter = unknown; @@ -1559,7 +1558,7 @@ static int __devinit depca_isa_probe (struct platform_device *device) */ #ifdef CONFIG_EISA -static int __devinit depca_eisa_probe (struct device *device) +static int __init depca_eisa_probe (struct device *device) { enum depca_type adapter = unknown; struct eisa_device *edev; @@ -1630,7 +1629,7 @@ static int __devexit depca_device_remove (struct device *device) ** and Boot (readb) ROM. This will also give us a clue to the network RAM ** base address. */ -static int __devinit DepcaSignature(char *name, u_long base_addr) +static int __init DepcaSignature(char *name, u_long base_addr) { u_int i, j, k; void __iomem *ptr; diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c index c52a1df..8e10d2f 100644 --- a/drivers/net/hp100.c +++ b/drivers/net/hp100.c @@ -188,14 +188,14 @@ struct hp100_private { * variables */ #ifdef CONFIG_ISA -static const char *const hp100_isa_tbl[] __devinitconst = { +static const char *hp100_isa_tbl[] = { "HWPF150", /* HP J2573 rev A */ "HWP1950", /* HP J2573 */ }; #endif #ifdef CONFIG_EISA -static const struct eisa_device_id hp100_eisa_tbl[] __devinitconst = { +static struct eisa_device_id hp100_eisa_tbl[] = { { "HWPF180" }, /* HP J2577 rev A */ { "HWP1920" }, /* HP 27248B */ { "HWP1940" }, /* HP J2577 */ @@ -336,7 +336,7 @@ static __devinit const char *hp100_read_id(int ioaddr) } #ifdef CONFIG_ISA -static __devinit int hp100_isa_probe1(struct net_device *dev, int ioaddr) +static __init int hp100_isa_probe1(struct net_device *dev, int ioaddr) { const char *sig; int i; @@ -372,7 +372,7 @@ static __devinit int hp100_isa_probe1(struct net_device *dev, int ioaddr) * EISA and PCI are handled by device infrastructure. */ -static int __devinit hp100_isa_probe(struct net_device *dev, int addr) +static int __init hp100_isa_probe(struct net_device *dev, int addr) { int err = -ENODEV; @@ -396,7 +396,7 @@ static int __devinit hp100_isa_probe(struct net_device *dev, int addr) #endif /* CONFIG_ISA */ #if !defined(MODULE) && defined(CONFIG_ISA) -struct net_device * __devinit hp100_probe(int unit) +struct net_device * __init hp100_probe(int unit) { struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private)); int err; @@ -2843,7 +2843,7 @@ static void cleanup_dev(struct net_device *d) } #ifdef CONFIG_EISA -static int __devinit hp100_eisa_probe (struct device *gendev) +static int __init hp100_eisa_probe (struct device *gendev) { struct net_device *dev = alloc_etherdev(sizeof(struct hp100_private)); struct eisa_device *edev = to_eisa_device(gendev); diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c index 136d754..a7d6cad 100644 --- a/drivers/net/ibmlana.c +++ b/drivers/net/ibmlana.c @@ -895,12 +895,12 @@ static int ibmlana_irq; static int ibmlana_io; static int startslot; /* counts through slots when probing multiple devices */ -static const short ibmlana_adapter_ids[] __devinitconst = { +static short ibmlana_adapter_ids[] __initdata = { IBM_LANA_ID, 0x0000 }; -static const char *const ibmlana_adapter_names[] __devinitconst = { +static char *ibmlana_adapter_names[] __devinitdata = { "IBM LAN Adapter/A", NULL }; diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c index 69b5707..8800e1f 100644 --- a/drivers/net/irda/smsc-ircc2.c +++ b/drivers/net/irda/smsc-ircc2.c @@ -222,19 +222,19 @@ static void smsc_ircc_set_transceiver_for_speed(struct smsc_ircc_cb *self, u32 s static void smsc_ircc_sir_wait_hw_transmitter_finish(struct smsc_ircc_cb *self); /* Probing */ -static int smsc_ircc_look_for_chips(void); -static const struct smsc_chip * smsc_ircc_probe(unsigned short cfg_base, u8 reg, const struct smsc_chip *chip, char *type); -static int smsc_superio_flat(const struct smsc_chip *chips, unsigned short cfg_base, char *type); -static int smsc_superio_paged(const struct smsc_chip *chips, unsigned short cfg_base, char *type); -static int smsc_superio_fdc(unsigned short cfg_base); -static int smsc_superio_lpc(unsigned short cfg_base); +static int __init smsc_ircc_look_for_chips(void); +static const struct smsc_chip * __init smsc_ircc_probe(unsigned short cfg_base, u8 reg, const struct smsc_chip *chip, char *type); +static int __init smsc_superio_flat(const struct smsc_chip *chips, unsigned short cfg_base, char *type); +static int __init smsc_superio_paged(const struct smsc_chip *chips, unsigned short cfg_base, char *type); +static int __init smsc_superio_fdc(unsigned short cfg_base); +static int __init smsc_superio_lpc(unsigned short cfg_base); #ifdef CONFIG_PCI -static int preconfigure_smsc_chip(struct smsc_ircc_subsystem_configuration *conf); -static int preconfigure_through_82801(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf); -static void preconfigure_ali_port(struct pci_dev *dev, +static int __init preconfigure_smsc_chip(struct smsc_ircc_subsystem_configuration *conf); +static int __init preconfigure_through_82801(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf); +static void __init preconfigure_ali_port(struct pci_dev *dev, unsigned short port); -static int preconfigure_through_ali(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf); -static int smsc_ircc_preconfigure_subsystems(unsigned short ircc_cfg, +static int __init preconfigure_through_ali(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf); +static int __init smsc_ircc_preconfigure_subsystems(unsigned short ircc_cfg, unsigned short ircc_fir, unsigned short ircc_sir, unsigned char ircc_dma, @@ -366,7 +366,7 @@ static inline void register_bank(int iobase, int bank) } /* PNP hotplug support */ -static const struct pnp_device_id smsc_ircc_pnp_table[] __devinitconst = { +static const struct pnp_device_id smsc_ircc_pnp_table[] = { { .id = "SMCf010", .driver_data = 0 }, /* and presumably others */ { } @@ -515,7 +515,7 @@ static const struct net_device_ops smsc_ircc_netdev_ops = { * Try to open driver instance * */ -static int __devinit smsc_ircc_open(unsigned int fir_base, unsigned int sir_base, u8 dma, u8 irq) +static int __init smsc_ircc_open(unsigned int fir_base, unsigned int sir_base, u8 dma, u8 irq) { struct smsc_ircc_cb *self; struct net_device *dev; @@ -2273,7 +2273,7 @@ static int __init smsc_superio_paged(const struct smsc_chip *chips, unsigned sho } -static int __devinit smsc_access(unsigned short cfg_base, unsigned char reg) +static int __init smsc_access(unsigned short cfg_base, unsigned char reg) { IRDA_DEBUG(1, "%s\n", __func__); @@ -2281,7 +2281,7 @@ static int __devinit smsc_access(unsigned short cfg_base, unsigned char reg) return inb(cfg_base) != reg ? -1 : 0; } -static const struct smsc_chip * __devinit smsc_ircc_probe(unsigned short cfg_base, u8 reg, const struct smsc_chip *chip, char *type) +static const struct smsc_chip * __init smsc_ircc_probe(unsigned short cfg_base, u8 reg, const struct smsc_chip *chip, char *type) { u8 devid, xdevid, rev; @@ -2406,7 +2406,7 @@ static int __init smsc_superio_lpc(unsigned short cfg_base) #ifdef CONFIG_PCI #define PCIID_VENDOR_INTEL 0x8086 #define PCIID_VENDOR_ALI 0x10b9 -static const struct smsc_ircc_subsystem_configuration subsystem_configurations[] __devinitconst = { +static struct smsc_ircc_subsystem_configuration subsystem_configurations[] __initdata = { /* * Subsystems needing entries: * 0x10b9:0x1533 0x103c:0x0850 HP nx9010 family @@ -2532,7 +2532,7 @@ static const struct smsc_ircc_subsystem_configuration subsystem_configurations[] * (FIR port, SIR port, FIR DMA, FIR IRQ) * through the chip configuration port. */ -static int __devinit preconfigure_smsc_chip(struct +static int __init preconfigure_smsc_chip(struct smsc_ircc_subsystem_configuration *conf) { @@ -2633,7 +2633,7 @@ static int __devinit preconfigure_smsc_chip(struct * or Intel 82801DB/DBL (ICH4/ICH4-L) LPC Interface Bridge. * They all work the same way! */ -static int __devinit preconfigure_through_82801(struct pci_dev *dev, +static int __init preconfigure_through_82801(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf) @@ -2786,7 +2786,7 @@ static int __devinit preconfigure_through_82801(struct pci_dev *dev, * This is based on reverse-engineering since ALi does not * provide any data sheet for the 1533 chip. */ -static void __devinit preconfigure_ali_port(struct pci_dev *dev, +static void __init preconfigure_ali_port(struct pci_dev *dev, unsigned short port) { unsigned char reg; @@ -2824,7 +2824,7 @@ static void __devinit preconfigure_ali_port(struct pci_dev *dev, IRDA_MESSAGE("Activated ALi 1533 ISA bridge port 0x%04x.\n", port); } -static int __devinit preconfigure_through_ali(struct pci_dev *dev, +static int __init preconfigure_through_ali(struct pci_dev *dev, struct smsc_ircc_subsystem_configuration *conf) @@ -2837,7 +2837,7 @@ static int __devinit preconfigure_through_ali(struct pci_dev *dev, return preconfigure_smsc_chip(conf); } -static int __devinit smsc_ircc_preconfigure_subsystems(unsigned short ircc_cfg, +static int __init smsc_ircc_preconfigure_subsystems(unsigned short ircc_cfg, unsigned short ircc_fir, unsigned short ircc_sir, unsigned char ircc_dma, @@ -2849,7 +2849,7 @@ static int __devinit smsc_ircc_preconfigure_subsystems(unsigned short ircc_cfg, int ret = 0; for_each_pci_dev(dev) { - const struct smsc_ircc_subsystem_configuration *conf; + struct smsc_ircc_subsystem_configuration *conf; /* * Cache the subsystem vendor/device: diff --git a/drivers/net/ne3210.c b/drivers/net/ne3210.c index e8984b0c..243ed2a 100644 --- a/drivers/net/ne3210.c +++ b/drivers/net/ne3210.c @@ -80,20 +80,17 @@ static void ne3210_block_output(struct net_device *dev, int count, const unsigne #define NE3210_DEBUG 0x0 -static const unsigned char irq_map[] __devinitconst = - { 15, 12, 11, 10, 9, 7, 5, 3 }; -static const unsigned int shmem_map[] __devinitconst = - { 0xff0, 0xfe0, 0xfff0, 0xd8, 0xffe0, 0xffc0, 0xd0, 0x0 }; -static const char *const ifmap[] __devinitconst = - { "UTP", "?", "BNC", "AUI" }; -static const int ifmap_val[] __devinitconst = { +static unsigned char irq_map[] __initdata = {15, 12, 11, 10, 9, 7, 5, 3}; +static unsigned int shmem_map[] __initdata = {0xff0, 0xfe0, 0xfff0, 0xd8, 0xffe0, 0xffc0, 0xd0, 0x0}; +static const char *ifmap[] __initdata = {"UTP", "?", "BNC", "AUI"}; +static int ifmap_val[] __initdata = { IF_PORT_10BASET, IF_PORT_UNKNOWN, IF_PORT_10BASE2, IF_PORT_AUI, }; -static int __devinit ne3210_eisa_probe (struct device *device) +static int __init ne3210_eisa_probe (struct device *device) { unsigned long ioaddr, phys_mem; int i, retval, port_index; @@ -316,7 +313,7 @@ static void ne3210_block_output(struct net_device *dev, int count, memcpy_toio(shmem, buf, count); } -static const struct eisa_device_id ne3210_ids[] __devinitconst = { +static struct eisa_device_id ne3210_ids[] = { { "EGL0101" }, { "NVL1801" }, { "" }, diff --git a/drivers/net/smc-mca.c b/drivers/net/smc-mca.c index 0f29f26..d07c39c 100644 --- a/drivers/net/smc-mca.c +++ b/drivers/net/smc-mca.c @@ -156,7 +156,7 @@ static const struct { { 14, 15 } }; -static const short smc_mca_adapter_ids[] __devinitconst = { +static short smc_mca_adapter_ids[] __initdata = { 0x61c8, 0x61c9, 0x6fc0, @@ -168,7 +168,7 @@ static const short smc_mca_adapter_ids[] __devinitconst = { 0x0000 }; -static const char *const smc_mca_adapter_names[] __devinitconst = { +static char *smc_mca_adapter_names[] __initdata = { "SMC Ethercard PLUS Elite/A BNC/AUI (WD8013EP/A)", "SMC Ethercard PLUS Elite/A UTP/AUI (WD8013WP/A)", "WD Ethercard PLUS/A (WD8003E/A or WD8003ET/A)", @@ -199,7 +199,7 @@ static const struct net_device_ops ultramca_netdev_ops = { #endif }; -static int __devinit ultramca_probe(struct device *gen_dev) +static int __init ultramca_probe(struct device *gen_dev) { unsigned short ioaddr; struct net_device *dev; diff --git a/drivers/net/tokenring/madgemc.c b/drivers/net/tokenring/madgemc.c index 1313aa1..2bedc0a 100644 --- a/drivers/net/tokenring/madgemc.c +++ b/drivers/net/tokenring/madgemc.c @@ -727,7 +727,7 @@ static int __devexit madgemc_remove(struct device *device) return 0; } -static const short madgemc_adapter_ids[] __devinitconst = { +static short madgemc_adapter_ids[] __initdata = { 0x002d, 0x0000 }; diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index 45144d5..efaa1d6 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c @@ -1995,7 +1995,7 @@ SetMulticastFilter(struct net_device *dev) static u_char de4x5_irq[] = EISA_ALLOWED_IRQ_LIST; -static int __devinit de4x5_eisa_probe (struct device *gendev) +static int __init de4x5_eisa_probe (struct device *gendev) { struct eisa_device *edev; u_long iobase; @@ -2097,7 +2097,7 @@ static int __devexit de4x5_eisa_remove (struct device *device) return 0; } -static const struct eisa_device_id de4x5_eisa_ids[] __devinitconst = { +static struct eisa_device_id de4x5_eisa_ids[] = { { "DEC4250", 0 }, /* 0 is the board name index... */ { "" } }; -- cgit v0.10.2 From a5b2c5b2ad5853591a6cac6134cd0f599a720865 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 31 May 2011 11:31:41 -0700 Subject: AppArmor: fix oops in apparmor_setprocattr When invalid parameters are passed to apparmor_setprocattr a NULL deref oops occurs when it tries to record an audit message. This is because it is passing NULL for the profile parameter for aa_audit. But aa_audit now requires that the profile passed is not NULL. Fix this by passing the current profile on the task that is trying to setprocattr. Signed-off-by: Kees Cook Signed-off-by: John Johansen Cc: stable@kernel.org Signed-off-by: James Morris diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index ae3a698..ec1bcec 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -593,7 +593,8 @@ static int apparmor_setprocattr(struct task_struct *task, char *name, sa.aad.op = OP_SETPROCATTR; sa.aad.info = name; sa.aad.error = -EINVAL; - return aa_audit(AUDIT_APPARMOR_DENIED, NULL, GFP_KERNEL, + return aa_audit(AUDIT_APPARMOR_DENIED, + __aa_current_profile(), GFP_KERNEL, &sa, NULL); } } else if (strcmp(name, "exec") == 0) { -- cgit v0.10.2 From 4c49ff3fe128ca68dabd07537415c419ad7f82f9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 Jun 2011 08:27:41 +0200 Subject: block: blkdev_get() should access ->bd_disk only after success d4dc210f69 (block: don't block events on excl write for non-optical devices) added dereferencing of bdev->bd_disk to test GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; however, bdev->bd_disk can be %NULL if open failed which can lead to an oops. Test the flag after testing open was successful, not before. Signed-off-by: Tejun Heo Reported-by: David Miller Tested-by: David Miller Cc: stable@kernel.org Signed-off-by: Jens Axboe diff --git a/fs/block_dev.c b/fs/block_dev.c index 1f2b199..1a2421f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1272,8 +1272,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) * individual writeable reference is too fragile given the * way @mode is used in blkdev_get/put(). */ - if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) && - !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { + if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder && + (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { bdev->bd_write_holder = true; disk_block_events(disk); } -- cgit v0.10.2 From 603d04b2010976a52f62b7633f9999d104046900 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Sat, 28 May 2011 10:04:25 -0400 Subject: kgdbts: only use new asm-generic/ptrace.h api when needed The new instruction_pointer_set helper is defined for people who have converted to asm-generic/ptrace.h, so don't use it generally unless the arch needs it (in which case it has been converted). This should fix building of kgdb tests for arches not yet converted. Signed-off-by: Mike Frysinger Acked-by: Stephen Rothwell Cc: Jason Wessel Cc: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index b0c5631..8cebec5 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -304,7 +304,10 @@ static int check_and_rewind_pc(char *put_str, char *arg) return 1; } /* Readjust the instruction pointer if needed */ - instruction_pointer_set(&kgdbts_regs, ip + offset); + ip += offset; +#ifdef GDB_ADJUSTS_BREAK_OFFSET + instruction_pointer_set(&kgdbts_regs, ip); +#endif return 0; } -- cgit v0.10.2 From ab75950b11e74145ffe61376ac073d56645aab8a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 26 May 2011 06:51:48 +0300 Subject: UBIFS: supress false error messages Commit ab51afe05273741f72383529ef488aa1ea598ec6 was a good clean-up, but it introduced a regression - now UBIFS prints scary error messages during recovery on all corrupted nodes, even though the corruptions are expected (due to a power cut). This patch fixes the issue. Additionally fix a typo in a commentary introduced by the same commit. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 731d9e2..95e2418 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -635,7 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * Scan quietly until there is an error from which we cannot * recover */ - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); if (ret == SCANNED_A_NODE) { /* A valid node, and not a padding node */ struct ubifs_ch *ch = buf; @@ -701,7 +701,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * While we are in the middle of the same min. I/O unit keep dropping * nodes. So basically, what we want is to make sure that the last min. * I/O unit where we saw the corruption is dropped completely with all - * the uncorrupted node which may possibly sit there. + * the uncorrupted nodes which may possibly sit there. * * In other words, let's name the min. I/O unit where the corruption * starts B, and the previous min. I/O unit A. The below code tries to -- cgit v0.10.2 From 1a0b06997ceca96db9259e537eb935f9fe59a3de Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 26 May 2011 08:26:05 +0300 Subject: UBIFS: introduce a "grouped" journal head flag Journal heads are different in a way how UBIFS writes nodes there. All normal journal heads receive grouped nodes, while the GC journal heads receives ungrouped nodes. This patch adds a 'grouped' flag to 'struct ubifs_jhead' which describes this property. This patch is a preparation to a further recovery fix. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1ab0d22..1e40db7 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -811,15 +811,18 @@ static int alloc_wbufs(struct ubifs_info *c) c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; c->jheads[i].wbuf.jhead = i; + c->jheads[i].grouped = 1; } c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; /* * Garbage Collector head likely contains long-term data and - * does not need to be synchronized by timer. + * does not need to be synchronized by timer. Also GC head nodes are + * not grouped. */ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; c->jheads[GCHD].wbuf.no_timer = 1; + c->jheads[GCHD].grouped = 0; return 0; } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index a70d7b4..adeca14 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -722,12 +722,14 @@ struct ubifs_bud { * struct ubifs_jhead - journal head. * @wbuf: head's write-buffer * @buds_list: list of bud LEBs belonging to this journal head + * @grouped: non-zero if UBIFS groups nodes when writing to this journal head * * Note, the @buds list is protected by the @c->buds_lock. */ struct ubifs_jhead { struct ubifs_wbuf wbuf; struct list_head buds_list; + unsigned int grouped:1; }; /** -- cgit v0.10.2 From efcfde54ca68091b164f9aec544c7233a9760aff Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 26 May 2011 08:36:52 +0300 Subject: UBIFS: amend ubifs_recover_leb interface Instead of passing "grouped" parameter to 'ubifs_recover_leb()' which tells whether the nodes are grouped in the LEB to recover, pass the journal head number and let 'ubifs_recover_leb()' look at the journal head's 'grouped' flag. This patch is a preparation to a further fix where we'll need to know the journal head number for other purposes. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index bd644bf..a5422ff 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -674,7 +674,7 @@ static int kill_orphans(struct ubifs_info *c) if (IS_ERR(sleb)) { if (PTR_ERR(sleb) == -EUCLEAN) sleb = ubifs_recover_leb(c, lnum, 0, - c->sbuf, 0); + c->sbuf, -1); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 95e2418..6adb532 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -604,7 +604,8 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) * @lnum: LEB number * @offs: offset * @sbuf: LEB-sized buffer to use - * @grouped: nodes may be grouped for recovery + * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not + * belong to any journal head) * * This function does a scan of a LEB, but caters for errors that might have * been caused by the unclean unmount from which we are attempting to recover. @@ -612,13 +613,14 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) * found, and a negative error code in case of failure. */ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped) + int offs, void *sbuf, int jhead) { int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; + int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; struct ubifs_scan_leb *sleb; void *buf = sbuf + offs; - dbg_rcvry("%d:%d", lnum, offs); + dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); sleb = ubifs_start_scan(c, lnum, offs, sbuf); if (IS_ERR(sleb)) @@ -881,7 +883,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, } ubifs_scan_destroy(sleb); } - return ubifs_recover_leb(c, lnum, offs, sbuf, 0); + return ubifs_recover_leb(c, lnum, offs, sbuf, -1); } /** diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 6617280..5e97161 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -557,8 +557,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) * these LEBs could possibly be written to at the power cut * time. */ - sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, - b->bud->jhead != GCHD); + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); else sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); if (IS_ERR(sleb)) diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index adeca14..f79983d 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1744,7 +1744,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); int ubifs_recover_master_node(struct ubifs_info *c); int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped); + int offs, void *sbuf, int jhead); struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf); int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); -- cgit v0.10.2 From da8b94ea61c5d80aae0cc7b7541f1e0fa7459391 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 26 May 2011 08:58:19 +0300 Subject: UBIFS: fix recovery broken by the previous recovery fix Unfortunately, the recovery fix d1606a59b6be4ea392eabd40d1250aa1eeb19efb (UBIFS: fix extremely rare mount failure) broke recovery. This commit make UBIFS drop the last min. I/O unit in all journal heads, but this is needed only for the GC head. And this does not work for non-GC heads. For example, if suppose we have min. I/O units A and B, and A contains a valid node X, which was fsynced, and then a group of nodes Y which spans the rest of A and B. In this case we'll drop not only Y, but also X, which is obviously incorrect. This patch fixes the issue and additionally makes recovery to drop last min. I/O unit only for the GC head, and leave things as they have been for ages for the other heads - this is safer. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 6adb532..783d8e0 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -564,19 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, } /** - * drop_last_node - drop the last node or group of nodes. + * drop_last_group - drop the last group of nodes. * @sleb: scanned LEB information * @offs: offset of dropped nodes is returned here - * @grouped: non-zero if whole group of nodes have to be dropped * * This is a helper function for 'ubifs_recover_leb()' which drops the last - * node of the scanned LEB or the last group of nodes if @grouped is not zero. - * This function returns %1 if a node was dropped and %0 otherwise. + * group of nodes of the scanned LEB. */ -static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) +static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) { - int dropped = 0; - while (!list_empty(&sleb->nodes)) { struct ubifs_scan_node *snod; struct ubifs_ch *ch; @@ -585,17 +581,40 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) list); ch = snod->node; if (ch->group_type != UBIFS_IN_NODE_GROUP) - return dropped; - dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); + break; + + dbg_rcvry("dropping grouped node at %d:%d", + sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; + } +} + +/** + * drop_last_node - drop the last node. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * @grouped: non-zero if whole group of nodes have to be dropped + * + * This is a helper function for 'ubifs_recover_leb()' which drops the last + * node of the scanned LEB. + */ +static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) +{ + struct ubifs_scan_node *snod; + + if (!list_empty(&sleb->nodes)) { + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + + dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); *offs = snod->offs; list_del(&snod->list); kfree(snod); sleb->nodes_cnt -= 1; - dropped = 1; - if (!grouped) - break; } - return dropped; } /** @@ -697,59 +716,62 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * If nodes are grouped, always drop the incomplete group at * the end. */ - drop_last_node(sleb, &offs, 1); + drop_last_group(sleb, &offs); - /* - * While we are in the middle of the same min. I/O unit keep dropping - * nodes. So basically, what we want is to make sure that the last min. - * I/O unit where we saw the corruption is dropped completely with all - * the uncorrupted nodes which may possibly sit there. - * - * In other words, let's name the min. I/O unit where the corruption - * starts B, and the previous min. I/O unit A. The below code tries to - * deal with a situation when half of B contains valid nodes or the end - * of a valid node, and the second half of B contains corrupted data or - * garbage. This means that UBIFS had been writing to B just before the - * power cut happened. I do not know how realistic is this scenario - * that half of the min. I/O unit had been written successfully and the - * other half not, but this is possible in our 'failure mode emulation' - * infrastructure at least. - * - * So what is the problem, why we need to drop those nodes? Whey can't - * we just clean-up the second half of B by putting a padding node - * there? We can, and this works fine with one exception which was - * reproduced with power cut emulation testing and happens extremely - * rarely. The description follows, but it is worth noting that that is - * only about the GC head, so we could do this trick only if the bud - * belongs to the GC head, but it does not seem to be worth an - * additional "if" statement. - * - * So, imagine the file-system is full, we run GC which is moving valid - * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head - * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X - * and will try to continue. Imagine that LEB X is currently the - * dirtiest LEB, and the amount of used space in LEB Y is exactly the - * same as amount of free space in LEB X. - * - * And a power cut happens when nodes are moved from LEB X to LEB Y. We - * are here trying to recover LEB Y which is the GC head LEB. We find - * the min. I/O unit B as described above. Then we clean-up LEB Y by - * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function - * fails, because it cannot find a dirty LEB which could be GC'd into - * LEB Y! Even LEB X does not match because the amount of valid nodes - * there does not fit the free space in LEB Y any more! And this is - * because of the padding node which we added to LEB Y. The - * user-visible effect of this which I once observed and analysed is - * that we cannot mount the file-system with -ENOSPC error. - * - * So obviously, to make sure that situation does not happen we should - * free min. I/O unit B in LEB Y completely and the last used min. I/O - * unit in LEB Y should be A. This is basically what the below code - * tries to do. - */ - while (min_io_unit == round_down(offs, c->min_io_size) && - min_io_unit != offs && - drop_last_node(sleb, &offs, grouped)); + if (jhead == GCHD) { + /* + * If this LEB belongs to the GC head then while we are in the + * middle of the same min. I/O unit keep dropping nodes. So + * basically, what we want is to make sure that the last min. + * I/O unit where we saw the corruption is dropped completely + * with all the uncorrupted nodes which may possibly sit there. + * + * In other words, let's name the min. I/O unit where the + * corruption starts B, and the previous min. I/O unit A. The + * below code tries to deal with a situation when half of B + * contains valid nodes or the end of a valid node, and the + * second half of B contains corrupted data or garbage. This + * means that UBIFS had been writing to B just before the power + * cut happened. I do not know how realistic is this scenario + * that half of the min. I/O unit had been written successfully + * and the other half not, but this is possible in our 'failure + * mode emulation' infrastructure at least. + * + * So what is the problem, why we need to drop those nodes? Why + * can't we just clean-up the second half of B by putting a + * padding node there? We can, and this works fine with one + * exception which was reproduced with power cut emulation + * testing and happens extremely rarely. + * + * Imagine the file-system is full, we run GC which starts + * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is + * the current GC head LEB). The @c->gc_lnum is -1, which means + * that GC will retain LEB X and will try to continue. Imagine + * that LEB X is currently the dirtiest LEB, and the amount of + * used space in LEB Y is exactly the same as amount of free + * space in LEB X. + * + * And a power cut happens when nodes are moved from LEB X to + * LEB Y. We are here trying to recover LEB Y which is the GC + * head LEB. We find the min. I/O unit B as described above. + * Then we clean-up LEB Y by padding min. I/O unit. And later + * 'ubifs_rcvry_gc_commit()' function fails, because it cannot + * find a dirty LEB which could be GC'd into LEB Y! Even LEB X + * does not match because the amount of valid nodes there does + * not fit the free space in LEB Y any more! And this is + * because of the padding node which we added to LEB Y. The + * user-visible effect of this which I once observed and + * analysed is that we cannot mount the file-system with + * -ENOSPC error. + * + * So obviously, to make sure that situation does not happen we + * should free min. I/O unit B in LEB Y completely and the last + * used min. I/O unit in LEB Y should be A. This is basically + * what the below code tries to do. + */ + while (offs > min_io_unit) + drop_last_node(sleb, &offs); + } buf = sbuf + offs; len = c->leb_size - offs; -- cgit v0.10.2 From 63da029015b5255915cd6d61f19ffc276ad4635d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 23 May 2011 11:37:09 -0700 Subject: mtd: fix physmap.h warnings Fix build warnings in physmap.h: include/linux/mtd/physmap.h:25: warning: 'struct platform_device' declared inside parameter list include/linux/mtd/physmap.h:25: warning: its scope is only this definition or declaration, which is probably not what you want include/linux/mtd/physmap.h:26: warning: 'struct platform_device' declared inside parameter list include/linux/mtd/physmap.h:27: warning: 'struct platform_device' declared inside parameter list Signed-off-by: Randy Dunlap Signed-off-by: David Woodhouse diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h index d40bfa1..e5f21d2 100644 --- a/include/linux/mtd/physmap.h +++ b/include/linux/mtd/physmap.h @@ -19,6 +19,7 @@ #include struct map_info; +struct platform_device; struct physmap_flash_data { unsigned int width; -- cgit v0.10.2 From 6dd9a7c73761a8a5f5475d5cfdc15368a0f4c06d Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Wed, 25 May 2011 19:13:49 +0100 Subject: intel-iommu: Enable super page (2MiB, 1GiB, etc.) support There are no externally-visible changes with this. In the loop in the internal __domain_mapping() function, we simply detect if we are mapping: - size >= 2MiB, and - virtual address aligned to 2MiB, and - physical address aligned to 2MiB, and - on hardware that supports superpages. (and likewise for larger superpages). We automatically use a superpage for such mappings. We never have to worry about *breaking* superpages, since we trust that we will always *unmap* the same range that was mapped. So all we need to do is ensure that dma_pte_clear_range() will also cope with superpages. Adjust pfn_to_dma_pte() to take a superpage 'level' as an argument, so it can return a PTE at the appropriate level rather than always extending the page tables all the way down to level 1. Again, this is simplified by the fact that we should never encounter existing small pages when we're creating a mapping; any old mapping that used the same virtual range will have been entirely removed and its obsolete page tables freed. Provide an 'intel_iommu=sp_off' argument on the command line as a chicken bit. Not that it should ever be required. == The original commit seen in the iommu-2.6.git was Youquan's implementation (and completion) of my own half-baked code which I'd typed into an email. Followed by half a dozen subsequent 'fixes'. I've taken the unusual step of rewriting history and collapsing the original commits in order to keep the main history simpler, and make life easier for the people who are going to have to backport this to older kernels. And also so I can give it a more coherent commit comment which (hopefully) gives a better explanation of what's going on. The original sequence of commits leading to identical code was: Youquan Song (3): intel-iommu: super page support intel-iommu: Fix superpage alignment calculation error intel-iommu: Fix superpage level calculation error in dma_pfn_level_pte() David Woodhouse (4): intel-iommu: Precalculate superpage support for dmar_domain intel-iommu: Fix hardware_largepage_caps() intel-iommu: Fix inappropriate use of superpages in __domain_mapping() intel-iommu: Fix phys_pfn in __domain_mapping for sglist pages Signed-off-by: Youquan Song Signed-off-by: David Woodhouse diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cc85a92..d005487 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. With this option on every unmap_single operation will result in a hardware IOTLB flush operation as opposed to batching them for performance. - + sp_off [Default Off] + By default, super page will be supported if Intel IOMMU + has the capability. With this option, super page will + not be supported. intremap= [X86-64, Intel-IOMMU] Format: { on (default) | off | nosid } on enable Interrupt Remapping (default) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 395f253..e6fe199 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -115,6 +115,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level) return (pfn + level_size(level) - 1) & level_mask(level); } +static inline unsigned long lvl_to_nr_pages(unsigned int lvl) +{ + return 1 << ((lvl - 1) * LEVEL_STRIDE); +} + /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) @@ -343,6 +348,9 @@ struct dmar_domain { int iommu_coherency;/* indicate coherency of iommu access */ int iommu_snooping; /* indicate snooping control feature*/ int iommu_count; /* reference count of iommu */ + int iommu_superpage;/* Level of superpages supported: + 0 == 4KiB (no superpages), 1 == 2MiB, + 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ spinlock_t iommu_lock; /* protect iommu set in domain */ u64 max_addr; /* maximum mapped address */ }; @@ -392,6 +400,7 @@ int dmar_disabled = 1; static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; +static int intel_iommu_superpage = 1; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -422,6 +431,10 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable batched IOTLB flush\n"); intel_iommu_strict = 1; + } else if (!strncmp(str, "sp_off", 6)) { + printk(KERN_INFO + "Intel-IOMMU: disable supported super page\n"); + intel_iommu_superpage = 0; } str += strcspn(str, ","); @@ -560,11 +573,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain) } } +static void domain_update_iommu_superpage(struct dmar_domain *domain) +{ + int i, mask = 0xf; + + if (!intel_iommu_superpage) { + domain->iommu_superpage = 0; + return; + } + + domain->iommu_superpage = 4; /* 1TiB */ + + for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { + mask |= cap_super_page_val(g_iommus[i]->cap); + if (!mask) { + break; + } + } + domain->iommu_superpage = fls(mask); +} + /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); domain_update_iommu_snooping(domain); + domain_update_iommu_superpage(domain); } static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) @@ -694,23 +728,31 @@ out: } static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn) + unsigned long pfn, int large_level) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); - int offset; + int offset, target_level; BUG_ON(!domain->pgd); BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); parent = domain->pgd; + /* Search pte */ + if (!large_level) + target_level = 1; + else + target_level = large_level; + while (level > 0) { void *tmp_page; offset = pfn_level_offset(pfn, level); pte = &parent[offset]; - if (level == 1) + if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE)) + break; + if (level == target_level) break; if (!dma_pte_present(pte)) { @@ -738,10 +780,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return pte; } + /* return address's pte at specific level */ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, unsigned long pfn, - int level) + int level, int *large_page) { struct dma_pte *parent, *pte = NULL; int total = agaw_to_level(domain->agaw); @@ -754,8 +797,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, if (level == total) return pte; - if (!dma_pte_present(pte)) + if (!dma_pte_present(pte)) { + *large_page = total; break; + } + + if (pte->val & DMA_PTE_LARGE_PAGE) { + *large_page = total; + return pte; + } + parent = phys_to_virt(dma_pte_addr(pte)); total--; } @@ -768,6 +819,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + unsigned int large_page = 1; struct dma_pte *first_pte, *pte; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); @@ -776,14 +828,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain, /* we don't need lock here; nobody else touches the iova range */ do { - first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); + large_page = 1; + first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); if (!pte) { - start_pfn = align_to_level(start_pfn + 1, 2); + start_pfn = align_to_level(start_pfn + 1, large_page + 1); continue; } - do { + do { dma_clear_pte(pte); - start_pfn++; + start_pfn += lvl_to_nr_pages(large_page); pte++; } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); @@ -803,6 +856,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int total = agaw_to_level(domain->agaw); int level; unsigned long tmp; + int large_page = 2; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); @@ -818,7 +872,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, return; do { - first_pte = pte = dma_pfn_level_pte(domain, tmp, level); + large_page = level; + first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); + if (large_page > level) + level = large_page + 1; if (!pte) { tmp = align_to_level(tmp + 1, level + 1); continue; @@ -1402,6 +1459,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) else domain->iommu_snooping = 0; + domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); domain->iommu_count = 1; domain->nid = iommu->node; @@ -1657,6 +1715,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr, return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; } +/* Return largest possible superpage level for a given mapping */ +static inline int hardware_largepage_caps(struct dmar_domain *domain, + unsigned long iov_pfn, + unsigned long phy_pfn, + unsigned long pages) +{ + int support, level = 1; + unsigned long pfnmerge; + + support = domain->iommu_superpage; + + /* To use a large page, the virtual *and* physical addresses + must be aligned to 2MiB/1GiB/etc. Lower bits set in either + of them will mean we have to use smaller pages. So just + merge them and check both at once. */ + pfnmerge = iov_pfn | phy_pfn; + + while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { + pages >>= VTD_STRIDE_SHIFT; + if (!pages) + break; + pfnmerge >>= VTD_STRIDE_SHIFT; + level++; + support--; + } + return level; +} + static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -1665,6 +1751,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, phys_addr_t uninitialized_var(pteval); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned long sg_res; + unsigned int largepage_lvl = 0; + unsigned long lvl_pages = 0; BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); @@ -1680,7 +1768,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; } - while (nr_pages--) { + while (nr_pages > 0) { uint64_t tmp; if (!sg_res) { @@ -1688,11 +1776,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; sg->dma_length = sg->length; pteval = page_to_phys(sg_page(sg)) | prot; + phys_pfn = pteval >> VTD_PAGE_SHIFT; } + if (!pte) { - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); + largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); + + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); if (!pte) return -ENOMEM; + /* It is large page*/ + if (largepage_lvl > 1) + pteval |= DMA_PTE_LARGE_PAGE; + else + pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; + } /* We don't need lock here, nobody else * touches the iova range @@ -1708,16 +1806,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, } WARN_ON(1); } + + lvl_pages = lvl_to_nr_pages(largepage_lvl); + + BUG_ON(nr_pages < lvl_pages); + BUG_ON(sg_res < lvl_pages); + + nr_pages -= lvl_pages; + iov_pfn += lvl_pages; + phys_pfn += lvl_pages; + pteval += lvl_pages * VTD_PAGE_SIZE; + sg_res -= lvl_pages; + + /* If the next PTE would be the first in a new page, then we + need to flush the cache on the entries we've just written. + And then we'll need to recalculate 'pte', so clear it and + let it get set again in the if (!pte) block above. + + If we're done (!nr_pages) we need to flush the cache too. + + Also if we've been setting superpages, we may need to + recalculate 'pte' and switch back to smaller pages for the + end of the mapping, if the trailing size is not enough to + use another superpage (i.e. sg_res < lvl_pages). */ pte++; - if (!nr_pages || first_pte_in_page(pte)) { + if (!nr_pages || first_pte_in_page(pte) || + (largepage_lvl > 1 && sg_res < lvl_pages)) { domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); pte = NULL; } - iov_pfn++; - pteval += VTD_PAGE_SIZE; - sg_res--; - if (!sg_res) + + if (!sg_res && nr_pages) sg = sg_next(sg); } return 0; @@ -3527,6 +3647,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_count = 0; domain->iommu_coherency = 0; domain->iommu_snooping = 0; + domain->iommu_superpage = 0; domain->max_addr = 0; domain->nid = -1; @@ -3742,7 +3863,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, struct dma_pte *pte; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); if (pte) phys = dma_pte_addr(pte); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 5619f85..bbd8661 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,8 +9,12 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) +#define VTD_STRIDE_SHIFT (9) +#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) + #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) +#define DMA_PTE_LARGE_PAGE (1 << 7) #define DMA_PTE_SNP (1 << 11) #define CONTEXT_TT_MULTI_LEVEL 0 -- cgit v0.10.2 From 9b4554b21ed07e8556405510638171f0c787742a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 24 May 2011 12:19:04 -0400 Subject: intel-iommu: Only unlink device domains from iommu Commit a97590e5 added unlinking domains from iommus to reciprocate the iommu from domains unlinking that was already done. We actually want to only do this for device domains and never for the static identity map domain or VM domains. The SI domain is special and never freed, while VM domain->id lives in their own special address space, separate from iommu->domain_ids. In the current code, a VM can get domain->id zero, then mark that domain unused when unbound from pci-stub. This leads to DMAR write faults when the device is re-bound to the host driver. Signed-off-by: Alex Williamson Cc: stable@kernel.org Signed-off-by: David Woodhouse diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e6fe199..4eaec2f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3561,10 +3561,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, domain_update_iommu_cap(domain); spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); - spin_lock_irqsave(&iommu->lock, tmp_flags); - clear_bit(domain->id, iommu->domain_ids); - iommu->domains[domain->id] = NULL; - spin_unlock_irqrestore(&iommu->lock, tmp_flags); + if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && + !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) { + spin_lock_irqsave(&iommu->lock, tmp_flags); + clear_bit(domain->id, iommu->domain_ids); + iommu->domains[domain->id] = NULL; + spin_unlock_irqrestore(&iommu->lock, tmp_flags); + } } spin_unlock_irqrestore(&device_domain_lock, flags); -- cgit v0.10.2 From 8fcc5372fbac085199d84a880503ed67aba3fe49 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Sat, 28 May 2011 13:15:02 -0500 Subject: intel-iommu: Check for identity mapping candidate using system dma mask The identity mapping code appears to make the assumption that if the devices dma_mask is greater than 32bits the device can use identity mapping. But that is not true: take the case where we have a 40bit device in a 44bit architecture. The device can potentially receive a physical address that it will truncate and cause incorrect addresses to be used. Instead check to see if the device's dma_mask is large enough to address the system's dma_mask. Signed-off-by: Mike Travis Reviewed-by: Mike Habeck Cc: stable@kernel.org Signed-off-by: David Woodhouse diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 4eaec2f..dcf051d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2316,8 +2316,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) * Assume that they will -- if they turn out not to be, then we can * take them out of the 1:1 domain later. */ - if (!startup) - return pdev->dma_mask > DMA_BIT_MASK(32); + if (!startup) { + /* + * If the device's dma_mask is less than the system's memory + * size then this is not a candidate for identity mapping. + */ + u64 dma_mask = pdev->dma_mask; + + if (pdev->dev.coherent_dma_mask && + pdev->dev.coherent_dma_mask < dma_mask) + dma_mask = pdev->dev.coherent_dma_mask; + + return dma_mask >= dma_get_required_mask(&pdev->dev); + } return 1; } -- cgit v0.10.2 From cb452a4040bb051d92e85d6e7eb60c11734c1781 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 28 May 2011 13:15:03 -0500 Subject: intel-iommu: Speed up processing of the identity_mapping function When there are a large count of PCI devices, and the pass through option for iommu is set, much time is spent in the identity_mapping function hunting though the iommu domains to check if a specific device is "identity mapped". Speed up the function by checking the cached info to see if it's mapped to the static identity domain. Signed-off-by: Mike Travis Reviewed-by: Mike Habeck Cc: stable@kernel.org Signed-off-by: David Woodhouse diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index dcf051d..98be0b5 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2235,10 +2235,10 @@ static int identity_mapping(struct pci_dev *pdev) if (likely(!iommu_identity_mapping)) return 0; + info = pdev->dev.archdata.iommu; + if (info && info != DUMMY_DEVICE_DOMAIN_INFO) + return (info->domain == si_domain); - list_for_each_entry(info, &si_domain->devices, link) - if (info->dev == pdev) - return 1; return 0; } -- cgit v0.10.2 From 1c9fc3d11b84fbd0c4f4aa7855702c2a1f098ebb Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Sat, 28 May 2011 13:15:04 -0500 Subject: intel-iommu: Dont cache iova above 32bit Mike Travis and Mike Habeck reported an issue where iova allocation would return a range that was larger than a device's dma mask. https://lkml.org/lkml/2011/3/29/423 The dmar initialization code will reserve all PCI MMIO regions and copy those reservations into a domain specific iova tree. It is possible for one of those regions to be above the dma mask of a device. It is typical to allocate iovas with a 32bit mask (despite device's dma mask possibly being larger) and cache the result until it exhausts the lower 32bit address space. Freeing the iova range that is >= the last iova in the lower 32bit range when there is still an iova above the 32bit range will corrupt the cached iova by pointing it to a region that is above 32bit. If that region is also larger than the device's dma mask, a subsequent allocation will return an unusable iova and cause dma failure. Simply don't cache an iova that is above the 32bit caching boundary. Reported-by: Mike Travis Reported-by: Mike Habeck Cc: stable@kernel.org Acked-by: Mike Travis Tested-by: Mike Habeck Signed-off-by: Chris Wright Signed-off-by: David Woodhouse diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 9606e59..c5c274a 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) curr = iovad->cached32_node; cached_iova = container_of(curr, struct iova, node); - if (free->pfn_lo >= cached_iova->pfn_lo) - iovad->cached32_node = rb_next(&free->node); + if (free->pfn_lo >= cached_iova->pfn_lo) { + struct rb_node *node = rb_next(&free->node); + struct iova *iova = container_of(node, struct iova, node); + + /* only cache if it's below 32bit pfn */ + if (node && iova->pfn_lo < iovad->dma_32bit_pfn) + iovad->cached32_node = node; + else + iovad->cached32_node = NULL; + } } /* Computes the padding size required, to make the -- cgit v0.10.2 From c681d0ba1252954208220ad32248a3e8e2fc98e4 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 28 May 2011 13:15:05 -0500 Subject: intel-iommu: Use coherent DMA mask when requested The __intel_map_single function is not honoring the passed in DMA mask. This results in not using the coherent DMA mask when called from intel_alloc_coherent(). Signed-off-by: Mike Travis Acked-by: Chris Wright Reviewed-by: Mike Habeck Cc: stable@kernel.org Signed-off-by: David Woodhouse diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 98be0b5..5cbab7f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2732,8 +2732,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); - iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), - pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); if (!iova) goto error; -- cgit v0.10.2 From 825507d6d059f1cbe2503e0e5a3926225b983aec Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 28 May 2011 13:15:06 -0500 Subject: intel-iommu: Remove Host Bridge devices from identity mapping When using the 1:1 (identity) PCI DMA remapping, PCI Host Bridge devices that do not use the IOMMU causes a kernel panic. Fix that by not inserting those devices into the si_domain. Signed-off-by: Mike Travis Reviewed-by: Mike Habeck Cc: stable@kernel.org Signed-off-by: David Woodhouse diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 5cbab7f..7f757ce 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -46,6 +46,8 @@ #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE +#define IS_BRIDGE_HOST_DEVICE(pdev) \ + ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) @@ -2343,6 +2345,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return -EFAULT; for_each_pci_dev(pdev) { + /* Skip Host/PCI Bridge devices */ + if (IS_BRIDGE_HOST_DEVICE(pdev)) + continue; if (iommu_should_identity_map(pdev, 1)) { printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", hw ? "hardware" : "software", pci_name(pdev)); -- cgit v0.10.2 From 8519dc4401ddf8a5399f979870bbeeadbc111186 Mon Sep 17 00:00:00 2001 From: Mike Habeck Date: Sat, 28 May 2011 13:15:07 -0500 Subject: intel-iommu: Add domain check in domain_remove_one_dev_info The comment in domain_remove_one_dev_info() states "No need to compare PCI domain; it has to be the same". But for the si_domain that isn't going to be true, as it consists of all the PCI devices that are identity mapped thus multiple PCI domains can be in si_domain. The code needs to validate the PCI domain too. Signed-off-by: Mike Habeck Signed-off-by: Mike Travis Cc: stable@kernel.org Signed-off-by: David Woodhouse diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 7f757ce..b0c96d3 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3537,8 +3537,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_safe(entry, tmp, &domain->devices) { info = list_entry(entry, struct device_domain_info, link); - /* No need to compare PCI domain; it has to be the same */ - if (info->bus == pdev->bus->number && + if (info->segment == pci_domain_nr(pdev->bus) && + info->bus == pdev->bus->number && info->devfn == pdev->devfn) { list_del(&info->link); list_del(&info->global); -- cgit v0.10.2 From 70e535d1e5d1e4317e894d6228b762cf9c3fbc6a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 31 May 2011 00:22:52 +0100 Subject: intel-iommu: Fix off-by-one in RMRR setup We were mapping an extra byte (and hence usually an extra page): iommu_prepare_identity_map() expects to be given an 'end' argument which is the last byte to be mapped; not the first byte *not* to be mapped. Signed-off-by: David Woodhouse diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index b0c96d3..a8867bd 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2147,7 +2147,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) return 0; return iommu_prepare_identity_map(pdev, rmrr->base_address, - rmrr->end_address + 1); + rmrr->end_address); } #ifdef CONFIG_DMAR_FLOPPY_WA @@ -2161,7 +2161,7 @@ static inline void iommu_prepare_isa(void) return; printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); - ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); + ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); if (ret) printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " -- cgit v0.10.2 From 6464920a6e30604cb71d0ecbaa20e35009bd76fb Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 25 May 2011 12:24:25 +0200 Subject: xen/blkback: don't call vbd_size() if bd_disk is NULL ...because vbd_size() dereferences bd_disk if bd_part is NULL. Signed-off-by: Laszlo Ersek Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 3457082..6cc0db1 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -357,14 +357,13 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, } vbd->bdev = bdev; - vbd->size = vbd_sz(vbd); - if (vbd->bdev->bd_disk == NULL) { DPRINTK("xen_vbd_create: device %08x doesn't exist.\n", vbd->pdevice); xen_vbd_free(vbd); return -ENOENT; } + vbd->size = vbd_sz(vbd); if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) vbd->type |= VDISK_CDROM; -- cgit v0.10.2 From 9b83c771214cf6a256ee875050e6eaf320cf7983 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 27 May 2011 09:27:16 +0300 Subject: xen/blkback: potential null dereference in error handling blkbk->pending_pages can be NULL here so I added a check for it. Signed-off-by: Dan Carpenter [v1: Redid the loop a bit] Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index c73910c..5cf2993 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -809,11 +809,13 @@ static int __init xen_blkif_init(void) failed_init: kfree(blkbk->pending_reqs); kfree(blkbk->pending_grant_handles); - for (i = 0; i < mmap_pages; i++) { - if (blkbk->pending_pages[i]) - __free_page(blkbk->pending_pages[i]); + if (blkbk->pending_pages) { + for (i = 0; i < mmap_pages; i++) { + if (blkbk->pending_pages[i]) + __free_page(blkbk->pending_pages[i]); + } + kfree(blkbk->pending_pages); } - kfree(blkbk->pending_pages); kfree(blkbk); blkbk = NULL; return rc; -- cgit v0.10.2 From 614198bbf8d74617381aea82521b261c7f9baaf6 Mon Sep 17 00:00:00 2001 From: Per Dalen Date: Tue, 31 May 2011 06:54:21 -0700 Subject: hwmon: (max6642) Rename temp_fault sysfs attribute to temp2_fault The temp_fault sysfs attribute is wrong, it should be temp2_fault instead. Reported-by: Guenter Roeck Signed-off-by: Per Dalen Acked-by: Jean Delvare Signed-off-by: Guenter Roeck diff --git a/drivers/hwmon/max6642.c b/drivers/hwmon/max6642.c index 0f9fc40..0f30e1b 100644 --- a/drivers/hwmon/max6642.c +++ b/drivers/hwmon/max6642.c @@ -246,7 +246,7 @@ static SENSOR_DEVICE_ATTR_2(temp1_max, S_IWUSR | S_IRUGO, show_temp_max, set_temp_max, 0, MAX6642_REG_W_LOCAL_HIGH); static SENSOR_DEVICE_ATTR_2(temp2_max, S_IWUSR | S_IRUGO, show_temp_max, set_temp_max, 1, MAX6642_REG_W_REMOTE_HIGH); -static SENSOR_DEVICE_ATTR(temp_fault, S_IRUGO, show_alarm, NULL, 2); +static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_alarm, NULL, 2); static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, show_alarm, NULL, 6); static SENSOR_DEVICE_ATTR(temp2_max_alarm, S_IRUGO, show_alarm, NULL, 4); @@ -256,7 +256,7 @@ static struct attribute *max6642_attributes[] = { &sensor_dev_attr_temp1_max.dev_attr.attr, &sensor_dev_attr_temp2_max.dev_attr.attr, - &sensor_dev_attr_temp_fault.dev_attr.attr, + &sensor_dev_attr_temp2_fault.dev_attr.attr, &sensor_dev_attr_temp1_max_alarm.dev_attr.attr, &sensor_dev_attr_temp2_max_alarm.dev_attr.attr, NULL -- cgit v0.10.2 From 4c6e0f8101e62d8b2d01dc94b835a98b191a1454 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 31 May 2011 15:50:51 -0400 Subject: hwmon: (coretemp) Relax target temperature range check The current temperature range check of MSR_IA32_TEMPERATURE_TARGET seems too strict to me, some TjMax values documented in Documentation/hwmon/coretemp wouldn't pass. Relax the check so that all the documented values pass. Signed-off-by: Jean Delvare Cc: Carsten Emde Cc: Fenghua Yu Signed-off-by: Guenter Roeck diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index de3d246..0eeff46 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -296,7 +296,7 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) * If the TjMax is not plausible, an assumption * will be used */ - if (val > 80 && val < 120) { + if (val >= 70 && val <= 125) { dev_info(dev, "TjMax is %d C.\n", val); return val * 1000; } -- cgit v0.10.2 From 333ba7325213f0a09dfa5ceeddb056d6ad74b3b5 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Sun, 29 May 2011 15:53:20 +0300 Subject: cfg80211: don't drop p2p probe responses Commit 0a35d36 ("cfg80211: Use capability info to detect mesh beacons") assumed that probe response with both ESS and IBSS bits cleared means that the frame was sent by a mesh sta. However, these capabilities are also being used in the p2p_find phase, and the mesh-validation broke it. Rename the WLAN_CAPABILITY_IS_MBSS macro, and verify that mesh ies exist before assuming this frame was sent by a mesh sta. Signed-off-by: Eliad Peller Signed-off-by: John W. Linville diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b2eee58..bf56b6f 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1003,8 +1003,12 @@ struct ieee80211_ht_info { #define WLAN_CAPABILITY_ESS (1<<0) #define WLAN_CAPABILITY_IBSS (1<<1) -/* A mesh STA sets the ESS and IBSS capability bits to zero */ -#define WLAN_CAPABILITY_IS_MBSS(cap) \ +/* + * A mesh STA sets the ESS and IBSS capability bits to zero. + * however, this holds true for p2p probe responses (in the p2p_find + * phase) as well. + */ +#define WLAN_CAPABILITY_IS_STA_BSS(cap) \ (!((cap) & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS))) #define WLAN_CAPABILITY_CF_POLLABLE (1<<2) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 73a441d..7a6c676 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -267,13 +267,35 @@ static bool is_bss(struct cfg80211_bss *a, return memcmp(ssidie + 2, ssid, ssid_len) == 0; } +static bool is_mesh_bss(struct cfg80211_bss *a) +{ + const u8 *ie; + + if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability)) + return false; + + ie = cfg80211_find_ie(WLAN_EID_MESH_ID, + a->information_elements, + a->len_information_elements); + if (!ie) + return false; + + ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, + a->information_elements, + a->len_information_elements); + if (!ie) + return false; + + return true; +} + static bool is_mesh(struct cfg80211_bss *a, const u8 *meshid, size_t meshidlen, const u8 *meshcfg) { const u8 *ie; - if (!WLAN_CAPABILITY_IS_MBSS(a->capability)) + if (!WLAN_CAPABILITY_IS_STA_BSS(a->capability)) return false; ie = cfg80211_find_ie(WLAN_EID_MESH_ID, @@ -311,7 +333,7 @@ static int cmp_bss(struct cfg80211_bss *a, if (a->channel != b->channel) return b->channel->center_freq - a->channel->center_freq; - if (WLAN_CAPABILITY_IS_MBSS(a->capability | b->capability)) { + if (is_mesh_bss(a) && is_mesh_bss(b)) { r = cmp_ies(WLAN_EID_MESH_ID, a->information_elements, a->len_information_elements, @@ -457,7 +479,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *res) { struct cfg80211_internal_bss *found = NULL; - const u8 *meshid, *meshcfg; /* * The reference to "res" is donated to this function. @@ -470,22 +491,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, res->ts = jiffies; - if (WLAN_CAPABILITY_IS_MBSS(res->pub.capability)) { - /* must be mesh, verify */ - meshid = cfg80211_find_ie(WLAN_EID_MESH_ID, - res->pub.information_elements, - res->pub.len_information_elements); - meshcfg = cfg80211_find_ie(WLAN_EID_MESH_CONFIG, - res->pub.information_elements, - res->pub.len_information_elements); - if (!meshid || !meshcfg || - meshcfg[1] != sizeof(struct ieee80211_meshconf_ie)) { - /* bogus mesh */ - kref_put(&res->ref, bss_release); - return NULL; - } - } - spin_lock_bh(&dev->bss_lock); found = rb_find_bss(dev, res); -- cgit v0.10.2 From 21fdc87248d1d28492c775e05fa92b3c8c7bc8db Mon Sep 17 00:00:00 2001 From: Daniel Halperin Date: Tue, 31 May 2011 11:59:30 -0700 Subject: ath9k: fix two more bugs in tx power This is the same fix as commit 841051602e3fa18ea468fe5a177aa92b6eb44b56 Author: Matteo Croce Date: Fri Dec 3 02:25:08 2010 +0100 The ath9k driver subtracts 3 dBm to the txpower as with two radios the signal power is doubled. The resulting value is assigned in an u16 which overflows and makes the card work at full power. in two more places. I grepped the ath tree and didn't find any others. Cc: stable@kernel.org Signed-off-by: Daniel Halperin Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 0ca7635..ff8150e 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -4645,10 +4645,16 @@ static void ar9003_hw_set_power_per_rate_table(struct ath_hw *ah, case 1: break; case 2: - scaledPower -= REDUCE_SCALED_POWER_BY_TWO_CHAIN; + if (scaledPower > REDUCE_SCALED_POWER_BY_TWO_CHAIN) + scaledPower -= REDUCE_SCALED_POWER_BY_TWO_CHAIN; + else + scaledPower = 0; break; case 3: - scaledPower -= REDUCE_SCALED_POWER_BY_THREE_CHAIN; + if (scaledPower > REDUCE_SCALED_POWER_BY_THREE_CHAIN) + scaledPower -= REDUCE_SCALED_POWER_BY_THREE_CHAIN; + else + scaledPower = 0; break; } diff --git a/drivers/net/wireless/ath/ath9k/eeprom_9287.c b/drivers/net/wireless/ath/ath9k/eeprom_9287.c index 7856f0d..343fc9f 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom_9287.c +++ b/drivers/net/wireless/ath/ath9k/eeprom_9287.c @@ -524,10 +524,16 @@ static void ath9k_hw_set_ar9287_power_per_rate_table(struct ath_hw *ah, case 1: break; case 2: - scaledPower -= REDUCE_SCALED_POWER_BY_TWO_CHAIN; + if (scaledPower > REDUCE_SCALED_POWER_BY_TWO_CHAIN) + scaledPower -= REDUCE_SCALED_POWER_BY_TWO_CHAIN; + else + scaledPower = 0; break; case 3: - scaledPower -= REDUCE_SCALED_POWER_BY_THREE_CHAIN; + if (scaledPower > REDUCE_SCALED_POWER_BY_THREE_CHAIN) + scaledPower -= REDUCE_SCALED_POWER_BY_THREE_CHAIN; + else + scaledPower = 0; break; } scaledPower = max((u16)0, scaledPower); -- cgit v0.10.2 From a7567b2059020bf3fa96c389ec25eed8e28ad4ba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Jun 2011 08:29:54 +0200 Subject: bluetooth l2cap: fix locking in l2cap_global_chan_by_psm read_lock() ... read_unlock_bh() is clearly bogus. This was broken by commit 23691d75cdc69c3b285211b4d77746aa20a17d18 Author: Gustavo F. Padovan Date: Wed Apr 27 18:26:32 2011 -0300 Bluetooth: Remove l2cap_sk_list Signed-off-by: Johannes Berg Signed-off-by: John W. Linville diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a86f9ba..e64a1c2 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -906,7 +906,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr if (c->psm == psm) { /* Exact match. */ if (!bacmp(&bt_sk(sk)->src, src)) { - read_unlock_bh(&chan_list_lock); + read_unlock(&chan_list_lock); return c; } -- cgit v0.10.2 From dfe21582ac5ebc460dda98c67e8589dd506d02cd Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 1 Jun 2011 17:17:57 +0200 Subject: iwl4965: correctly validate temperature value In some cases we can read wrong temperature value. If after that temperature value will not be updated to good one, we badly configure tx power parameters and device is unable to send a data. Resolves: https://bugzilla.kernel.org/show_bug.cgi?id=35932 Cc: stable@kernel.org # 2.6.39+ Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/iwlegacy/iwl-4965.c b/drivers/net/wireless/iwlegacy/iwl-4965.c index f5433c7..f9db25b 100644 --- a/drivers/net/wireless/iwlegacy/iwl-4965.c +++ b/drivers/net/wireless/iwlegacy/iwl-4965.c @@ -1543,7 +1543,7 @@ static void iwl4965_temperature_calib(struct iwl_priv *priv) s32 temp; temp = iwl4965_hw_get_temperature(priv); - if (temp < 0) + if (IWL_TX_POWER_TEMPERATURE_OUT_OF_RANGE(temp)) return; if (priv->temperature != temp) { -- cgit v0.10.2 From 3cc39b3f061e90f69cb1f65d72c005c56cddd6a6 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 1 Jun 2011 16:06:04 -0400 Subject: tile: enable CONFIG_BUGVERBOSE Trivial config change to enable backtraces on panic. Signed-off-by: Chris Metcalf diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 28afa4c..dd373c8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -697,7 +697,7 @@ config DEBUG_BUGVERBOSE bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT depends on BUG depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \ - FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 + FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 || TILE default y help Say Y here to make BUG() panics output the file name and line number -- cgit v0.10.2 From 0f48f2600911d5de6393829e4a9986d4075558b3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Jun 2011 05:29:19 +0900 Subject: block: fix mismerge of the DISK_EVENT_MEDIA_CHANGE removal Jens' back-merge commit 698567f3fa79 ("Merge commit 'v2.6.39' into for-2.6.40/core") was incorrectly done, and re-introduced the DISK_EVENT_MEDIA_CHANGE lines that had been removed earlier in commits - 9fd097b14918 ("block: unexport DISK_EVENT_MEDIA_CHANGE for legacy/fringe drivers") - 7eec77a1816a ("ide: unexport DISK_EVENT_MEDIA_CHANGE for ide-gd and ide-cd") because of conflicts with the "g->flags" updates near-by by commit d4dc210f69bc ("block: don't block events on excl write for non-optical devices") As a result, we re-introduced the hanging behavior due to infinite disk media change reports. Tssk, tssk, people! Don't do back-merges at all, and *definitely* don't do them to hide merge conflicts from me - especially as I'm likely better at merging them than you are, since I do so many merges. Reported-by: Steven Rostedt Cc: Jens Axboe Signed-off-by: Linus Torvalds diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index a0aabd9..46b8136 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -321,7 +321,6 @@ static void pcd_init_units(void) strcpy(disk->disk_name, cd->name); /* umm... */ disk->fops = &pcd_bdops; disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - disk->events = DISK_EVENT_MEDIA_CHANGE; } } diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index ae15a4d..7878da8 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -627,7 +627,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id) gendisk->fops = &viocd_fops; gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - gendisk->events = DISK_EVENT_MEDIA_CHANGE; set_capacity(gendisk, 0); gendisk->private_data = d; d->viocd_disk = gendisk; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 6e5123b..144d272 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1782,7 +1782,6 @@ static int ide_cd_probe(ide_drive_t *drive) ide_cd_read_toc(drive, &sense); g->fops = &idecd_ops; g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - g->events = DISK_EVENT_MEDIA_CHANGE; add_disk(g); return 0; -- cgit v0.10.2 From 1fa7b6a29c61358cc2ca6f64cef4aa0e1a7ca74c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 2 Jun 2011 06:11:24 +0900 Subject: Revert "mm: fail GFP_DMA allocations when ZONE_DMA is not configured" This reverts commit a197b59ae6e8bee56fcef37ea2482dc08414e2ac. As rmk says: "Commit a197b59ae6e8 (mm: fail GFP_DMA allocations when ZONE_DMA is not configured) is causing regressions on ARM with various drivers which use GFP_DMA. The behaviour up until now has been to silently ignore that flag when CONFIG_ZONE_DMA is not enabled, and to allocate from the normal zone. However, as a result of the above commit, such allocations now fail which causes drivers to fail. These are regressions compared to the previous kernel version." so just revert it. Requested-by: Russell King Acked-by: Andrew Morton Cc: David Rientjes Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a4e1db3..4e8985a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2247,10 +2247,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return NULL; -#ifndef CONFIG_ZONE_DMA - if (WARN_ON_ONCE(gfp_mask & __GFP_DMA)) - return NULL; -#endif /* * Check the zones suitable for the gfp_mask contain at least one -- cgit v0.10.2 From 4f5f71a7abe329bdad81ee6a8e4545054a7cc30a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 31 May 2011 06:54:21 -0700 Subject: hwmon: (coretemp) Fix TjMax detection for older CPUs Commit a321cedb12904114e2ba5041a3673ca24deb09c9 excludes CPU models 0xe, 0xf, 0x16, and 0x1a from TjMax temperature adjustment, even though several of those CPUs are known to have TiMax other than 100 degrees C, and even though the code in adjust_tjmax() explicitly handles those CPUs and points to a Web document listing several of the affected CPU IDs. Reinstate original TjMax adjustment if TjMax can not be determined using the IA32_TEMPERATURE_TARGET register. https://bugzilla.kernel.org/show_bug.cgi?id=32582 Signed-off-by: Guenter Roeck Cc: Huaxu Wan Cc: Carsten Emde Cc: Valdis Kletnieks Cc: Henrique de Moraes Holschuh Cc: Yong Wang Cc: Rudolf Marek Cc: Fenghua Yu Tested-by: Jean Delvare Acked-by: Jean Delvare Acked-by: Fenghua Yu Cc: # .35.x .36.x .37.x .38.x .39.x diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 0eeff46..1680977 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -304,24 +304,9 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) /* * An assumption is made for early CPUs and unreadable MSR. - * NOTE: the given value may not be correct. + * NOTE: the calculated value may not be correct. */ - - switch (c->x86_model) { - case 0xe: - case 0xf: - case 0x16: - case 0x1a: - dev_warn(dev, "TjMax is assumed as 100 C!\n"); - return 100000; - case 0x17: - case 0x1c: /* Atom CPUs */ - return adjust_tjmax(c, id, dev); - default: - dev_warn(dev, "CPU (model=0x%x) is not supported yet," - " using default TjMax of 100C.\n", c->x86_model); - return 100000; - } + return adjust_tjmax(c, id, dev); } static void __devinit get_ucode_rev_on_cpu(void *edx) -- cgit v0.10.2 From bb9973e4e73f43bd86698483d0c3f7a362ff94ce Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 1 Jun 2011 11:03:41 -0700 Subject: hwmon: (coretemp) Further relax temperature range checks Further relax temperature range checks after reading the IA32_TEMPERATURE_TARGET register. If the register returns a value other than 0 in bits 16..32, assume that the returned value is correct. This change applies to both packet and core temperature limits. Cc: Carsten Emde Cc: Fenghua Yu Cc: Jean Delvare Signed-off-by: Guenter Roeck Acked-by: Fenghua Yu diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 1680977..85e9379 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -296,7 +296,7 @@ static int get_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) * If the TjMax is not plausible, an assumption * will be used */ - if (val >= 70 && val <= 125) { + if (val) { dev_info(dev, "TjMax is %d C.\n", val); return val * 1000; } @@ -326,7 +326,7 @@ static int get_pkg_tjmax(unsigned int cpu, struct device *dev) err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); if (!err) { val = (eax >> 16) & 0xff; - if (val > 80 && val < 120) + if (val) return val * 1000; } dev_warn(dev, "Unable to read Pkg-TjMax from CPU:%u\n", cpu); -- cgit v0.10.2 From d0733d2e29b652b2e7b1438ececa732e4eed98eb Mon Sep 17 00:00:00 2001 From: Marcus Meissner Date: Wed, 1 Jun 2011 21:05:22 -0700 Subject: net/ipv4: Check for mistakenly passed in non-IPv4 address Check against mistakenly passing in IPv6 addresses (which would result in an INADDR_ANY bind) or similar incompatible sockaddrs. Signed-off-by: Marcus Meissner Cc: Reinhard Max Signed-off-by: David S. Miller diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cc14631..9c19260 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -465,6 +465,9 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in)) goto out; + if (addr->sin_family != AF_INET) + goto out; + chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); /* Not specified by any standard per-se, however it breaks too -- cgit v0.10.2 From 307f73df2b9829ee5a261d1ed432ff683c426cdf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 31 May 2011 22:53:19 +0000 Subject: vlan: fix typo in vlan_dev_hard_start_xmit() commit 4af429d29b341bb1735f04c2fb960178ed5d52e7 (vlan: lockless transmit path) have a typo in vlan_dev_hard_start_xmit(), using u64_stats_update_begin() to end the stat update, it should be u64_stats_update_end(). Signed-off-by: Wei Yongjun Reviewed-by: WANG Cong Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index f247f5b..7ea5cf9 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -165,7 +165,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; - u64_stats_update_begin(&stats->syncp); + u64_stats_update_end(&stats->syncp); } else { this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped); } -- cgit v0.10.2 From 85e3c65fa3a1d0542c181510a950a2be7733ff29 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 1 Jun 2011 02:01:41 +0000 Subject: usbnet/cdc_ncm: add missing .reset_resume hook This avoids messages like this after suspend: cdc_ncm 2-1.4:1.6: no reset_resume for driver cdc_ncm? cdc_ncm 2-1.4:1.7: no reset_resume for driver cdc_ncm? cdc_ncm 2-1.4:1.6: usb0: unregister 'cdc_ncm' usb-0000:00:1d.0-1.4, CDC NCM This is important for the Ericsson F5521gw GSM/UMTS modem. Otherwise modemmanager looses the fact that the cdc_ncm and cdc_acm devices belong together. The cdc_ether module does the same. Signed-off-by: Stefan Metzmacher Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index cdd3ae4..f33ca6a 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -54,7 +54,7 @@ #include #include -#define DRIVER_VERSION "24-May-2011" +#define DRIVER_VERSION "01-June-2011" /* CDC NCM subclass 3.2.1 */ #define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10 @@ -1234,6 +1234,7 @@ static struct usb_driver cdc_ncm_driver = { .disconnect = cdc_ncm_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, + .reset_resume = usbnet_resume, .supports_autosuspend = 1, }; -- cgit v0.10.2 From 41be5a4a3668810bf3687a76c2b017bd437039e0 Mon Sep 17 00:00:00 2001 From: "sjur.brandeland@stericsson.com" Date: Wed, 1 Jun 2011 00:55:37 +0000 Subject: caif: Fix race when conditionally taking rtnl lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take the RTNL lock unconditionally when calling dev_close. Taking the lock conditionally may cause race conditions. Signed-off-by: Sjur Brændeland Signed-off-by: David S. Miller diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 649ebac..adbb424 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -139,17 +139,14 @@ static void close_work(struct work_struct *work) struct chnl_net *dev = NULL; struct list_head *list_node; struct list_head *_tmp; - /* May be called with or without RTNL lock held */ - int islocked = rtnl_is_locked(); - if (!islocked) - rtnl_lock(); + + rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); if (dev->state == CAIF_SHUTDOWN) dev_close(dev->netdev); } - if (!islocked) - rtnl_unlock(); + rtnl_unlock(); } static DECLARE_WORK(close_worker, close_work); -- cgit v0.10.2 From a3bcc23e890a6d49d6763d9eb073d711de2e0469 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Wed, 1 Jun 2011 06:49:10 +0000 Subject: af-packet: Add flag to distinguish VID 0 from no-vlan. Currently, user-space cannot determine if a 0 tcp_vlan_tci means there is no VLAN tag or the VLAN ID was zero. Add flag to make this explicit. User-space can check for TP_STATUS_VLAN_VALID || tp_vlan_tci > 0, which will be backwards compatible. Older could would have just checked for tp_vlan_tci, so it will work no worse than before. Signed-off-by: Ben Greear Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 72bfa5a..6d66ce1 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -70,6 +70,7 @@ struct tpacket_auxdata { #define TP_STATUS_COPY 0x2 #define TP_STATUS_LOSING 0x4 #define TP_STATUS_CSUMNOTREADY 0x8 +#define TP_STATUS_VLAN_VALID 0x10 /* auxdata has valid tp_vlan_tci */ /* Tx ring - header status */ #define TP_STATUS_AVAILABLE 0x0 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 925f715..ba248d9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -798,7 +798,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, getnstimeofday(&ts); h.h2->tp_sec = ts.tv_sec; h.h2->tp_nsec = ts.tv_nsec; - h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + if (vlan_tx_tag_present(skb)) { + h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); + status |= TP_STATUS_VLAN_VALID; + } else { + h.h2->tp_vlan_tci = 0; + } hdrlen = sizeof(*h.h2); break; default: @@ -1725,8 +1730,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, aux.tp_snaplen = skb->len; aux.tp_mac = 0; aux.tp_net = skb_network_offset(skb); - aux.tp_vlan_tci = vlan_tx_tag_get(skb); - + if (vlan_tx_tag_present(skb)) { + aux.tp_vlan_tci = vlan_tx_tag_get(skb); + aux.tp_status |= TP_STATUS_VLAN_VALID; + } else { + aux.tp_vlan_tci = 0; + } put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); } -- cgit v0.10.2 From b722dbf176b67c75fe0f5a6b1b31f5ea8aa6117d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 1 Jun 2011 07:10:10 +0000 Subject: drivers/net/davinci_emac.c: add missing clk_put Go to existing error handling code at the end of the function that calls clk_put. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r exists@ expression e1,e2; statement S; @@ e1 = clk_get@p1(...); ... when != e1 = e2 when != clk_put(e1) when any if (...) { ... when != clk_put(e1) when != if (...) { ... clk_put(e1) ... } * return@p3 ...; } else S // Signed-off-by: Julia Lawall Acked-by: Kevin Hilman Signed-off-by: David S. Miller diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c index 29a4f06..dcc4a17 100644 --- a/drivers/net/davinci_emac.c +++ b/drivers/net/davinci_emac.c @@ -1781,8 +1781,8 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev) ndev = alloc_etherdev(sizeof(struct emac_priv)); if (!ndev) { dev_err(&pdev->dev, "error allocating net_device\n"); - clk_put(emac_clk); - return -ENOMEM; + rc = -ENOMEM; + goto free_clk; } platform_set_drvdata(pdev, ndev); @@ -1796,7 +1796,8 @@ static int __devinit davinci_emac_probe(struct platform_device *pdev) pdata = pdev->dev.platform_data; if (!pdata) { dev_err(&pdev->dev, "no platform data\n"); - return -ENODEV; + rc = -ENODEV; + goto probe_quit; } /* MAC addr and PHY mask , RMII enable info from platform_data */ @@ -1929,8 +1930,9 @@ no_dma: iounmap(priv->remap_addr); probe_quit: - clk_put(emac_clk); free_netdev(ndev); +free_clk: + clk_put(emac_clk); return rc; } -- cgit v0.10.2 From 6979d5dd96a4a4975ce240982436e92a3da23315 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 1 Jun 2011 10:18:09 +0000 Subject: net: dm9000: Get the chip in a known good state before enabling interrupts Currently the DM9000 driver requests the primary interrupt before it resets the chip and puts it into a known good state. This means that if the chip is asserting interrupt for some reason we can end up with a screaming IRQ that the interrupt handler is unable to deal with. Avoid this by only requesting the interrupt after we've reset the chip so we know what state it's in. This started manifesting itself on one of my boards in the past month or so, I suspect as a result of some core infrastructure changes removing some form of mitigation against bad behaviour here, even when things boot it seems that the new code brings the interface up more quickly. Signed-off-by: Mark Brown Signed-off-by: David S. Miller diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c index fbaff35..ee597e6 100644 --- a/drivers/net/dm9000.c +++ b/drivers/net/dm9000.c @@ -1157,9 +1157,6 @@ dm9000_open(struct net_device *dev) irqflags |= IRQF_SHARED; - if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev)) - return -EAGAIN; - /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */ iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */ mdelay(1); /* delay needs by DM9000B */ @@ -1168,6 +1165,9 @@ dm9000_open(struct net_device *dev) dm9000_reset(db); dm9000_init_dm9000(dev); + if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev)) + return -EAGAIN; + /* Init driver variable */ db->dbug_cnt = 0; -- cgit v0.10.2 From a1b2cc50679c1d2eed44e2885f6178ce907498b7 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 31 May 2011 09:25:16 +0000 Subject: dmaengine: shdma: fix a regression: initialise DMA channels for memcpy A recent patch has introduced a regression, where repeating a memcpy DMA test with shdma module unloading between them skips the DMA channel configuration. Fix this regression by always configuring the channel during its allocation. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 727e76f..2a638f9 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -343,7 +343,7 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) dmae_set_dmars(sh_chan, cfg->mid_rid); dmae_set_chcr(sh_chan, cfg->chcr); - } else if ((sh_dmae_readl(sh_chan, CHCR) & 0xf00) != 0x400) { + } else { dmae_init(sh_chan); } -- cgit v0.10.2 From 816af7422f5bdbb74a0c9bb09735a9aeb9522c30 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Wed, 1 Jun 2011 07:32:07 +0000 Subject: ARM: mach-shmobile: add DMAC clock definitions on SH7372 These definitions are needed to let the runtime PM subsystem turn off DMAC clocks, when it is suspended by the driver. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Paul Mundt diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index d17eb66..c0800d8 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -509,6 +509,7 @@ enum { MSTP001, MSTP118, MSTP117, MSTP116, MSTP113, MSTP106, MSTP101, MSTP100, MSTP223, + MSTP218, MSTP217, MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, MSTP329, MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312, MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403, @@ -534,6 +535,9 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP101] = MSTP(&div4_clks[DIV4_M1], SMSTPCR1, 1, 0), /* VPU */ [MSTP100] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 0, 0), /* LCDC0 */ [MSTP223] = MSTP(&div6_clks[DIV6_SPU], SMSTPCR2, 23, 0), /* SPU2 */ + [MSTP218] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* DMAC1 */ + [MSTP217] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 17, 0), /* DMAC2 */ + [MSTP216] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 16, 0), /* DMAC3 */ [MSTP207] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 7, 0), /* SCIFA5 */ [MSTP206] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 6, 0), /* SCIFB */ [MSTP204] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 4, 0), /* SCIFA0 */ @@ -626,6 +630,9 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */ CLKDEV_DEV_ID("uio_pdrv_genirq.6", &mstp_clks[MSTP223]), /* SPU2DSP0 */ CLKDEV_DEV_ID("uio_pdrv_genirq.7", &mstp_clks[MSTP223]), /* SPU2DSP1 */ + CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), /* DMAC1 */ + CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[MSTP217]), /* DMAC2 */ + CLKDEV_DEV_ID("sh-dma-engine.2", &mstp_clks[MSTP216]), /* DMAC3 */ CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */ CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP206]), /* SCIFB */ CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), /* SCIFA0 */ -- cgit v0.10.2 From 2e4ceec4edaef6e903422792de4f7f37de98cec6 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 1 Jun 2011 19:48:50 +0000 Subject: drivers/net/can/flexcan.c: add missing clk_put The failed_get label is used after the call to clk_get has succeeded, so it should be moved up above the call to clk_put. The failed_req labels doesn't do anything different than failed_get, so delete it. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @r exists@ expression e1,e2; statement S; @@ e1 = clk_get@p1(...); ... when != e1 = e2 when != clk_put(e1) when any if (...) { ... when != clk_put(e1) when != if (...) { ... clk_put(e1) ... } * return@p3 ...; } else S // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index d499056..1767811 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -923,7 +923,7 @@ static int __devinit flexcan_probe(struct platform_device *pdev) mem_size = resource_size(mem); if (!request_mem_region(mem->start, mem_size, pdev->name)) { err = -EBUSY; - goto failed_req; + goto failed_get; } base = ioremap(mem->start, mem_size); @@ -977,9 +977,8 @@ static int __devinit flexcan_probe(struct platform_device *pdev) iounmap(base); failed_map: release_mem_region(mem->start, mem_size); - failed_req: - clk_put(clk); failed_get: + clk_put(clk); failed_clock: return err; } -- cgit v0.10.2 From e73e079bf128d68284efedeba1fbbc18d78610f9 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 25 May 2011 15:52:14 -0500 Subject: [SCSI] Fix oops caused by queue refcounting failure In certain circumstances, we can get an oops from a torn down device. Most notably this is from CD roms trying to call scsi_ioctl. The root cause of the problem is the fact that after scsi_remove_device() has been called, the queue is fully torn down. This is actually wrong since the queue can be used until the sdev release function is called. Therefore, we add an extra reference to the queue which is released in sdev->release, so the queue always exists. Reported-by: Parag Warudkar Cc: stable@kernel.org Signed-off-by: James Bottomley diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 58584dc..44e8ca3 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -297,7 +297,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, kfree(sdev); goto out; } - + blk_get_queue(sdev->request_queue); sdev->request_queue->queuedata = sdev; scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index e639125..e0bd3f7 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -322,6 +322,7 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) kfree(evt); } + blk_put_queue(sdev->request_queue); /* NULL queue means the device can't be used */ sdev->request_queue = NULL; -- cgit v0.10.2 From 28304f485c3627cc5e1665b92e26eb7fcfe98088 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 2 Jun 2011 13:05:02 +0200 Subject: cfq-iosched: Remove bogus check in queue_fail path queue_fail can only be reached if cic is NULL, so its check for cic must be bogus. Signed-off-by: Paul Bolle Signed-off-by: Jens Axboe diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 8a02c95..3c7b537 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3786,9 +3786,6 @@ new_queue: return 0; queue_fail: - if (cic) - put_io_context(cic->ioc); - cfq_schedule_dispatch(cfqd); spin_unlock_irqrestore(q->queue_lock, flags); cfq_log(cfqd, "set_request fail"); -- cgit v0.10.2 From e2bd9678fc0085acf540dc4cb48ff961cd4d88c0 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 2 Jun 2011 13:05:02 +0200 Subject: block: Use hlist_entry() for io_context.cic_list.first list_entry() and hlist_entry() are both simply aliases for container_of(), but since io_context.cic_list.first is an hlist_node one should at least use the correct alias. Signed-off-by: Paul Bolle Signed-off-by: Jens Axboe diff --git a/block/blk-ioc.c b/block/blk-ioc.c index c898049..342eae9 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -21,7 +21,7 @@ static void cfq_dtor(struct io_context *ioc) if (!hlist_empty(&ioc->cic_list)) { struct cfq_io_context *cic; - cic = list_entry(ioc->cic_list.first, struct cfq_io_context, + cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, cic_list); cic->dtor(ioc); } @@ -57,7 +57,7 @@ static void cfq_exit(struct io_context *ioc) if (!hlist_empty(&ioc->cic_list)) { struct cfq_io_context *cic; - cic = list_entry(ioc->cic_list.first, struct cfq_io_context, + cic = hlist_entry(ioc->cic_list.first, struct cfq_io_context, cic_list); cic->exit(ioc); } -- cgit v0.10.2 From 4c8cc55b3c0ebe989e727017933945b68b4327cd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Jun 2011 23:22:30 -0400 Subject: ktest: Fix off-by-one in config bisect result Because in perl the array size returned by $#arr, is the last index and not the actually size of the array, we end the config bisect early, thinking there is only one config left when there are in fact two. Thus the result has a 50% chance of picking the correct config that caused the problem. Signed-off-by: Steven Rostedt diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 1fd29b2..8dc8c3c 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1638,7 +1638,7 @@ sub run_config_bisect { if (!$found) { # try the other half doprint "Top half produced no set configs, trying bottom half\n"; - @tophalf = @start_list[$half .. $#start_list]; + @tophalf = @start_list[$half + 1 .. $#start_list]; create_config @tophalf; read_current_config \%current_config; foreach my $config (@tophalf) { @@ -1690,7 +1690,7 @@ sub run_config_bisect { # remove half the configs we are looking at and see if # they are good. $half = int($#start_list / 2); - } while ($half > 0); + } while ($#start_list > 0); # we found a single config, try it again unless we are running manually -- cgit v0.10.2 From 4da46da2d295c0d9f4aaf28dd2b70a1ecb42d972 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Jun 2011 23:25:13 -0400 Subject: ktest: Fix result of rebooting the kernel The command that is called that reboots the kernel may fail but the return code is not passed back to the ktest.pl script. This is because a ';' is used between the two commands and if the second command fails, only the first command's return code is returned. Using a '&&' between the two commands fixes this. Signed-off-by: Steven Rostedt diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 8dc8c3c..6c68259 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -788,7 +788,7 @@ sub wait_for_input sub reboot_to { if ($reboot_type eq "grub") { - run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch; reboot)'"; + run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch && reboot)'"; return; } -- cgit v0.10.2 From 9bf7174949aef2f43253956e1f3ab01698abbd79 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 1 Jun 2011 23:27:19 -0400 Subject: ktest: Ignore unset values of the minconfig in config_bisect By ignoring the unset values of the minconfig in deciding what to test in the config_bisect can cause the problem config from being tested too. Just do not test the configs that are set in the minconfig. Signed-off-by: Steven Rostedt diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 6c68259..cef28e6 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1480,7 +1480,7 @@ sub process_config_ignore { or dodie "Failed to read $config"; while () { - if (/^(.*?(CONFIG\S*)(=.*| is not set))/) { + if (/^((CONFIG\S*)=.*)/) { $config_ignore{$2} = $1; } } -- cgit v0.10.2 From bf0be0e951cf1c4c9ce38032195cd8095a16d828 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 30 May 2011 12:49:01 +0200 Subject: ALSA: 6fire: Don't leak firmware in error path One of the error paths in sound/usb/6fire/firmware.c::usb6fire_fw_ezusb_upload() neglects to free the memory allocated for the firmware before returning, thus leaking the memory. Signed-off-by: Jesper Juhl Signed-off-by: Takashi Iwai diff --git a/sound/usb/6fire/firmware.c b/sound/usb/6fire/firmware.c index d47beff..a91719d 100644 --- a/sound/usb/6fire/firmware.c +++ b/sound/usb/6fire/firmware.c @@ -227,6 +227,7 @@ static int usb6fire_fw_ezusb_upload( ret = usb6fire_fw_ihex_init(fw, rec); if (ret < 0) { kfree(rec); + release_firmware(fw); snd_printk(KERN_ERR PREFIX "error validating ezusb " "firmware %s.\n", fwname); return ret; -- cgit v0.10.2 From b36a968927b789b2dd8de0aaf7a72ef7c1f0d012 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 2 Jun 2011 14:21:45 -0400 Subject: asm-generic/unistd.h: support sendmmsg syscall Signed-off-by: Chris Metcalf Acked-by: Arnd Bergmann diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index ae90e0f..4f769593 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -683,9 +683,11 @@ __SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime) __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_setns 268 __SYSCALL(__NR_setns, sys_setns) +#define __NR_sendmmsg 269 +__SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg) #undef __NR_syscalls -#define __NR_syscalls 269 +#define __NR_syscalls 270 /* * All syscalls below here should go away really, -- cgit v0.10.2 From ec764bf083a6ff396234351b51fd236f53c903bf Mon Sep 17 00:00:00 2001 From: Koki Sanagi Date: Mon, 30 May 2011 21:48:34 +0000 Subject: net: tracepoint of net_dev_xmit sees freed skb and causes panic Because there is a possibility that skb is kfree_skb()ed and zero cleared after ndo_start_xmit, we should not see the contents of skb like skb->len and skb->dev->name after ndo_start_xmit. But trace_net_dev_xmit does that and causes panic by NULL pointer dereference. This patch fixes trace_net_dev_xmit not to see the contents of skb directly. If you want to reproduce this panic, 1. Get tracepoint of net_dev_xmit on 2. Create 2 guests on KVM 2. Make 2 guests use virtio_net 4. Execute netperf from one to another for a long time as a network burden 5. host will panic(It takes about 30 minutes) Signed-off-by: Koki Sanagi Signed-off-by: David S. Miller diff --git a/include/trace/events/net.h b/include/trace/events/net.h index 5f247f5..f99645d 100644 --- a/include/trace/events/net.h +++ b/include/trace/events/net.h @@ -12,22 +12,24 @@ TRACE_EVENT(net_dev_xmit, TP_PROTO(struct sk_buff *skb, - int rc), + int rc, + struct net_device *dev, + unsigned int skb_len), - TP_ARGS(skb, rc), + TP_ARGS(skb, rc, dev, skb_len), TP_STRUCT__entry( __field( void *, skbaddr ) __field( unsigned int, len ) __field( int, rc ) - __string( name, skb->dev->name ) + __string( name, dev->name ) ), TP_fast_assign( __entry->skbaddr = skb; - __entry->len = skb->len; + __entry->len = skb_len; __entry->rc = rc; - __assign_str(name, skb->dev->name); + __assign_str(name, dev->name); ), TP_printk("dev=%s skbaddr=%p len=%u rc=%d", diff --git a/net/core/dev.c b/net/core/dev.c index c7e305d..9393078 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2096,6 +2096,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, { const struct net_device_ops *ops = dev->netdev_ops; int rc = NETDEV_TX_OK; + unsigned int skb_len; if (likely(!skb->next)) { u32 features; @@ -2146,8 +2147,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } } + skb_len = skb->len; rc = ops->ndo_start_xmit(skb, dev); - trace_net_dev_xmit(skb, rc); + trace_net_dev_xmit(skb, rc, dev, skb_len); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; @@ -2167,8 +2169,9 @@ gso: if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(nskb); + skb_len = nskb->len; rc = ops->ndo_start_xmit(nskb, dev); - trace_net_dev_xmit(nskb, rc); + trace_net_dev_xmit(nskb, rc, dev, skb_len); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb; -- cgit v0.10.2 From 9a2e0fb0893ddf595d0a372e681f5b98017c6d90 Mon Sep 17 00:00:00 2001 From: Matt Carlson Date: Thu, 2 Jun 2011 13:01:39 +0000 Subject: tg3: Fix tg3_skb_error_unmap() This function attempts to free one fragment beyond the number of fragments that were actually mapped. This patch brings back the limit to the correct spot. Signed-off-by: Matt Carlson Tested-by: Alex Williamson Signed-off-by: David S. Miller diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index f4b01c6..a1f9f9e 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -5774,7 +5774,7 @@ static void tg3_skb_error_unmap(struct tg3_napi *tnapi, dma_unmap_addr(txb, mapping), skb_headlen(skb), PCI_DMA_TODEVICE); - for (i = 0; i <= last; i++) { + for (i = 0; i < last; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; entry = NEXT_TX(entry); -- cgit v0.10.2 From 4dffbe03d1e940aba878f9420e67feb8423cdd08 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 3 Jun 2011 10:05:02 +0200 Subject: ALSA: hda - Fix HP and Front pins of ad1988/ad1989 in ad198x_power_eapd() In ad198x_power_eapd(), wrong pin NIDs are used for controlling EAPD for HP and Front outputs of AD1988/AD1989. These are actually same with the ones for AD1984 & co, port-A is 0x11 and port-D 0x12. Reported-by: Raymond Yau Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 696ac25..82c4b2f 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -524,6 +524,10 @@ static void ad198x_power_eapd(struct hda_codec *codec) case 0x11d4184a: case 0x11d4194a: case 0x11d4194b: + case 0x11d41988: + case 0x11d4198b: + case 0x11d4989a: + case 0x11d4989b: ad198x_power_eapd_write(codec, 0x12, 0x11); break; case 0x11d41981: @@ -533,12 +537,6 @@ static void ad198x_power_eapd(struct hda_codec *codec) case 0x11d41986: ad198x_power_eapd_write(codec, 0x1b, 0x1a); break; - case 0x11d41988: - case 0x11d4198b: - case 0x11d4989a: - case 0x11d4989b: - ad198x_power_eapd_write(codec, 0x29, 0x22); - break; } } -- cgit v0.10.2 From a01ef051d584c12ede5cd5275b008b2ded57f3d9 Mon Sep 17 00:00:00 2001 From: Raymond Yau Date: Wed, 1 Jun 2011 15:09:48 +0800 Subject: ALSA: hda - Check pin support EAPD in ad198x_power_eapd_write Check whether the pin supports EAPD in ad198x_power_eapd_write. Signed-off-by: Raymond Yau Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 82c4b2f..d694e9d 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -506,9 +506,11 @@ static void ad198x_power_eapd_write(struct hda_codec *codec, hda_nid_t front, hda_nid_t hp) { struct ad198x_spec *spec = codec->spec; - snd_hda_codec_write(codec, front, 0, AC_VERB_SET_EAPD_BTLENABLE, + if (snd_hda_query_pin_caps(codec, front) & AC_PINCAP_EAPD) + snd_hda_codec_write(codec, front, 0, AC_VERB_SET_EAPD_BTLENABLE, !spec->inv_eapd ? 0x00 : 0x02); - snd_hda_codec_write(codec, hp, 0, AC_VERB_SET_EAPD_BTLENABLE, + if (snd_hda_query_pin_caps(codec, hp) & AC_PINCAP_EAPD) + snd_hda_codec_write(codec, hp, 0, AC_VERB_SET_EAPD_BTLENABLE, !spec->inv_eapd ? 0x00 : 0x02); } -- cgit v0.10.2 From 9676001559fce06e37c7dc230ab275f605556176 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 26 May 2011 11:47:35 +0300 Subject: ALSA: fm801: add error handling if auto-detect fails In the original code if auto detect failed and tea575x_tuner == 4 then we copy bogus information to chip->tea.card. I've changed the autodetect code to cleanup and return -ENODEV on error instead. Signed-off-by: Dan Carpenter Signed-off-by: Takashi Iwai diff --git a/sound/pci/fm801.c b/sound/pci/fm801.c index eacd490..a7ec703 100644 --- a/sound/pci/fm801.c +++ b/sound/pci/fm801.c @@ -1234,9 +1234,12 @@ static int __devinit snd_fm801_create(struct snd_card *card, sprintf(chip->tea.bus_info, "PCI:%s", pci_name(pci)); if ((tea575x_tuner & TUNER_TYPE_MASK) > 0 && (tea575x_tuner & TUNER_TYPE_MASK) < 4) { - if (snd_tea575x_init(&chip->tea)) + if (snd_tea575x_init(&chip->tea)) { snd_printk(KERN_ERR "TEA575x radio not found\n"); - } else if ((tea575x_tuner & TUNER_TYPE_MASK) == 0) + snd_fm801_free(chip); + return -ENODEV; + } + } else if ((tea575x_tuner & TUNER_TYPE_MASK) == 0) { /* autodetect tuner connection */ for (tea575x_tuner = 1; tea575x_tuner <= 3; tea575x_tuner++) { chip->tea575x_tuner = tea575x_tuner; @@ -1246,6 +1249,12 @@ static int __devinit snd_fm801_create(struct snd_card *card, break; } } + if (tea575x_tuner == 4) { + snd_printk(KERN_ERR "TEA575x radio not found\n"); + snd_fm801_free(chip); + return -ENODEV; + } + } strlcpy(chip->tea.card, snd_fm801_tea575x_gpios[(tea575x_tuner & TUNER_TYPE_MASK) - 1].name, sizeof(chip->tea.card)); #endif -- cgit v0.10.2 From d50a2fb63643dce8506520dab5ffb8f49cc45cb2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 3 Jun 2011 02:28:49 -0700 Subject: ALSA: asihpi: Use angle brackets for system includes Use the normal include style. Signed-off-by: Joe Perches Signed-off-by: Takashi Iwai diff --git a/sound/pci/asihpi/hpidspcd.c b/sound/pci/asihpi/hpidspcd.c index fb311d8..5c6ea11 100644 --- a/sound/pci/asihpi/hpidspcd.c +++ b/sound/pci/asihpi/hpidspcd.c @@ -60,7 +60,7 @@ struct code_header { HPI_VER_MINOR(HPI_VER) * 100 + HPI_VER_RELEASE(HPI_VER))) /***********************************************************************/ -#include "linux/pci.h" +#include /*-------------------------------------------------------------------*/ short hpi_dsp_code_open(u32 adapter, struct dsp_code *ps_dsp_code, u32 *pos_error_code) -- cgit v0.10.2 From 5ff6197f828d5ea051b3abf77cb61f8a34480e8d Mon Sep 17 00:00:00 2001 From: Steven Miao Date: Wed, 1 Jun 2011 15:52:41 +0800 Subject: Blackfin: strncpy: fix handling of zero lengths The jump to 4f will cause the NUL padding loop to run at least one time, so if string length is zero just jump to the end. Otherwise we wrongly write one NUL byte when size==0. Signed-off-by: Steven Miao Signed-off-by: Mike Frysinger diff --git a/arch/blackfin/lib/strncpy.S b/arch/blackfin/lib/strncpy.S index f3931d5..2c07ddd 100644 --- a/arch/blackfin/lib/strncpy.S +++ b/arch/blackfin/lib/strncpy.S @@ -25,7 +25,7 @@ ENTRY(_strncpy) CC = R2 == 0; - if CC JUMP 4f; + if CC JUMP 6f; P2 = R2 ; /* size */ P0 = R0 ; /* dst*/ -- cgit v0.10.2 From cf610bf4199770420629d3bc273494bd27ad6c1d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 31 May 2011 07:03:21 +0300 Subject: UBIFS: fix shrinker object count reports Sometimes VM asks the shrinker to return amount of objects it can shrink, and we return the ubifs_clean_zn_cnt in that case. However, it is possible that this counter is negative for a short period of time, due to the way UBIFS TNC code updates it. And I can observe the following warnings sometimes: shrink_slab: ubifs_shrinker+0x0/0x2b7 [ubifs] negative objects to delete nr=-8541616642706119788 This patch makes sure UBIFS never returns negative count of objects. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index ca953a9..9e1d056 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -284,7 +284,11 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); if (nr == 0) - return clean_zn_cnt; + /* + * Due to the way UBIFS updates the clean znode counter it may + * temporarily be negative. + */ + return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; if (!clean_zn_cnt) { /* -- cgit v0.10.2 From 812eb258311f89bcd664a34a620f249d54a2cd83 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 31 May 2011 08:40:40 +0300 Subject: UBIFS: fix memory leak on error path UBIFS leaks memory on error path in 'ubifs_jnl_update()' in case of write failure because it forgets to free the 'struct ubifs_dent_node *dent' object. Although the object is small, the alignment can make it large - e.g., 2KiB if the min. I/O unit is 2KiB. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 34b1679..cef0460 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -669,6 +669,7 @@ out_free: out_release: release_head(c, BASEHD); + kfree(dent); out_ro: ubifs_ro_mode(c, err); if (last_reference) -- cgit v0.10.2 From 837072377034d0a0b18b851d1ab95676b245cc0a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 31 May 2011 14:26:07 +0300 Subject: UBIFS: fix clean znode counter corruption in error cases UBIFS maintains per-filesystem and global clean znode counters ('c->clean_zn_cnt' and 'ubifs_clean_zn_cnt'). It is important to maintain correct values there since the shrinker relies on 'ubifs_clean_zn_cnt'. However, in case of failures during commit the counters were corrupted. E.g., if a failure happens in the middle of 'write_index()', then some nodes in the commit list ('c->cnext') are marked as clean, and some are marked as dirty. And the 'ubifs_destroy_tnc_subtree()' frees does not retrun correct count, and we end up with non-zero 'c->clean_zn_cnt' when unmounting. This means that if we have 2 file-sytem and one of them fails, and we unmount it, 'ubifs_clean_zn_cnt' stays incorrect and confuses the shrinker. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 8119b1f..91b4213 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2876,12 +2876,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c) */ void ubifs_tnc_close(struct ubifs_info *c) { - long clean_freed; - tnc_destroy_cnext(c); if (c->zroot.znode) { - clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); - atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); + long n; + + ubifs_destroy_tnc_subtree(c->zroot.znode); + n = atomic_long_read(&c->clean_zn_cnt); + atomic_long_sub(n, &ubifs_clean_zn_cnt); } kfree(c->gap_lebs); kfree(c->ilebs); -- cgit v0.10.2 From 4f1ab9b01d34eac9fc958f7150d3bf266dcc1685 Mon Sep 17 00:00:00 2001 From: Ben Gardiner Date: Mon, 30 May 2011 14:56:14 -0400 Subject: UBIFS: assert no fixup when writing a node The current free space fixup can result in some writing to the UBI volume when the space_fixup flag is set. To catch instances where UBIFS is writing to the NAND while the space_fixup flag is set, add an assert to ubifs_write_node(). Artem: tweaked the patch, added similar assertion to the write buffer write path. Signed-off-by: Ben Gardiner Reviewed-by: Matthew L. Creech Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 166951e..3be645e 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -581,6 +581,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) ubifs_assert(wbuf->size % c->min_io_size == 0); ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); ubifs_assert(!c->ro_media && !c->ro_mount); + ubifs_assert(!c->space_fixup); if (c->leb_size - wbuf->offs >= c->max_write_size) ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); @@ -759,6 +760,7 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); ubifs_assert(!c->ro_media && !c->ro_mount); + ubifs_assert(!c->space_fixup); if (c->ro_error) return -EROFS; -- cgit v0.10.2 From 781c5717a95a74b294beb38b8276943b0f8b5bb4 Mon Sep 17 00:00:00 2001 From: Ben Gardiner Date: Mon, 30 May 2011 14:56:15 -0400 Subject: UBIFS: intialize LPT earlier The current 'mount_ubifs()' implementation does not initialize the LPT until the the master node is marked dirty. Move the LPT initialization to before marking the master node dirty. This is a preparation for the next patch which will move the free-space-fixup check to before marking the master node dirty, because we have to fix-up the free space before doing any writes. Artem: massaged the patch and commit message. Signed-off-by: Ben Gardiner Reviewed-by: Matthew L. Creech Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 1e40db7..6d357fd 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1282,17 +1282,24 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_master; - init_constants_master(c); - if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { ubifs_msg("recovery needed"); c->need_recovery = 1; - if (!c->ro_mount) { - err = ubifs_recover_inl_heads(c, c->sbuf); - if (err) - goto out_master; - } - } else if (!c->ro_mount) { + } + + init_constants_master(c); + + if (c->need_recovery && !c->ro_mount) { + err = ubifs_recover_inl_heads(c, c->sbuf); + if (err) + goto out_master; + } + + err = ubifs_lpt_init(c, 1, !c->ro_mount); + if (err) + goto out_master; + + if (!c->ro_mount) { /* * Set the "dirty" flag so that if we reboot uncleanly we * will notice this immediately on the next mount. @@ -1300,13 +1307,9 @@ static int mount_ubifs(struct ubifs_info *c) c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); err = ubifs_write_master(c); if (err) - goto out_master; + goto out_lpt; } - err = ubifs_lpt_init(c, 1, !c->ro_mount); - if (err) - goto out_lpt; - err = dbg_check_idx_size(c, c->bi.old_idx_sz); if (err) goto out_lpt; -- cgit v0.10.2 From 098011940a2549ae7182db4bf101c3e3d2b4e6df Mon Sep 17 00:00:00 2001 From: Ben Gardiner Date: Mon, 30 May 2011 14:56:16 -0400 Subject: UBIFS: fix-up free space earlier The free space fixup is currently initiated during mount after the call to ubifs_write_master() which results in a write to PEBs; this has been observed with the patch 'assert no fixup when writing a node' applied: Move the free space fixup on mount to before the calls to ubifs_recover_inl_heads() and ubifs_write_master(). This results in no assertions with the previously mentioned patch applied. Artem: tweaked the patch a bit Signed-off-by: Ben Gardiner Reviewed-by: Matthew L. Creech Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 6d357fd..b5aeb5a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1282,13 +1282,13 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_master; + init_constants_master(c); + if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { ubifs_msg("recovery needed"); c->need_recovery = 1; } - init_constants_master(c); - if (c->need_recovery && !c->ro_mount) { err = ubifs_recover_inl_heads(c, c->sbuf); if (err) @@ -1299,6 +1299,12 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_master; + if (!c->ro_mount && c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + goto out_master; + } + if (!c->ro_mount) { /* * Set the "dirty" flag so that if we reboot uncleanly we @@ -1402,12 +1408,6 @@ static int mount_ubifs(struct ubifs_info *c) } else ubifs_assert(c->lst.taken_empty_lebs > 0); - if (!c->ro_mount && c->space_fixup) { - err = ubifs_fixup_free_space(c); - if (err) - goto out_infos; - } - err = dbg_check_filesystem(c); if (err) goto out_infos; -- cgit v0.10.2 From 157186bc185cdf588fecba0fc5d7466e3e5d49d7 Mon Sep 17 00:00:00 2001 From: Eric Lammerts Date: Fri, 27 May 2011 18:16:52 -0400 Subject: ALSA: usb - turn off de-emphasis in s/pdif for cm6206 CM6206: Turn off de-emphasis channel status bit in S/PDIF output. Signed-off-by: Eric Lammerts Signed-off-by: Takashi Iwai diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 2e969cb..090e193 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -403,7 +403,7 @@ static int snd_usb_cm106_boot_quirk(struct usb_device *dev) static int snd_usb_cm6206_boot_quirk(struct usb_device *dev) { int err, reg; - int val[] = {0x200c, 0x3000, 0xf800, 0x143f, 0x0000, 0x3000}; + int val[] = {0x2004, 0x3000, 0xf800, 0x143f, 0x0000, 0x3000}; for (reg = 0; reg < ARRAY_SIZE(val); reg++) { err = snd_usb_cm106_write_int_reg(dev, reg, val[reg]); -- cgit v0.10.2 From 55db4c64eddf37e31279ec15fe90314713bc9cfa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Jun 2011 06:33:24 +0900 Subject: Revert "tty: make receive_buf() return the amout of bytes received" This reverts commit b1c43f82c5aa265442f82dba31ce985ebb7aa71c. It was broken in so many ways, and results in random odd pty issues. It re-introduced the buggy schedule_work() in flush_to_ldisc() that can cause endless work-loops (see commit a5660b41af6a: "tty: fix endless work loop when the buffer fills up"). It also used an "unsigned int" return value fo the ->receive_buf() function, but then made multiple functions return a negative error code, and didn't actually check for the error in the caller. And it didn't actually work at all. BenH bisected down odd tty behavior to it: "It looks like the patch is causing some major malfunctions of the X server for me, possibly related to PTYs. For example, cat'ing a large file in a gnome terminal hangs the kernel for -minutes- in a loop of what looks like flush_to_ldisc/workqueue code, (some ftrace data in the quoted bits further down). ... Some more data: It -looks- like what happens is that the flush_to_ldisc work queue entry constantly re-queues itself (because the PTY is full ?) and the workqueue thread will basically loop forver calling it without ever scheduling, thus starving the consumer process that could have emptied the PTY." which is pretty much exactly the problem we fixed in a5660b41af6a. Milton Miller pointed out the 'unsigned int' issue. Reported-by: Benjamin Herrenschmidt Reported-by: Milton Miller Cc: Stefan Bigler Cc: Toby Gray Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: Alan Cox Signed-off-by: Linus Torvalds diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index b3f0199..48ad2a7 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -355,29 +355,24 @@ static void hci_uart_tty_wakeup(struct tty_struct *tty) * flags pointer to flags for data * count count of received data in bytes * - * Return Value: Number of bytes received + * Return Value: None */ -static unsigned int hci_uart_tty_receive(struct tty_struct *tty, - const u8 *data, char *flags, int count) +static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, char *flags, int count) { struct hci_uart *hu = (void *)tty->disc_data; - int received; if (!hu || tty != hu->tty) - return -ENODEV; + return; if (!test_bit(HCI_UART_PROTO_SET, &hu->flags)) - return -EINVAL; + return; spin_lock(&hu->rx_lock); - received = hu->proto->recv(hu, (void *) data, count); - if (received > 0) - hu->hdev->stat.byte_rx += received; + hu->proto->recv(hu, (void *) data, count); + hu->hdev->stat.byte_rx += count; spin_unlock(&hu->rx_lock); tty_unthrottle(tty); - - return received; } static int hci_uart_register_dev(struct hci_uart *hu) diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index f369896..8755f5f 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -120,21 +120,17 @@ static void serport_ldisc_close(struct tty_struct *tty) * 'interrupt' routine. */ -static unsigned int serport_ldisc_receive(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void serport_ldisc_receive(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct serport *serport = (struct serport*) tty->disc_data; unsigned long flags; unsigned int ch_flags; - int ret = 0; int i; spin_lock_irqsave(&serport->lock, flags); - if (!test_bit(SERPORT_ACTIVE, &serport->flags)) { - ret = -EINVAL; + if (!test_bit(SERPORT_ACTIVE, &serport->flags)) goto out; - } for (i = 0; i < count; i++) { switch (fp[i]) { @@ -156,8 +152,6 @@ static unsigned int serport_ldisc_receive(struct tty_struct *tty, out: spin_unlock_irqrestore(&serport->lock, flags); - - return ret == 0 ? count : ret; } /* diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 1d44d47..86a5c4f 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -674,7 +674,7 @@ gigaset_tty_ioctl(struct tty_struct *tty, struct file *file, * cflags buffer containing error flags for received characters (ignored) * count number of received characters */ -static unsigned int +static void gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf, char *cflags, int count) { @@ -683,12 +683,12 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf, struct inbuf_t *inbuf; if (!cs) - return -ENODEV; + return; inbuf = cs->inbuf; if (!inbuf) { dev_err(cs->dev, "%s: no inbuf\n", __func__); cs_put(cs); - return -EINVAL; + return; } tail = inbuf->tail; @@ -725,8 +725,6 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf, gig_dbg(DEBUG_INTR, "%s-->BH", __func__); gigaset_schedule_event(cs); cs_put(cs); - - return count; } /* diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 1a05fe0..f91f82e 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -747,8 +747,8 @@ static void st_tty_close(struct tty_struct *tty) pr_debug("%s: done ", __func__); } -static unsigned int st_tty_receive(struct tty_struct *tty, - const unsigned char *data, char *tty_flags, int count) +static void st_tty_receive(struct tty_struct *tty, const unsigned char *data, + char *tty_flags, int count) { #ifdef VERBOSE print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE, @@ -761,8 +761,6 @@ static unsigned int st_tty_receive(struct tty_struct *tty, */ st_recv(tty->disc_data, data, count); pr_debug("done %s", __func__); - - return count; } /* wake-up function called in from the TTY layer diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index 73c7e03..3df0c0f 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -167,8 +167,8 @@ static inline void debugfs_tx(struct ser_device *ser, const u8 *data, int size) #endif -static unsigned int ldisc_receive(struct tty_struct *tty, - const u8 *data, char *flags, int count) +static void ldisc_receive(struct tty_struct *tty, const u8 *data, + char *flags, int count) { struct sk_buff *skb = NULL; struct ser_device *ser; @@ -215,8 +215,6 @@ static unsigned int ldisc_receive(struct tty_struct *tty, } else ++ser->dev->stats.rx_dropped; update_tty_status(ser); - - return count; } static int handle_tx(struct ser_device *ser) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 75622d5..1b49df6 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -425,17 +425,16 @@ static void slc_setup(struct net_device *dev) * in parallel */ -static unsigned int slcan_receive_buf(struct tty_struct *tty, +static void slcan_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct slcan *sl = (struct slcan *) tty->disc_data; - int bytes = count; if (!sl || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev)) - return -ENODEV; + return; /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; @@ -444,8 +443,6 @@ static unsigned int slcan_receive_buf(struct tty_struct *tty, } slcan_unesc(sl, *cp++); } - - return count; } /************************************ diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 9920896..3e5d0b6 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -456,7 +456,7 @@ out: * a block of 6pack data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. */ -static unsigned int sixpack_receive_buf(struct tty_struct *tty, +static void sixpack_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct sixpack *sp; @@ -464,11 +464,11 @@ static unsigned int sixpack_receive_buf(struct tty_struct *tty, int count1; if (!count) - return 0; + return; sp = sp_get(tty); if (!sp) - return -ENODEV; + return; memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf)); @@ -487,8 +487,6 @@ static unsigned int sixpack_receive_buf(struct tty_struct *tty, sp_put(sp); tty_unthrottle(tty); - - return count1; } /* diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 0e4f235..4c62839 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -923,14 +923,13 @@ static long mkiss_compat_ioctl(struct tty_struct *tty, struct file *file, * a block of data has been received, which can now be decapsulated * and sent on to the AX.25 layer for further processing. */ -static unsigned int mkiss_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct mkiss *ax = mkiss_get(tty); - int bytes = count; if (!ax) - return -ENODEV; + return; /* * Argh! mtu change time! - costs us the packet part received @@ -940,7 +939,7 @@ static unsigned int mkiss_receive_buf(struct tty_struct *tty, ax_changedmtu(ax); /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp != NULL && *fp++) { if (!test_and_set_bit(AXF_ERROR, &ax->flags)) ax->dev->stats.rx_errors++; @@ -953,8 +952,6 @@ static unsigned int mkiss_receive_buf(struct tty_struct *tty, mkiss_put(ax); tty_unthrottle(tty); - - return count; } /* diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index 035861d..3352b24 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -216,23 +216,23 @@ static int irtty_do_write(struct sir_dev *dev, const unsigned char *ptr, size_t * usbserial: urb-complete-interrupt / softint */ -static unsigned int irtty_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void irtty_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct sir_dev *dev; struct sirtty_cb *priv = tty->disc_data; int i; - IRDA_ASSERT(priv != NULL, return -ENODEV;); - IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return -EINVAL;); + IRDA_ASSERT(priv != NULL, return;); + IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return;); if (unlikely(count==0)) /* yes, this happens */ - return 0; + return; dev = priv->dev; if (!dev) { IRDA_WARNING("%s(), not ready yet!\n", __func__); - return -ENODEV; + return; } for (i = 0; i < count; i++) { @@ -242,13 +242,11 @@ static unsigned int irtty_receive_buf(struct tty_struct *tty, if (fp && *fp++) { IRDA_DEBUG(0, "Framing or parity error!\n"); sirdev_receive(dev, NULL, 0); /* notify sir_dev (updating stats) */ - return -EINVAL; + return; } } sirdev_receive(dev, cp, count); - - return count; } /* diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index 53872d7..a1b82c9 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -340,7 +340,7 @@ ppp_asynctty_poll(struct tty_struct *tty, struct file *file, poll_table *wait) } /* May sleep, don't call from interrupt level or with interrupts disabled */ -static unsigned int +static void ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, char *cflags, int count) { @@ -348,7 +348,7 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, unsigned long flags; if (!ap) - return -ENODEV; + return; spin_lock_irqsave(&ap->recv_lock, flags); ppp_async_input(ap, buf, cflags, count); spin_unlock_irqrestore(&ap->recv_lock, flags); @@ -356,8 +356,6 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, tasklet_schedule(&ap->tsk); ap_put(ap); tty_unthrottle(tty); - - return count; } static void diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index 0815790..2573f52 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -381,7 +381,7 @@ ppp_sync_poll(struct tty_struct *tty, struct file *file, poll_table *wait) } /* May sleep, don't call from interrupt level or with interrupts disabled */ -static unsigned int +static void ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, char *cflags, int count) { @@ -389,7 +389,7 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, unsigned long flags; if (!ap) - return -ENODEV; + return; spin_lock_irqsave(&ap->recv_lock, flags); ppp_sync_input(ap, buf, cflags, count); spin_unlock_irqrestore(&ap->recv_lock, flags); @@ -397,8 +397,6 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, tasklet_schedule(&ap->tsk); sp_put(ap); tty_unthrottle(tty); - - return count; } static void diff --git a/drivers/net/slip.c b/drivers/net/slip.c index 584809c..8ec1a9a 100644 --- a/drivers/net/slip.c +++ b/drivers/net/slip.c @@ -670,17 +670,16 @@ static void sl_setup(struct net_device *dev) * in parallel */ -static unsigned int slip_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void slip_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct slip *sl = tty->disc_data; - int bytes = count; if (!sl || sl->magic != SLIP_MAGIC || !netif_running(sl->dev)) - return -ENODEV; + return; /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; @@ -694,8 +693,6 @@ static unsigned int slip_receive_buf(struct tty_struct *tty, #endif slip_unesc(sl, *cp++); } - - return count; } /************************************ diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 40398bf..24297b2 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -517,18 +517,17 @@ static int x25_asy_close(struct net_device *dev) * and sent on to some IP layer for further processing. */ -static unsigned int x25_asy_receive_buf(struct tty_struct *tty, +static void x25_asy_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct x25_asy *sl = tty->disc_data; - int bytes = count; if (!sl || sl->magic != X25_ASY_MAGIC || !netif_running(sl->dev)) return; /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; @@ -537,8 +536,6 @@ static unsigned int x25_asy_receive_buf(struct tty_struct *tty, } x25_asy_unesc(sl, *cp++); } - - return count; } /* diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index a4c42a7..09e8c7d 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2128,8 +2128,8 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) gsm->tty = NULL; } -static unsigned int gsmld_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct gsm_mux *gsm = tty->disc_data; const unsigned char *dp; @@ -2162,8 +2162,6 @@ static unsigned int gsmld_receive_buf(struct tty_struct *tty, } /* FASYNC if needed ? */ /* If clogged call tty_throttle(tty); */ - - return count; } /** diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index cac6663..cea5603 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -188,8 +188,8 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp, poll_table *wait); static int n_hdlc_tty_open(struct tty_struct *tty); static void n_hdlc_tty_close(struct tty_struct *tty); -static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, - const __u8 *cp, char *fp, int count); +static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *cp, + char *fp, int count); static void n_hdlc_tty_wakeup(struct tty_struct *tty); #define bset(p,b) ((p)[(b) >> 5] |= (1 << ((b) & 0x1f))) @@ -509,8 +509,8 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty) * Called by tty low level driver when receive data is available. Data is * interpreted as one HDLC frame. */ -static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, - const __u8 *data, char *flags, int count) +static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data, + char *flags, int count) { register struct n_hdlc *n_hdlc = tty2n_hdlc (tty); register struct n_hdlc_buf *buf; @@ -521,20 +521,20 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, /* This can happen if stuff comes in on the backup tty */ if (!n_hdlc || tty != n_hdlc->tty) - return -ENODEV; + return; /* verify line is using HDLC discipline */ if (n_hdlc->magic != HDLC_MAGIC) { printk("%s(%d) line not using HDLC discipline\n", __FILE__,__LINE__); - return -EINVAL; + return; } if ( count>maxframe ) { if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d) rx count>maxframesize, data discarded\n", __FILE__,__LINE__); - return -EINVAL; + return; } /* get a free HDLC buffer */ @@ -550,7 +550,7 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d) no more rx buffers, data discarded\n", __FILE__,__LINE__); - return -EINVAL; + return; } /* copy received data to HDLC buffer */ @@ -565,8 +565,6 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, if (n_hdlc->tty->fasync != NULL) kill_fasync (&n_hdlc->tty->fasync, SIGIO, POLL_IN); - return count; - } /* end of n_hdlc_tty_receive() */ /** diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c index a4bc39c..5c6c314 100644 --- a/drivers/tty/n_r3964.c +++ b/drivers/tty/n_r3964.c @@ -139,8 +139,8 @@ static int r3964_ioctl(struct tty_struct *tty, struct file *file, static void r3964_set_termios(struct tty_struct *tty, struct ktermios *old); static unsigned int r3964_poll(struct tty_struct *tty, struct file *file, struct poll_table_struct *wait); -static unsigned int r3964_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count); +static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count); static struct tty_ldisc_ops tty_ldisc_N_R3964 = { .owner = THIS_MODULE, @@ -1239,8 +1239,8 @@ static unsigned int r3964_poll(struct tty_struct *tty, struct file *file, return result; } -static unsigned int r3964_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct r3964_info *pInfo = tty->disc_data; const unsigned char *p; @@ -1257,8 +1257,6 @@ static unsigned int r3964_receive_buf(struct tty_struct *tty, } } - - return count; } MODULE_LICENSE("GPL"); diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 95d0a9c..0ad3288 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -81,6 +81,38 @@ static inline int tty_put_user(struct tty_struct *tty, unsigned char x, return put_user(x, ptr); } +/** + * n_tty_set__room - receive space + * @tty: terminal + * + * Called by the driver to find out how much data it is + * permitted to feed to the line discipline without any being lost + * and thus to manage flow control. Not serialized. Answers for the + * "instant". + */ + +static void n_tty_set_room(struct tty_struct *tty) +{ + /* tty->read_cnt is not read locked ? */ + int left = N_TTY_BUF_SIZE - tty->read_cnt - 1; + int old_left; + + /* + * If we are doing input canonicalization, and there are no + * pending newlines, let characters through without limit, so + * that erase characters will be handled. Other excess + * characters will be beeped. + */ + if (left <= 0) + left = tty->icanon && !tty->canon_data; + old_left = tty->receive_room; + tty->receive_room = left; + + /* Did this open up the receive buffer? We may need to flip */ + if (left && !old_left) + schedule_work(&tty->buf.work); +} + static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty) { if (tty->read_cnt < N_TTY_BUF_SIZE) { @@ -152,6 +184,7 @@ static void reset_buffer_flags(struct tty_struct *tty) tty->canon_head = tty->canon_data = tty->erasing = 0; memset(&tty->read_flags, 0, sizeof tty->read_flags); + n_tty_set_room(tty); check_unthrottle(tty); } @@ -1327,19 +1360,17 @@ static void n_tty_write_wakeup(struct tty_struct *tty) * calls one at a time and in order (or using flush_to_ldisc) */ -static unsigned int n_tty_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { const unsigned char *p; char *f, flags = TTY_NORMAL; int i; char buf[64]; unsigned long cpuflags; - int left; - int ret = 0; if (!tty->read_buf) - return 0; + return; if (tty->real_raw) { spin_lock_irqsave(&tty->read_lock, cpuflags); @@ -1349,7 +1380,6 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, memcpy(tty->read_buf + tty->read_head, cp, i); tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1); tty->read_cnt += i; - ret += i; cp += i; count -= i; @@ -1359,10 +1389,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, memcpy(tty->read_buf + tty->read_head, cp, i); tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1); tty->read_cnt += i; - ret += i; spin_unlock_irqrestore(&tty->read_lock, cpuflags); } else { - ret = count; for (i = count, p = cp, f = fp; i; i--, p++) { if (f) flags = *f++; @@ -1390,6 +1418,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, tty->ops->flush_chars(tty); } + n_tty_set_room(tty); + if ((!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) || L_EXTPROC(tty)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); @@ -1402,12 +1432,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, * mode. We don't want to throttle the driver if we're in * canonical mode and don't have a newline yet! */ - left = N_TTY_BUF_SIZE - tty->read_cnt - 1; - - if (left < TTY_THRESHOLD_THROTTLE) + if (tty->receive_room < TTY_THRESHOLD_THROTTLE) tty_throttle(tty); - - return ret; } int is_ignored(int sig) @@ -1451,6 +1477,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) if (test_bit(TTY_HW_COOK_IN, &tty->flags)) { tty->raw = 1; tty->real_raw = 1; + n_tty_set_room(tty); return; } if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) || @@ -1503,6 +1530,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) else tty->real_raw = 0; } + n_tty_set_room(tty); /* The termios change make the tty ready for I/O */ wake_up_interruptible(&tty->write_wait); wake_up_interruptible(&tty->read_wait); @@ -1784,6 +1812,8 @@ do_it_again: retval = -ERESTARTSYS; break; } + /* FIXME: does n_tty_set_room need locking ? */ + n_tty_set_room(tty); timeout = schedule_timeout(timeout); continue; } @@ -1855,8 +1885,10 @@ do_it_again: * longer than TTY_THRESHOLD_UNTHROTTLE in canonical mode, * we won't get any more characters. */ - if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) + if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) { + n_tty_set_room(tty); check_unthrottle(tty); + } if (b - buf >= minimum) break; @@ -1878,6 +1910,7 @@ do_it_again: } else if (test_and_clear_bit(TTY_PUSH, &tty->flags)) goto do_it_again; + n_tty_set_room(tty); return retval; } diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 46de2e0..f1a7918 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -416,7 +416,6 @@ static void flush_to_ldisc(struct work_struct *work) struct tty_buffer *head, *tail = tty->buf.tail; int seen_tail = 0; while ((head = tty->buf.head) != NULL) { - int copied; int count; char *char_buf; unsigned char *flag_buf; @@ -443,19 +442,17 @@ static void flush_to_ldisc(struct work_struct *work) line discipline as we want to empty the queue */ if (test_bit(TTY_FLUSHPENDING, &tty->flags)) break; + if (!tty->receive_room || seen_tail) + break; + if (count > tty->receive_room) + count = tty->receive_room; char_buf = head->char_buf_ptr + head->read; flag_buf = head->flag_buf_ptr + head->read; + head->read += count; spin_unlock_irqrestore(&tty->buf.lock, flags); - copied = disc->ops->receive_buf(tty, char_buf, + disc->ops->receive_buf(tty, char_buf, flag_buf, count); spin_lock_irqsave(&tty->buf.lock, flags); - - head->read += copied; - - if (copied == 0 || seen_tail) { - schedule_work(&tty->buf.work); - break; - } } clear_bit(TTY_FLUSHING, &tty->flags); } diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 67b1d0d..fb864e7 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -332,7 +332,8 @@ int paste_selection(struct tty_struct *tty) continue; } count = sel_buffer_lth - pasted; - count = tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted, + count = min(count, tty->receive_room); + tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted, NULL, count); pasted += count; } diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 5b07792..ff7dc08 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -76,7 +76,7 @@ * tty device. It is solely the responsibility of the line * discipline to handle poll requests. * - * unsigned int (*receive_buf)(struct tty_struct *, const unsigned char *cp, + * void (*receive_buf)(struct tty_struct *, const unsigned char *cp, * char *fp, int count); * * This function is called by the low-level tty driver to send @@ -84,8 +84,7 @@ * processing. is a pointer to the buffer of input * character received by the device. is a pointer to a * pointer of flag bytes which indicate whether a character was - * received with a parity error, etc. Returns the amount of bytes - * received. + * received with a parity error, etc. * * void (*write_wakeup)(struct tty_struct *); * @@ -141,8 +140,8 @@ struct tty_ldisc_ops { /* * The following routines are called from below. */ - unsigned int (*receive_buf)(struct tty_struct *, - const unsigned char *cp, char *fp, int count); + void (*receive_buf)(struct tty_struct *, const unsigned char *cp, + char *fp, int count); void (*write_wakeup)(struct tty_struct *); void (*dcd_change)(struct tty_struct *, unsigned int, struct pps_event_time *); -- cgit v0.10.2 From bb3d6bf1919a20c56b3257b4ec09e67a9226cfb2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Jun 2011 07:00:50 +0900 Subject: Revert "ASoC: Update cx20442 for TTY API change" This reverts commit ed0bd2333cffc3d856db9beb829543c1dfc00982. Since we reverted the TTY API change, we should revert the ASoC update to it too. Cc: Mark Brown Cc: Liam Girdwood Cc: Greg Kroah-Hartman Signed-off-by: Linus Torvalds diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c index f8c663d..d68ea53 100644 --- a/sound/soc/codecs/cx20442.c +++ b/sound/soc/codecs/cx20442.c @@ -262,14 +262,14 @@ static int v253_hangup(struct tty_struct *tty) } /* Line discipline .receive_buf() */ -static unsigned int v253_receive(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void v253_receive(struct tty_struct *tty, + const unsigned char *cp, char *fp, int count) { struct snd_soc_codec *codec = tty->disc_data; struct cx20442_priv *cx20442; if (!codec) - return count; + return; cx20442 = snd_soc_codec_get_drvdata(codec); @@ -281,8 +281,6 @@ static unsigned int v253_receive(struct tty_struct *tty, codec->hw_write = (hw_write_t)tty->ops->write; codec->card->pop_time = 1; } - - return count; } /* Line discipline .write_wakeup() */ -- cgit v0.10.2 From 1bc8779349d6278e2713a1ff94418c2a6746a791 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Sat, 28 May 2011 21:57:55 +0200 Subject: btrfs: scrub: don't reuse bios and pages The current scrub implementation reuses bios and pages as often as possible, allocating them only on start and releasing them when finished. This leads to more problems with the block layer than it's worth. The elevator gets confused when there are more pages added to the bio than bi_size suggests. This patch completely rips out the reuse of bios and pages and allocates them freshly for each submit. Signed-off-by: Arne Jansen Signed-off-by: Chris Maosn diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6dfed0c..2d1f890 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -117,33 +117,37 @@ static void scrub_free_csums(struct scrub_dev *sdev) } } +static void scrub_free_bio(struct bio *bio) +{ + int i; + struct page *last_page = NULL; + + if (!bio) + return; + + for (i = 0; i < bio->bi_vcnt; ++i) { + if (bio->bi_io_vec[i].bv_page == last_page) + continue; + last_page = bio->bi_io_vec[i].bv_page; + __free_page(last_page); + } + bio_put(bio); +} + static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) { int i; - int j; - struct page *last_page; if (!sdev) return; for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { struct scrub_bio *sbio = sdev->bios[i]; - struct bio *bio; if (!sbio) break; - bio = sbio->bio; - if (bio) { - last_page = NULL; - for (j = 0; j < bio->bi_vcnt; ++j) { - if (bio->bi_io_vec[j].bv_page == last_page) - continue; - last_page = bio->bi_io_vec[j].bv_page; - __free_page(last_page); - } - bio_put(bio); - } + scrub_free_bio(sbio->bio); kfree(sbio); } @@ -156,8 +160,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) { struct scrub_dev *sdev; int i; - int j; - int ret; struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; sdev = kzalloc(sizeof(*sdev), GFP_NOFS); @@ -165,7 +167,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) goto nomem; sdev->dev = dev; for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { - struct bio *bio; struct scrub_bio *sbio; sbio = kzalloc(sizeof(*sbio), GFP_NOFS); @@ -173,32 +174,10 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) goto nomem; sdev->bios[i] = sbio; - bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); - if (!bio) - goto nomem; - sbio->index = i; sbio->sdev = sdev; - sbio->bio = bio; sbio->count = 0; sbio->work.func = scrub_checksum; - bio->bi_private = sdev->bios[i]; - bio->bi_end_io = scrub_bio_end_io; - bio->bi_sector = 0; - bio->bi_bdev = dev->bdev; - bio->bi_size = 0; - - for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) { - struct page *page; - page = alloc_page(GFP_NOFS); - if (!page) - goto nomem; - - ret = bio_add_page(bio, page, PAGE_SIZE, 0); - if (!ret) - goto nomem; - } - WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO); if (i != SCRUB_BIOS_PER_DEV-1) sdev->bios[i]->next_free = i + 1; @@ -394,6 +373,7 @@ static void scrub_bio_end_io(struct bio *bio, int err) struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; sbio->err = err; + sbio->bio = bio; btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); } @@ -453,6 +433,8 @@ static void scrub_checksum(struct btrfs_work *work) } out: + scrub_free_bio(sbio->bio); + sbio->bio = NULL; spin_lock(&sdev->list_lock); sbio->next_free = sdev->first_free; sdev->first_free = sbio->index; @@ -583,25 +565,50 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) static int scrub_submit(struct scrub_dev *sdev) { struct scrub_bio *sbio; + struct bio *bio; + int i; if (sdev->curr == -1) return 0; sbio = sdev->bios[sdev->curr]; - sbio->bio->bi_sector = sbio->physical >> 9; - sbio->bio->bi_size = sbio->count * PAGE_SIZE; - sbio->bio->bi_next = NULL; - sbio->bio->bi_flags |= 1 << BIO_UPTODATE; - sbio->bio->bi_comp_cpu = -1; - sbio->bio->bi_bdev = sdev->dev->bdev; + bio = bio_alloc(GFP_NOFS, sbio->count); + if (!bio) + goto nomem; + + bio->bi_private = sbio; + bio->bi_end_io = scrub_bio_end_io; + bio->bi_bdev = sdev->dev->bdev; + bio->bi_sector = sbio->physical >> 9; + + for (i = 0; i < sbio->count; ++i) { + struct page *page; + int ret; + + page = alloc_page(GFP_NOFS); + if (!page) + goto nomem; + + ret = bio_add_page(bio, page, PAGE_SIZE, 0); + if (!ret) { + __free_page(page); + goto nomem; + } + } + sbio->err = 0; sdev->curr = -1; atomic_inc(&sdev->in_flight); - submit_bio(0, sbio->bio); + submit_bio(READ, bio); return 0; + +nomem: + scrub_free_bio(bio); + + return -ENOMEM; } static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, @@ -633,7 +640,11 @@ again: sbio->logical = logical; } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || sbio->logical + sbio->count * PAGE_SIZE != logical) { - scrub_submit(sdev); + int ret; + + ret = scrub_submit(sdev); + if (ret) + return ret; goto again; } sbio->spag[sbio->count].flags = flags; @@ -645,8 +656,13 @@ again: memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); } ++sbio->count; - if (sbio->count == SCRUB_PAGES_PER_BIO || force) - scrub_submit(sdev); + if (sbio->count == SCRUB_PAGES_PER_BIO || force) { + int ret; + + ret = scrub_submit(sdev); + if (ret) + return ret; + } return 0; } -- cgit v0.10.2 From 17aca1c987cff89dc4279371857035da902c8854 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Jun 2011 01:13:45 -0400 Subject: Btrfs: fix uninit variable in the delayed inode code The nitems counter needs to start at zero Signed-off-by: Chris Mason diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index b46d94d..c61c32c 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -678,6 +678,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, INIT_LIST_HEAD(&head); next = item; + nitems = 0; /* * count the number of the continuous items that we can insert in batch -- cgit v0.10.2 From 211f96c24f117fcc6e9e2431e40d92f4de22625e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Jun 2011 01:26:53 -0400 Subject: Btrfs: make sure we don't overflow the free space cache crc page The free space cache uses only one page for crcs right now, which means we can't have a cache file bigger than the crcs we can fit in the first page. This adds a check to enforce that restriction. Signed-off-by: Chris Mason diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index dd38d4c..1cb7239 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -590,10 +590,25 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + /* Since the first page has all of our checksums and our generation we + * need to calculate the offset into the page that we can start writing + * our entries. + */ + first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); + filemap_write_and_wait(inode->i_mapping); btrfs_wait_ordered_range(inode, inode->i_size & ~(root->sectorsize - 1), (u64)-1); + /* make sure we don't overflow that first page */ + if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { + /* this is really the same as running out of space, where we also return 0 */ + printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); + ret = 0; + goto out_update; + } + /* We need a checksum per page. */ crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); if (!crc) @@ -605,12 +620,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, return -1; } - /* Since the first page has all of our checksums and our generation we - * need to calculate the offset into the page that we can start writing - * our entries. - */ - first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); - /* Get the cluster for this block_group if it exists */ if (block_group && !list_empty(&block_group->cluster_list)) cluster = list_entry(block_group->cluster_list.next, @@ -872,12 +881,14 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ret = 1; out_free: + kfree(checksums); + kfree(pages); + +out_update: if (ret != 1) { invalidate_inode_pages2_range(inode->i_mapping, 0, index); BTRFS_I(inode)->generation = 0; } - kfree(checksums); - kfree(pages); btrfs_update_inode(trans, root, inode); return ret; } -- cgit v0.10.2 From ca456ae280c0646e1e571c3b9a3834c55e90adfe Mon Sep 17 00:00:00 2001 From: liubo Date: Wed, 1 Jun 2011 09:42:49 +0000 Subject: Btrfs: don't save the inode cache in non-FS roots This adds extra checks to make sure the inode map we are caching really belongs to a FS root instead of a special relocation tree. It prevents crashes during balancing operations. Signed-off-by: Liu Bo Signed-off-by: Chris Mason diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 3262cd1..04f7199 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -388,6 +388,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root, int prealloc; bool retry = false; + /* only fs tree and subvol/snap needs ino cache */ + if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID && + (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID || + root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID)) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; -- cgit v0.10.2 From 5f3f302a6f4cb74906c05fad1d03fc5e95c7e5af Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Mon, 30 May 2011 08:36:16 +0000 Subject: btrfs: false BUG_ON when degraded In degraded mode the struct btrfs_device of missing devs don't have device->name set. A kstrdup of NULL correctly returns NULL. Don't BUG in this case. Signed-off-by: Arne Jansen Signed-off-by: Chris Mason diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c48214e..da541df 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -504,7 +504,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) BUG_ON(!new_device); memcpy(new_device, device, sizeof(*new_device)); new_device->name = kstrdup(device->name, GFP_NOFS); - BUG_ON(!new_device->name); + BUG_ON(device->name && !new_device->name); new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; -- cgit v0.10.2 From d132a538d258f8f52fd0cd8b5017755f4e915386 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 31 May 2011 19:33:33 +0000 Subject: Btrfs: don't save the inode cache if we are deleting this root With xfstest 254 I can panic the box every time with the inode number caching stuff on. This is because we clean the inodes out when we delete the subvolume, but then we write out the inode cache which adds an inode to the subvolume inode tree, and then when it gets evicted again the root gets added back on the dead roots list and is deleted again, so we have a double free. To stop this from happening just return 0 if refs is 0 (and we're not the tree root since tree root always has refs of 0). With this fix 254 no longer panics. Thanks, Signed-off-by: Josef Bacik Tested-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 04f7199..2d0d500 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -394,6 +394,11 @@ int btrfs_save_ino_cache(struct btrfs_root *root, root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID)) return 0; + /* Don't save inode cache if we are deleting this root */ + if (btrfs_root_refs(&root->root_item) == 0 && + root != root->fs_info->tree_root) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; -- cgit v0.10.2 From a4689d2bd3b00dcf5c4320f06e0ab88810fbff9c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 May 2011 17:08:14 +0000 Subject: btrfs: use btrfs_ino to access inode number commit 4cb5300bc ("Btrfs: add mount -o auto_defrag") accesses inode number directly while it should use the helper with the new inode number allocator. Signed-off-by: David Sterba Signed-off-by: Chris Mason diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e3a1b0c..982b5ea 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -144,7 +144,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (!defrag) return -ENOMEM; - defrag->ino = inode->i_ino; + defrag->ino = btrfs_ino(inode); defrag->transid = transid; defrag->root = root->root_key.objectid; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 74c8059..ac37040 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -706,16 +706,17 @@ static int find_new_extents(struct btrfs_root *root, struct btrfs_file_extent_item *extent; int type; int ret; + u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - min_key.objectid = inode->i_ino; + min_key.objectid = ino; min_key.type = BTRFS_EXTENT_DATA_KEY; min_key.offset = *off; - max_key.objectid = inode->i_ino; + max_key.objectid = ino; max_key.type = (u8)-1; max_key.offset = (u64)-1; @@ -726,7 +727,7 @@ static int find_new_extents(struct btrfs_root *root, path, 0, newer_than); if (ret != 0) goto none; - if (min_key.objectid != inode->i_ino) + if (min_key.objectid != ino) goto none; if (min_key.type != BTRFS_EXTENT_DATA_KEY) goto none; -- cgit v0.10.2 From e7786c3ae517b2c433edc91714e86be770e9f1ce Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Sat, 28 May 2011 20:58:38 +0000 Subject: btrfs: scrub: add explicit plugging With the removal of the implicit plugging scrub ends up doing more and smaller I/O than necessary. This patch adds explicit plugging per chunk. Signed-off-by: Arne Jansen Signed-off-by: Chris Mason diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2d1f890..1204eab 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -348,9 +348,6 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, int ret; DECLARE_COMPLETION_ONSTACK(complete); - /* we are going to wait on this IO */ - rw |= REQ_SYNC; - bio = bio_alloc(GFP_NOFS, 1); bio->bi_bdev = bdev; bio->bi_sector = sector; @@ -359,6 +356,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, bio->bi_private = &complete; submit_bio(rw, bio); + /* this will also unplug the queue */ wait_for_completion(&complete); ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -743,6 +741,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, struct btrfs_root *root = fs_info->extent_root; struct btrfs_root *csum_root = fs_info->csum_root; struct btrfs_extent_item *extent; + struct blk_plug plug; u64 flags; int ret; int slot; @@ -847,6 +846,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, * the scrub. This might currently (crc32) end up to be about 1MB */ start_stripe = 0; + blk_start_plug(&plug); again: logical = base + offset + start_stripe * increment; for (i = start_stripe; i < nstripes; ++i) { @@ -988,6 +988,7 @@ next: scrub_submit(sdev); out: + blk_finish_plug(&plug); btrfs_free_path(path); return ret < 0 ? ret : 0; } -- cgit v0.10.2 From 4b9465cb9e3859186eefa1ca3b990a5849386320 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 3 Jun 2011 09:36:29 -0400 Subject: Btrfs: add mount -o inode_cache This makes the inode map cache default to off until we fix the overflow problem when the free space crcs don't fit inside a single page. Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8f98c20..4958ef5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1340,6 +1340,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) #define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) #define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) +#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 1cb7239..bffa5c4 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2536,6 +2536,9 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) int ret = 0; u64 root_gen = btrfs_root_generation(&root->root_item); + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return 0; + /* * If we're unmounting then just return, since this does a search on the * normal root and not the commit root and we could deadlock. @@ -2575,6 +2578,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, struct inode *inode; int ret; + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return 0; + inode = lookup_free_ino_inode(root, path); if (IS_ERR(inode)) return 0; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 2d0d500..cb79b89 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -38,6 +38,9 @@ static int caching_kthread(void *data) int slot; int ret; + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -141,6 +144,9 @@ static void start_caching(struct btrfs_root *root) int ret; u64 objectid; + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return; + spin_lock(&root->cache_lock); if (root->cached != BTRFS_CACHE_NO) { spin_unlock(&root->cache_lock); @@ -178,6 +184,9 @@ static void start_caching(struct btrfs_root *root) int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) { + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return btrfs_find_free_objectid(root, objectid); + again: *objectid = btrfs_find_ino_for_alloc(root); @@ -201,6 +210,10 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; + + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return; + again: if (root->cached == BTRFS_CACHE_FINISHED) { __btrfs_add_free_space(ctl, objectid, 1); @@ -250,6 +263,9 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) struct rb_node *n; u64 count; + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return; + while (1) { n = rb_first(rbroot); if (!n) @@ -399,9 +415,13 @@ int btrfs_save_ino_cache(struct btrfs_root *root, root != root->fs_info->tree_root) return 0; + if (!btrfs_test_opt(root, INODE_MAP_CACHE)) + return 0; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; + again: inode = lookup_free_ino_inode(root, path); if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 28e3cb2..3559d0b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -160,7 +160,8 @@ enum { Opt_compress_type, Opt_compress_force, Opt_compress_force_type, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, - Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err, + Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, + Opt_inode_cache, Opt_err, }; static match_table_t tokens = { @@ -192,6 +193,7 @@ static match_table_t tokens = { {Opt_enospc_debug, "enospc_debug"}, {Opt_subvolrootid, "subvolrootid=%d"}, {Opt_defrag, "autodefrag"}, + {Opt_inode_cache, "inode_cache"}, {Opt_err, NULL}, }; @@ -360,6 +362,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: enabling disk space caching\n"); btrfs_set_opt(info->mount_opt, SPACE_CACHE); break; + case Opt_inode_cache: + printk(KERN_INFO "btrfs: enabling inode map caching\n"); + btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); + break; case Opt_clear_cache: printk(KERN_INFO "btrfs: force clearing of disk cache\n"); btrfs_set_opt(info->mount_opt, CLEAR_CACHE); -- cgit v0.10.2 From 7841cb2898f66a73062c64d0ef5733dde7279e46 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 31 May 2011 18:07:27 +0200 Subject: btrfs: add helper for fs_info->closing wrap checking of filesystem 'closing' flag and fix a few missing memory barriers. Signed-off-by: David Sterba diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4958ef5..8490ee0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2354,6 +2354,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, struct extent_buffer *parent); +static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) +{ + /* + * Get synced with close_ctree() + */ + smp_mb(); + return fs_info->closing; +} + /* root-item.c */ int btrfs_find_root_ref(struct btrfs_root *tree_root, struct btrfs_path *path, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c9173a7..5b9b6b6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -366,8 +366,7 @@ again: nritems = btrfs_header_nritems(leaf); while (1) { - smp_mb(); - if (fs_info->closing > 1) { + if (btrfs_fs_closing(fs_info) > 1) { last = (u64)-1; break; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 982b5ea..fa4ef18 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -129,7 +129,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, if (!btrfs_test_opt(root, AUTO_DEFRAG)) return 0; - if (root->fs_info->closing) + if (btrfs_fs_closing(root->fs_info)) return 0; if (BTRFS_I(inode)->in_defrag) @@ -229,7 +229,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) first_ino = defrag->ino + 1; rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); - if (fs_info->closing) + if (btrfs_fs_closing(fs_info)) goto next_free; spin_unlock(&fs_info->defrag_inodes_lock); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index bffa5c4..ad14473 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -98,7 +98,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, return inode; spin_lock(&block_group->lock); - if (!root->fs_info->closing) { + if (!btrfs_fs_closing(root->fs_info)) { block_group->inode = igrab(inode); block_group->iref = 1; } @@ -493,8 +493,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, * If we're unmounting then just return, since this does a search on the * normal root and not the commit root and we could deadlock. */ - smp_mb(); - if (fs_info->closing) + if (btrfs_fs_closing(fs_info)) return 0; /* @@ -2513,7 +2512,7 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, return inode; spin_lock(&root->cache_lock); - if (!root->fs_info->closing) + if (!btrfs_fs_closing(root->fs_info)) root->cache_inode = igrab(inode); spin_unlock(&root->cache_lock); @@ -2543,8 +2542,7 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) * If we're unmounting then just return, since this does a search on the * normal root and not the commit root and we could deadlock. */ - smp_mb(); - if (fs_info->closing) + if (btrfs_fs_closing(fs_info)) return 0; path = btrfs_alloc_path(); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cb79b89..b4087e0 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -62,8 +62,7 @@ again: goto out; while (1) { - smp_mb(); - if (fs_info->closing) + if (btrfs_fs_closing(fs_info)) goto out; leaf = path->nodes[0]; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a83e44b..02ff4a1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4266,8 +4266,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (BTRFS_I(inode)->dummy_inode) return 0; - smp_mb(); - if (root->fs_info->closing && is_free_space_inode(root, inode)) + if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) nolock = true; if (wbc->sync_mode == WB_SYNC_ALL) { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 1204eab..df50fd1 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1183,7 +1183,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, int ret; struct btrfs_device *dev; - if (root->fs_info->closing) + if (btrfs_fs_closing(root->fs_info)) return -EINVAL; /* diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2d5c6d2..dd71966 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -817,7 +817,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) btrfs_btree_balance_dirty(info->tree_root, nr); cond_resched(); - if (root->fs_info->closing || ret != -EAGAIN) + if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) break; } root->defrag_running = 0; -- cgit v0.10.2 From aa0467d8d2a00e75b2bb6a56a4ee6d70c5d1928f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 3 Jun 2011 16:29:08 +0200 Subject: btrfs: fix uninitialized variable warning With Linus' tree, today's linux-next build (powercp ppc64_defconfig) produced this warning: fs/btrfs/delayed-inode.c: In function 'btrfs_delayed_update_inode': fs/btrfs/delayed-inode.c:1598:6: warning: 'ret' may be used uninitialized in this function Introduced by commit 16cdcec736cd ("btrfs: implement delayed inode items operation"). This fixes a bug in btrfs_update_inode(): if the returned value from btrfs_delayed_update_inode is a nonzero garbage, inode stat data are not updated and several call paths may hit a BUG_ON or fail with strange code. Reported-by: Stephen Rothwell Signed-off-by: David Sterba diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c61c32c..6462c29 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1595,7 +1595,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { struct btrfs_delayed_node *delayed_node; - int ret; + int ret = 0; delayed_node = btrfs_get_or_create_delayed_node(inode); if (IS_ERR(delayed_node)) -- cgit v0.10.2 From 942c1a927bf296fd64fd49f04c5a8f66bb14446b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20Dal=C3=A9n?= Date: Thu, 26 May 2011 09:08:53 -0400 Subject: hwmon: (max6642): Better chip detection schema Improve detection of MAX6642 by reading non existing registers (0x04, 0x06 and 0xff). Reading those registers returns the previously read value. Signed-off-by: Per Dalen [guenter.roeck@ericsson.com: added second set of register reads] Signed-off-by: Guenter Roeck diff --git a/drivers/hwmon/max6642.c b/drivers/hwmon/max6642.c index 0f30e1b..e855d3b 100644 --- a/drivers/hwmon/max6642.c +++ b/drivers/hwmon/max6642.c @@ -136,15 +136,29 @@ static int max6642_detect(struct i2c_client *client, if (man_id != 0x4D) return -ENODEV; + /* sanity check */ + if (i2c_smbus_read_byte_data(client, 0x04) != 0x4D + || i2c_smbus_read_byte_data(client, 0x06) != 0x4D + || i2c_smbus_read_byte_data(client, 0xff) != 0x4D) + return -ENODEV; + /* * We read the config and status register, the 4 lower bits in the * config register should be zero and bit 5, 3, 1 and 0 should be * zero in the status register. */ reg_config = i2c_smbus_read_byte_data(client, MAX6642_REG_R_CONFIG); + if ((reg_config & 0x0f) != 0x00) + return -ENODEV; + + /* in between, another round of sanity checks */ + if (i2c_smbus_read_byte_data(client, 0x04) != reg_config + || i2c_smbus_read_byte_data(client, 0x06) != reg_config + || i2c_smbus_read_byte_data(client, 0xff) != reg_config) + return -ENODEV; + reg_status = i2c_smbus_read_byte_data(client, MAX6642_REG_R_STATUS); - if (((reg_config & 0x0f) != 0x00) || - ((reg_status & 0x2b) != 0x00)) + if ((reg_status & 0x2b) != 0x00) return -ENODEV; strlcpy(info->type, "max6642", I2C_NAME_SIZE); -- cgit v0.10.2 From f2a4d8ae4d40f6f5482d207f47fd4d53b3ae0ed4 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 31 May 2011 15:14:07 -0600 Subject: ARM: Tegra: Harmony: Fix conflicting GPIO numbering Currently, both the WM8903 and TPS6586x chips attempt to register with gpiolib using the same GPIO numbers. This causes the audio driver to fail to initialize. To solve this, add a define to board-harmony.h for the TPS6586x, and make board-harmony-power.c use this define, instead of directly referencing TEGRA_NR_GPIOS. This fixes a regression introduced by commit 6f168f2fa60f87e85e0df25e87e2372f22f5eb7c. ARM: tegra: harmony: initialize the TPS65862 PMIC Signed-off-by: Stephen Warren Signed-off-by: Colin Cross diff --git a/arch/arm/mach-tegra/board-harmony-power.c b/arch/arm/mach-tegra/board-harmony-power.c index c84442c..5ad8b2f 100644 --- a/arch/arm/mach-tegra/board-harmony-power.c +++ b/arch/arm/mach-tegra/board-harmony-power.c @@ -24,6 +24,8 @@ #include +#include "board-harmony.h" + #define PMC_CTRL 0x0 #define PMC_CTRL_INTR_LOW (1 << 17) @@ -98,7 +100,7 @@ static struct tps6586x_platform_data tps_platform = { .irq_base = TEGRA_NR_IRQS, .num_subdevs = ARRAY_SIZE(tps_devs), .subdevs = tps_devs, - .gpio_base = TEGRA_NR_GPIOS, + .gpio_base = HARMONY_GPIO_TPS6586X(0), }; static struct i2c_board_info __initdata harmony_regulators[] = { diff --git a/arch/arm/mach-tegra/board-harmony.h b/arch/arm/mach-tegra/board-harmony.h index 1e57b07..d85142e 100644 --- a/arch/arm/mach-tegra/board-harmony.h +++ b/arch/arm/mach-tegra/board-harmony.h @@ -17,7 +17,8 @@ #ifndef _MACH_TEGRA_BOARD_HARMONY_H #define _MACH_TEGRA_BOARD_HARMONY_H -#define HARMONY_GPIO_WM8903(_x_) (TEGRA_NR_GPIOS + (_x_)) +#define HARMONY_GPIO_TPS6586X(_x_) (TEGRA_NR_GPIOS + (_x_)) +#define HARMONY_GPIO_WM8903(_x_) (HARMONY_GPIO_TPS6586X(4) + (_x_)) #define TEGRA_GPIO_SD2_CD TEGRA_GPIO_PI5 #define TEGRA_GPIO_SD2_WP TEGRA_GPIO_PH1 -- cgit v0.10.2 From e0dcd8a05be438b3d2e49ef61441ea3a463663f8 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 5 Jun 2011 22:03:13 -0700 Subject: mm: fix ENOSPC returned by handle_mm_fault() Al Viro observes that in the hugetlb case, handle_mm_fault() may return a value of the kind ENOSPC when its caller is expecting a value of the kind VM_FAULT_SIGBUS: fix alloc_huge_page()'s failure returns. Signed-off-by: Hugh Dickins Acked-by: Al Viro Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f33bb31..6402458 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1033,10 +1033,10 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, */ chg = vma_needs_reservation(h, vma, addr); if (chg < 0) - return ERR_PTR(chg); + return ERR_PTR(-VM_FAULT_OOM); if (chg) if (hugetlb_get_quota(inode->i_mapping, chg)) - return ERR_PTR(-ENOSPC); + return ERR_PTR(-VM_FAULT_SIGBUS); spin_lock(&hugetlb_lock); page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve); -- cgit v0.10.2 From 59c5f46fbe01a00eedf54a23789634438bb80603 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 6 Jun 2011 18:06:33 +0900 Subject: Linux 3.0-rc2 diff --git a/Makefile b/Makefile index afb8e0d..0f1db8d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Sneaky Weasel # *DOCUMENTATION* -- cgit v0.10.2 From d52b31deffe1956ac62d0b81b915c9b52cffb814 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 27 May 2011 13:56:12 -0700 Subject: GPIO: OMAP: fix section mismatch warnings WARNING: arch/arm/plat-omap/built-in.o(.devinit.text+0x46c): Section mismatch in reference from the function omap_gpio_probe() to the function .init.text:omap_gpio_chip_init() The function __devinit omap_gpio_probe() references a function __init omap_gpio_chip_init(). If omap_gpio_chip_init is only used by omap_gpio_probe then annotate omap_gpio_chip_init with a matching annotation. Signed-off-by: Russell King Signed-off-by: Kevin Hilman diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 6c51191..76709b0 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1524,7 +1524,7 @@ static void omap_gpio_mod_init(struct gpio_bank *bank, int id) } } -static void __init omap_gpio_chip_init(struct gpio_bank *bank) +static void __devinit omap_gpio_chip_init(struct gpio_bank *bank) { int j; static int gpio; -- cgit v0.10.2 From 0622b25bf071fd83c6eef6b61fb5f3f12a418528 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 Jun 2011 13:38:17 -0700 Subject: GPIO: OMAP: fix setting IRQWAKEN bits for OMAP4 Setting the IRQWAKEN bit was overwriting previous IRQWAKEN bits, causing only the last bit set to take effect, resulting in lost wakeups when the GPIO controller is in idle. Replace direct writes to IRQWAKEN with MOD_REG_BIT calls to perform a read-modify-write on the register. Signed-off-by: Colin Cross Signed-off-by: Kevin Hilman diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 76709b0..5ad827a 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -432,7 +432,6 @@ static inline void set_24xx_gpio_triggering(struct gpio_bank *bank, int gpio, { void __iomem *base = bank->base; u32 gpio_bit = 1 << gpio; - u32 val; if (cpu_is_omap44xx()) { MOD_REG_BIT(OMAP4_GPIO_LEVELDETECT0, gpio_bit, @@ -455,15 +454,8 @@ static inline void set_24xx_gpio_triggering(struct gpio_bank *bank, int gpio, } if (likely(!(bank->non_wakeup_gpios & gpio_bit))) { if (cpu_is_omap44xx()) { - if (trigger != 0) - __raw_writel(1 << gpio, bank->base+ - OMAP4_GPIO_IRQWAKEN0); - else { - val = __raw_readl(bank->base + - OMAP4_GPIO_IRQWAKEN0); - __raw_writel(val & (~(1 << gpio)), bank->base + - OMAP4_GPIO_IRQWAKEN0); - } + MOD_REG_BIT(OMAP4_GPIO_IRQWAKEN0, gpio_bit, + trigger != 0); } else { /* * GPIO wakeup request can only be generated on edge -- cgit v0.10.2 From 85ec7b970553369e0c956fab1d7a6022f2a99369 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Mon, 6 Jun 2011 13:38:18 -0700 Subject: GPIO: OMAP: add locking around calls to _set_gpio_triggering _set_gpio_triggering uses read-modify-write on bank registers, lock bank->lock around all calls to it to prevent register corruption if two cpus access gpios in the same bank at the same time. Signed-off-by: Colin Cross Signed-off-by: Kevin Hilman diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 5ad827a..01f74a8 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1126,8 +1126,11 @@ static void gpio_irq_shutdown(struct irq_data *d) { unsigned int gpio = d->irq - IH_GPIO_BASE; struct gpio_bank *bank = irq_data_get_irq_chip_data(d); + unsigned long flags; + spin_lock_irqsave(&bank->lock, flags); _reset_gpio(bank, gpio); + spin_unlock_irqrestore(&bank->lock, flags); } static void gpio_ack_irq(struct irq_data *d) @@ -1142,9 +1145,12 @@ static void gpio_mask_irq(struct irq_data *d) { unsigned int gpio = d->irq - IH_GPIO_BASE; struct gpio_bank *bank = irq_data_get_irq_chip_data(d); + unsigned long flags; + spin_lock_irqsave(&bank->lock, flags); _set_gpio_irqenable(bank, gpio, 0); _set_gpio_triggering(bank, get_gpio_index(gpio), IRQ_TYPE_NONE); + spin_unlock_irqrestore(&bank->lock, flags); } static void gpio_unmask_irq(struct irq_data *d) @@ -1153,7 +1159,9 @@ static void gpio_unmask_irq(struct irq_data *d) struct gpio_bank *bank = irq_data_get_irq_chip_data(d); unsigned int irq_mask = 1 << get_gpio_index(gpio); u32 trigger = irqd_get_trigger_type(d); + unsigned long flags; + spin_lock_irqsave(&bank->lock, flags); if (trigger) _set_gpio_triggering(bank, get_gpio_index(gpio), trigger); @@ -1165,6 +1173,7 @@ static void gpio_unmask_irq(struct irq_data *d) } _set_gpio_irqenable(bank, gpio, 1); + spin_unlock_irqrestore(&bank->lock, flags); } static struct irq_chip gpio_irq_chip = { -- cgit v0.10.2