diff options
Diffstat (limited to 'fs')
101 files changed, 582 insertions, 1189 deletions
@@ -1027,9 +1027,9 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent) spin_unlock(&info->ring_lock); out: + kunmap_atomic(ring); dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret, (unsigned long)ring->head, (unsigned long)ring->tail); - kunmap_atomic(ring); return ret; } diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index b0f12d7..b785e77 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -34,7 +34,6 @@ #include <linux/sched.h> #include <linux/mount.h> #include <linux/namei.h> -#include <linux/delay.h> #include <asm/current.h> #include <asm/uaccess.h> diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index b2f1903..01443ce 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -61,6 +61,15 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) /* This is an autofs submount, we can't expire it */ if (autofs_type_indirect(sbi->type)) goto done; + + /* + * Otherwise it's an offset mount and we need to check + * if we can umount its mount, if there is one. + */ + if (!d_mountpoint(path.dentry)) { + status = 0; + goto done; + } } /* Update the expiry counter if fs is busy */ @@ -157,7 +166,7 @@ again: parent = p->d_parent; if (!spin_trylock(&parent->d_lock)) { spin_unlock(&p->d_lock); - cpu_chill(); + cpu_relax(); goto relock; } spin_unlock(&p->d_lock); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 5843a47..0c42cdb 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1132,7 +1132,6 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, goto whole; if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) goto whole; - return 0; } /* Do not dump I/O mapped devices or special mappings */ diff --git a/fs/block_dev.c b/fs/block_dev.c index 883dc49..172f849 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -551,7 +551,6 @@ struct block_device *bdgrab(struct block_device *bdev) ihold(bdev->bd_inode); return bdev; } -EXPORT_SYMBOL(bdgrab); long nr_blockdev_pages(void) { @@ -995,7 +994,6 @@ int revalidate_disk(struct gendisk *disk) mutex_lock(&bdev->bd_mutex); check_disk_size_change(disk, bdev); - bdev->bd_invalidated = 0; mutex_unlock(&bdev->bd_mutex); bdput(bdev); return ret; @@ -1034,9 +1032,7 @@ void bd_set_size(struct block_device *bdev, loff_t size) { unsigned bsize = bdev_logical_block_size(bdev); - mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, size); - mutex_unlock(&bdev->bd_inode->i_mutex); + bdev->bd_inode->i_size = size; while (bsize < PAGE_CACHE_SIZE) { if (size & bsize) break; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index ce1c169..eea5da7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -651,8 +651,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (tree_mod_dont_log(fs_info, NULL)) return 0; - __tree_mod_log_free_eb(fs_info, old_root); - ret = tree_mod_alloc(fs_info, flags, &tm); if (ret < 0) goto out; @@ -738,7 +736,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) static noinline void tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, struct extent_buffer *src, unsigned long dst_offset, - unsigned long src_offset, int nr_items, int log_removal) + unsigned long src_offset, int nr_items) { int ret; int i; @@ -752,12 +750,10 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, } for (i = 0; i < nr_items; i++) { - if (log_removal) { - ret = tree_mod_log_insert_key_locked(fs_info, src, - i + src_offset, - MOD_LOG_KEY_REMOVE); - BUG_ON(ret < 0); - } + ret = tree_mod_log_insert_key_locked(fs_info, src, + i + src_offset, + MOD_LOG_KEY_REMOVE); + BUG_ON(ret < 0); ret = tree_mod_log_insert_key_locked(fs_info, dst, i + dst_offset, MOD_LOG_KEY_ADD); @@ -931,6 +927,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ret = btrfs_dec_ref(trans, root, buf, 1, 1); BUG_ON(ret); /* -ENOMEM */ } + tree_mod_log_free_eb(root->fs_info, buf); clean_tree_block(trans, root, buf); *last_ref = 1; } @@ -1049,7 +1046,6 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_set_node_ptr_generation(parent, parent_slot, trans->transid); btrfs_mark_buffer_dirty(parent); - tree_mod_log_free_eb(root->fs_info, buf); btrfs_free_tree_block(trans, root, buf, parent_start, last_ref); } @@ -1759,6 +1755,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, goto enospc; } + tree_mod_log_free_eb(root->fs_info, root->node); tree_mod_log_set_root_pointer(root, child); rcu_assign_pointer(root->node, child); @@ -3003,7 +3000,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, push_items = min(src_nritems - 8, push_items); tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, - push_items, 1); + push_items); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(dst_nritems), btrfs_node_key_ptr_offset(0), @@ -3074,7 +3071,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, sizeof(struct btrfs_key_ptr)); tree_mod_log_eb_copy(root->fs_info, dst, src, 0, - src_nritems - push_items, push_items, 1); + src_nritems - push_items, push_items); copy_extent_buffer(dst, src, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(src_nritems - push_items), @@ -3226,18 +3223,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans, int mid; int ret; u32 c_nritems; - int tree_mod_log_removal = 1; c = path->nodes[level]; WARN_ON(btrfs_header_generation(c) != trans->transid); if (c == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); - /* - * removal of root nodes has been logged by - * tree_mod_log_set_root_pointer due to locking - */ - tree_mod_log_removal = 0; if (ret) return ret; } else { @@ -3275,8 +3266,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, (unsigned long)btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); - tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid, - tree_mod_log_removal); + tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); copy_extent_buffer(split, c, btrfs_node_key_ptr_offset(0), btrfs_node_key_ptr_offset(mid), diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 105b265..ae94117 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -36,19 +36,16 @@ * compare two delayed tree backrefs with same bytenr and type */ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, - struct btrfs_delayed_tree_ref *ref1, int type) + struct btrfs_delayed_tree_ref *ref1) { - if (type == BTRFS_TREE_BLOCK_REF_KEY) { - if (ref1->root < ref2->root) - return -1; - if (ref1->root > ref2->root) - return 1; - } else { - if (ref1->parent < ref2->parent) - return -1; - if (ref1->parent > ref2->parent) - return 1; - } + if (ref1->root < ref2->root) + return -1; + if (ref1->root > ref2->root) + return 1; + if (ref1->parent < ref2->parent) + return -1; + if (ref1->parent > ref2->parent) + return 1; return 0; } @@ -112,8 +109,7 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2, if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), - btrfs_delayed_node_to_tree_ref(ref1), - ref1->type); + btrfs_delayed_node_to_tree_ref(ref1)); } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY || ref1->type == BTRFS_SHARED_DATA_REF_KEY) { return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2), diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d170412..5a3327b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4308,7 +4308,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) spin_lock(&sinfo->lock); spin_lock(&block_rsv->lock); - block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024); + block_rsv->size = num_bytes; num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + sinfo->bytes_reserved + sinfo->bytes_readonly + @@ -4601,49 +4601,14 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) * If the inodes csum_bytes is the same as the original * csum_bytes then we know we haven't raced with any free()ers * so we can just reduce our inodes csum bytes and carry on. + * Otherwise we have to do the normal free thing to account for + * the case that the free side didn't free up its reserve + * because of this outstanding reservation. */ - if (BTRFS_I(inode)->csum_bytes == csum_bytes) { + if (BTRFS_I(inode)->csum_bytes == csum_bytes) calc_csum_metadata_size(inode, num_bytes, 0); - } else { - u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes; - u64 bytes; - - /* - * This is tricky, but first we need to figure out how much we - * free'd from any free-ers that occured during this - * reservation, so we reset ->csum_bytes to the csum_bytes - * before we dropped our lock, and then call the free for the - * number of bytes that were freed while we were trying our - * reservation. - */ - bytes = csum_bytes - BTRFS_I(inode)->csum_bytes; - BTRFS_I(inode)->csum_bytes = csum_bytes; - to_free = calc_csum_metadata_size(inode, bytes, 0); - - - /* - * Now we need to see how much we would have freed had we not - * been making this reservation and our ->csum_bytes were not - * artificially inflated. - */ - BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes; - bytes = csum_bytes - orig_csum_bytes; - bytes = calc_csum_metadata_size(inode, bytes, 0); - - /* - * Now reset ->csum_bytes to what it should be. If bytes is - * more than to_free then we would have free'd more space had we - * not had an artificially high ->csum_bytes, so we need to free - * the remainder. If bytes is the same or less then we don't - * need to do anything, the other free-ers did the correct - * thing. - */ - BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes; - if (bytes > to_free) - to_free = bytes - to_free; - else - to_free = 0; - } + else + to_free = calc_csum_metadata_size(inode, num_bytes, 0); spin_unlock(&BTRFS_I(inode)->lock); if (dropped) to_free += btrfs_calc_trans_metadata_size(root, dropped); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 125397e..1b319df 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1258,39 +1258,6 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) GFP_NOFS); } -int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(inode->i_mapping, index); - BUG_ON(!page); /* Pages should be in the extent_io_tree */ - clear_page_dirty_for_io(page); - page_cache_release(page); - index++; - } - return 0; -} - -int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(inode->i_mapping, index); - BUG_ON(!page); /* Pages should be in the extent_io_tree */ - account_page_redirty(page); - __set_page_dirty_nobuffers(page); - page_cache_release(page); - index++; - } - return 0; -} - /* * helper function to set both pages and extents in the tree writeback */ diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 715b474..2eacfab 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -329,8 +329,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_len); int extent_range_uptodate(struct extent_io_tree *tree, u64 start, u64 end); -int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); -int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); int extent_clear_unlock_delalloc(struct inode *inode, struct extent_io_tree *tree, u64 start, u64 end, struct page *locked_page, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4b5398c..cc93b23 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -265,7 +265,6 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, return 1; } - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); btrfs_delalloc_release_metadata(inode, end + 1 - start); btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); return 0; @@ -352,7 +351,6 @@ static noinline int compress_file_range(struct inode *inode, int i; int will_compress; int compress_type = root->fs_info->compress_type; - int redirty = 0; /* if this is a small write inside eof, kick off a defrag */ if ((end - start + 1) < 16 * 1024 && @@ -415,17 +413,6 @@ again: if (BTRFS_I(inode)->force_compress) compress_type = BTRFS_I(inode)->force_compress; - /* - * we need to call clear_page_dirty_for_io on each - * page in the range. Otherwise applications with the file - * mmap'd can wander in and change the page contents while - * we are compressing them. - * - * If the compression fails for any reason, we set the pages - * dirty again later on. - */ - extent_range_clear_dirty_for_io(inode, start, end); - redirty = 1; ret = btrfs_compress_pages(compress_type, inode->i_mapping, start, total_compressed, pages, @@ -567,8 +554,6 @@ cleanup_and_bail_uncompressed: __set_page_dirty_nobuffers(locked_page); /* unlocked later on in the async handlers */ } - if (redirty) - extent_range_redirty_for_io(inode, start, end); add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, BTRFS_COMPRESS_NONE); *num_added += 1; @@ -2484,7 +2469,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) */ set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags); - atomic_inc(&root->orphan_inodes); /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { @@ -2507,8 +2491,6 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) goto out; ret = btrfs_truncate(inode); - if (ret) - btrfs_orphan_del(NULL, inode); } else { nr_unlink++; } @@ -5794,9 +5776,7 @@ out: * block must be cow'd */ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, - struct inode *inode, u64 offset, u64 *len, - u64 *orig_start, u64 *orig_block_len, - u64 *ram_bytes) + struct inode *inode, u64 offset, u64 len) { struct btrfs_path *path; int ret; @@ -5853,12 +5833,8 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); backref_offset = btrfs_file_extent_offset(leaf, fi); - *orig_start = key.offset - backref_offset; - *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi); - *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); - extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); - if (extent_end < offset + *len) { + if (extent_end < offset + len) { /* extent doesn't include our full range, must cow */ goto out; } @@ -5882,14 +5858,13 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, */ disk_bytenr += backref_offset; disk_bytenr += offset - key.offset; - num_bytes = min(offset + *len, extent_end) - offset; + num_bytes = min(offset + len, extent_end) - offset; if (csum_exist_in_range(root, disk_bytenr, num_bytes)) goto out; /* * all of the above have passed, it is safe to overwrite this extent * without cow */ - *len = num_bytes; ret = 1; out: btrfs_free_path(path); @@ -6099,7 +6074,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, em->block_start != EXTENT_MAP_HOLE)) { int type; int ret; - u64 block_start, orig_start, orig_block_len, ram_bytes; + u64 block_start; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) type = BTRFS_ORDERED_PREALLOC; @@ -6117,8 +6092,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, if (IS_ERR(trans)) goto must_cow; - if (can_nocow_odirect(trans, inode, start, &len, &orig_start, - &orig_block_len, &ram_bytes) == 1) { + if (can_nocow_odirect(trans, inode, start, len) == 1) { + u64 orig_start = em->orig_start; + u64 orig_block_len = em->orig_block_len; + if (type == BTRFS_ORDERED_PREALLOC) { free_extent_map(em); em = create_pinned_em(inode, start, len, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 48761b6..67783e0 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -541,6 +541,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) eb = path->nodes[0]; ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); item_size = btrfs_item_size_nr(eb, path->slots[0]); + btrfs_release_path(path); if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { do { @@ -556,9 +557,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) ret < 0 ? -1 : ref_level, ret < 0 ? -1 : ref_root); } while (ret != 1); - btrfs_release_path(path); } else { - btrfs_release_path(path); swarn.path = path; swarn.dev = dev; iterate_extent_inodes(fs_info, found_key.objectid, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8a00e2f..9027bb1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -318,7 +318,6 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, unsigned long src_ptr; unsigned long dst_ptr; int overwrite_root = 0; - bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) overwrite_root = 1; @@ -328,9 +327,6 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, /* look for the key in the destination tree */ ret = btrfs_search_slot(NULL, root, key, path, 0, 0); - if (ret < 0) - return ret; - if (ret == 0) { char *src_copy; char *dst_copy; @@ -372,30 +368,6 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, return 0; } - /* - * We need to load the old nbytes into the inode so when we - * replay the extents we've logged we get the right nbytes. - */ - if (inode_item) { - struct btrfs_inode_item *item; - u64 nbytes; - - item = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_inode_item); - nbytes = btrfs_inode_nbytes(path->nodes[0], item); - item = btrfs_item_ptr(eb, slot, - struct btrfs_inode_item); - btrfs_set_inode_nbytes(eb, item, nbytes); - } - } else if (inode_item) { - struct btrfs_inode_item *item; - - /* - * New inode, set nbytes to 0 so that the nbytes comes out - * properly when we replay the extents. - */ - item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); - btrfs_set_inode_nbytes(eb, item, 0); } insert: btrfs_release_path(path); @@ -516,7 +488,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, u64 mask = root->sectorsize - 1; u64 extent_end; u64 start = key->offset; - u64 nbytes = 0; + u64 saved_nbytes; struct btrfs_file_extent_item *item; struct inode *inode = NULL; unsigned long size; @@ -526,19 +498,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, found_type = btrfs_file_extent_type(eb, item); if (found_type == BTRFS_FILE_EXTENT_REG || - found_type == BTRFS_FILE_EXTENT_PREALLOC) { - nbytes = btrfs_file_extent_num_bytes(eb, item); - extent_end = start + nbytes; - - /* - * We don't add to the inodes nbytes if we are prealloc or a - * hole. - */ - if (btrfs_file_extent_disk_bytenr(eb, item) == 0) - nbytes = 0; - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_type == BTRFS_FILE_EXTENT_PREALLOC) + extent_end = start + btrfs_file_extent_num_bytes(eb, item); + else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size = btrfs_file_extent_inline_len(eb, item); - nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = (start + size + mask) & ~mask; } else { ret = 0; @@ -587,6 +550,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(path); + saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); BUG_ON(ret); @@ -673,7 +637,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); } - inode_add_bytes(inode, nbytes); + inode_set_bytes(inode, saved_nbytes); ret = btrfs_update_inode(trans, root, inode); out: if (inode) @@ -1420,10 +1384,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, btrfs_release_path(path); if (ret == 0) { - if (!inode->i_nlink) - set_nlink(inode, 1); - else - btrfs_inc_nlink(inode); + btrfs_inc_nlink(inode); ret = btrfs_update_inode(trans, root, inode); } else if (ret == -EEXIST) { ret = 0; @@ -3320,7 +3281,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, int ret; bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; -insert: INIT_LIST_HEAD(&ordered_sums); btrfs_init_map_token(&token); key.objectid = btrfs_ino(inode); @@ -3336,23 +3296,6 @@ insert: leaf = path->nodes[0]; fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - - /* - * If we are overwriting an inline extent with a real one then we need - * to just delete the inline extent as it may not be large enough to - * have the entire file_extent_item. - */ - if (ret && btrfs_token_file_extent_type(leaf, fi, &token) == - BTRFS_FILE_EXTENT_INLINE) { - ret = btrfs_del_item(trans, log, path); - btrfs_release_path(path); - if (ret) { - path->really_keep_locks = 0; - return ret; - } - goto insert; - } - btrfs_set_token_file_extent_generation(leaf, fi, em->generation, &token); if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1fd234a..5cbb7f4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -647,7 +647,6 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device->writeable = 0; new_device->in_fs_metadata = 0; new_device->can_discard = 0; - spin_lock_init(&new_device->io_lock); list_replace_rcu(&device->dev_list, &new_device->dev_list); call_rcu(&device->rcu, free_device); @@ -681,12 +680,6 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) __btrfs_close_devices(fs_devices); free_fs_devices(fs_devices); } - /* - * Wait for rcu kworkers under __btrfs_close_devices - * to finish all blkdev_puts so device is really - * free when umount is done. - */ - rcu_barrier(); return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index 8863f45..7a75c3e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -280,7 +280,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) * decide that the page is now completely done. */ first = page_buffers(page); - flags = bh_uptodate_lock_irqsave(first); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -293,7 +294,8 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } while (tmp != bh); - bh_uptodate_unlock_irqrestore(first, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); /* * If none of the buffers had errors and they are all @@ -305,7 +307,9 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) return; still_busy: - bh_uptodate_unlock_irqrestore(first, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); + return; } /* @@ -339,7 +343,8 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) } first = page_buffers(page); - flags = bh_uptodate_lock_irqsave(first); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); clear_buffer_async_write(bh); unlock_buffer(bh); @@ -351,12 +356,15 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } - bh_uptodate_unlock_irqrestore(first, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); end_page_writeback(page); return; still_busy: - bh_uptodate_unlock_irqrestore(first, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); + return; } EXPORT_SYMBOL(end_buffer_async_write); @@ -3248,7 +3256,6 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); - buffer_head_init_locks(ret); preempt_disable(); __this_cpu_inc(bh_accounting.nr); recalc_bh_state(); diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 1d36db1..cfd1ce3 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -614,10 +614,53 @@ decode_negTokenInit(unsigned char *security_blob, int length, } } - /* - * We currently ignore anything at the end of the SPNEGO blob after - * the mechTypes have been parsed, since none of that info is - * used at the moment. - */ + /* mechlistMIC */ + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + /* Check if we have reached the end of the blob, but with + no mechListMic (e.g. NTLMSSP instead of KRB5) */ + if (ctx.error == ASN1_ERR_DEC_EMPTY) + goto decode_negtoken_exit; + cFYI(1, "Error decoding last part negTokenInit exit3"); + return 0; + } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { + /* tag = 3 indicating mechListMIC */ + cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end); + return 0; + } + + /* sequence */ + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, "Error decoding last part negTokenInit exit5"); + return 0; + } else if ((cls != ASN1_UNI) || (con != ASN1_CON) + || (tag != ASN1_SEQ)) { + cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end); + } + + /* sequence of */ + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, "Error decoding last part negTokenInit exit 7"); + return 0; + } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { + cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end); + return 0; + } + + /* general string */ + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, "Error decoding last part negTokenInit exit9"); + return 0; + } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) + || (tag != ASN1_GENSTR)) { + cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end); + return 0; + } + cFYI(1, "Need to call asn1_octets_decode() function for %s", + ctx.pointer); /* is this UTF-8 or ASCII? */ +decode_negtoken_exit: return 1; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index b9db388..de7f9168 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -91,30 +91,6 @@ struct workqueue_struct *cifsiod_wq; __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE]; #endif -/* - * Bumps refcount for cifs super block. - * Note that it should be only called if a referece to VFS super block is - * already held, e.g. in open-type syscalls context. Otherwise it can race with - * atomic_dec_and_test in deactivate_locked_super. - */ -void -cifs_sb_active(struct super_block *sb) -{ - struct cifs_sb_info *server = CIFS_SB(sb); - - if (atomic_inc_return(&server->active) == 1) - atomic_inc(&sb->s_active); -} - -void -cifs_sb_deactive(struct super_block *sb) -{ - struct cifs_sb_info *server = CIFS_SB(sb); - - if (atomic_dec_and_test(&server->active)) - deactivate_super(sb); -} - static int cifs_read_super(struct super_block *sb) { @@ -582,11 +558,6 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) dentry = ERR_PTR(-ENOENT); break; } - if (!S_ISDIR(dir->i_mode)) { - dput(dentry); - dentry = ERR_PTR(-ENOTDIR); - break; - } /* skip separators */ while (*s == sep) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 0e32c34..7163419 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -41,10 +41,6 @@ extern struct file_system_type cifs_fs_type; extern const struct address_space_operations cifs_addr_ops; extern const struct address_space_operations cifs_addr_ops_smallbuf; -/* Functions related to super block operations */ -extern void cifs_sb_active(struct super_block *sb); -extern void cifs_sb_deactive(struct super_block *sb); - /* Functions related to inodes */ extern const struct inode_operations cifs_dir_inode_ops; extern struct inode *cifs_root_iget(struct super_block *); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f7199b9..12b3da3 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1546,24 +1546,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } break; case Opt_blank_pass: + vol->password = NULL; + break; + case Opt_pass: /* passwords have to be handled differently * to allow the character used for deliminator * to be passed within them */ - /* - * Check if this is a case where the password - * starts with a delimiter - */ - tmp_end = strchr(data, '='); - tmp_end++; - if (!(tmp_end < end && tmp_end[1] == delim)) { - /* No it is not. Set the password to NULL */ - vol->password = NULL; - break; - } - /* Yes it is. Drop down to Opt_pass below.*/ - case Opt_pass: /* Obtain the value string */ value = strchr(data, '='); value++; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 1d93ee8..8ea6ca5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -294,8 +294,6 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, INIT_WORK(&cfile->oplock_break, cifs_oplock_break); mutex_init(&cfile->fh_mutex); - cifs_sb_active(inode->i_sb); - /* * If the server returned a read oplock and we have mandatory brlocks, * set oplock level to None. @@ -345,8 +343,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); struct TCP_Server_Info *server = tcon->ses->server; struct cifsInodeInfo *cifsi = CIFS_I(inode); - struct super_block *sb = inode->i_sb; - struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsLockInfo *li, *tmp; struct cifs_fid fid; struct cifs_pending_open open; @@ -411,7 +408,6 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) cifs_put_tlink(cifs_file->tlink); dput(cifs_file->dentry); - cifs_sb_deactive(sb); kfree(cifs_file); } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index bceffe7..c9c7aa7 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -744,5 +744,4 @@ struct smb_version_values smb30_values = { .cap_unix = 0, .cap_nt_find = SMB2_NT_FIND, .cap_large_files = SMB2_LARGE_FILES, - .oplock_read = SMB2_OPLOCK_LEVEL_II, }; diff --git a/fs/compat.c b/fs/compat.c index a06dcbc..015e1e1 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -558,10 +558,6 @@ ssize_t compat_rw_copy_check_uvector(int type, } *ret_pointer = iov; - ret = -EFAULT; - if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) - goto out; - /* * Single unix specification: * We should -EINVAL if an element length is not >= 0 and fitting an @@ -1084,12 +1080,17 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, if (!file->f_op) goto out; - ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, + ret = -EFAULT; + if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) + goto out; + + tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs, UIO_FASTIOV, iovstack, &iov); - if (ret <= 0) + if (tot_len == 0) { + ret = 0; goto out; + } - tot_len = ret; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; diff --git a/fs/dcache.c b/fs/dcache.c index 67f4681..19153a0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -37,7 +37,6 @@ #include <linux/rculist_bl.h> #include <linux/prefetch.h> #include <linux/ratelimit.h> -#include <linux/delay.h> #include "internal.h" #include "mount.h" @@ -471,7 +470,7 @@ static inline struct dentry *dentry_kill(struct dentry *dentry, int ref) if (inode && !spin_trylock(&inode->i_lock)) { relock: spin_unlock(&dentry->d_lock); - cpu_chill(); + cpu_relax(); return dentry; /* try again with same dentry */ } if (IS_ROOT(dentry)) @@ -853,7 +852,7 @@ relock: if (!spin_trylock(&dentry->d_lock)) { spin_unlock(&dcache_lru_lock); - cpu_chill(); + cpu_relax(); goto relock; } @@ -1233,10 +1232,8 @@ void shrink_dcache_parent(struct dentry * parent) LIST_HEAD(dispose); int found; - while ((found = select_parent(parent, &dispose)) != 0) { + while ((found = select_parent(parent, &dispose)) != 0) shrink_dentry_list(&dispose); - cond_resched(); - } } EXPORT_SYMBOL(shrink_dcache_parent); @@ -2087,7 +2084,7 @@ again: if (dentry->d_count == 1) { if (!spin_trylock(&inode->i_lock)) { spin_unlock(&dentry->d_lock); - cpu_chill(); + cpu_relax(); goto again; } dentry->d_flags &= ~DCACHE_CANT_MOUNT; @@ -2555,6 +2552,7 @@ static int prepend_path(const struct path *path, bool slash = false; int error = 0; + br_read_lock(&vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2584,6 +2582,8 @@ static int prepend_path(const struct path *path, if (!error && !slash) error = prepend(buffer, buflen, "/", 1); +out: + br_read_unlock(&vfsmount_lock); return error; global_root: @@ -2600,7 +2600,7 @@ global_root: error = prepend(buffer, buflen, "/", 1); if (!error) error = is_mounted(vfsmnt) ? 1 : 2; - return error; + goto out; } /** @@ -2627,11 +2627,9 @@ char *__d_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); - br_read_lock(&vfsmount_lock); write_seqlock(&rename_lock); error = prepend_path(path, root, &res, &buflen); write_sequnlock(&rename_lock); - br_read_unlock(&vfsmount_lock); if (error < 0) return ERR_PTR(error); @@ -2648,11 +2646,9 @@ char *d_absolute_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); - br_read_lock(&vfsmount_lock); write_seqlock(&rename_lock); error = prepend_path(path, &root, &res, &buflen); write_sequnlock(&rename_lock); - br_read_unlock(&vfsmount_lock); if (error > 1) error = -EINVAL; @@ -2716,13 +2712,11 @@ char *d_path(const struct path *path, char *buf, int buflen) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); - br_read_lock(&vfsmount_lock); write_seqlock(&rename_lock); error = path_with_deleted(path, &root, &res, &buflen); - write_sequnlock(&rename_lock); - br_read_unlock(&vfsmount_lock); if (error < 0) res = ERR_PTR(error); + write_sequnlock(&rename_lock); path_put(&root); return res; } @@ -2877,7 +2871,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) get_fs_root_and_pwd(current->fs, &root, &pwd); error = -ENOENT; - br_read_lock(&vfsmount_lock); write_seqlock(&rename_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; @@ -2887,7 +2880,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &root, &cwd, &buflen); write_sequnlock(&rename_lock); - br_read_unlock(&vfsmount_lock); if (error < 0) goto out; @@ -2908,7 +2900,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) } } else { write_sequnlock(&rename_lock); - br_read_unlock(&vfsmount_lock); } out: diff --git a/fs/direct-io.c b/fs/direct-io.c index f853263..cf5b44b 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -261,9 +261,9 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is dio->end_io(dio->iocb, offset, transferred, dio->private, ret, is_async); } else { - inode_dio_done(dio->inode); if (is_async) aio_complete(dio->iocb, ret, 0); + inode_dio_done(dio->inode); } return ret; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 343e14a..9fec183 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -497,12 +497,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) */ static void ep_poll_safewake(wait_queue_head_t *wq) { - int this_cpu = get_cpu_light(); + int this_cpu = get_cpu(); ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); - put_cpu_light(); + put_cpu(); } static void ep_remove_wait_queue(struct eppoll_entry *pwq) @@ -613,7 +613,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * when the old and new regions overlap clear from new_end. */ free_pgd_range(&tlb, new_end, old_end, new_end, - vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); + vma->vm_next ? vma->vm_next->vm_start : 0); } else { /* * otherwise, clean from old_start; this is done to not touch @@ -622,7 +622,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * for the others its just a little faster. */ free_pgd_range(&tlb, old_start, old_end, new_end, - vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); + vma->vm_next ? vma->vm_next->vm_start : 0); } tlb_finish_mmu(&tlb, new_end, old_end); @@ -827,12 +827,10 @@ static int exec_mmap(struct mm_struct *mm) } } task_lock(tsk); - preempt_disable_rt(); active_mm = tsk->active_mm; tsk->mm = mm; tsk->active_mm = mm; activate_mm(active_mm, mm); - preempt_enable_rt(); task_unlock(tsk); arch_pick_mmap_layout(mm); if (old_mm) { @@ -900,13 +898,11 @@ static int de_thread(struct task_struct *tsk) sig->notify_count = -1; /* for exit_notify() */ for (;;) { - threadgroup_change_begin(tsk); write_lock_irq(&tasklist_lock); if (likely(leader->exit_state)) break; __set_current_state(TASK_KILLABLE); write_unlock_irq(&tasklist_lock); - threadgroup_change_end(tsk); schedule(); if (unlikely(__fatal_signal_pending(tsk))) goto killed; @@ -964,7 +960,6 @@ static int de_thread(struct task_struct *tsk) if (unlikely(leader->ptrace)) __wake_up_parent(leader, leader->parent); write_unlock_irq(&tasklist_lock); - threadgroup_change_end(tsk); release_task(leader); } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 0a7f2d0..6e50223 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -353,7 +353,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) return bdev; fail: - ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld", + ext3_msg(sb, "error: failed to open journal device %s: %ld", __bdevname(dev, b), PTR_ERR(bdev)); return NULL; @@ -887,7 +887,7 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) /*todo: use simple_strtoll with >32bit ext3 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { - ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s", + ext3_msg(sb, "error: invalid sb specification: %s", (char *) *data); return 1; } diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index efea5d5..9873587 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -71,5 +71,4 @@ config EXT4_DEBUG Enables run-time debugging support for the ext4 filesystem. If you select Y here, then you will be able to turn on debugging - with a command such as: - echo 1 > /sys/module/ext4/parameters/mballoc_debug + with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 92e68b3..cf18217 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -358,7 +358,7 @@ void ext4_validate_block_bitmap(struct super_block *sb, } /** - * ext4_read_block_bitmap_nowait() + * ext4_read_block_bitmap() * @sb: super block * @block_group: given block group * @@ -457,8 +457,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) struct buffer_head *bh; bh = ext4_read_block_bitmap_nowait(sb, block_group); - if (!bh) - return NULL; if (ext4_wait_block_bitmap(sb, block_group, bh)) { put_bh(bh); return NULL; @@ -484,16 +482,11 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi, free_clusters = percpu_counter_read_positive(fcc); dirty_clusters = percpu_counter_read_positive(dcc); - - /* - * r_blocks_count should always be multiple of the cluster ratio so - * we are safe to do a plane bit shift only. - */ - root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; + root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es)); if (free_clusters - (nclusters + root_clusters + dirty_clusters) < EXT4_FREECLUSTERS_WATERMARK) { - free_clusters = percpu_counter_sum_positive(fcc); + free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc)); dirty_clusters = percpu_counter_sum_positive(dcc); } /* Check whether we have space after accounting for current @@ -635,7 +628,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) brelse(bitmap_bh); printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu" ", computed = %llu, %llu\n", - EXT4_NUM_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)), + EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)), desc_count, bitmap_count); return bitmap_count; #else diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bbcd6a0..8462eb3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -338,9 +338,9 @@ struct ext4_group_desc */ struct flex_groups { - atomic64_t free_clusters; - atomic_t free_inodes; - atomic_t used_dirs; + atomic_t free_inodes; + atomic_t free_clusters; + atomic_t used_dirs; }; #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index dbd9ae1..7177f9b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -170,20 +170,16 @@ static inline void ext4_journal_callback_add(handle_t *handle, * ext4_journal_callback_del: delete a registered callback * @handle: active journal transaction handle on which callback was registered * @jce: registered journal callback entry to unregister - * Return true if object was sucessfully removed */ -static inline bool ext4_journal_callback_try_del(handle_t *handle, +static inline void ext4_journal_callback_del(handle_t *handle, struct ext4_journal_cb_entry *jce) { - bool deleted; struct ext4_sb_info *sbi = EXT4_SB(handle->h_transaction->t_journal->j_private); spin_lock(&sbi->s_md_lock); - deleted = !list_empty(&jce->jce_list); list_del_init(&jce->jce_list); spin_unlock(&sbi->s_md_lock); - return deleted; } int diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4d315a0..5ae1674 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -725,7 +725,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, struct ext4_extent_header *eh; struct buffer_head *bh; short int depth, i, ppos = 0, alloc = 0; - int ret; eh = ext_inode_hdr(inode); depth = ext_depth(inode); @@ -753,15 +752,12 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_ext = NULL; bh = sb_getblk(inode->i_sb, path[ppos].p_block); - if (unlikely(!bh)) { - ret = -ENOMEM; + if (unlikely(!bh)) goto err; - } if (!bh_uptodate_or_lock(bh)) { trace_ext4_ext_load_extent(inode, block, path[ppos].p_block); - ret = bh_submit_read(bh); - if (ret < 0) { + if (bh_submit_read(bh) < 0) { put_bh(bh); goto err; } @@ -772,15 +768,13 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, put_bh(bh); EXT4_ERROR_INODE(inode, "ppos %d > depth %d", ppos, depth); - ret = -EIO; goto err; } path[ppos].p_bh = bh; path[ppos].p_hdr = eh; i--; - ret = ext4_ext_check_block(inode, eh, i, bh); - if (ret < 0) + if (ext4_ext_check_block(inode, eh, i, bh)) goto err; } @@ -802,7 +796,7 @@ err: ext4_ext_drop_refs(path); if (alloc) kfree(path); - return ERR_PTR(ret); + return ERR_PTR(-EIO); } /* @@ -957,7 +951,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, } bh = sb_getblk(inode->i_sb, newblock); if (!bh) { - err = -ENOMEM; + err = -EIO; goto cleanup; } lock_buffer(bh); @@ -1030,7 +1024,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, newblock = ablocks[--a]; bh = sb_getblk(inode->i_sb, newblock); if (!bh) { - err = -ENOMEM; + err = -EIO; goto cleanup; } lock_buffer(bh); @@ -1142,8 +1136,11 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, return err; bh = sb_getblk(inode->i_sb, newblock); - if (!bh) - return -ENOMEM; + if (!bh) { + err = -EIO; + ext4_std_error(inode->i_sb, err); + return err; + } lock_buffer(bh); err = ext4_journal_get_create_access(handle, bh); @@ -3089,7 +3086,6 @@ static int ext4_split_extent(handle_t *handle, int err = 0; int uninitialized; int split_flag1, flags1; - int allocated = map->m_len; depth = ext_depth(inode); ex = path[depth].p_ext; @@ -3109,8 +3105,6 @@ static int ext4_split_extent(handle_t *handle, map->m_lblk + map->m_len, split_flag1, flags1); if (err) goto out; - } else { - allocated = ee_len - (map->m_lblk - ee_block); } ext4_ext_drop_refs(path); @@ -3133,7 +3127,7 @@ static int ext4_split_extent(handle_t *handle, ext4_ext_show_leaf(inode, path); out: - return err ? err : allocated; + return err ? err : map->m_len; } /* @@ -3278,7 +3272,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, if (EXT4_EXT_MAY_ZEROOUT & split_flag) max_zeroout = sbi->s_extent_max_zeroout_kb >> - (inode->i_sb->s_blocksize_bits - 10); + inode->i_sb->s_blocksize_bits; /* If extent is less than s_max_zeroout_kb, zeroout directly */ if (max_zeroout && (ee_len <= max_zeroout)) { @@ -3723,7 +3717,6 @@ out: allocated - map->m_len); allocated = map->m_len; } - map->m_len = allocated; /* * If we have done fallocate with the offset that is already diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e0ba8a4..3278e64 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -166,7 +166,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (journal->j_flags & JBD2_BARRIER && !jbd2_trans_will_send_data_barrier(journal, commit_tid)) needs_barrier = true; - ret = jbd2_complete_transaction(journal, commit_tid); + jbd2_log_start_commit(journal, commit_tid); + ret = jbd2_log_wait_commit(journal, commit_tid); if (needs_barrier) { err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); if (!ret) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index ec2909e..3f32c80 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -324,8 +324,8 @@ error_return: } struct orlov_stats { - __u64 free_clusters; __u32 free_inodes; + __u32 free_clusters; __u32 used_dirs; }; @@ -342,7 +342,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, if (flex_size > 1) { stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); + stats->free_clusters = atomic_read(&flex_group[g].free_clusters); stats->used_dirs = atomic_read(&flex_group[g].used_dirs); return; } diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 8d83d1e..20862f9 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -146,7 +146,6 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, struct super_block *sb = inode->i_sb; Indirect *p = chain; struct buffer_head *bh; - int ret = -EIO; *err = 0; /* i_data is not going away, no lock needed */ @@ -155,10 +154,8 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, goto no_block; while (--depth) { bh = sb_getblk(sb, le32_to_cpu(p->key)); - if (unlikely(!bh)) { - ret = -ENOMEM; + if (unlikely(!bh)) goto failure; - } if (!bh_uptodate_or_lock(bh)) { if (bh_submit_read(bh) < 0) { @@ -180,7 +177,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, return NULL; failure: - *err = ret; + *err = -EIO; no_block: return p; } @@ -474,7 +471,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, */ bh = sb_getblk(inode->i_sb, new_blocks[n-1]); if (unlikely(!bh)) { - err = -ENOMEM; + err = -EIO; goto failed; } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 93a3408..387c47c 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1188,7 +1188,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle, data_bh = sb_getblk(inode->i_sb, map.m_pblk); if (!data_bh) { - error = -ENOMEM; + error = -EIO; goto out_restore; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c0fbd96..cbfe13b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -55,21 +55,21 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, __u16 csum_hi = 0; __u32 csum; - csum_lo = le16_to_cpu(raw->i_checksum_lo); + csum_lo = raw->i_checksum_lo; raw->i_checksum_lo = 0; if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { - csum_hi = le16_to_cpu(raw->i_checksum_hi); + csum_hi = raw->i_checksum_hi; raw->i_checksum_hi = 0; } csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, EXT4_INODE_SIZE(inode->i_sb)); - raw->i_checksum_lo = cpu_to_le16(csum_lo); + raw->i_checksum_lo = csum_lo; if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) - raw->i_checksum_hi = cpu_to_le16(csum_hi); + raw->i_checksum_hi = csum_hi; return csum; } @@ -211,12 +211,12 @@ void ext4_evict_inode(struct inode *inode) * don't use page cache. */ if (ext4_should_journal_data(inode) && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && - inode->i_ino != EXT4_JOURNAL_INO) { + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; - jbd2_complete_transaction(journal, commit_tid); + jbd2_log_start_commit(journal, commit_tid); + jbd2_log_wait_commit(journal, commit_tid); filemap_write_and_wait(&inode->i_data); } truncate_inode_pages(&inode->i_data, 0); @@ -714,7 +714,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, bh = sb_getblk(inode->i_sb, map.m_pblk); if (!bh) { - *errp = -ENOMEM; + *errp = -EIO; return NULL; } if (map.m_flags & EXT4_MAP_NEW) { @@ -2977,9 +2977,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { ext4_free_io_end(io_end); out: - inode_dio_done(inode); if (is_async) aio_complete(iocb, ret, 0); + inode_dio_done(inode); return; } @@ -3660,8 +3660,11 @@ static int __ext4_get_inode_loc(struct inode *inode, iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); bh = sb_getblk(sb, block); - if (!bh) - return -ENOMEM; + if (!bh) { + EXT4_ERROR_INODE_BLOCK(inode, block, + "unable to read itable block"); + return -EIO; + } if (!buffer_uptodate(bh)) { lock_buffer(bh); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b443e62..1bf6fe7 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2829,8 +2829,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); - atomic64_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + atomic_sub(ac->ac_b_ex.fe_len, + &sbi->s_flex_groups[flex_group].free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -3444,7 +3444,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) win = offs; ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - - EXT4_NUM_B2C(sbi, win); + EXT4_B2C(sbi, win); BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); } @@ -4136,7 +4136,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; /* Add the prealloc space to lg */ - spin_lock(&lg->lg_prealloc_lock); + rcu_read_lock(); list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], pa_inode_list) { spin_lock(&tmp_pa->pa_lock); @@ -4160,12 +4160,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) if (!added) list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list[order]); - spin_unlock(&lg->lg_prealloc_lock); + rcu_read_unlock(); /* Now trim the list to be not more than 8 elements */ if (lg_prealloc_count > 8) { ext4_mb_discard_lg_preallocations(sb, lg, - order, lg_prealloc_count); + order, lg_prealloc_count); return; } return ; @@ -4449,11 +4449,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, node = rb_prev(new_node); if (node) { entry = rb_entry(node, struct ext4_free_data, efd_node); - if (can_merge(entry, new_entry) && - ext4_journal_callback_try_del(handle, &entry->efd_jce)) { + if (can_merge(entry, new_entry)) { new_entry->efd_start_cluster = entry->efd_start_cluster; new_entry->efd_count += entry->efd_count; rb_erase(node, &(db->bb_free_root)); + ext4_journal_callback_del(handle, &entry->efd_jce); kmem_cache_free(ext4_free_data_cachep, entry); } } @@ -4461,10 +4461,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, node = rb_next(new_node); if (node) { entry = rb_entry(node, struct ext4_free_data, efd_node); - if (can_merge(new_entry, entry) && - ext4_journal_callback_try_del(handle, &entry->efd_jce)) { + if (can_merge(new_entry, entry)) { new_entry->efd_count += entry->efd_count; rb_erase(node, &(db->bb_free_root)); + ext4_journal_callback_del(handle, &entry->efd_jce); kmem_cache_free(ext4_free_data_cachep, entry); } } @@ -4590,7 +4590,7 @@ do_more: EXT4_BLOCKS_PER_GROUP(sb); count -= overflow; } - count_clusters = EXT4_NUM_B2C(sbi, count); + count_clusters = EXT4_B2C(sbi, count); bitmap_bh = ext4_read_block_bitmap(sb, block_group); if (!bitmap_bh) { err = -EIO; @@ -4691,8 +4691,8 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic64_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + atomic_add(count_clusters, + &sbi->s_flex_groups[flex_group].free_clusters); } ext4_mb_unload_buddy(&e4b); @@ -4832,12 +4832,12 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ext4_group_desc_csum_set(sb, block_group, desc); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, - EXT4_NUM_B2C(sbi, blocks_freed)); + EXT4_B2C(sbi, blocks_freed)); if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), - &sbi->s_flex_groups[flex_group].free_clusters); + atomic_add(EXT4_B2C(sbi, blocks_freed), + &sbi->s_flex_groups[flex_group].free_clusters); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index fe201c6..fe7c63f 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -7,7 +7,7 @@ #include "ext4.h" /* Checksumming functions */ -static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) +static __u32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) { struct ext4_sb_info *sbi = EXT4_SB(sb); int offset = offsetof(struct mmp_struct, mmp_checksum); @@ -80,8 +80,6 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, * is not blocked in the elevator. */ if (!*bh) *bh = sb_getblk(sb, mmp_block); - if (!*bh) - return -ENOMEM; if (*bh) { get_bh(*bh); lock_buffer(*bh); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index b42d04f..0016fbc 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -103,13 +103,14 @@ static int ext4_end_io(ext4_io_end_t *io) "(inode %lu, offset %llu, size %zd, error %d)", inode->i_ino, offset, size, ret); } + if (io->iocb) + aio_complete(io->iocb, io->result, 0); + + if (io->flag & EXT4_IO_END_DIRECT) + inode_dio_done(inode); /* Wake up anyone waiting on unwritten extent conversion */ if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) wake_up_all(ext4_ioend_wq(inode)); - if (io->flag & EXT4_IO_END_DIRECT) - inode_dio_done(inode); - if (io->iocb) - aio_complete(io->iocb, io->result, 0); return ret; } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0cfa2f4..d99387b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -334,7 +334,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, bh = sb_getblk(sb, blk); if (!bh) - return ERR_PTR(-ENOMEM); + return ERR_PTR(-EIO); if ((err = ext4_journal_get_write_access(handle, bh))) { brelse(bh); bh = ERR_PTR(err); @@ -411,7 +411,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); if (!bh) - return -ENOMEM; + return -EIO; err = ext4_journal_get_write_access(handle, bh); if (err) @@ -501,7 +501,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, gdb = sb_getblk(sb, block); if (!gdb) { - err = -ENOMEM; + err = -EIO; goto out; } @@ -1065,7 +1065,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, bh = sb_getblk(sb, backup_block); if (!bh) { - err = -ENOMEM; + err = -EIO; break; } ext4_debug("update metadata backup %llu(+%llu)\n", @@ -1247,7 +1247,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, ext4_inode_table_set(sb, gdp, group_data->inode_table); ext4_free_group_clusters_set(sb, gdp, - EXT4_NUM_B2C(sbi, group_data->free_blocks_count)); + EXT4_B2C(sbi, group_data->free_blocks_count)); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); if (ext4_has_group_desc_csum(sb)) ext4_itable_unused_set(sb, gdp, @@ -1341,8 +1341,6 @@ static void ext4_update_super(struct super_block *sb, /* Update the global fs size fields */ sbi->s_groups_count += flex_gd->count; - sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, - (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); /* Update the reserved block counts only once the new group is * active. */ @@ -1351,7 +1349,7 @@ static void ext4_update_super(struct super_block *sb, /* Update the free space counts */ percpu_counter_add(&sbi->s_freeclusters_counter, - EXT4_NUM_B2C(sbi, free_blocks)); + EXT4_B2C(sbi, free_blocks)); percpu_counter_add(&sbi->s_freeinodes_counter, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); @@ -1362,8 +1360,8 @@ static void ext4_update_super(struct super_block *sb, sbi->s_log_groups_per_flex) { ext4_group_t flex_group; flex_group = ext4_flex_group(sbi, group_data[0].group); - atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + atomic_add(EXT4_B2C(sbi, free_blocks), + &sbi->s_flex_groups[flex_group].free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, &sbi->s_flex_groups[flex_group].free_inodes); } @@ -1880,10 +1878,6 @@ retry: return 0; ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); - if (n_group > (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) { - ext4_warning(sb, "resize would cause inodes_count overflow"); - return -EINVAL; - } ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); n_desc_blocks = num_desc_blocks(sb, n_group + 1); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 5575a45..3d4fb81 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -452,13 +452,10 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); int error = is_journal_aborted(journal); - struct ext4_journal_cb_entry *jce; + struct ext4_journal_cb_entry *jce, *tmp; - BUG_ON(txn->t_state == T_FINISHED); spin_lock(&sbi->s_md_lock); - while (!list_empty(&txn->t_private_list)) { - jce = list_entry(txn->t_private_list.next, - struct ext4_journal_cb_entry, jce_list); + list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) { list_del_init(&jce->jce_list); spin_unlock(&sbi->s_md_lock); jce->jce_func(sb, jce, error); @@ -1982,8 +1979,8 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group = ext4_flex_group(sbi, i); atomic_add(ext4_free_inodes_count(sb, gdp), &sbi->s_flex_groups[flex_group].free_inodes); - atomic64_add(ext4_free_group_clusters(sb, gdp), - &sbi->s_flex_groups[flex_group].free_clusters); + atomic_add(ext4_free_group_clusters(sb, gdp), + &sbi->s_flex_groups[flex_group].free_clusters); atomic_add(ext4_used_dirs_count(sb, gdp), &sbi->s_flex_groups[flex_group].used_dirs); } @@ -3238,7 +3235,7 @@ int ext4_calculate_overhead(struct super_block *sb) } /* Add the journal blocks as well */ if (sbi->s_journal) - overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); + overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen); sbi->s_overhead = overhead; smp_wmb(); @@ -4011,7 +4008,7 @@ no_journal: !(sb->s_flags & MS_RDONLY)) { err = ext4_enable_quotas(sb); if (err) - goto failed_mount8; + goto failed_mount7; } #endif /* CONFIG_QUOTA */ @@ -4038,10 +4035,6 @@ cantfind_ext4: ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); goto failed_mount; -#ifdef CONFIG_QUOTA -failed_mount8: - kobject_del(&sbi->s_kobj); -#endif failed_mount7: ext4_unregister_li_request(sb); failed_mount6: @@ -5012,9 +5005,9 @@ static int ext4_enable_quotas(struct super_block *sb) DQUOT_USAGE_ENABLED); if (err) { ext4_warning(sb, - "Failed to enable quota tracking " - "(type=%d, err=%d). Please run " - "e2fsck to fix.", type, err); + "Failed to enable quota (type=%d) " + "tracking. Please run e2fsck to fix.", + type); return err; } } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b93846b..3a91ebc 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -549,7 +549,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, error = ext4_handle_dirty_xattr_block(handle, inode, bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); - dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); + dquot_free_block(inode, 1); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); } @@ -832,8 +832,7 @@ inserted: else { /* The old block is released after updating the inode. */ - error = dquot_alloc_block(inode, - EXT4_C2B(EXT4_SB(sb), 1)); + error = dquot_alloc_block(inode, 1); if (error) goto cleanup; error = ext4_journal_get_write_access(handle, @@ -888,17 +887,16 @@ inserted: new_bh = sb_getblk(sb, block); if (!new_bh) { - error = -ENOMEM; getblk_failed: ext4_free_blocks(handle, inode, NULL, block, 1, EXT4_FREE_BLOCKS_METADATA); + error = -EIO; goto cleanup; } lock_buffer(new_bh); error = ext4_journal_get_create_access(handle, new_bh); if (error) { unlock_buffer(new_bh); - error = -EIO; goto getblk_failed; } memcpy(new_bh->b_data, s->base, new_bh->b_size); @@ -930,7 +928,7 @@ cleanup: return error; cleanup_dquot: - dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1)); + dquot_free_block(inode, 1); goto cleanup; bad_block: @@ -98,14 +98,14 @@ static void free_fdtable_rcu(struct rcu_head *rcu) kfree(fdt->open_fds); kfree(fdt); } else { - fddef = &per_cpu(fdtable_defer_list, get_cpu_light()); + fddef = &get_cpu_var(fdtable_defer_list); spin_lock(&fddef->lock); fdt->next = fddef->next; fddef->next = fdt; /* vmallocs are handled from the workqueue context */ schedule_work(&fddef->wq); spin_unlock(&fddef->lock); - put_cpu_light(); + put_cpu_var(fdtable_defer_list); } } @@ -516,7 +516,7 @@ struct files_struct init_files = { .close_on_exec = init_files.close_on_exec_init, .open_fds = init_files.open_fds_init, }, - .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), + .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; /* diff --git a/fs/fscache/page.c b/fs/fscache/page.c index c84696c..ff000e5 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -796,13 +796,11 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) _enter(""); - spin_lock(&cookie->stores_lock); - while (1) { - n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, - ARRAY_SIZE(results), - FSCACHE_COOKIE_PENDING_TAG); - if (n == 0) - break; + while (spin_lock(&cookie->stores_lock), + n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, + ARRAY_SIZE(results), + FSCACHE_COOKIE_PENDING_TAG), + n > 0) { for (i = n - 1; i >= 0; i--) { page = results[i]; radix_tree_delete(&cookie->stores, page->index); @@ -812,7 +810,6 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) for (i = n - 1; i >= 0; i--) page_cache_release(results[i]); - spin_lock(&cookie->stores_lock); } spin_unlock(&cookie->stores_lock); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 40d13c7..8179e8b 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -287,5 +287,5 @@ const struct file_operations fscache_stats_fops = { .open = fscache_stats_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = seq_release, }; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 315e1f8..b7c09f9 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -682,14 +682,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) spin_lock(&fc->lock); fi->attr_version = ++fc->attr_version; - /* - * If i_nlink == 0 then unlink doesn't make sense, yet this can - * happen if userspace filesystem is careless. It would be - * difficult to enforce correct nlink usage so just ignore this - * condition here - */ - if (inode->i_nlink > 0) - drop_nlink(inode); + drop_nlink(inode); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 9a3945a..a68e91b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1286,10 +1286,6 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) inode_dio_wait(inode); - ret = gfs2_rs_alloc(GFS2_I(inode)); - if (ret) - return ret; - oldsize = inode->i_size; if (newsize >= oldsize) return do_grow(inode, newsize); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 7af426b..991ab2d 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -924,11 +924,8 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) cmd = F_SETLK; fl->fl_type = F_UNLCK; } - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { - if (fl->fl_type == F_UNLCK) - posix_lock_file_wait(file, fl); + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) return -EIO; - } if (IS_GETLK(cmd)) return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); else if (fl->fl_type == F_UNLCK) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9afba3d6..b7eff07 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -576,7 +576,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) RB_CLEAR_NODE(&ip->i_res->rs_node); out: up_write(&ip->i_rw_mutex); - return error; + return 0; } static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index fc8ddc1..eba76ea 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -533,7 +533,7 @@ void hfsplus_file_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - loff_t size = inode->i_size; + u32 size = inode->i_size; res = pagecache_write_begin(NULL, mapping, size, 0, AOP_FLAG_UNINTERRUPTIBLE, diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d0de769..78bde32 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap_pgoff unwinds (may be important on powerpc * and ia64). */ - vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; + vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_ops = &hugetlb_vm_ops; if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) @@ -908,19 +908,19 @@ static int can_do_hugetlb_shm(void) static int get_hstate_idx(int page_size_log) { - struct hstate *h = hstate_sizelog(page_size_log); + struct hstate *h; + if (!page_size_log) + return default_hstate_idx; + h = size_to_hstate(1 << page_size_log); if (!h) return -1; return h - hstates; } -/* - * Note that size should be aligned to proper hugepage size in caller side, - * otherwise hugetlb_reserve_pages reserves one less hugepages than intended. - */ -struct file *hugetlb_file_setup(const char *name, size_t size, - vm_flags_t acctflag, struct user_struct **user, +struct file *hugetlb_file_setup(const char *name, unsigned long addr, + size_t size, vm_flags_t acctflag, + struct user_struct **user, int creat_flags, int page_size_log) { int error = -ENOMEM; @@ -929,6 +929,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, struct path path; struct dentry *root; struct qstr quick_string; + struct hstate *hstate; + unsigned long num_pages; int hstate_idx; hstate_idx = get_hstate_idx(page_size_log); @@ -967,10 +969,12 @@ struct file *hugetlb_file_setup(const char *name, size_t size, if (!inode) goto out_dentry; + hstate = hstate_inode(inode); + size += addr & ~huge_page_mask(hstate); + num_pages = ALIGN(size, huge_page_size(hstate)) >> + huge_page_shift(hstate); error = -ENOMEM; - if (hugetlb_reserve_pages(inode, 0, - size >> huge_page_shift(hstate_inode(inode)), NULL, - acctflag)) + if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag)) goto out_inode; d_instantiate(path.dentry, inode); @@ -725,7 +725,7 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan) * inode to the back of the list so we don't spin on it. */ if (!spin_trylock(&inode->i_lock)) { - list_move(&inode->i_lru, &sb->s_inode_lru); + list_move_tail(&inode->i_lru, &sb->s_inode_lru); continue; } diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 95debd7..08c0304 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -129,8 +129,6 @@ void __log_wait_for_space(journal_t *journal) if (journal->j_flags & JFS_ABORT) return; spin_unlock(&journal->j_state_lock); - if (current->plug) - io_schedule(); mutex_lock(&journal->j_checkpoint_mutex); /* diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 069bf58..3091d42 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -382,7 +382,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int space_left = 0; int first_tag = 0; int tag_flag; - int i; + int i, to_free = 0; int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; @@ -1126,7 +1126,7 @@ restart_loop: journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; spin_unlock(&journal->j_history_lock); - commit_transaction->t_state = T_COMMIT_CALLBACK; + commit_transaction->t_state = T_FINISHED; J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid; journal->j_committing_transaction = NULL; @@ -1141,44 +1141,38 @@ restart_loop: journal->j_average_commit_time*3) / 4; else journal->j_average_commit_time = commit_time; - write_unlock(&journal->j_state_lock); - if (journal->j_checkpoint_transactions == NULL) { - journal->j_checkpoint_transactions = commit_transaction; - commit_transaction->t_cpnext = commit_transaction; - commit_transaction->t_cpprev = commit_transaction; + if (commit_transaction->t_checkpoint_list == NULL && + commit_transaction->t_checkpoint_io_list == NULL) { + __jbd2_journal_drop_transaction(journal, commit_transaction); + to_free = 1; } else { - commit_transaction->t_cpnext = - journal->j_checkpoint_transactions; - commit_transaction->t_cpprev = - commit_transaction->t_cpnext->t_cpprev; - commit_transaction->t_cpnext->t_cpprev = - commit_transaction; - commit_transaction->t_cpprev->t_cpnext = + if (journal->j_checkpoint_transactions == NULL) { + journal->j_checkpoint_transactions = commit_transaction; + commit_transaction->t_cpnext = commit_transaction; + commit_transaction->t_cpprev = commit_transaction; + } else { + commit_transaction->t_cpnext = + journal->j_checkpoint_transactions; + commit_transaction->t_cpprev = + commit_transaction->t_cpnext->t_cpprev; + commit_transaction->t_cpnext->t_cpprev = + commit_transaction; + commit_transaction->t_cpprev->t_cpnext = commit_transaction; + } } spin_unlock(&journal->j_list_lock); - /* Drop all spin_locks because commit_callback may be block. - * __journal_remove_checkpoint() can not destroy transaction - * under us because it is not marked as T_FINISHED yet */ + if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); trace_jbd2_end_commit(journal, commit_transaction); jbd_debug(1, "JBD2: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); - - write_lock(&journal->j_state_lock); - spin_lock(&journal->j_list_lock); - commit_transaction->t_state = T_FINISHED; - /* Recheck checkpoint lists after j_list_lock was dropped */ - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { - __jbd2_journal_drop_transaction(journal, commit_transaction); + if (to_free) jbd2_journal_free_transaction(commit_transaction); - } - spin_unlock(&journal->j_list_lock); - write_unlock(&journal->j_state_lock); + wake_up(&journal->j_wait_done_commit); } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 42f8cf6c..dbf41f9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -698,37 +698,6 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) } /* - * When this function returns the transaction corresponding to tid - * will be completed. If the transaction has currently running, start - * committing that transaction before waiting for it to complete. If - * the transaction id is stale, it is by definition already completed, - * so just return SUCCESS. - */ -int jbd2_complete_transaction(journal_t *journal, tid_t tid) -{ - int need_to_wait = 1; - - read_lock(&journal->j_state_lock); - if (journal->j_running_transaction && - journal->j_running_transaction->t_tid == tid) { - if (journal->j_commit_request != tid) { - /* transaction not yet started, so request it */ - read_unlock(&journal->j_state_lock); - jbd2_log_start_commit(journal, tid); - goto wait_commit; - } - } else if (!(journal->j_committing_transaction && - journal->j_committing_transaction->t_tid == tid)) - need_to_wait = 0; - read_unlock(&journal->j_state_lock); - if (!need_to_wait) - return 0; -wait_commit: - return jbd2_log_wait_commit(journal, tid); -} -EXPORT_SYMBOL(jbd2_complete_transaction); - -/* * Log buffer allocation routines: */ diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 73b9253..df9f297 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1047,12 +1047,9 @@ out: void jbd2_journal_set_triggers(struct buffer_head *bh, struct jbd2_buffer_trigger_type *type) { - struct journal_head *jh = jbd2_journal_grab_journal_head(bh); + struct journal_head *jh = bh2jh(bh); - if (WARN_ON(!jh)) - return; jh->b_triggers = type; - jbd2_journal_put_journal_head(jh); } void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, @@ -1104,18 +1101,17 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; - struct journal_head *jh; + struct journal_head *jh = bh2jh(bh); int ret = 0; + jbd_debug(5, "journal_head %p\n", jh); + JBUFFER_TRACE(jh, "entry"); if (is_handle_aborted(handle)) goto out; - jh = jbd2_journal_grab_journal_head(bh); - if (!jh) { + if (!buffer_jbd(bh)) { ret = -EUCLEAN; goto out; } - jbd_debug(5, "journal_head %p\n", jh); - JBUFFER_TRACE(jh, "entry"); jbd_lock_bh_state(bh); @@ -1206,7 +1202,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) spin_unlock(&journal->j_list_lock); out_unlock_bh: jbd_unlock_bh_state(bh); - jbd2_journal_put_journal_head(jh); out: JBUFFER_TRACE(jh, "exit"); WARN_ON(ret); /* All errors are bugs, so dump the stack */ diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 193f04c..ca0a080 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -144,9 +144,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) timeout); if (ret < 0) return -ERESTARTSYS; - /* Reset the lock status after a server reboot so we resend */ - if (block->b_status == nlm_lck_denied_grace_period) - block->b_status = nlm_lck_blocked; req->a_res.status = block->b_status; return 0; } @@ -693,6 +693,8 @@ void nd_jump_link(struct nameidata *nd, struct path *path) nd->path = *path; nd->inode = nd->path.dentry->d_inode; nd->flags |= LOOKUP_JUMPED; + + BUG_ON(nd->inode->i_op->follow_link); } static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) diff --git a/fs/namespace.c b/fs/namespace.c index 859a026..55605c5 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -22,7 +22,6 @@ #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ #include <linux/uaccess.h> #include <linux/proc_fs.h> -#include <linux/delay.h> #include "pnode.h" #include "internal.h" @@ -314,11 +313,8 @@ int __mnt_want_write(struct vfsmount *m) * incremented count after it has set MNT_WRITE_HOLD. */ smp_mb(); - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { - preempt_enable(); - cpu_chill(); - preempt_disable(); - } + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) + cpu_relax(); /* * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will * be set to match its requirements. So we must not load that until @@ -802,10 +798,6 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, } mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; - /* Don't allow unprivileged users to change mount flags */ - if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) - mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; - atomic_inc(&sb->s_active); mnt->mnt.mnt_sb = sb; mnt->mnt.mnt_root = dget(root); @@ -1246,14 +1238,6 @@ static int do_umount(struct mount *mnt, int flags) } /* - * Is the caller allowed to modify his namespace? - */ -static inline bool may_mount(void) -{ - return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN); -} - -/* * Now umount can handle mount points as well as block devices. * This is important for filesystems which use unnamed block devices. * @@ -1271,9 +1255,6 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) return -EINVAL; - if (!may_mount()) - return -EPERM; - if (!(flags & UMOUNT_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; @@ -1287,6 +1268,10 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) if (!check_mnt(mnt)) goto dput_and_out; + retval = -EPERM; + if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) + goto dput_and_out; + retval = do_umount(mnt, flags); dput_and_out: /* we mustn't call path_put() as that would clear mnt_expiry_mark */ @@ -1310,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name) static int mount_is_safe(struct path *path) { - if (may_mount()) + if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) return 0; return -EPERM; #ifdef notyet @@ -1648,7 +1633,7 @@ static int do_change_type(struct path *path, int flag) int type; int err = 0; - if (!may_mount()) + if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; if (path->dentry != path->mnt->mnt_root) @@ -1744,9 +1729,6 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) if (readonly_request == __mnt_is_readonly(mnt)) return 0; - if (mnt->mnt_flags & MNT_LOCK_READONLY) - return -EPERM; - if (readonly_request) error = mnt_make_readonly(real_mount(mnt)); else @@ -1815,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name) struct mount *p; struct mount *old; int err = 0; - if (!may_mount()) + if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -1951,14 +1933,16 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, int mnt_flags, const char *name, void *data) { struct file_system_type *type; - struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; + struct user_namespace *user_ns; struct vfsmount *mnt; int err; if (!fstype) return -EINVAL; - if (!may_mount()) + /* we need capabilities... */ + user_ns = real_mount(path->mnt)->mnt_ns->user_ns; + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; type = get_fs_type(fstype); @@ -2376,7 +2360,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, /* First pass: copy the tree topology */ copy_flags = CL_COPY_ALL | CL_EXPIRE; if (user_ns != mnt_ns->user_ns) - copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; + copy_flags |= CL_SHARED_TO_SLAVE; new = copy_tree(old, old->mnt.mnt_root, copy_flags); if (IS_ERR(new)) { up_write(&namespace_sem); @@ -2583,7 +2567,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, struct mount *new_mnt, *root_mnt; int error; - if (!may_mount()) + if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; error = user_path_dir(new_root, &new); @@ -2769,51 +2753,6 @@ bool our_mnt(struct vfsmount *mnt) return check_mnt(real_mount(mnt)); } -bool current_chrooted(void) -{ - /* Does the current process have a non-standard root */ - struct path ns_root; - struct path fs_root; - bool chrooted; - - /* Find the namespace root */ - ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt; - ns_root.dentry = ns_root.mnt->mnt_root; - path_get(&ns_root); - while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root)) - ; - - get_fs_root(current->fs, &fs_root); - - chrooted = !path_equal(&fs_root, &ns_root); - - path_put(&fs_root); - path_put(&ns_root); - - return chrooted; -} - -void update_mnt_policy(struct user_namespace *userns) -{ - struct mnt_namespace *ns = current->nsproxy->mnt_ns; - struct mount *mnt; - - down_read(&namespace_sem); - list_for_each_entry(mnt, &ns->list, mnt_list) { - switch (mnt->mnt.mnt_sb->s_magic) { - case SYSFS_MAGIC: - userns->may_mount_sysfs = true; - break; - case PROC_SUPER_MAGIC: - userns->may_mount_proc = true; - break; - } - if (userns->may_mount_sysfs && userns->may_mount_proc) - break; - } - up_read(&namespace_sem); -} - static void *mntns_get(struct task_struct *task) { struct mnt_namespace *ns = NULL; diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 434b93e..4fa788c 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -1273,7 +1273,6 @@ static const struct nfs_pageio_ops bl_pg_write_ops = { static struct pnfs_layoutdriver_type blocklayout_type = { .id = LAYOUT_BLOCK_VOLUME, .name = "LAYOUT_BLOCK_VOLUME", - .owner = THIS_MODULE, .read_pagelist = bl_read_pagelist, .write_pagelist = bl_write_pagelist, .alloc_layout_hdr = bl_alloc_layout_hdr, diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index 6fc7b5c..737d839 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -55,8 +55,7 @@ static void dev_remove(struct net *net, dev_t dev) bl_pipe_msg.bl_wq = &nn->bl_wq; memset(msg, 0, sizeof(*msg)); - msg->len = sizeof(bl_msg) + bl_msg.totallen; - msg->data = kzalloc(msg->len, GFP_NOFS); + msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); if (!msg->data) goto out; @@ -67,6 +66,7 @@ static void dev_remove(struct net *net, dev_t dev) memcpy(msg->data, &bl_msg, sizeof(bl_msg)); dataptr = (uint8_t *) msg->data; memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); + msg->len = sizeof(bl_msg) + bl_msg.totallen; add_wait_queue(&nn->bl_wq, &wq); if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 2960512..264d1aa 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -183,15 +183,60 @@ static u32 initiate_file_draining(struct nfs_client *clp, static u32 initiate_bulk_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { - int stat; + struct nfs_server *server; + struct pnfs_layout_hdr *lo; + struct inode *ino; + u32 rv = NFS4ERR_NOMATCHING_LAYOUT; + struct pnfs_layout_hdr *tmp; + LIST_HEAD(recall_list); + LIST_HEAD(free_me_list); + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; - if (args->cbl_recall_type == RETURN_FSID) - stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true); - else - stat = pnfs_destroy_layouts_byclid(clp, true); - if (stat != 0) - return NFS4ERR_DELAY; - return NFS4ERR_NOMATCHING_LAYOUT; + spin_lock(&clp->cl_lock); + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if ((args->cbl_recall_type == RETURN_FSID) && + memcmp(&server->fsid, &args->cbl_fsid, + sizeof(struct nfs_fsid))) + continue; + + list_for_each_entry(lo, &server->layouts, plh_layouts) { + ino = igrab(lo->plh_inode); + if (!ino) + continue; + spin_lock(&ino->i_lock); + /* Is this layout in the process of being freed? */ + if (NFS_I(ino)->layout != lo) { + spin_unlock(&ino->i_lock); + iput(ino); + continue; + } + pnfs_get_layout_hdr(lo); + spin_unlock(&ino->i_lock); + list_add(&lo->plh_bulk_recall, &recall_list); + } + } + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + + list_for_each_entry_safe(lo, tmp, + &recall_list, plh_bulk_recall) { + ino = lo->plh_inode; + spin_lock(&ino->i_lock); + set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); + if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range)) + rv = NFS4ERR_DELAY; + list_del_init(&lo->plh_bulk_recall); + spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&free_me_list); + pnfs_put_layout_hdr(lo); + iput(ino); + } + return rv; } static u32 do_callback_layoutrecall(struct nfs_client *clp, diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index cf4ed87..bc3968fa 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -725,9 +725,9 @@ out1: return ret; } -static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen) +static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) { - return key_instantiate_and_link(key, data, datalen, + return key_instantiate_and_link(key, data, strlen(data) + 1, id_resolver_cache->thread_keyring, authkey); } @@ -737,7 +737,6 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, struct key *key, struct key *authkey) { char id_str[NFS_UINT_MAXLEN]; - size_t len; int ret = -ENOKEY; /* ret = -ENOKEY */ @@ -747,15 +746,13 @@ static int nfs_idmap_read_and_verify_message(struct idmap_msg *im, case IDMAP_CONV_NAMETOID: if (strcmp(upcall->im_name, im->im_name) != 0) break; - /* Note: here we store the NUL terminator too */ - len = sprintf(id_str, "%d", im->im_id) + 1; - ret = nfs_idmap_instantiate(key, authkey, id_str, len); + sprintf(id_str, "%d", im->im_id); + ret = nfs_idmap_instantiate(key, authkey, id_str); break; case IDMAP_CONV_IDTONAME: if (upcall->im_id != im->im_id) break; - len = strlen(im->im_name); - ret = nfs_idmap_instantiate(key, authkey, im->im_name, len); + ret = nfs_idmap_instantiate(key, authkey, im->im_name); break; default: ret = -EINVAL; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index c53189d..2e9779b 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -300,7 +300,7 @@ int nfs40_walk_client_list(struct nfs_client *new, struct rpc_cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); - struct nfs_client *pos, *prev = NULL; + struct nfs_client *pos, *n, *prev = NULL; struct nfs4_setclientid_res clid = { .clientid = new->cl_clientid, .confirm = new->cl_confirm, @@ -308,23 +308,10 @@ int nfs40_walk_client_list(struct nfs_client *new, int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); - list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos" */ - if (pos->cl_cons_state > NFS_CS_READY) { - atomic_inc(&pos->cl_count); - spin_unlock(&nn->nfs_client_lock); - - if (prev) - nfs_put_client(prev); - prev = pos; - - status = nfs_wait_client_init_complete(pos); - spin_lock(&nn->nfs_client_lock); - if (status < 0) - continue; - } - if (pos->cl_cons_state != NFS_CS_READY) + if (pos->cl_cons_state < NFS_CS_READY) continue; if (pos->rpc_ops != new->rpc_ops) @@ -436,16 +423,16 @@ int nfs41_walk_client_list(struct nfs_client *new, struct rpc_cred *cred) { struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); - struct nfs_client *pos, *prev = NULL; + struct nfs_client *pos, *n, *prev = NULL; int status = -NFS4ERR_STALE_CLIENTID; spin_lock(&nn->nfs_client_lock); - list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { + list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos", especially the client * ID and serverowner fields. Wait for CREATE_SESSION * to finish. */ - if (pos->cl_cons_state > NFS_CS_READY) { + if (pos->cl_cons_state < NFS_CS_READY) { atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); @@ -453,17 +440,18 @@ int nfs41_walk_client_list(struct nfs_client *new, nfs_put_client(prev); prev = pos; + nfs4_schedule_lease_recovery(pos); status = nfs_wait_client_init_complete(pos); - if (status == 0) { - nfs4_schedule_lease_recovery(pos); - status = nfs4_wait_clnt_recover(pos); + if (status < 0) { + nfs_put_client(pos); + spin_lock(&nn->nfs_client_lock); + continue; } + status = pos->cl_cons_state; spin_lock(&nn->nfs_client_lock); if (status < 0) continue; } - if (pos->cl_cons_state != NFS_CS_READY) - continue; if (pos->rpc_ops != new->rpc_ops) continue; @@ -481,18 +469,17 @@ int nfs41_walk_client_list(struct nfs_client *new, continue; atomic_inc(&pos->cl_count); - *result = pos; - status = 0; + spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", __func__, pos, atomic_read(&pos->cl_count)); - break; + + *result = pos; + return 0; } /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); - if (prev) - nfs_put_client(prev); return status; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 4fb234d..194c484 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -99,8 +99,7 @@ static void filelayout_reset_write(struct nfs_write_data *data) task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages, - hdr->completion_ops, - hdr->dreq); + hdr->completion_ops); } } @@ -120,8 +119,7 @@ static void filelayout_reset_read(struct nfs_read_data *data) task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages, - hdr->completion_ops, - hdr->dreq); + hdr->completion_ops); } } @@ -129,6 +127,7 @@ static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) { if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) return; + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); pnfs_return_layout(inode); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e3c6121..cf747ef 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1374,12 +1374,6 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state case -ENOMEM: err = 0; goto out; - case -NFS4ERR_DELAY: - case -NFS4ERR_GRACE: - set_bit(NFS_DELEGATED_STATE, &state->flags); - ssleep(1); - err = -EAGAIN; - goto out; } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); @@ -1469,7 +1463,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) struct nfs4_state_owner *sp = data->owner; if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) - goto out_wait; + return; /* * Check if we still need to send an OPEN call, or if we can use * a delegation instead. @@ -1504,7 +1498,6 @@ unlock_no_action: rcu_read_unlock(); out_no_action: task->tk_action = NULL; -out_wait: nfs4_sequence_done(task, &data->o_res.seq_res); } @@ -2157,7 +2150,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) - goto out_wait; + return; task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; calldata->arg.fmode = FMODE_READ|FMODE_WRITE; @@ -2179,14 +2172,16 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (!call_close) { /* Note: exit _without_ calling nfs4_close_done */ - goto out_no_action; + task->tk_action = NULL; + nfs4_sequence_done(task, &calldata->res.seq_res); + goto out; } if (calldata->arg.fmode == 0) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; if (calldata->roc && pnfs_roc_drain(inode, &calldata->roc_barrier, task)) - goto out_wait; + goto out; } nfs_fattr_init(calldata->res.fattr); @@ -2196,12 +2191,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) &calldata->res.seq_res, task) != 0) nfs_release_seqid(calldata->arg.seqid); +out: dprintk("%s: done!\n", __func__); - return; -out_no_action: - task->tk_action = NULL; -out_wait: - nfs4_sequence_done(task, &calldata->res.seq_res); } static const struct rpc_call_ops nfs4_close_ops = { @@ -4432,10 +4423,12 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) struct nfs4_unlockdata *calldata = data; if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) - goto out_wait; + return; if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { /* Note: exit _without_ running nfs4_locku_done */ - goto out_no_action; + task->tk_action = NULL; + nfs4_sequence_done(task, &calldata->res.seq_res); + return; } calldata->timestamp = jiffies; if (nfs4_setup_sequence(calldata->server, @@ -4443,11 +4436,6 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) &calldata->res.seq_res, task) != 0) nfs_release_seqid(calldata->arg.seqid); - return; -out_no_action: - task->tk_action = NULL; -out_wait: - nfs4_sequence_done(task, &calldata->res.seq_res); } static const struct rpc_call_ops nfs4_locku_ops = { @@ -4513,9 +4501,9 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * if (status != 0) goto out; /* Is this a delegated lock? */ - lsp = request->fl_u.nfs4_fl.owner; - if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) == 0) + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) goto out; + lsp = request->fl_u.nfs4_fl.owner; seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); status = -ENOMEM; if (seqid == NULL) @@ -4588,7 +4576,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) - goto out_wait; + return; /* Do we need to do an open_to_lock_owner? */ if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { @@ -4608,8 +4596,6 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) nfs_release_seqid(data->arg.open_seqid); out_release_lock_seqid: nfs_release_seqid(data->arg.lock_seqid); -out_wait: - nfs4_sequence_done(task, &data->res.seq_res); dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); } @@ -6093,13 +6079,11 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; - struct inode *inode = lgp->args.inode; - struct nfs_server *server = NFS_SERVER(inode); + struct nfs_server *server = NFS_SERVER(lgp->args.inode); size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); nfs4_free_pages(lgp->args.layout.pages, max_pages); - pnfs_put_layout_hdr(NFS_I(inode)->layout); put_nfs_open_context(lgp->args.ctx); kfree(calldata); dprintk("<-- %s\n", __func__); @@ -6114,8 +6098,7 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { struct pnfs_layout_segment * nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { - struct inode *inode = lgp->args.inode; - struct nfs_server *server = NFS_SERVER(inode); + struct nfs_server *server = NFS_SERVER(lgp->args.inode); size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { @@ -6145,18 +6128,13 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); - - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; - /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ - if (status == 0 && lgp->res.layoutp->len) + if (status == 0) lseg = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); @@ -6372,8 +6350,22 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) static void nfs4_layoutcommit_release(void *calldata) { struct nfs4_layoutcommit_data *data = calldata; + struct pnfs_layout_segment *lseg, *tmp; + unsigned long *bitlock = &NFS_I(data->args.inode)->flags; pnfs_cleanup_layoutcommit(data); + /* Matched by references in pnfs_set_layoutcommit */ + list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) { + list_del_init(&lseg->pls_lc_list); + if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, + &lseg->pls_flags)) + pnfs_put_lseg(lseg); + } + + clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); + smp_mb__after_clear_bit(); + wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); + put_rpccred(data->cred); kfree(data); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 994fbe2..e61f68d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1877,13 +1877,7 @@ again: status = PTR_ERR(clnt); break; } - /* Note: this is safe because we haven't yet marked the - * client as ready, so we are the only user of - * clp->cl_rpcclient - */ - clnt = xchg(&clp->cl_rpcclient, clnt); - rpc_shutdown_client(clnt); - clnt = clp->cl_rpcclient; + clp->cl_rpcclient = clnt; goto again; case -NFS4ERR_MINOR_VERS_MISMATCH: diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 88f9611..c6f9906 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -647,7 +647,6 @@ static struct pnfs_layoutdriver_type objlayout_type = { .flags = PNFS_LAYOUTRET_ON_SETATTR | PNFS_LAYOUTRET_ON_ERROR, - .owner = THIS_MODULE, .alloc_layout_hdr = objlayout_alloc_layout_hdr, .free_layout_hdr = objlayout_free_layout_hdr, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3b71623..d00260b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -417,16 +417,6 @@ should_free_lseg(struct pnfs_layout_range *lseg_range, lo_seg_intersecting(lseg_range, recall_range); } -static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, - struct list_head *tmp_list) -{ - if (!atomic_dec_and_test(&lseg->pls_refcount)) - return false; - pnfs_layout_remove_lseg(lseg->pls_layout, lseg); - list_add(&lseg->pls_list, tmp_list); - return true; -} - /* Returns 1 if lseg is removed from list, 0 otherwise */ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, struct list_head *tmp_list) @@ -440,8 +430,11 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, */ dprintk("%s: lseg %p ref %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount)); - if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) + if (atomic_dec_and_test(&lseg->pls_refcount)) { + pnfs_layout_remove_lseg(lseg->pls_layout, lseg); + list_add(&lseg->pls_list, tmp_list); rv = 1; + } } return rv; } @@ -512,147 +505,37 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); -static bool -pnfs_layout_add_bulk_destroy_list(struct inode *inode, - struct list_head *layout_list) -{ - struct pnfs_layout_hdr *lo; - bool ret = false; - - spin_lock(&inode->i_lock); - lo = NFS_I(inode)->layout; - if (lo != NULL && list_empty(&lo->plh_bulk_destroy)) { - pnfs_get_layout_hdr(lo); - list_add(&lo->plh_bulk_destroy, layout_list); - ret = true; - } - spin_unlock(&inode->i_lock); - return ret; -} - -/* Caller must hold rcu_read_lock and clp->cl_lock */ -static int -pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, - struct nfs_server *server, - struct list_head *layout_list) -{ - struct pnfs_layout_hdr *lo, *next; - struct inode *inode; - - list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { - inode = igrab(lo->plh_inode); - if (inode == NULL) - continue; - list_del_init(&lo->plh_layouts); - if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) - continue; - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); - iput(inode); - spin_lock(&clp->cl_lock); - rcu_read_lock(); - return -EAGAIN; - } - return 0; -} - -static int -pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, - bool is_bulk_recall) +/* + * Called by the state manger to remove all layouts established under an + * expired lease. + */ +void +pnfs_destroy_all_layouts(struct nfs_client *clp) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; - struct inode *inode; - struct pnfs_layout_range range = { - .iomode = IOMODE_ANY, - .offset = 0, - .length = NFS4_MAX_UINT64, - }; - LIST_HEAD(lseg_list); - int ret = 0; - - while (!list_empty(layout_list)) { - lo = list_entry(layout_list->next, struct pnfs_layout_hdr, - plh_bulk_destroy); - dprintk("%s freeing layout for inode %lu\n", __func__, - lo->plh_inode->i_ino); - inode = lo->plh_inode; - spin_lock(&inode->i_lock); - list_del_init(&lo->plh_bulk_destroy); - lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ - if (is_bulk_recall) - set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); - if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range)) - ret = -EAGAIN; - spin_unlock(&inode->i_lock); - pnfs_free_lseg_list(&lseg_list); - pnfs_put_layout_hdr(lo); - iput(inode); - } - return ret; -} + LIST_HEAD(tmp_list); -int -pnfs_destroy_layouts_byfsid(struct nfs_client *clp, - struct nfs_fsid *fsid, - bool is_recall) -{ - struct nfs_server *server; - LIST_HEAD(layout_list); + nfs4_deviceid_mark_client_invalid(clp); + nfs4_deviceid_purge_client(clp); spin_lock(&clp->cl_lock); rcu_read_lock(); -restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - if (memcmp(&server->fsid, fsid, sizeof(*fsid)) != 0) - continue; - if (pnfs_layout_bulk_destroy_byserver_locked(clp, - server, - &layout_list) != 0) - goto restart; + if (!list_empty(&server->layouts)) + list_splice_init(&server->layouts, &tmp_list); } rcu_read_unlock(); spin_unlock(&clp->cl_lock); - if (list_empty(&layout_list)) - return 0; - return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); -} - -int -pnfs_destroy_layouts_byclid(struct nfs_client *clp, - bool is_recall) -{ - struct nfs_server *server; - LIST_HEAD(layout_list); - - spin_lock(&clp->cl_lock); - rcu_read_lock(); -restart: - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - if (pnfs_layout_bulk_destroy_byserver_locked(clp, - server, - &layout_list) != 0) - goto restart; + while (!list_empty(&tmp_list)) { + lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, + plh_layouts); + dprintk("%s freeing layout for inode %lu\n", __func__, + lo->plh_inode->i_ino); + list_del_init(&lo->plh_layouts); + pnfs_destroy_layout(NFS_I(lo->plh_inode)); } - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); - - if (list_empty(&layout_list)) - return 0; - return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall); -} - -/* - * Called by the state manger to remove all layouts established under an - * expired lease. - */ -void -pnfs_destroy_all_layouts(struct nfs_client *clp) -{ - nfs4_deviceid_mark_client_invalid(clp); - nfs4_deviceid_purge_client(clp); - - pnfs_destroy_layouts_byclid(clp, false); } /* @@ -784,21 +667,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, return lseg; } -static void pnfs_clear_layoutcommit(struct inode *inode, - struct list_head *head) -{ - struct nfs_inode *nfsi = NFS_I(inode); - struct pnfs_layout_segment *lseg, *tmp; - - if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) - return; - list_for_each_entry_safe(lseg, tmp, &nfsi->layout->plh_segs, pls_list) { - if (!test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) - continue; - pnfs_lseg_dec_and_remove_zero(lseg, head); - } -} - /* * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr * when the layout segment list is empty. @@ -830,7 +698,6 @@ _pnfs_return_layout(struct inode *ino) /* Reference matched in nfs4_layoutreturn_release */ pnfs_get_layout_hdr(lo); empty = list_empty(&lo->plh_segs); - pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL); /* Don't send a LAYOUTRETURN if list was initially empty */ if (empty) { @@ -843,6 +710,8 @@ _pnfs_return_layout(struct inode *ino) spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); + WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); if (unlikely(lrp == NULL)) { status = -ENOMEM; @@ -1019,7 +888,7 @@ alloc_init_layout_hdr(struct inode *ino, atomic_set(&lo->plh_refcount, 1); INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); - INIT_LIST_HEAD(&lo->plh_bulk_destroy); + INIT_LIST_HEAD(&lo->plh_bulk_recall); lo->plh_inode = ino; lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred); return lo; @@ -1443,15 +1312,13 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq) + const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_pageio_descriptor pgio; LIST_HEAD(failed); /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); - pgio.pg_dreq = dreq; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); @@ -1480,13 +1347,13 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) dprintk("pnfs write error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages, - hdr->completion_ops, - hdr->dreq); + hdr->completion_ops); } /* @@ -1601,15 +1468,13 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq) + const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_pageio_descriptor pgio; LIST_HEAD(failed); /* Resend all requests through the MDS */ nfs_pageio_init_read(&pgio, inode, compl_ops); - pgio.pg_dreq = dreq; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); @@ -1634,13 +1499,13 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) dprintk("pnfs read error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages, - hdr->completion_ops, - hdr->dreq); + hdr->completion_ops); } /* @@ -1766,27 +1631,11 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { if (lseg->pls_range.iomode == IOMODE_RW && - test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) list_add(&lseg->pls_lc_list, listp); } } -static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) -{ - struct pnfs_layout_segment *lseg, *tmp; - unsigned long *bitlock = &NFS_I(inode)->flags; - - /* Matched by references in pnfs_set_layoutcommit */ - list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { - list_del_init(&lseg->pls_lc_list); - pnfs_put_lseg(lseg); - } - - clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); - smp_mb__after_clear_bit(); - wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); -} - void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) { pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode); @@ -1831,7 +1680,6 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data) if (nfss->pnfs_curr_ld->cleanup_layoutcommit) nfss->pnfs_curr_ld->cleanup_layoutcommit(data); - pnfs_list_write_lseg_done(data->args.inode, &data->lseg_list); } /* diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 94ba804..dbf7bba 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -132,7 +132,7 @@ struct pnfs_layoutdriver_type { struct pnfs_layout_hdr { atomic_t plh_refcount; struct list_head plh_layouts; /* other client layouts */ - struct list_head plh_bulk_destroy; + struct list_head plh_bulk_recall; /* clnt list of bulk recalls */ struct list_head plh_segs; /* layout segments list */ nfs4_stateid plh_stateid; atomic_t plh_outstanding; /* number of RPCs out */ @@ -196,11 +196,6 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); -int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, - struct nfs_fsid *fsid, - bool is_recall); -int pnfs_destroy_layouts_byclid(struct nfs_client *clp, - bool is_recall); void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, @@ -230,11 +225,9 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq); + const struct nfs_pgio_completion_ops *compl_ops); int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq); + const struct nfs_pgio_completion_ops *compl_ops); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); /* nfs4_deviceid_flags */ diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 6edc807..3f79c77 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -336,14 +336,20 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct inode *old_dir = data->old_dir; struct inode *new_dir = data->new_dir; struct dentry *old_dentry = data->old_dentry; + struct dentry *new_dentry = data->new_dentry; if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { rpc_restart_call_prepare(task); return; } - if (task->tk_status != 0) + if (task->tk_status != 0) { nfs_cancel_async_unlink(old_dentry); + return; + } + + d_drop(old_dentry); + d_drop(new_dentry); } /** @@ -544,18 +550,6 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) error = rpc_wait_for_completion_task(task); if (error == 0) error = task->tk_status; - switch (error) { - case 0: - /* The rename succeeded */ - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - d_move(dentry, sdentry); - break; - case -ERESTARTSYS: - /* The result of the rename is unknown. Play it safe by - * forcing a new lookup */ - d_drop(dentry); - d_drop(sdentry); - } rpc_put_task(task); out_dput: dput(sdentry); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ec668e1..9d1c5db 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -931,14 +931,14 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate, stateid, WR_STATE, &filp); + if (filp) + get_file(filp); + nfs4_unlock_state(); + if (status) { - nfs4_unlock_state(); dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; } - if (filp) - get_file(filp); - nfs4_unlock_state(); cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 53a7c64..ac8ed96 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -210,7 +210,13 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { if (atomic_dec_and_test(&fp->fi_access[oflag])) { nfs4_file_put_fd(fp, oflag); - if (atomic_read(&fp->fi_access[1 - oflag]) == 0) + /* + * It's also safe to get rid of the RDWR open *if* + * we no longer have need of the other kind of access + * or if we already have the other kind of open: + */ + if (fp->fi_fds[1-oflag] + || atomic_read(&fp->fi_access[1 - oflag]) == 0) nfs4_file_put_fd(fp, O_RDWR); } } @@ -1054,8 +1060,6 @@ free_client(struct nfs4_client *clp) } free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); - idr_remove_all(&clp->cl_stateids); - idr_destroy(&clp->cl_stateids); kfree(clp); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index cd5e6c1..0dc1158 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -264,7 +264,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - u32 nace; + int nace; struct nfs4_ace *ace; READ_BUF(4); len += 4; @@ -344,7 +344,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ64(iattr->ia_atime.tv_sec); + READ32(dummy32); + if (dummy32) + return nfserr_inval; + READ32(iattr->ia_atime.tv_sec); READ32(iattr->ia_atime.tv_nsec); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -367,7 +370,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ64(iattr->ia_mtime.tv_sec); + READ32(dummy32); + if (dummy32) + return nfserr_inval; + READ32(iattr->ia_mtime.tv_sec); READ32(iattr->ia_mtime.tv_nsec); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -2380,7 +2386,8 @@ out_acl: if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { if ((buflen -= 12) < 0) goto out_resource; - WRITE64((s64)stat.atime.tv_sec); + WRITE32(0); + WRITE32(stat.atime.tv_sec); WRITE32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { @@ -2393,13 +2400,15 @@ out_acl: if (bmval1 & FATTR4_WORD1_TIME_METADATA) { if ((buflen -= 12) < 0) goto out_resource; - WRITE64((s64)stat.ctime.tv_sec); + WRITE32(0); + WRITE32(stat.ctime.tv_sec); WRITE32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { if ((buflen -= 12) < 0) goto out_resource; - WRITE64((s64)stat.mtime.tv_sec); + WRITE32(0); + WRITE32(stat.mtime.tv_sec); WRITE32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 69c6413..d586117 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1013,7 +1013,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, int host_err; int stable = *stablep; int use_wgather; - loff_t pos = offset; dentry = file->f_path.dentry; inode = dentry->d_inode; @@ -1026,7 +1025,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, /* Write the data. */ oldfs = get_fs(); set_fs(KERNEL_DS); - host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos); + host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); set_fs(oldfs); if (host_err < 0) goto out_nfserr; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 595343e..228a2c2 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -574,7 +574,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, int add = (arg & IN_MASK_ADD); int ret; + /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); + if (unlikely(!(mask & IN_ALL_EVENTS))) + return -EINVAL; fsn_mark = fsnotify_find_inode_mark(group, inode); if (!fsn_mark) @@ -624,7 +627,10 @@ static int inotify_new_watch(struct fsnotify_group *group, struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; + /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); + if (unlikely(!(mask & IN_ALL_EVENTS))) + return -EINVAL; tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) @@ -751,10 +757,6 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, int ret; unsigned flags = 0; - /* don't allow invalid bits: we don't want flags set */ - if (unlikely(!(mask & ALL_INOTIFY_BITS))) - return -EINVAL; - f = fdget(fd); if (unlikely(!f.file)) return -EBADF; diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index f5d4565..fa9c05f 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -108,7 +108,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) "0x%llx.", (unsigned long long)bh->b_blocknr); } first = page_buffers(page); - flags = bh_uptodate_lock_irqsave(first); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); clear_buffer_async_read(bh); unlock_buffer(bh); tmp = bh; @@ -123,7 +124,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) } tmp = tmp->b_this_page; } while (tmp != bh); - bh_uptodate_unlock_irqrestore(first, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); /* * If none of the buffers had errors then we can set the page uptodate, * but we first have to perform the post read mst fixups, if the @@ -144,13 +146,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) recs = PAGE_CACHE_SIZE / rec_size; /* Should have been verified before we got here... */ BUG_ON(!recs); - local_irq_save_nort(flags); + local_irq_save(flags); kaddr = kmap_atomic(page); for (i = 0; i < recs; i++) post_read_mst_fixup((NTFS_RECORD*)(kaddr + i * rec_size), rec_size); kunmap_atomic(kaddr); - local_irq_restore_nort(flags); + local_irq_restore(flags); flush_dcache_page(page); if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); @@ -158,7 +160,9 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) unlock_page(page); return; still_busy: - bh_uptodate_unlock_irqrestore(first, flags); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); + return; } /** diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 340bd02..6577432 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -593,9 +593,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, level = ocfs2_iocb_rw_locked_level(iocb); ocfs2_rw_unlock(inode, level); - inode_dio_done(inode); if (is_async) aio_complete(iocb, ret, 0); + inode_dio_done(inode); } /* diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 88577eb..4f7795f 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2545,7 +2545,6 @@ int ocfs2_super_lock(struct ocfs2_super *osb, * everything is up to the caller :) */ status = ocfs2_should_refresh_lock_res(lockres); if (status < 0) { - ocfs2_cluster_unlock(osb, lockres, level); mlog_errno(status); goto bail; } @@ -2554,10 +2553,8 @@ int ocfs2_super_lock(struct ocfs2_super *osb, ocfs2_complete_lock_res_refresh(lockres, status); - if (status < 0) { - ocfs2_cluster_unlock(osb, lockres, level); + if (status < 0) mlog_errno(status); - } ocfs2_track_lock_refresh(lockres); } bail: diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b7e74b5..f169da4 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle, * cluster groups will be staying in cache for the duration of * this operation. */ - ac->ac_disable_chain_relink = 1; + ac->ac_allow_chain_relink = 0; /* Claim the first region */ status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, @@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, * Do this *after* figuring out how many bits we're taking out * of our target group. */ - if (!ac->ac_disable_chain_relink && + if (ac->ac_allow_chain_relink && (prev_group_bh) && (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) { status = ocfs2_relink_block_group(handle, alloc_inode, @@ -1928,6 +1928,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; + ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); @@ -1946,7 +1947,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, * searching each chain in order. Don't allow chain relinking * because we only calculate enough journal credits for one * relink per alloc. */ - ac->ac_disable_chain_relink = 1; + ac->ac_allow_chain_relink = 0; for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { if (i == victim) continue; diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a36d0aa..b8afabf 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -49,7 +49,7 @@ struct ocfs2_alloc_context { /* these are used by the chain search */ u16 ac_chain; - int ac_disable_chain_relink; + int ac_allow_chain_relink; group_search_t *ac_group_search; u64 ac_last_group; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2e3ea30..0ba9ea1 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7189,7 +7189,7 @@ int ocfs2_init_security_and_acl(struct inode *dir, struct buffer_head *dir_bh = NULL; ret = ocfs2_init_security_get(inode, dir, qstr, NULL); - if (ret) { + if (!ret) { mlog_errno(ret); goto leave; } @@ -863,9 +863,6 @@ pipe_rdwr_open(struct inode *inode, struct file *filp) { int ret = -ENOENT; - if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE))) - return -EINVAL; - mutex_lock(&inode->i_mutex); if (inode->i_pipe) { @@ -9,7 +9,6 @@ #include <linux/mnt_namespace.h> #include <linux/mount.h> #include <linux/fs.h> -#include <linux/nsproxy.h> #include "internal.h" #include "pnode.h" @@ -221,7 +220,6 @@ static struct mount *get_source(struct mount *dest, int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, struct mount *source_mnt, struct list_head *tree_list) { - struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; struct mount *m, *child; int ret = 0; struct mount *prev_dest_mnt = dest_mnt; @@ -239,10 +237,6 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); - /* Notice when we are propagating across user namespaces */ - if (m->mnt_ns->user_ns != user_ns) - type |= CL_UNPRIVILEGED; - child = copy_tree(source, source->mnt.mnt_root, type); if (IS_ERR(child)) { ret = PTR_ERR(child); @@ -23,7 +23,6 @@ #define CL_MAKE_SHARED 0x08 #define CL_PRIVATE 0x10 #define CL_SHARED_TO_SLAVE 0x20 -#define CL_UNPRIVILEGED 0x40 static inline void set_mnt_shared(struct mount *mnt) { diff --git a/fs/proc/array.c b/fs/proc/array.c index be3c22f..6a91e6f 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -143,7 +143,6 @@ static const char * const task_state_array[] = { "x (dead)", /* 64 */ "K (wakekill)", /* 128 */ "W (waking)", /* 256 */ - "P (parked)", /* 512 */ }; static inline const char *get_task_state(struct task_struct *tsk) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index b796da2..76ddae8 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -412,7 +412,8 @@ static const struct dentry_operations proc_dentry_operations = struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, struct dentry *dentry) { - struct inode *inode; + struct inode *inode = NULL; + int error = -ENOENT; spin_lock(&proc_subdir_lock); for (de = de->subdir; de ; de = de->next) { @@ -421,16 +422,22 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { pde_get(de); spin_unlock(&proc_subdir_lock); + error = -ENOMEM; inode = proc_get_inode(dir->i_sb, de); - if (!inode) - return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &proc_dentry_operations); - d_add(dentry, inode); - return NULL; + goto out_unlock; } } spin_unlock(&proc_subdir_lock); - return ERR_PTR(-ENOENT); +out_unlock: + + if (inode) { + d_set_d_op(dentry, &proc_dentry_operations); + d_add(dentry, inode); + return NULL; + } + if (de) + pde_put(de); + return ERR_PTR(error); } struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 0ac1e1b..439ae688 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -445,10 +445,12 @@ static const struct file_operations proc_reg_file_ops_no_compat = { struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) { - struct inode *inode = new_inode_pseudo(sb); + struct inode * inode; - if (inode) { - inode->i_ino = de->low_ino; + inode = iget_locked(sb, de->low_ino); + if (!inode) + return NULL; + if (inode->i_state & I_NEW) { inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->pde = de; @@ -476,10 +478,11 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) inode->i_fop = de->proc_fops; } } + unlock_new_inode(inode); } else pde_put(de); return inode; -} +} int proc_fill_super(struct super_block *s) { @@ -496,5 +499,6 @@ int proc_fill_super(struct super_block *s) return 0; printk("proc_read_super: get root inode failed\n"); + pde_put(&proc_root); return -ENOMEM; } diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 66b51c0..b7a4719 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -118,7 +118,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) struct super_block *sb = inode->i_sb; struct proc_inode *ei = PROC_I(inode); struct task_struct *task; - struct path ns_path; + struct dentry *ns_dentry; void *error = ERR_PTR(-EACCES); task = get_proc_task(inode); @@ -128,14 +128,14 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; - ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); - if (IS_ERR(ns_path.dentry)) { - error = ERR_CAST(ns_path.dentry); + ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); + if (IS_ERR(ns_dentry)) { + error = ERR_CAST(ns_dentry); goto out_put_task; } - ns_path.mnt = mntget(nd->path.mnt); - nd_jump_link(nd, &ns_path); + dput(nd->path.dentry); + nd->path.dentry = ns_dentry; error = NULL; out_put_task: diff --git a/fs/proc/root.c b/fs/proc/root.c index 9c7fab1..c6e9fac 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -16,7 +16,6 @@ #include <linux/sched.h> #include <linux/module.h> #include <linux/bitops.h> -#include <linux/user_namespace.h> #include <linux/mount.h> #include <linux/pid_namespace.h> #include <linux/parser.h> @@ -109,9 +108,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, } else { ns = task_active_pid_ns(current); options = data; - - if (!current_user_ns()->may_mount_proc) - return ERR_PTR(-EPERM); } sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 86d1038..5ea2e77 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -96,27 +96,6 @@ static const char *get_reason_str(enum kmsg_dump_reason reason) } } -bool pstore_cannot_block_path(enum kmsg_dump_reason reason) -{ - /* - * In case of NMI path, pstore shouldn't be blocked - * regardless of reason. - */ - if (in_nmi()) - return true; - - switch (reason) { - /* In panic case, other cpus are stopped by smp_send_stop(). */ - case KMSG_DUMP_PANIC: - /* Emergency restart shouldn't be blocked by spin lock. */ - case KMSG_DUMP_EMERG: - return true; - default: - return false; - } -} -EXPORT_SYMBOL_GPL(pstore_cannot_block_path); - /* * callback from kmsg_dump. (s2,l2) has the most recently * written bytes, older bytes are in (s1,l1). Save as much @@ -135,12 +114,10 @@ static void pstore_dump(struct kmsg_dumper *dumper, why = get_reason_str(reason); - if (pstore_cannot_block_path(reason)) { - is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags); - if (!is_locked) { - pr_err("pstore dump routine blocked in %s path, may corrupt error record\n" - , in_nmi() ? "NMI" : why); - } + if (in_nmi()) { + is_locked = spin_trylock(&psinfo->buf_lock); + if (!is_locked) + pr_err("pstore dump routine blocked in NMI, may corrupt error record\n"); } else spin_lock_irqsave(&psinfo->buf_lock, flags); oopscount++; @@ -166,9 +143,9 @@ static void pstore_dump(struct kmsg_dumper *dumper, total += hsize + len; part++; } - if (pstore_cannot_block_path(reason)) { + if (in_nmi()) { if (is_locked) - spin_unlock_irqrestore(&psinfo->buf_lock, flags); + spin_unlock(&psinfo->buf_lock); } else spin_unlock_irqrestore(&psinfo->buf_lock, flags); } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 4cce1d9..c196369 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -187,8 +187,8 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) return -ENOSPC; - if (name[0] == '.' && (namelen < 2 || - (namelen == 2 && name[1] == '.'))) + if (name[0] == '.' && (name[1] == '\0' || + (name[1] == '.' && name[2] == '\0'))) return 0; dentry = lookup_one_len(name, dbuf->xadir, namelen); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index d924812..2fbdff6 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -1012,7 +1012,6 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) enum kobj_ns_type type; const void *ns; ino_t ino; - loff_t off; type = sysfs_ns_type(parent_sd); ns = sysfs_info(dentry->d_sb)->ns[type]; @@ -1021,8 +1020,6 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ino = parent_sd->s_ino; if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) filp->f_pos++; - else - return 0; } if (filp->f_pos == 1) { if (parent_sd->s_parent) @@ -1031,11 +1028,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ino = parent_sd->s_ino; if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) filp->f_pos++; - else - return 0; } mutex_lock(&sysfs_mutex); - off = filp->f_pos; for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); pos; pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) { @@ -1047,43 +1041,27 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) len = strlen(name); ino = pos->s_ino; type = dt_type(pos); - off = filp->f_pos = pos->s_hash; + filp->f_pos = pos->s_hash; filp->private_data = sysfs_get(pos); mutex_unlock(&sysfs_mutex); - ret = filldir(dirent, name, len, off, ino, type); + ret = filldir(dirent, name, len, filp->f_pos, ino, type); mutex_lock(&sysfs_mutex); if (ret < 0) break; } mutex_unlock(&sysfs_mutex); - - /* don't reference last entry if its refcount is dropped */ - if (!pos) { + if ((filp->f_pos > 1) && !pos) { /* EOF */ + filp->f_pos = INT_MAX; filp->private_data = NULL; - - /* EOF and not changed as 0 or 1 in read/write path */ - if (off == filp->f_pos && off > 1) - filp->f_pos = INT_MAX; } return 0; } -static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) -{ - struct inode *inode = file->f_path.dentry->d_inode; - loff_t ret; - - mutex_lock(&inode->i_mutex); - ret = generic_file_llseek(file, offset, whence); - mutex_unlock(&inode->i_mutex); - - return ret; -} const struct file_operations sysfs_dir_operations = { .read = generic_read_dir, .readdir = sysfs_readdir, .release = sysfs_dir_release, - .llseek = sysfs_dir_llseek, + .llseek = generic_file_llseek, }; diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index fb328d1..db940a9 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -19,7 +19,6 @@ #include <linux/module.h> #include <linux/magic.h> #include <linux/slab.h> -#include <linux/user_namespace.h> #include "sysfs.h" @@ -112,9 +111,6 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, struct super_block *sb; int error; - if (!(flags & MS_KERNMOUNT) && !current_user_ns()->may_mount_sysfs) - return ERR_PTR(-EPERM); - info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return ERR_PTR(-ENOMEM); diff --git a/fs/timerfd.c b/fs/timerfd.c index 522aeb8..d03822b 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -311,7 +311,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) break; spin_unlock_irq(&ctx->wqh.lock); - hrtimer_wait_for_timer(&ctx->tmr); + cpu_relax(); } /* diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index ba32da3..769701c 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -126,14 +126,13 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum) else if (inum > o->inum) p = p->rb_right; else { - if (o->del) { + if (o->dnext) { spin_unlock(&c->orphan_lock); dbg_gen("deleted twice ino %lu", (unsigned long)inum); return; } - if (o->cmt) { - o->del = 1; + if (o->cnext) { o->dnext = c->orph_dnext; c->orph_dnext = o; spin_unlock(&c->orphan_lock); @@ -173,9 +172,7 @@ int ubifs_orphan_start_commit(struct ubifs_info *c) last = &c->orph_cnext; list_for_each_entry(orphan, &c->orph_new, new_list) { ubifs_assert(orphan->new); - ubifs_assert(!orphan->cmt); orphan->new = 0; - orphan->cmt = 1; *last = orphan; last = &orphan->cnext; } @@ -302,9 +299,7 @@ static int write_orph_node(struct ubifs_info *c, int atomic) cnext = c->orph_cnext; for (i = 0; i < cnt; i++) { orphan = cnext; - ubifs_assert(orphan->cmt); orph->inos[i] = cpu_to_le64(orphan->inum); - orphan->cmt = 0; cnext = orphan->cnext; orphan->cnext = NULL; } @@ -383,7 +378,6 @@ static int consolidate(struct ubifs_info *c) list_for_each_entry(orphan, &c->orph_list, list) { if (orphan->new) continue; - orphan->cmt = 1; *last = orphan; last = &orphan->cnext; cnt += 1; @@ -448,7 +442,6 @@ static void erase_deleted(struct ubifs_info *c) orphan = dnext; dnext = orphan->dnext; ubifs_assert(!orphan->new); - ubifs_assert(orphan->del); rb_erase(&orphan->rb, &c->orph_tree); list_del(&orphan->list); c->tot_orphans -= 1; @@ -538,7 +531,6 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) rb_link_node(&orphan->rb, parent, p); rb_insert_color(&orphan->rb, &c->orph_tree); list_add_tail(&orphan->list, &c->orph_list); - orphan->del = 1; orphan->dnext = c->orph_dnext; c->orph_dnext = orphan; dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 97f6875..ddc0f6a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1568,12 +1568,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->remounting_rw = 1; c->ro_mount = 0; - if (c->space_fixup) { - err = ubifs_fixup_free_space(c); - if (err) - return err; - } - err = check_free_space(c); if (err) goto out; @@ -1690,6 +1684,12 @@ static int ubifs_remount_rw(struct ubifs_info *c) err = dbg_check_space_info(c); } + if (c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + goto out; + } + mutex_unlock(&c->umount_mutex); return err; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index b2babce..d133c27 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -904,8 +904,6 @@ struct ubifs_budget_req { * @dnext: next orphan to delete * @inum: inode number * @new: %1 => added since the last commit, otherwise %0 - * @cmt: %1 => commit pending, otherwise %0 - * @del: %1 => delete pending, otherwise %0 */ struct ubifs_orphan { struct rb_node rb; @@ -914,9 +912,7 @@ struct ubifs_orphan { struct ubifs_orphan *cnext; struct ubifs_orphan *dnext; ino_t inum; - unsigned new:1; - unsigned cmt:1; - unsigned del:1; + int new; }; /** diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 8d1c9d4..5f02722 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -82,7 +82,7 @@ struct udf_virtual_data { struct udf_bitmap { __u32 s_extLength; __u32 s_extPosition; - int s_nr_groups; + __u16 s_nr_groups; struct buffer_head **s_block_bitmap; }; diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 572a858..cdb2d33 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -147,10 +147,7 @@ xfs_bmap_local_to_extents( xfs_fsblock_t *firstblock, /* first block allocated in xaction */ xfs_extlen_t total, /* total blocks needed by transaction */ int *logflagsp, /* inode logging flags */ - int whichfork, /* data or attr fork */ - void (*init_fn)(struct xfs_buf *bp, - struct xfs_inode *ip, - struct xfs_ifork *ifp)); + int whichfork); /* data or attr fork */ /* * Search the extents list for the inode, for the extent containing bno. @@ -360,42 +357,7 @@ xfs_bmap_add_attrfork_extents( } /* - * Block initialisation functions for local to extent format conversion. - * As these get more complex, they will be moved to the relevant files, - * but for now they are too simple to worry about. - */ -STATIC void -xfs_bmap_local_to_extents_init_fn( - struct xfs_buf *bp, - struct xfs_inode *ip, - struct xfs_ifork *ifp) -{ - bp->b_ops = &xfs_bmbt_buf_ops; - memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); -} - -STATIC void -xfs_symlink_local_to_remote( - struct xfs_buf *bp, - struct xfs_inode *ip, - struct xfs_ifork *ifp) -{ - /* remote symlink blocks are not verifiable until CRCs come along */ - bp->b_ops = NULL; - memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); -} - -/* - * Called from xfs_bmap_add_attrfork to handle local format files. Each - * different data fork content type needs a different callout to do the - * conversion. Some are basic and only require special block initialisation - * callouts for the data formating, others (directories) are so specialised they - * handle everything themselves. - * - * XXX (dgc): investigate whether directory conversion can use the generic - * formatting callout. It should be possible - it's just a very complex - * formatter. it would also require passing the transaction through to the init - * function. + * Called from xfs_bmap_add_attrfork to handle local format files. */ STATIC int /* error */ xfs_bmap_add_attrfork_local( @@ -406,29 +368,25 @@ xfs_bmap_add_attrfork_local( int *flags) /* inode logging flags */ { xfs_da_args_t dargs; /* args for dir/attr code */ + int error; /* error return value */ + xfs_mount_t *mp; /* mount structure pointer */ if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) return 0; - if (S_ISDIR(ip->i_d.di_mode)) { + mp = ip->i_mount; memset(&dargs, 0, sizeof(dargs)); dargs.dp = ip; dargs.firstblock = firstblock; dargs.flist = flist; - dargs.total = ip->i_mount->m_dirblkfsbs; + dargs.total = mp->m_dirblkfsbs; dargs.whichfork = XFS_DATA_FORK; dargs.trans = tp; - return xfs_dir2_sf_to_block(&dargs); - } - - if (S_ISLNK(ip->i_d.di_mode)) - return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, - flags, XFS_DATA_FORK, - xfs_symlink_local_to_remote); - - return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, - XFS_DATA_FORK, - xfs_bmap_local_to_extents_init_fn); + error = xfs_dir2_sf_to_block(&dargs); + } else + error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, + XFS_DATA_FORK); + return error; } /* @@ -3263,10 +3221,7 @@ xfs_bmap_local_to_extents( xfs_fsblock_t *firstblock, /* first block allocated in xaction */ xfs_extlen_t total, /* total blocks needed by transaction */ int *logflagsp, /* inode logging flags */ - int whichfork, - void (*init_fn)(struct xfs_buf *bp, - struct xfs_inode *ip, - struct xfs_ifork *ifp)) + int whichfork) /* data or attr fork */ { int error; /* error return value */ int flags; /* logging flags returned */ @@ -3286,12 +3241,12 @@ xfs_bmap_local_to_extents( xfs_buf_t *bp; /* buffer for extent block */ xfs_bmbt_rec_host_t *ep;/* extent record pointer */ - ASSERT((ifp->if_flags & - (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = ip->i_mount; args.firstblock = *firstblock; + ASSERT((ifp->if_flags & + (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); /* * Allocate a block. We know we need only one, since the * file currently fits in an inode. @@ -3307,20 +3262,17 @@ xfs_bmap_local_to_extents( args.mod = args.minleft = args.alignment = args.wasdel = args.isfl = args.minalignslop = 0; args.minlen = args.maxlen = args.prod = 1; - error = xfs_alloc_vextent(&args); - if (error) + if ((error = xfs_alloc_vextent(&args))) goto done; - - /* Can't fail, the space was reserved. */ + /* + * Can't fail, the space was reserved. + */ ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); *firstblock = args.fsbno; bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); - - /* initialise the block and copy the data */ - init_fn(bp, ip, ifp); - - /* account for the change in fork size and log everything */ + bp->b_ops = &xfs_bmbt_buf_ops; + memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); xfs_bmap_forkoff_reset(args.mp, ip, whichfork); xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); @@ -4967,32 +4919,8 @@ xfs_bmapi_write( XFS_STATS_INC(xs_blk_mapw); if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - /* - * XXX (dgc): This assumes we are only called for inodes that - * contain content neutral data in local format. Anything that - * contains caller-specific data in local format that needs - * transformation to move to a block format needs to do the - * conversion to extent format itself. - * - * Directory data forks and attribute forks handle this - * themselves, but with the addition of metadata verifiers every - * data fork in local format now contains caller specific data - * and as such conversion through this function is likely to be - * broken. - * - * The only likely user of this branch is for remote symlinks, - * but we cannot overwrite the data fork contents of the symlink - * (EEXIST occurs higher up the stack) and so it will never go - * from local format to extent format here. Hence I don't think - * this branch is ever executed intentionally and we should - * consider removing it and asserting that xfs_bmapi_write() - * cannot be called directly on local format forks. i.e. callers - * are completely responsible for local to extent format - * conversion, not xfs_bmapi_write(). - */ error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, - &bma.logflags, whichfork, - xfs_bmap_local_to_extents_init_fn); + &bma.logflags, whichfork); if (error) goto error0; } |