diff options
Diffstat (limited to 'fs')
102 files changed, 1814 insertions, 1314 deletions
diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 210acaf..3ff8bdd 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -432,7 +432,6 @@ vfs_rejected_lock: list_del_init(&fl->fl_u.afs.link); if (list_empty(&vnode->granted_locks)) afs_defer_unlock(vnode, key); - spin_unlock(&vnode->lock); goto abort_attempt; } @@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) { assert_spin_locked(&ctx->ctx_lock); + if (req->ki_eventfd != NULL) + eventfd_ctx_put(req->ki_eventfd); if (req->ki_dtor) req->ki_dtor(req); if (req->ki_iovec != &req->ki_inline_vec) @@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data) /* Complete the fput(s) */ if (req->ki_filp != NULL) __fput(req->ki_filp); - if (req->ki_eventfd != NULL) - __fput(req->ki_eventfd); /* Link the iocb into the context's free list */ spin_lock_irq(&ctx->ctx_lock); @@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data) */ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) { - int schedule_putreq = 0; - dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", req, atomic_long_read(&req->ki_filp->f_count)); @@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) * we would not be holding the last reference to the file*, so * this function will be executed w/out any aio kthread wakeup. */ - if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) - schedule_putreq++; - else - req->ki_filp = NULL; - if (req->ki_eventfd != NULL) { - if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count))) - schedule_putreq++; - else - req->ki_eventfd = NULL; - } - if (unlikely(schedule_putreq)) { + if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); spin_unlock(&fput_lock); queue_work(aio_wq, &fput_work); - } else + } else { + req->ki_filp = NULL; really_put_req(ctx, req); + } return 1; } @@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, * an eventfd() fd, and will be signaled for each completed * event using the eventfd_signal() function. */ - req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); + req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); if (IS_ERR(req->ki_eventfd)) { ret = PTR_ERR(req->ki_eventfd); req->ki_eventfd = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9fa212b0..b7c1603 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1522,11 +1522,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, info->thread = NULL; psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); - fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); - if (psinfo == NULL) return 0; + fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); + /* * Figure out how many notes we're going to need for each thread. */ @@ -1929,7 +1929,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un elf = kmalloc(sizeof(*elf), GFP_KERNEL); if (!elf) goto out; - + /* + * The number of segs are recored into ELF header as 16bit value. + * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. + */ segs = current->mm->map_count; #ifdef ELF_CORE_EXTRA_PHDRS segs += ELF_CORE_EXTRA_PHDRS; diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 31c46a2..49a34e7 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -1,7 +1,7 @@ /* * bio-integrity.c - bio data integrity extensions * - * Copyright (C) 2007, 2008 Oracle Corporation + * Copyright (C) 2007, 2008, 2009 Oracle Corporation * Written by: Martin K. Petersen <martin.petersen@oracle.com> * * This program is free software; you can redistribute it and/or @@ -25,63 +25,121 @@ #include <linux/bio.h> #include <linux/workqueue.h> -static struct kmem_cache *bio_integrity_slab __read_mostly; -static mempool_t *bio_integrity_pool; -static struct bio_set *integrity_bio_set; +struct integrity_slab { + struct kmem_cache *slab; + unsigned short nr_vecs; + char name[8]; +}; + +#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) } +struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = { + IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES), +}; +#undef IS + static struct workqueue_struct *kintegrityd_wq; +static inline unsigned int vecs_to_idx(unsigned int nr) +{ + switch (nr) { + case 1: + return 0; + case 2 ... 4: + return 1; + case 5 ... 16: + return 2; + case 17 ... 64: + return 3; + case 65 ... 128: + return 4; + case 129 ... BIO_MAX_PAGES: + return 5; + default: + BUG(); + } +} + +static inline int use_bip_pool(unsigned int idx) +{ + if (idx == BIOVEC_NR_POOLS) + return 1; + + return 0; +} + /** - * bio_integrity_alloc - Allocate integrity payload and attach it to bio + * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio * @bio: bio to attach integrity metadata to * @gfp_mask: Memory allocation mask * @nr_vecs: Number of integrity metadata scatter-gather elements + * @bs: bio_set to allocate from * * Description: This function prepares a bio for attaching integrity * metadata. nr_vecs specifies the maximum number of pages containing * integrity metadata that can be attached. */ -struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, - gfp_t gfp_mask, - unsigned int nr_vecs) +struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, + gfp_t gfp_mask, + unsigned int nr_vecs, + struct bio_set *bs) { struct bio_integrity_payload *bip; - struct bio_vec *iv; - unsigned long idx; + unsigned int idx = vecs_to_idx(nr_vecs); BUG_ON(bio == NULL); + bip = NULL; - bip = mempool_alloc(bio_integrity_pool, gfp_mask); - if (unlikely(bip == NULL)) { - printk(KERN_ERR "%s: could not alloc bip\n", __func__); - return NULL; - } + /* Lower order allocations come straight from slab */ + if (!use_bip_pool(idx)) + bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask); - memset(bip, 0, sizeof(*bip)); + /* Use mempool if lower order alloc failed or max vecs were requested */ + if (bip == NULL) { + bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); - iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set); - if (unlikely(iv == NULL)) { - printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__); - mempool_free(bip, bio_integrity_pool); - return NULL; + if (unlikely(bip == NULL)) { + printk(KERN_ERR "%s: could not alloc bip\n", __func__); + return NULL; + } } - bip->bip_pool = idx; - bip->bip_vec = iv; + memset(bip, 0, sizeof(*bip)); + + bip->bip_slab = idx; bip->bip_bio = bio; bio->bi_integrity = bip; return bip; } +EXPORT_SYMBOL(bio_integrity_alloc_bioset); + +/** + * bio_integrity_alloc - Allocate integrity payload and attach it to bio + * @bio: bio to attach integrity metadata to + * @gfp_mask: Memory allocation mask + * @nr_vecs: Number of integrity metadata scatter-gather elements + * + * Description: This function prepares a bio for attaching integrity + * metadata. nr_vecs specifies the maximum number of pages containing + * integrity metadata that can be attached. + */ +struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, + gfp_t gfp_mask, + unsigned int nr_vecs) +{ + return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set); +} EXPORT_SYMBOL(bio_integrity_alloc); /** * bio_integrity_free - Free bio integrity payload * @bio: bio containing bip to be freed + * @bs: bio_set this bio was allocated from * * Description: Used to free the integrity portion of a bio. Usually * called from bio_free(). */ -void bio_integrity_free(struct bio *bio) +void bio_integrity_free(struct bio *bio, struct bio_set *bs) { struct bio_integrity_payload *bip = bio->bi_integrity; @@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio) && bip->bip_buf != NULL) kfree(bip->bip_buf); - bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool); - mempool_free(bip, bio_integrity_pool); + if (use_bip_pool(bip->bip_slab)) + mempool_free(bip, bs->bio_integrity_pool); + else + kmem_cache_free(bip_slab[bip->bip_slab].slab, bip); bio->bi_integrity = NULL; } @@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) { + if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } @@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) bp->iv1 = bip->bip_vec[0]; bp->iv2 = bip->bip_vec[0]; - bp->bip1.bip_vec = &bp->iv1; - bp->bip2.bip_vec = &bp->iv2; + bp->bip1.bip_vec[0] = bp->iv1; + bp->bip2.bip_vec[0] = bp->iv2; bp->iv1.bv_len = sectors * bi->tuple_size; bp->iv2.bv_offset += sectors * bi->tuple_size; @@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split); * @bio: New bio * @bio_src: Original bio * @gfp_mask: Memory allocation mask + * @bs: bio_set to allocate bip from * * Description: Called to allocate a bip when cloning a bio */ -int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) +int bio_integrity_clone(struct bio *bio, struct bio *bio_src, + gfp_t gfp_mask, struct bio_set *bs) { struct bio_integrity_payload *bip_src = bio_src->bi_integrity; struct bio_integrity_payload *bip; BUG_ON(bip_src == NULL); - bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); + bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs); if (bip == NULL) return -EIO; @@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) } EXPORT_SYMBOL(bio_integrity_clone); -static int __init bio_integrity_init(void) +int bioset_integrity_create(struct bio_set *bs, int pool_size) { - kintegrityd_wq = create_workqueue("kintegrityd"); + unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); + + bs->bio_integrity_pool = + mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); + if (!bs->bio_integrity_pool) + return -1; + + return 0; +} +EXPORT_SYMBOL(bioset_integrity_create); + +void bioset_integrity_free(struct bio_set *bs) +{ + if (bs->bio_integrity_pool) + mempool_destroy(bs->bio_integrity_pool); +} +EXPORT_SYMBOL(bioset_integrity_free); + +void __init bio_integrity_init(void) +{ + unsigned int i; + + kintegrityd_wq = create_workqueue("kintegrityd"); if (!kintegrityd_wq) panic("Failed to create kintegrityd\n"); - bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, - SLAB_HWCACHE_ALIGN|SLAB_PANIC); + for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) { + unsigned int size; - bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE, - bio_integrity_slab); - if (!bio_integrity_pool) - panic("bio_integrity: can't allocate bip pool\n"); + size = sizeof(struct bio_integrity_payload) + + bip_slab[i].nr_vecs * sizeof(struct bio_vec); - integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0); - if (!integrity_bio_set) - panic("bio_integrity: can't allocate bio_set\n"); - - return 0; + bip_slab[i].slab = + kmem_cache_create(bip_slab[i].name, size, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + } } -subsys_initcall(bio_integrity_init); @@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs) bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); if (bio_integrity(bio)) - bio_integrity_free(bio); + bio_integrity_free(bio, bs); /* * If we have front padding, adjust the bio pointer before freeing @@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) static void bio_kmalloc_destructor(struct bio *bio) { if (bio_integrity(bio)) - bio_integrity_free(bio); + bio_integrity_free(bio, fs_bio_set); kfree(bio); } @@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) if (bio_integrity(bio)) { int ret; - ret = bio_integrity_clone(b, bio, gfp_mask); + ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); if (ret < 0) { bio_put(b); @@ -1539,6 +1539,7 @@ void bioset_free(struct bio_set *bs) if (bs->bio_pool) mempool_destroy(bs->bio_pool); + bioset_integrity_free(bs); biovec_free_pools(bs); bio_put_slab(bs); @@ -1579,6 +1580,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; + if (bioset_integrity_create(bs, pool_size)) + goto bad; + if (!biovec_create_pools(bs, pool_size)) return bs; @@ -1616,6 +1620,7 @@ static int __init init_bio(void) if (!bio_slabs) panic("bio: can't allocate bios\n"); + bio_integrity_init(); biovec_init_slabs(); fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6039725..f128427 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -29,51 +29,28 @@ #ifdef CONFIG_FS_POSIX_ACL -static void btrfs_update_cached_acl(struct inode *inode, - struct posix_acl **p_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED) - posix_acl_release(*p_acl); - *p_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) { int size; const char *name; char *value = NULL; - struct posix_acl *acl = NULL, **p_acl; + struct posix_acl *acl; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; - p_acl = &BTRFS_I(inode)->i_acl; break; case ACL_TYPE_DEFAULT: name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &BTRFS_I(inode)->i_default_acl; break; default: - return ERR_PTR(-EINVAL); + BUG(); } - /* Handle the cached NULL acl case without locking */ - acl = ACCESS_ONCE(*p_acl); - if (!acl) - return acl; - - spin_lock(&inode->i_lock); - acl = *p_acl; - if (acl != BTRFS_ACL_NOT_CACHED) - acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); - - if (acl != BTRFS_ACL_NOT_CACHED) - return acl; - size = __btrfs_getxattr(inode, name, "", 0); if (size > 0) { value = kzalloc(size, GFP_NOFS); @@ -82,13 +59,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) size = __btrfs_getxattr(inode, name, value, size); if (size > 0) { acl = posix_acl_from_xattr(value, size); - btrfs_update_cached_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); } kfree(value); } else if (size == -ENOENT || size == -ENODATA || size == 0) { /* FIXME, who returns -ENOENT? I think nobody */ acl = NULL; - btrfs_update_cached_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); } else { acl = ERR_PTR(-EIO); } @@ -121,7 +98,6 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int ret, size = 0; const char *name; - struct posix_acl **p_acl; char *value = NULL; mode_t mode; @@ -141,13 +117,11 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) ret = 0; inode->i_mode = mode; name = POSIX_ACL_XATTR_ACCESS; - p_acl = &BTRFS_I(inode)->i_acl; break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) return acl ? -EINVAL : 0; name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &BTRFS_I(inode)->i_default_acl; break; default: return -EINVAL; @@ -172,7 +146,7 @@ out: kfree(value); if (!ret) - btrfs_update_cached_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); return ret; } diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 7f88628..6e4f6c5 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) "btrfs-%s-%d", workers->name, workers->num_workers + i); if (IS_ERR(worker->task)) { - kfree(worker); ret = PTR_ERR(worker->task); + kfree(worker); goto fail; } diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index acb4f35..ea1ea0a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -53,10 +53,6 @@ struct btrfs_inode { /* used to order data wrt metadata */ struct btrfs_ordered_inode_tree ordered_tree; - /* standard acl pointers */ - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; - /* for keeping track of orphaned inodes */ struct list_head i_orphan; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 03441a9..98a8738 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -41,8 +41,6 @@ struct btrfs_ordered_sum; #define BTRFS_MAGIC "_BHRfS_M" -#define BTRFS_ACL_NOT_CACHED ((void *)-1) - #define BTRFS_MAX_LEVEL 8 #define BTRFS_COMPAT_EXTENT_TREE_V0 @@ -2076,8 +2074,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); -int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root); +int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index edc7d20..a5aca39 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner) return type; } -static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) +static int find_next_key(struct btrfs_path *path, int level, + struct btrfs_key *key) { - int level; - BUG_ON(!path->keep_locks); - for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + for (; level < BTRFS_MAX_LEVEL; level++) { if (!path->nodes[level]) break; - btrfs_assert_tree_locked(path->nodes[level]); if (path->slots[level] + 1 >= btrfs_header_nritems(path->nodes[level])) continue; @@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, * For simplicity, we just do not add new inline back * ref if there is any kind of item for this block */ - if (find_next_key(path, &key) == 0 && key.objectid == bytenr && + if (find_next_key(path, 0, &key) == 0 && + key.objectid == bytenr && key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { err = -EAGAIN; goto out; @@ -2697,7 +2696,7 @@ again: printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" ", %llu bytes_used, %llu bytes_reserved, " - "%llu bytes_pinned, %llu bytes_readonly, %llu may use" + "%llu bytes_pinned, %llu bytes_readonly, %llu may use " "%llu total\n", (unsigned long long)bytes, (unsigned long long)data_sinfo->bytes_delalloc, (unsigned long long)data_sinfo->bytes_used, @@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } +#if 0 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf) { @@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -#if 0 - static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_leaf_ref *ref) @@ -4553,262 +4551,471 @@ out: } #endif +struct walk_control { + u64 refs[BTRFS_MAX_LEVEL]; + u64 flags[BTRFS_MAX_LEVEL]; + struct btrfs_key update_progress; + int stage; + int level; + int shared_level; + int update_ref; + int keep_locks; +}; + +#define DROP_REFERENCE 1 +#define UPDATE_BACKREF 2 + /* - * helper function for drop_subtree, this function is similar to - * walk_down_tree. The main difference is that it checks reference - * counts while tree blocks are locked. + * hepler to process tree block while walking down the tree. + * + * when wc->stage == DROP_REFERENCE, this function checks + * reference count of the block. if the block is shared and + * we need update back refs for the subtree rooted at the + * block, this function changes wc->stage to UPDATE_BACKREF + * + * when wc->stage == UPDATE_BACKREF, this function updates + * back refs for pointers in the block. + * + * NOTE: return value 1 means we should stop walking down. */ -static noinline int walk_down_tree(struct btrfs_trans_handle *trans, +static noinline int walk_down_proc(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int *level) + struct btrfs_path *path, + struct walk_control *wc) { - struct extent_buffer *next; - struct extent_buffer *cur; - struct extent_buffer *parent; - u64 bytenr; - u64 ptr_gen; - u64 refs; - u64 flags; - u32 blocksize; + int level = wc->level; + struct extent_buffer *eb = path->nodes[level]; + struct btrfs_key key; + u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; int ret; - cur = path->nodes[*level]; - ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, - &refs, &flags); - BUG_ON(ret); - if (refs > 1) - goto out; + if (wc->stage == UPDATE_BACKREF && + btrfs_header_owner(eb) != root->root_key.objectid) + return 1; - BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); + /* + * when reference count of tree block is 1, it won't increase + * again. once full backref flag is set, we never clear it. + */ + if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || + (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { + BUG_ON(!path->locks[level]); + ret = btrfs_lookup_extent_info(trans, root, + eb->start, eb->len, + &wc->refs[level], + &wc->flags[level]); + BUG_ON(ret); + BUG_ON(wc->refs[level] == 0); + } - while (*level >= 0) { - cur = path->nodes[*level]; - if (*level == 0) { - ret = btrfs_drop_leaf_ref(trans, root, cur); - BUG_ON(ret); - clean_tree_block(trans, root, cur); - break; - } - if (path->slots[*level] >= btrfs_header_nritems(cur)) { - clean_tree_block(trans, root, cur); - break; + if (wc->stage == DROP_REFERENCE && + wc->update_ref && wc->refs[level] > 1) { + BUG_ON(eb == root->node); + BUG_ON(path->slots[level] > 0); + if (level == 0) + btrfs_item_key_to_cpu(eb, &key, path->slots[level]); + else + btrfs_node_key_to_cpu(eb, &key, path->slots[level]); + if (btrfs_header_owner(eb) == root->root_key.objectid && + btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { + wc->stage = UPDATE_BACKREF; + wc->shared_level = level; } + } - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + if (wc->stage == DROP_REFERENCE) { + if (wc->refs[level] > 1) + return 1; - next = read_tree_block(root, bytenr, blocksize, ptr_gen); - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); + if (path->locks[level] && !wc->keep_locks) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + } + return 0; + } - ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, - &refs, &flags); + /* wc->stage == UPDATE_BACKREF */ + if (!(wc->flags[level] & flag)) { + BUG_ON(!path->locks[level]); + ret = btrfs_inc_ref(trans, root, eb, 1); BUG_ON(ret); - if (refs > 1) { - parent = path->nodes[*level]; - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, parent->start, - btrfs_header_owner(parent), - *level - 1, 0); + ret = btrfs_dec_ref(trans, root, eb, 0); + BUG_ON(ret); + ret = btrfs_set_disk_extent_flags(trans, root, eb->start, + eb->len, flag, 0); + BUG_ON(ret); + wc->flags[level] |= flag; + } + + /* + * the block is shared by multiple trees, so it's not good to + * keep the tree lock + */ + if (path->locks[level] && level > 0) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + } + return 0; +} + +/* + * hepler to process tree block while walking up the tree. + * + * when wc->stage == DROP_REFERENCE, this function drops + * reference count on the block. + * + * when wc->stage == UPDATE_BACKREF, this function changes + * wc->stage back to DROP_REFERENCE if we changed wc->stage + * to UPDATE_BACKREF previously while processing the block. + * + * NOTE: return value 1 means we should stop walking up. + */ +static noinline int walk_up_proc(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct walk_control *wc) +{ + int ret = 0; + int level = wc->level; + struct extent_buffer *eb = path->nodes[level]; + u64 parent = 0; + + if (wc->stage == UPDATE_BACKREF) { + BUG_ON(wc->shared_level < level); + if (level < wc->shared_level) + goto out; + + BUG_ON(wc->refs[level] <= 1); + ret = find_next_key(path, level + 1, &wc->update_progress); + if (ret > 0) + wc->update_ref = 0; + + wc->stage = DROP_REFERENCE; + wc->shared_level = -1; + path->slots[level] = 0; + + /* + * check reference count again if the block isn't locked. + * we should start walking down the tree again if reference + * count is one. + */ + if (!path->locks[level]) { + BUG_ON(level == 0); + btrfs_tree_lock(eb); + btrfs_set_lock_blocking(eb); + path->locks[level] = 1; + + ret = btrfs_lookup_extent_info(trans, root, + eb->start, eb->len, + &wc->refs[level], + &wc->flags[level]); BUG_ON(ret); - path->slots[*level]++; - btrfs_tree_unlock(next); - free_extent_buffer(next); - continue; + BUG_ON(wc->refs[level] == 0); + if (wc->refs[level] == 1) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + return 1; + } + } else { + BUG_ON(level != 0); } + } - BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); + /* wc->stage == DROP_REFERENCE */ + BUG_ON(wc->refs[level] > 1 && !path->locks[level]); - *level = btrfs_header_level(next); - path->nodes[*level] = next; - path->slots[*level] = 0; - path->locks[*level] = 1; - cond_resched(); + if (wc->refs[level] == 1) { + if (level == 0) { + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + ret = btrfs_dec_ref(trans, root, eb, 1); + else + ret = btrfs_dec_ref(trans, root, eb, 0); + BUG_ON(ret); + } + /* make block locked assertion in clean_tree_block happy */ + if (!path->locks[level] && + btrfs_header_generation(eb) == trans->transid) { + btrfs_tree_lock(eb); + btrfs_set_lock_blocking(eb); + path->locks[level] = 1; + } + clean_tree_block(trans, root, eb); + } + + if (eb == root->node) { + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + parent = eb->start; + else + BUG_ON(root->root_key.objectid != + btrfs_header_owner(eb)); + } else { + if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + parent = path->nodes[level + 1]->start; + else + BUG_ON(root->root_key.objectid != + btrfs_header_owner(path->nodes[level + 1])); } -out: - if (path->nodes[*level] == root->node) - parent = path->nodes[*level]; - else - parent = path->nodes[*level + 1]; - bytenr = path->nodes[*level]->start; - blocksize = path->nodes[*level]->len; - ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, - btrfs_header_owner(parent), *level, 0); + ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, + root->root_key.objectid, level, 0); BUG_ON(ret); +out: + wc->refs[level] = 0; + wc->flags[level] = 0; + return ret; +} + +static noinline int walk_down_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct walk_control *wc) +{ + struct extent_buffer *next; + struct extent_buffer *cur; + u64 bytenr; + u64 ptr_gen; + u32 blocksize; + int level = wc->level; + int ret; + + while (level >= 0) { + cur = path->nodes[level]; + BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); - if (path->locks[*level]) { - btrfs_tree_unlock(path->nodes[*level]); - path->locks[*level] = 0; + ret = walk_down_proc(trans, root, path, wc); + if (ret > 0) + break; + + if (level == 0) + break; + + bytenr = btrfs_node_blockptr(cur, path->slots[level]); + blocksize = btrfs_level_size(root, level - 1); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); + + next = read_tree_block(root, bytenr, blocksize, ptr_gen); + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + + level--; + BUG_ON(level != btrfs_header_level(next)); + path->nodes[level] = next; + path->slots[level] = 0; + path->locks[level] = 1; + wc->level = level; } - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level += 1; - cond_resched(); return 0; } -/* - * helper for dropping snapshots. This walks back up the tree in the path - * to find the first node higher up where we haven't yet gone through - * all the slots - */ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - int *level, int max_level) + struct walk_control *wc, int max_level) { - struct btrfs_root_item *root_item = &root->root_item; - int i; - int slot; + int level = wc->level; int ret; - for (i = *level; i < max_level && path->nodes[i]; i++) { - slot = path->slots[i]; - if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { - /* - * there is more work to do in this level. - * Update the drop_progress marker to reflect - * the work we've done so far, and then bump - * the slot number - */ - path->slots[i]++; - WARN_ON(*level == 0); - if (max_level == BTRFS_MAX_LEVEL) { - btrfs_node_key(path->nodes[i], - &root_item->drop_progress, - path->slots[i]); - root_item->drop_level = i; - } - *level = i; + path->slots[level] = btrfs_header_nritems(path->nodes[level]); + while (level < max_level && path->nodes[level]) { + wc->level = level; + if (path->slots[level] + 1 < + btrfs_header_nritems(path->nodes[level])) { + path->slots[level]++; return 0; } else { - struct extent_buffer *parent; - - /* - * this whole node is done, free our reference - * on it and go up one level - */ - if (path->nodes[*level] == root->node) - parent = path->nodes[*level]; - else - parent = path->nodes[*level + 1]; + ret = walk_up_proc(trans, root, path, wc); + if (ret > 0) + return 0; - clean_tree_block(trans, root, path->nodes[i]); - ret = btrfs_free_extent(trans, root, - path->nodes[i]->start, - path->nodes[i]->len, - parent->start, - btrfs_header_owner(parent), - *level, 0); - BUG_ON(ret); - if (path->locks[*level]) { - btrfs_tree_unlock(path->nodes[i]); - path->locks[i] = 0; + if (path->locks[level]) { + btrfs_tree_unlock(path->nodes[level]); + path->locks[level] = 0; } - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - *level = i + 1; + free_extent_buffer(path->nodes[level]); + path->nodes[level] = NULL; + level++; } } return 1; } /* - * drop the reference count on the tree rooted at 'snap'. This traverses - * the tree freeing any blocks that have a ref count of zero after being - * decremented. + * drop a subvolume tree. + * + * this function traverses the tree freeing any blocks that only + * referenced by the tree. + * + * when a shared tree block is found. this function decreases its + * reference count by one. if update_ref is true, this function + * also make sure backrefs for the shared block and all lower level + * blocks are properly updated. */ -int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root) +int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) { - int ret = 0; - int wret; - int level; struct btrfs_path *path; - int update_count; + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_root_item *root_item = &root->root_item; + struct walk_control *wc; + struct btrfs_key key; + int err = 0; + int ret; + int level; path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(root->node); + wc = kzalloc(sizeof(*wc), GFP_NOFS); + BUG_ON(!wc); + + trans = btrfs_start_transaction(tree_root, 1); + if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { + level = btrfs_header_level(root->node); path->nodes[level] = btrfs_lock_root_node(root); btrfs_set_lock_blocking(path->nodes[level]); path->slots[level] = 0; path->locks[level] = 1; + memset(&wc->update_progress, 0, + sizeof(wc->update_progress)); } else { - struct btrfs_key key; - struct btrfs_disk_key found_key; - struct extent_buffer *node; - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + memcpy(&wc->update_progress, &key, + sizeof(wc->update_progress)); + level = root_item->drop_level; + BUG_ON(level == 0); path->lowest_level = level; - wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (wret < 0) { - ret = wret; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + path->lowest_level = 0; + if (ret < 0) { + err = ret; goto out; } - node = path->nodes[level]; - btrfs_node_key(node, &found_key, path->slots[level]); - WARN_ON(memcmp(&found_key, &root_item->drop_progress, - sizeof(found_key))); + btrfs_node_key_to_cpu(path->nodes[level], &key, + path->slots[level]); + WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); + /* * unlock our path, this is safe because only this * function is allowed to delete this snapshot */ btrfs_unlock_up_safe(path, 0); + + level = btrfs_header_level(root->node); + while (1) { + btrfs_tree_lock(path->nodes[level]); + btrfs_set_lock_blocking(path->nodes[level]); + + ret = btrfs_lookup_extent_info(trans, root, + path->nodes[level]->start, + path->nodes[level]->len, + &wc->refs[level], + &wc->flags[level]); + BUG_ON(ret); + BUG_ON(wc->refs[level] == 0); + + if (level == root_item->drop_level) + break; + + btrfs_tree_unlock(path->nodes[level]); + WARN_ON(wc->refs[level] != 1); + level--; + } } + + wc->level = level; + wc->shared_level = -1; + wc->stage = DROP_REFERENCE; + wc->update_ref = update_ref; + wc->keep_locks = 0; + while (1) { - unsigned long update; - wret = walk_down_tree(trans, root, path, &level); - if (wret > 0) + ret = walk_down_tree(trans, root, path, wc); + if (ret < 0) { + err = ret; break; - if (wret < 0) - ret = wret; + } - wret = walk_up_tree(trans, root, path, &level, - BTRFS_MAX_LEVEL); - if (wret > 0) + ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); + if (ret < 0) { + err = ret; break; - if (wret < 0) - ret = wret; - if (trans->transaction->in_commit || - trans->transaction->delayed_refs.flushing) { - ret = -EAGAIN; + } + + if (ret > 0) { + BUG_ON(wc->stage != DROP_REFERENCE); break; } - for (update_count = 0; update_count < 16; update_count++) { + + if (wc->stage == DROP_REFERENCE) { + level = wc->level; + btrfs_node_key(path->nodes[level], + &root_item->drop_progress, + path->slots[level]); + root_item->drop_level = level; + } + + BUG_ON(wc->level == 0); + if (trans->transaction->in_commit || + trans->transaction->delayed_refs.flushing) { + ret = btrfs_update_root(trans, tree_root, + &root->root_key, + root_item); + BUG_ON(ret); + + btrfs_end_transaction(trans, tree_root); + trans = btrfs_start_transaction(tree_root, 1); + } else { + unsigned long update; update = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (update) - btrfs_run_delayed_refs(trans, root, update); - else - break; + btrfs_run_delayed_refs(trans, tree_root, + update); } } + btrfs_release_path(root, path); + BUG_ON(err); + + ret = btrfs_del_root(trans, tree_root, &root->root_key); + BUG_ON(ret); + + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); + kfree(root); out: + btrfs_end_transaction(trans, tree_root); + kfree(wc); btrfs_free_path(path); - return ret; + return err; } +/* + * drop subtree rooted at tree block 'node'. + * + * NOTE: this function will unlock and release tree block 'node' + */ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, struct extent_buffer *parent) { struct btrfs_path *path; + struct walk_control *wc; int level; int parent_level; int ret = 0; int wret; + BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); + path = btrfs_alloc_path(); BUG_ON(!path); + wc = kzalloc(sizeof(*wc), GFP_NOFS); + BUG_ON(!wc); + btrfs_assert_tree_locked(parent); parent_level = btrfs_header_level(parent); extent_buffer_get(parent); @@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, btrfs_assert_tree_locked(node); level = btrfs_header_level(node); - extent_buffer_get(node); path->nodes[level] = node; path->slots[level] = 0; + path->locks[level] = 1; + + wc->refs[parent_level] = 1; + wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; + wc->level = level; + wc->shared_level = -1; + wc->stage = DROP_REFERENCE; + wc->update_ref = 0; + wc->keep_locks = 1; while (1) { - wret = walk_down_tree(trans, root, path, &level); - if (wret < 0) + wret = walk_down_tree(trans, root, path, wc); + if (wret < 0) { ret = wret; - if (wret != 0) break; + } - wret = walk_up_tree(trans, root, path, &level, parent_level); + wret = walk_up_tree(trans, root, path, wc, parent_level); if (wret < 0) ret = wret; if (wret != 0) break; } + kfree(wc); btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 126477e..7c3cd24 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -151,7 +151,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, } if (end_pos > isize) { i_size_write(inode, end_pos); - btrfs_update_inode(trans, root, inode); + /* we've only changed i_size in ram, and we haven't updated + * the disk i_size. There is no need to log the inode + * at this time. + */ } err = btrfs_end_transaction(trans, root); out_unlock: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8612b3a..7ffa3d3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2122,10 +2122,8 @@ static void btrfs_read_locked_inode(struct inode *inode) * any xattrs or acls */ maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); - if (!maybe_acls) { - BTRFS_I(inode)->i_acl = NULL; - BTRFS_I(inode)->i_default_acl = NULL; - } + if (!maybe_acls) + cache_no_acl(inode); BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_group_block, 0); @@ -3141,9 +3139,6 @@ static noinline void init_btrfs_i(struct inode *inode) { struct btrfs_inode *bi = BTRFS_I(inode); - bi->i_acl = BTRFS_ACL_NOT_CACHED; - bi->i_default_acl = BTRFS_ACL_NOT_CACHED; - bi->generation = 0; bi->sequence = 0; bi->last_trans = 0; @@ -3585,12 +3580,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 1; BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_hint, owner); - if ((mode & S_IFREG)) { - if (btrfs_test_opt(root, NODATASUM)) - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; - if (btrfs_test_opt(root, NODATACOW)) - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; - } key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -3645,6 +3634,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_inherit_iflags(inode, dir); + if ((mode & S_IFREG)) { + if (btrfs_test_opt(root, NODATASUM)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; + if (btrfs_test_opt(root, NODATACOW)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; + } + insert_inode_hash(inode); inode_tree_add(inode); return inode; @@ -4640,8 +4636,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_trans = 0; ei->logged_trans = 0; btrfs_ordered_inode_tree_init(&ei->ordered_tree); - ei->i_acl = BTRFS_ACL_NOT_CACHED; - ei->i_default_acl = BTRFS_ACL_NOT_CACHED; INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->ordered_operations); return &ei->vfs_inode; @@ -4655,13 +4649,6 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); - if (BTRFS_I(inode)->i_acl && - BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED) - posix_acl_release(BTRFS_I(inode)->i_acl); - if (BTRFS_I(inode)->i_default_acl && - BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) - posix_acl_release(BTRFS_I(inode)->i_default_acl); - /* * Make sure we're properly removed from the ordered operation * lists. @@ -5096,6 +5083,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; struct btrfs_trans_handle *trans; + struct btrfs_root *root; int ret; alloc_start = offset & ~mask; @@ -5114,6 +5102,13 @@ static long btrfs_fallocate(struct inode *inode, int mode, goto out; } + root = BTRFS_I(inode)->root; + + ret = btrfs_check_data_free_space(root, inode, + alloc_end - alloc_start); + if (ret) + goto out; + locked_end = alloc_end - 1; while (1) { struct btrfs_ordered_extent *ordered; @@ -5121,7 +5116,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); if (!trans) { ret = -EIO; - goto out; + goto out_free; } /* the extent lock is ordered inside the running @@ -5182,6 +5177,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, GFP_NOFS); btrfs_end_transaction(trans, BTRFS_I(inode)->root); +out_free: + btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start); out: mutex_unlock(&inode->i_mutex); return ret; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index eff18f5..9f4db84 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1028,7 +1028,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, struct btrfs_file_extent_item); comp = btrfs_file_extent_compression(leaf, extent); type = btrfs_file_extent_type(leaf, extent); - if (type == BTRFS_FILE_EXTENT_REG) { + if (type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) { disko = btrfs_file_extent_disk_bytenr(leaf, extent); diskl = btrfs_file_extent_disk_num_bytes(leaf, @@ -1051,7 +1052,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, new_key.objectid = inode->i_ino; new_key.offset = key.offset + destoff - off; - if (type == BTRFS_FILE_EXTENT_REG) { + if (type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) { ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); if (ret) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b23dc20..0083979 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work) btrfs_end_transaction(trans, root); } - btrfs_drop_dead_root(reloc_root); + btrfs_drop_snapshot(reloc_root, 0); if (atomic_dec_and_test(async->num_pending)) complete(async->done); @@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans, ret = btrfs_drop_subtree(trans, root, eb, upper->eb); BUG_ON(ret); - - btrfs_tree_unlock(eb); - free_extent_buffer(eb); } if (!lowest) { btrfs_tree_unlock(upper->eb); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4e83457..2dbf1c1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; } +#if 0 /* * when dropping snapshots, we generate a ton of delayed refs, and it makes * sense not to join the transaction while it is trying to flush the current @@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root) btrfs_btree_balance_dirty(tree_root, nr); return ret; } +#endif /* * new snapshots need to be created at a very specific time in the @@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) while (!list_empty(&list)) { root = list_entry(list.next, struct btrfs_root, root_list); list_del_init(&root->root_list); - btrfs_drop_dead_root(root); + btrfs_drop_snapshot(root, 0); } return 0; } diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index b486898..3a9b7a5 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -5,7 +5,7 @@ client generated ones by default (mount option "serverino" turned on by default if server supports it). Add forceuid and forcegid mount options (so that when negotiating unix extensions specifying which uid mounted does not immediately force the server's reported -uids to be overridden). +uids to be overridden). Add support for scope moutn parm. Version 1.58 ------------ diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 1b09f16..20692fb 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -49,6 +49,7 @@ #define ASN1_OJI 6 /* Object Identifier */ #define ASN1_OJD 7 /* Object Description */ #define ASN1_EXT 8 /* External */ +#define ASN1_ENUM 10 /* Enumerated */ #define ASN1_SEQ 16 /* Sequence */ #define ASN1_SET 17 /* Set */ #define ASN1_NUMSTR 18 /* Numerical String */ @@ -78,10 +79,12 @@ #define SPNEGO_OID_LEN 7 #define NTLMSSP_OID_LEN 10 #define KRB5_OID_LEN 7 +#define KRB5U2U_OID_LEN 8 #define MSKRB5_OID_LEN 7 static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 }; static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 }; static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 }; +static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 }; static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 }; /* @@ -122,6 +125,28 @@ asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch) return 1; } +#if 0 /* will be needed later by spnego decoding/encoding of ntlmssp */ +static unsigned char +asn1_enum_decode(struct asn1_ctx *ctx, __le32 *val) +{ + unsigned char ch; + + if (ctx->pointer >= ctx->end) { + ctx->error = ASN1_ERR_DEC_EMPTY; + return 0; + } + + ch = *(ctx->pointer)++; /* ch has 0xa, ptr points to lenght octet */ + if ((ch) == ASN1_ENUM) /* if ch value is ENUM, 0xa */ + *val = *(++(ctx->pointer)); /* value has enum value */ + else + return 0; + + ctx->pointer++; + return 1; +} +#endif + static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) { @@ -476,10 +501,9 @@ decode_negTokenInit(unsigned char *security_blob, int length, unsigned int cls, con, tag, oidlen, rc; bool use_ntlmssp = false; bool use_kerberos = false; + bool use_kerberosu2u = false; bool use_mskerberos = false; - *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ - /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */ asn1_open(&ctx, security_blob, length); @@ -515,6 +539,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* SPNEGO */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding negTokenInit")); return 0; @@ -526,6 +551,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* negTokenInit */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding negTokenInit")); return 0; @@ -537,6 +563,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* sequence */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding 2nd part of negTokenInit")); return 0; @@ -548,6 +575,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* sequence of */ if (asn1_header_decode (&ctx, &sequence_end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding 2nd part of negTokenInit")); @@ -560,6 +588,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* list of security mechanisms */ while (!asn1_eoc_decode(&ctx, sequence_end)) { rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); if (!rc) { @@ -576,11 +605,15 @@ decode_negTokenInit(unsigned char *security_blob, int length, if (compare_oid(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN) && - !use_kerberos) + !use_mskerberos) use_mskerberos = true; + else if (compare_oid(oid, oidlen, KRB5U2U_OID, + KRB5U2U_OID_LEN) && + !use_kerberosu2u) + use_kerberosu2u = true; else if (compare_oid(oid, oidlen, KRB5_OID, KRB5_OID_LEN) && - !use_mskerberos) + !use_kerberos) use_kerberos = true; else if (compare_oid(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN)) @@ -593,7 +626,12 @@ decode_negTokenInit(unsigned char *security_blob, int length, } } + /* mechlistMIC */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + /* Check if we have reached the end of the blob, but with + no mechListMic (e.g. NTLMSSP instead of KRB5) */ + if (ctx.error == ASN1_ERR_DEC_EMPTY) + goto decode_negtoken_exit; cFYI(1, ("Error decoding last part negTokenInit exit3")); return 0; } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { @@ -602,6 +640,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, cls, con, tag, end, *end)); return 0; } + + /* sequence */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding last part negTokenInit exit5")); return 0; @@ -611,6 +651,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, cls, con, tag, end, *end)); } + /* sequence of */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding last part negTokenInit exit 7")); return 0; @@ -619,6 +660,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, cls, con, tag, end, *end)); return 0; } + + /* general string */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding last part negTokenInit exit9")); return 0; @@ -630,13 +673,13 @@ decode_negTokenInit(unsigned char *security_blob, int length, } cFYI(1, ("Need to call asn1_octets_decode() function for %s", ctx.pointer)); /* is this UTF-8 or ASCII? */ - +decode_negtoken_exit: if (use_kerberos) *secType = Kerberos; else if (use_mskerberos) *secType = MSKerberos; else if (use_ntlmssp) - *secType = NTLMSSP; + *secType = RawNTLMSSP; return 1; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0d92114..9f669f9 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -333,6 +333,27 @@ cifs_destroy_inode(struct inode *inode) kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); } +static void +cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) +{ + seq_printf(s, ",addr="); + + switch (server->addr.sockAddr.sin_family) { + case AF_INET: + seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr); + break; + case AF_INET6: + seq_printf(s, "%pI6", + &server->addr.sockAddr6.sin6_addr.s6_addr); + if (server->addr.sockAddr6.sin6_scope_id) + seq_printf(s, "%%%u", + server->addr.sockAddr6.sin6_scope_id); + break; + default: + seq_printf(s, "(unknown)"); + } +} + /* * cifs_show_options() is for displaying mount options in /proc/mounts. * Not all settable options are displayed but most of the important @@ -343,83 +364,64 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) { struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; - struct TCP_Server_Info *server; cifs_sb = CIFS_SB(m->mnt_sb); + tcon = cifs_sb->tcon; - if (cifs_sb) { - tcon = cifs_sb->tcon; - if (tcon) { - seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); - if (tcon->ses) { - if (tcon->ses->userName) - seq_printf(s, ",username=%s", - tcon->ses->userName); - if (tcon->ses->domainName) - seq_printf(s, ",domain=%s", - tcon->ses->domainName); - server = tcon->ses->server; - if (server) { - seq_printf(s, ",addr="); - switch (server->addr.sockAddr6. - sin6_family) { - case AF_INET6: - seq_printf(s, "%pI6", - &server->addr.sockAddr6.sin6_addr); - break; - case AF_INET: - seq_printf(s, "%pI4", - &server->addr.sockAddr.sin_addr.s_addr); - break; - } - } - } - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) || - !(tcon->unix_ext)) - seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) || - !(tcon->unix_ext)) - seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); - if (!tcon->unix_ext) { - seq_printf(s, ",file_mode=0%o,dir_mode=0%o", + seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); + if (tcon->ses->userName) + seq_printf(s, ",username=%s", tcon->ses->userName); + if (tcon->ses->domainName) + seq_printf(s, ",domain=%s", tcon->ses->domainName); + + seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) + seq_printf(s, ",forceuid"); + + seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) + seq_printf(s, ",forcegid"); + + cifs_show_address(s, tcon->ses->server); + + if (!tcon->unix_ext) + seq_printf(s, ",file_mode=0%o,dir_mode=0%o", cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); - } - if (tcon->seal) - seq_printf(s, ",seal"); - if (tcon->nocase) - seq_printf(s, ",nocase"); - if (tcon->retry) - seq_printf(s, ",hard"); - } - if (cifs_sb->prepath) - seq_printf(s, ",prepath=%s", cifs_sb->prepath); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) - seq_printf(s, ",posixpaths"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) - seq_printf(s, ",setuids"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) - seq_printf(s, ",serverino"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) - seq_printf(s, ",directio"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) - seq_printf(s, ",nouser_xattr"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) - seq_printf(s, ",mapchars"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) - seq_printf(s, ",sfu"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - seq_printf(s, ",nobrl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) - seq_printf(s, ",cifsacl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) - seq_printf(s, ",dynperm"); - if (m->mnt_sb->s_flags & MS_POSIXACL) - seq_printf(s, ",acl"); - - seq_printf(s, ",rsize=%d", cifs_sb->rsize); - seq_printf(s, ",wsize=%d", cifs_sb->wsize); - } + if (tcon->seal) + seq_printf(s, ",seal"); + if (tcon->nocase) + seq_printf(s, ",nocase"); + if (tcon->retry) + seq_printf(s, ",hard"); + if (cifs_sb->prepath) + seq_printf(s, ",prepath=%s", cifs_sb->prepath); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + seq_printf(s, ",posixpaths"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) + seq_printf(s, ",setuids"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + seq_printf(s, ",serverino"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) + seq_printf(s, ",directio"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + seq_printf(s, ",nouser_xattr"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) + seq_printf(s, ",mapchars"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + seq_printf(s, ",sfu"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + seq_printf(s, ",nobrl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + seq_printf(s, ",cifsacl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + seq_printf(s, ",dynperm"); + if (m->mnt_sb->s_flags & MS_POSIXACL) + seq_printf(s, ",acl"); + + seq_printf(s, ",rsize=%d", cifs_sb->rsize); + seq_printf(s, ",wsize=%d", cifs_sb->wsize); + return 0; } @@ -535,9 +537,14 @@ static void cifs_umount_begin(struct super_block *sb) if (tcon == NULL) return; - lock_kernel(); read_lock(&cifs_tcp_ses_lock); - if (tcon->tc_count == 1) + if ((tcon->tc_count > 1) || (tcon->tidStatus == CifsExiting)) { + /* we have other mounts to same share or we have + already tried to force umount this and woken up + all waiting network requests, nothing to do */ + read_unlock(&cifs_tcp_ses_lock); + return; + } else if (tcon->tc_count == 1) tcon->tidStatus = CifsExiting; read_unlock(&cifs_tcp_ses_lock); @@ -552,9 +559,7 @@ static void cifs_umount_begin(struct super_block *sb) wake_up_all(&tcon->ses->server->response_q); msleep(1); } -/* BB FIXME - finish add checks for tidStatus BB */ - unlock_kernel(); return; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a61ab77..e1225e6 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -83,7 +83,7 @@ enum securityEnum { NTLM, /* Legacy NTLM012 auth with NTLM hash */ NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ - NTLMSSP, /* NTLMSSP via SPNEGO, NTLMv2 hash */ +/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */ Kerberos, /* Kerberos via SPNEGO */ MSKerberos, /* MS Kerberos via SPNEGO */ }; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f945232..c419416 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -74,7 +74,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr); extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); extern int decode_negTokenInit(unsigned char *security_blob, int length, enum securityEnum *secType); -extern int cifs_inet_pton(const int, const char *source, void *dst); +extern int cifs_convert_address(char *src, void *dst); extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); extern void header_assemble(struct smb_hdr *, char /* command */ , const struct cifsTconInfo *, int /* length of diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b84c61d..61007c6 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -594,7 +594,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) else if (secFlags & CIFSSEC_MAY_KRB5) server->secType = Kerberos; else if (secFlags & CIFSSEC_MAY_NTLMSSP) - server->secType = NTLMSSP; + server->secType = RawNTLMSSP; else if (secFlags & CIFSSEC_MAY_LANMAN) server->secType = LANMAN; /* #ifdef CONFIG_CIFS_EXPERIMENTAL @@ -729,7 +729,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) * the tcon is no longer on the list, so no need to take lock before * checking this. */ - if (tcon->need_reconnect) + if ((tcon->need_reconnect) || (tcon->ses->need_reconnect)) return 0; rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 97f4311..e16d759 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -70,7 +70,6 @@ struct smb_vol { mode_t file_mode; mode_t dir_mode; unsigned secFlg; - bool rw:1; bool retry:1; bool intr:1; bool setuids:1; @@ -832,7 +831,6 @@ cifs_parse_mount_options(char *options, const char *devname, vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR; /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ - vol->rw = true; /* default is always to request posix paths. */ vol->posix_paths = 1; /* default to using server inode numbers where available */ @@ -1199,7 +1197,9 @@ cifs_parse_mount_options(char *options, const char *devname, } else if (strnicmp(data, "guest", 5) == 0) { /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { - vol->rw = true; + /* ignore */ + } else if (strnicmp(data, "ro", 2) == 0) { + /* ignore */ } else if (strnicmp(data, "noblocksend", 11) == 0) { vol->noblocksnd = 1; } else if (strnicmp(data, "noautotune", 10) == 0) { @@ -1218,8 +1218,6 @@ cifs_parse_mount_options(char *options, const char *devname, parse these options again and set anything and it is ok to just ignore them */ continue; - } else if (strnicmp(data, "ro", 2) == 0) { - vol->rw = false; } else if (strnicmp(data, "hard", 4) == 0) { vol->retry = 1; } else if (strnicmp(data, "soft", 4) == 0) { @@ -1386,8 +1384,10 @@ cifs_find_tcp_session(struct sockaddr_storage *addr) server->addr.sockAddr.sin_addr.s_addr)) continue; else if (addr->ss_family == AF_INET6 && - !ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, - &addr6->sin6_addr)) + (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, + &addr6->sin6_addr) || + server->addr.sockAddr6.sin6_scope_id != + addr6->sin6_scope_id)) continue; ++server->srv_count; @@ -1433,28 +1433,15 @@ cifs_get_tcp_session(struct smb_vol *volume_info) memset(&addr, 0, sizeof(struct sockaddr_storage)); - if (volume_info->UNCip && volume_info->UNC) { - rc = cifs_inet_pton(AF_INET, volume_info->UNCip, - &sin_server->sin_addr.s_addr); - - if (rc <= 0) { - /* not ipv4 address, try ipv6 */ - rc = cifs_inet_pton(AF_INET6, volume_info->UNCip, - &sin_server6->sin6_addr.in6_u); - if (rc > 0) - addr.ss_family = AF_INET6; - } else { - addr.ss_family = AF_INET; - } + cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip)); - if (rc <= 0) { + if (volume_info->UNCip && volume_info->UNC) { + rc = cifs_convert_address(volume_info->UNCip, &addr); + if (!rc) { /* we failed translating address */ rc = -EINVAL; goto out_err; } - - cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, - volume_info->UNCip)); } else if (volume_info->UNCip) { /* BB using ip addr as tcp_ses name to connect to the DFS root below */ @@ -1513,14 +1500,14 @@ cifs_get_tcp_session(struct smb_vol *volume_info) cFYI(1, ("attempting ipv6 connect")); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ + sin_server6->sin6_port = htons(volume_info->port); memcpy(&tcp_ses->addr.sockAddr6, sin_server6, sizeof(struct sockaddr_in6)); - sin_server6->sin6_port = htons(volume_info->port); rc = ipv6_connect(tcp_ses); } else { + sin_server->sin_port = htons(volume_info->port); memcpy(&tcp_ses->addr.sockAddr, sin_server, sizeof(struct sockaddr_in)); - sin_server->sin_port = htons(volume_info->port); rc = ipv4_connect(tcp_ses); } if (rc < 0) { diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 3758965d..7dc6b74 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -307,8 +307,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if (oplockEnabled) @@ -540,8 +541,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { kfree(full_path); + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } rc = CIFSSMBOpen(xid, pTcon, full_path, diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index df4a306..8794814 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -35,26 +35,11 @@ * 0 - name is not IP */ static int -is_ip(const char *name) +is_ip(char *name) { - int rc; - struct sockaddr_in sin_server; - struct sockaddr_in6 sin_server6; - - rc = cifs_inet_pton(AF_INET, name, - &sin_server.sin_addr.s_addr); - - if (rc <= 0) { - /* not ipv4 address, try ipv6 */ - rc = cifs_inet_pton(AF_INET6, name, - &sin_server6.sin6_addr.in6_u); - if (rc > 0) - return 1; - } else { - return 1; - } - /* we failed translating address */ - return 0; + struct sockaddr_storage ss; + + return cifs_convert_address(name, &ss); } static int @@ -72,7 +57,7 @@ dns_resolver_instantiate(struct key *key, const void *data, ip[datalen] = '\0'; /* make sure this looks like an address */ - if (!is_ip((const char *) ip)) { + if (!is_ip(ip)) { kfree(ip); return -EINVAL; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0686684..97ce4bf 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -300,14 +300,16 @@ int cifs_open(struct inode *inode, struct file *file) pCifsInode = CIFS_I(file->f_path.dentry->d_inode); pCifsFile = cifs_fill_filedata(file); if (pCifsFile) { + rc = 0; FreeXid(xid); - return 0; + return rc; } full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } cFYI(1, ("inode = 0x%p file flags are 0x%x for %s", @@ -491,11 +493,12 @@ static int cifs_reopen_file(struct file *file, bool can_flush) return -EBADF; xid = GetXid(); - mutex_unlock(&pCifsFile->fh_mutex); + mutex_lock(&pCifsFile->fh_mutex); if (!pCifsFile->invalidHandle) { - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); + rc = 0; FreeXid(xid); - return 0; + return rc; } if (file->f_path.dentry == NULL) { @@ -524,7 +527,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush) if (full_path == NULL) { rc = -ENOMEM; reopen_error_exit: - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); FreeXid(xid); return rc; } @@ -566,14 +569,14 @@ reopen_error_exit: cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) { - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); cFYI(1, ("cifs_open returned 0x%x", rc)); cFYI(1, ("oplock: %d", oplock)); } else { reopen_success: pCifsFile->netfid = netfid; pCifsFile->invalidHandle = false; - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); pCifsInode = CIFS_I(inode); if (pCifsInode) { if (can_flush) { @@ -845,8 +848,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) tcon = cifs_sb->tcon; if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } netfid = ((struct cifsFileInfo *)file->private_data)->netfid; @@ -1805,8 +1809,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, pTcon = cifs_sb->tcon; if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } open_file = (struct cifsFileInfo *)file->private_data; @@ -1885,8 +1890,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, pTcon = cifs_sb->tcon; if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } open_file = (struct cifsFileInfo *)file->private_data; @@ -2019,8 +2025,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, xid = GetXid(); if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } open_file = (struct cifsFileInfo *)file->private_data; cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); @@ -2185,8 +2192,9 @@ static int cifs_readpage(struct file *file, struct page *page) xid = GetXid(); if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } cFYI(1, ("readpage %p at offset %d 0x%x\n", diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index fad882b..155c9e7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -988,8 +988,9 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) * sb->s_vfs_rename_mutex here */ full_path = build_path_from_dentry(dentry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if ((tcon->ses->capabilities & CAP_UNIX) && @@ -1118,8 +1119,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if ((pTcon->ses->capabilities & CAP_UNIX) && @@ -1303,8 +1305,9 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls, @@ -1508,8 +1511,9 @@ int cifs_revalidate(struct dentry *direntry) since that would deadlock */ full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " "jiffies %ld", full_path, direntry->d_inode, @@ -1911,8 +1915,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* diff --git a/fs/cifs/link.c b/fs/cifs/link.c index cd83c53..fc1e048 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -172,8 +172,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } cFYI(1, ("Full path: %s", full_path)); diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 32d6baa..bd6d689 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -133,10 +133,12 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = { {0, 0} }; -/* Convert string containing dotted ip address to binary form */ -/* returns 0 if invalid address */ - -int +/* + * Convert a string containing text IPv4 or IPv6 address to binary form. + * + * Returns 0 on failure. + */ +static int cifs_inet_pton(const int address_family, const char *cp, void *dst) { int ret = 0; @@ -153,6 +155,52 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst) return ret; } +/* + * Try to convert a string to an IPv4 address and then attempt to convert + * it to an IPv6 address if that fails. Set the family field if either + * succeeds. If it's an IPv6 address and it has a '%' sign in it, try to + * treat the part following it as a numeric sin6_scope_id. + * + * Returns 0 on failure. + */ +int +cifs_convert_address(char *src, void *dst) +{ + int rc; + char *pct, *endp; + struct sockaddr_in *s4 = (struct sockaddr_in *) dst; + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; + + /* IPv4 address */ + if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) { + s4->sin_family = AF_INET; + return 1; + } + + /* temporarily terminate string */ + pct = strchr(src, '%'); + if (pct) + *pct = '\0'; + + rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr); + + /* repair temp termination (if any) and make pct point to scopeid */ + if (pct) + *pct++ = '%'; + + if (!rc) + return rc; + + s6->sin6_family = AF_INET6; + if (pct) { + s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); + if (!*pct || *endp) + return 0; + } + + return rc; +} + /***************************************************************************** convert a NT status code to a dos class/code *****************************************************************************/ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 897a052..7085a62 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -802,7 +802,7 @@ ssetup_ntlmssp_authenticate: #endif /* CONFIG_CIFS_UPCALL */ } else { #ifdef CONFIG_CIFS_EXPERIMENTAL - if ((experimEnabled > 1) && (type == RawNTLMSSP)) { + if (type == RawNTLMSSP) { if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { cERROR(1, ("NTLMSSP requires Unicode support")); rc = -ENOSYS; diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index e9527ee..a75afa3 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -64,8 +64,9 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if (ea_name == NULL) { cFYI(1, ("Null xattr names not supported")); @@ -118,8 +119,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ @@ -225,8 +227,9 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ @@ -351,8 +354,9 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c135202..626c748 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -31,6 +31,7 @@ #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/vt.h> +#include <linux/falloc.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/ppp_defs.h> @@ -1779,6 +1780,41 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg) return sys_ioctl(fd, cmd, (unsigned long)tn); } +/* on ia32 l_start is on a 32-bit boundary */ +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) +struct space_resv_32 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +}; + +#define FS_IOC_RESVSP_32 _IOW ('X', 40, struct space_resv_32) +#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32) + +/* just account for different alignment */ +static int compat_ioctl_preallocate(struct file *file, unsigned long arg) +{ + struct space_resv_32 __user *p32 = (void __user *)arg; + struct space_resv __user *p = compat_alloc_user_space(sizeof(*p)); + + if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || + copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || + copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || + copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || + copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || + copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || + copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32))) + return -EFAULT; + + return ioctl_preallocate(file, p); +} +#endif + typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int, unsigned long, struct file *); @@ -2756,6 +2792,18 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, case FIOQSIZE: break; +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) + case FS_IOC_RESVSP_32: + case FS_IOC_RESVSP64_32: + error = compat_ioctl_preallocate(filp, arg); + goto out_fput; +#else + case FS_IOC_RESVSP: + case FS_IOC_RESVSP64: + error = ioctl_preallocate(filp, (void __user *)arg); + goto out_fput; +#endif + case FIBMAP: case FIGETBSZ: case FIONREAD: diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 9b1d285..75efb02 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -423,7 +423,6 @@ static void devpts_kill_sb(struct super_block *sb) } static struct file_system_type devpts_fs_type = { - .owner = THIS_MODULE, .name = "devpts", .get_sb = devpts_get_sb, .kill_sb = devpts_kill_sb, @@ -564,13 +563,4 @@ static int __init init_devpts_fs(void) } return err; } - -static void __exit exit_devpts_fs(void) -{ - unregister_filesystem(&devpts_fs_type); - mntput(devpts_mnt); -} - module_init(init_devpts_fs) -module_exit(exit_devpts_fs) -MODULE_LICENSE("GPL"); diff --git a/fs/eventfd.c b/fs/eventfd.c index 3f0e197..31d12de8 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -14,35 +14,44 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/anon_inodes.h> -#include <linux/eventfd.h> #include <linux/syscalls.h> #include <linux/module.h> +#include <linux/kref.h> +#include <linux/eventfd.h> struct eventfd_ctx { + struct kref kref; wait_queue_head_t wqh; /* * Every time that a write(2) is performed on an eventfd, the * value of the __u64 being written is added to "count" and a * wakeup is performed on "wqh". A read(2) will return the "count" * value to userspace, and will reset "count" to zero. The kernel - * size eventfd_signal() also, adds to the "count" counter and + * side eventfd_signal() also, adds to the "count" counter and * issue a wakeup. */ __u64 count; unsigned int flags; }; -/* - * Adds "n" to the eventfd counter "count". Returns "n" in case of - * success, or a value lower then "n" in case of coutner overflow. - * This function is supposed to be called by the kernel in paths - * that do not allow sleeping. In this function we allow the counter - * to reach the ULLONG_MAX value, and we signal this as overflow - * condition by returining a POLLERR to poll(2). +/** + * eventfd_signal - Adds @n to the eventfd counter. + * @ctx: [in] Pointer to the eventfd context. + * @n: [in] Value of the counter to be added to the eventfd internal counter. + * The value cannot be negative. + * + * This function is supposed to be called by the kernel in paths that do not + * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX + * value, and we signal this as overflow condition by returining a POLLERR + * to poll(2). + * + * Returns @n in case of success, a non-negative number lower than @n in case + * of overflow, or the following error codes: + * + * -EINVAL : The value of @n is negative. */ -int eventfd_signal(struct file *file, int n) +int eventfd_signal(struct eventfd_ctx *ctx, int n) { - struct eventfd_ctx *ctx = file->private_data; unsigned long flags; if (n < 0) @@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n) } EXPORT_SYMBOL_GPL(eventfd_signal); +static void eventfd_free(struct kref *kref) +{ + struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); + + kfree(ctx); +} + +/** + * eventfd_ctx_get - Acquires a reference to the internal eventfd context. + * @ctx: [in] Pointer to the eventfd context. + * + * Returns: In case of success, returns a pointer to the eventfd context. + */ +struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx) +{ + kref_get(&ctx->kref); + return ctx; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_get); + +/** + * eventfd_ctx_put - Releases a reference to the internal eventfd context. + * @ctx: [in] Pointer to eventfd context. + * + * The eventfd context reference must have been previously acquired either + * with eventfd_ctx_get() or eventfd_ctx_fdget()). + */ +void eventfd_ctx_put(struct eventfd_ctx *ctx) +{ + kref_put(&ctx->kref, eventfd_free); +} +EXPORT_SYMBOL_GPL(eventfd_ctx_put); + static int eventfd_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct eventfd_ctx *ctx = file->private_data; + + wake_up_poll(&ctx->wqh, POLLHUP); + eventfd_ctx_put(ctx); return 0; } @@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = { .write = eventfd_write, }; +/** + * eventfd_fget - Acquire a reference of an eventfd file descriptor. + * @fd: [in] Eventfd file descriptor. + * + * Returns a pointer to the eventfd file structure in case of success, or the + * following error pointer: + * + * -EBADF : Invalid @fd file descriptor. + * -EINVAL : The @fd file descriptor is not an eventfd file. + */ struct file *eventfd_fget(int fd) { struct file *file; @@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd) } EXPORT_SYMBOL_GPL(eventfd_fget); +/** + * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context. + * @fd: [in] Eventfd file descriptor. + * + * Returns a pointer to the internal eventfd context, otherwise the error + * pointers returned by the following functions: + * + * eventfd_fget + */ +struct eventfd_ctx *eventfd_ctx_fdget(int fd) +{ + struct file *file; + struct eventfd_ctx *ctx; + + file = eventfd_fget(fd); + if (IS_ERR(file)) + return (struct eventfd_ctx *) file; + ctx = eventfd_ctx_get(file->private_data); + fput(file); + + return ctx; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_fdget); + +/** + * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context. + * @file: [in] Eventfd file pointer. + * + * Returns a pointer to the internal eventfd context, otherwise the error + * pointer: + * + * -EINVAL : The @fd file descriptor is not an eventfd file. + */ +struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) +{ + if (file->f_op != &eventfd_fops) + return ERR_PTR(-EINVAL); + + return eventfd_ctx_get(file->private_data); +} +EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); + SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { int fd; @@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) if (!ctx) return -ENOMEM; + kref_init(&ctx->kref); init_waitqueue_head(&ctx->wqh); ctx->count = count; ctx->flags = flags; diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index d46e38c..d636e12 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -125,37 +125,12 @@ fail: return ERR_PTR(-EINVAL); } -static inline struct posix_acl * -ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl) -{ - struct posix_acl *acl = EXT2_ACL_NOT_CACHED; - - spin_lock(&inode->i_lock); - if (*i_acl != EXT2_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); - - return acl; -} - -static inline void -ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != EXT2_ACL_NOT_CACHED) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - /* * inode->i_mutex: don't care */ static struct posix_acl * ext2_get_acl(struct inode *inode, int type) { - struct ext2_inode_info *ei = EXT2_I(inode); int name_index; char *value = NULL; struct posix_acl *acl; @@ -164,23 +139,19 @@ ext2_get_acl(struct inode *inode, int type) if (!test_opt(inode->i_sb, POSIX_ACL)) return NULL; - switch(type) { - case ACL_TYPE_ACCESS: - acl = ext2_iget_acl(inode, &ei->i_acl); - if (acl != EXT2_ACL_NOT_CACHED) - return acl; - name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; - break; - - case ACL_TYPE_DEFAULT: - acl = ext2_iget_acl(inode, &ei->i_default_acl); - if (acl != EXT2_ACL_NOT_CACHED) - return acl; - name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; - break; - - default: - return ERR_PTR(-EINVAL); + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + default: + BUG(); } retval = ext2_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { @@ -197,17 +168,9 @@ ext2_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) { - switch(type) { - case ACL_TYPE_ACCESS: - ext2_iset_acl(inode, &ei->i_acl, acl); - break; + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); - case ACL_TYPE_DEFAULT: - ext2_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } return acl; } @@ -217,7 +180,6 @@ ext2_get_acl(struct inode *inode, int type) static int ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) { - struct ext2_inode_info *ei = EXT2_I(inode); int name_index; void *value = NULL; size_t size = 0; @@ -263,17 +225,8 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) error = ext2_xattr_set(inode, name_index, "", value, size, 0); kfree(value); - if (!error) { - switch(type) { - case ACL_TYPE_ACCESS: - ext2_iset_acl(inode, &ei->i_acl, acl); - break; - - case ACL_TYPE_DEFAULT: - ext2_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } + if (!error) + set_cached_acl(inode, type, acl); return error; } diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index b42cf57..ecefe47 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -53,10 +53,6 @@ static inline int ext2_acl_count(size_t size) #ifdef CONFIG_EXT2_FS_POSIX_ACL -/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl - if the ACL has not been cached */ -#define EXT2_ACL_NOT_CACHED ((void *)-1) - /* acl.c */ extern int ext2_permission (struct inode *, int); extern int ext2_acl_chmod (struct inode *); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index d988a71..9a8a8e2 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -47,10 +47,6 @@ struct ext2_inode_info { */ struct rw_semaphore xattr_sem; #endif -#ifdef CONFIG_EXT2_FS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif rwlock_t i_meta_lock; /* diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 29ed682..e271303 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1224,10 +1224,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) return inode; ei = EXT2_I(inode); -#ifdef CONFIG_EXT2_FS_POSIX_ACL - ei->i_acl = EXT2_ACL_NOT_CACHED; - ei->i_default_acl = EXT2_ACL_NOT_CACHED; -#endif ei->i_block_alloc_info = NULL; raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 6524eca..e1dedb0 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -66,8 +66,16 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str inode = NULL; if (ino) { inode = ext2_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (unlikely(IS_ERR(inode))) { + if (PTR_ERR(inode) == -ESTALE) { + ext2_error(dir->i_sb, __func__, + "deleted inode referenced: %lu", + ino); + return ERR_PTR(-EIO); + } else { + return ERR_CAST(inode); + } + } } return d_splice_alias(inode, dentry); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 4589996..1a9ffee 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -152,10 +152,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); if (!ei) return NULL; -#ifdef CONFIG_EXT2_FS_POSIX_ACL - ei->i_acl = EXT2_ACL_NOT_CACHED; - ei->i_default_acl = EXT2_ACL_NOT_CACHED; -#endif ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; return &ei->vfs_inode; @@ -198,18 +194,6 @@ static void destroy_inodecache(void) static void ext2_clear_inode(struct inode *inode) { struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info; -#ifdef CONFIG_EXT2_FS_POSIX_ACL - struct ext2_inode_info *ei = EXT2_I(inode); - - if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) { - posix_acl_release(ei->i_acl); - ei->i_acl = EXT2_ACL_NOT_CACHED; - } - if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) { - posix_acl_release(ei->i_default_acl); - ei->i_default_acl = EXT2_ACL_NOT_CACHED; - } -#endif ext2_discard_reservation(inode); EXT2_I(inode)->i_block_alloc_info = NULL; if (unlikely(rsv)) diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index e0c7454..e167bae 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -126,33 +126,6 @@ fail: return ERR_PTR(-EINVAL); } -static inline struct posix_acl * -ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl) -{ - struct posix_acl *acl = ACCESS_ONCE(*i_acl); - - if (acl) { - spin_lock(&inode->i_lock); - acl = *i_acl; - if (acl != EXT3_ACL_NOT_CACHED) - acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); - } - - return acl; -} - -static inline void -ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != EXT3_ACL_NOT_CACHED) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - /* * Inode operation get_posix_acl(). * @@ -161,7 +134,6 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl, static struct posix_acl * ext3_get_acl(struct inode *inode, int type) { - struct ext3_inode_info *ei = EXT3_I(inode); int name_index; char *value = NULL; struct posix_acl *acl; @@ -170,24 +142,21 @@ ext3_get_acl(struct inode *inode, int type) if (!test_opt(inode->i_sb, POSIX_ACL)) return NULL; - switch(type) { - case ACL_TYPE_ACCESS: - acl = ext3_iget_acl(inode, &ei->i_acl); - if (acl != EXT3_ACL_NOT_CACHED) - return acl; - name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; - break; - - case ACL_TYPE_DEFAULT: - acl = ext3_iget_acl(inode, &ei->i_default_acl); - if (acl != EXT3_ACL_NOT_CACHED) - return acl; - name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; - break; - - default: - return ERR_PTR(-EINVAL); + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + default: + BUG(); } + retval = ext3_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { value = kmalloc(retval, GFP_NOFS); @@ -203,17 +172,9 @@ ext3_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) { - switch(type) { - case ACL_TYPE_ACCESS: - ext3_iset_acl(inode, &ei->i_acl, acl); - break; + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); - case ACL_TYPE_DEFAULT: - ext3_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } return acl; } @@ -226,7 +187,6 @@ static int ext3_set_acl(handle_t *handle, struct inode *inode, int type, struct posix_acl *acl) { - struct ext3_inode_info *ei = EXT3_I(inode); int name_index; void *value = NULL; size_t size = 0; @@ -271,17 +231,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, value, size, 0); kfree(value); - if (!error) { - switch(type) { - case ACL_TYPE_ACCESS: - ext3_iset_acl(inode, &ei->i_acl, acl); - break; - case ACL_TYPE_DEFAULT: - ext3_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } + if (!error) + set_cached_acl(inode, type, acl); + return error; } diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 42da16b..07d15a3 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -53,10 +53,6 @@ static inline int ext3_acl_count(size_t size) #ifdef CONFIG_EXT3_FS_POSIX_ACL -/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl - if the ACL has not been cached */ -#define EXT3_ACL_NOT_CACHED ((void *)-1) - /* acl.c */ extern int ext3_permission (struct inode *, int); extern int ext3_acl_chmod (struct inode *); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 05dea81..5f51fed 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2752,10 +2752,6 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) return inode; ei = EXT3_I(inode); -#ifdef CONFIG_EXT3_FS_POSIX_ACL - ei->i_acl = EXT3_ACL_NOT_CACHED; - ei->i_default_acl = EXT3_ACL_NOT_CACHED; -#endif ei->i_block_alloc_info = NULL; ret = __ext3_get_inode_loc(inode, &iloc, 0); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 601e881..524b349 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -464,10 +464,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); if (!ei) return NULL; -#ifdef CONFIG_EXT3_FS_POSIX_ACL - ei->i_acl = EXT3_ACL_NOT_CACHED; - ei->i_default_acl = EXT3_ACL_NOT_CACHED; -#endif ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; return &ei->vfs_inode; @@ -518,18 +514,6 @@ static void destroy_inodecache(void) static void ext3_clear_inode(struct inode *inode) { struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; -#ifdef CONFIG_EXT3_FS_POSIX_ACL - if (EXT3_I(inode)->i_acl && - EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) { - posix_acl_release(EXT3_I(inode)->i_acl); - EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED; - } - if (EXT3_I(inode)->i_default_acl && - EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) { - posix_acl_release(EXT3_I(inode)->i_default_acl); - EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED; - } -#endif ext3_discard_reservation(inode); EXT3_I(inode)->i_block_alloc_info = NULL; if (unlikely(rsv)) diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 605aeed..f6d8967 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -126,33 +126,6 @@ fail: return ERR_PTR(-EINVAL); } -static inline struct posix_acl * -ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl) -{ - struct posix_acl *acl = ACCESS_ONCE(*i_acl); - - if (acl) { - spin_lock(&inode->i_lock); - acl = *i_acl; - if (acl != EXT4_ACL_NOT_CACHED) - acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); - } - - return acl; -} - -static inline void -ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != EXT4_ACL_NOT_CACHED) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - /* * Inode operation get_posix_acl(). * @@ -161,7 +134,6 @@ ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl, static struct posix_acl * ext4_get_acl(struct inode *inode, int type) { - struct ext4_inode_info *ei = EXT4_I(inode); int name_index; char *value = NULL; struct posix_acl *acl; @@ -170,23 +142,19 @@ ext4_get_acl(struct inode *inode, int type) if (!test_opt(inode->i_sb, POSIX_ACL)) return NULL; + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + switch (type) { case ACL_TYPE_ACCESS: - acl = ext4_iget_acl(inode, &ei->i_acl); - if (acl != EXT4_ACL_NOT_CACHED) - return acl; name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; break; - case ACL_TYPE_DEFAULT: - acl = ext4_iget_acl(inode, &ei->i_default_acl); - if (acl != EXT4_ACL_NOT_CACHED) - return acl; name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; break; - default: - return ERR_PTR(-EINVAL); + BUG(); } retval = ext4_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { @@ -203,17 +171,9 @@ ext4_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) { - switch (type) { - case ACL_TYPE_ACCESS: - ext4_iset_acl(inode, &ei->i_acl, acl); - break; + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); - case ACL_TYPE_DEFAULT: - ext4_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } return acl; } @@ -226,7 +186,6 @@ static int ext4_set_acl(handle_t *handle, struct inode *inode, int type, struct posix_acl *acl) { - struct ext4_inode_info *ei = EXT4_I(inode); int name_index; void *value = NULL; size_t size = 0; @@ -271,17 +230,9 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, value, size, 0); kfree(value); - if (!error) { - switch (type) { - case ACL_TYPE_ACCESS: - ext4_iset_acl(inode, &ei->i_acl, acl); - break; + if (!error) + set_cached_acl(inode, type, acl); - case ACL_TYPE_DEFAULT: - ext4_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } return error; } diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index cb45257..949789d 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -53,10 +53,6 @@ static inline int ext4_acl_count(size_t size) #ifdef CONFIG_EXT4_FS_POSIX_ACL -/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl - if the ACL has not been cached */ -#define EXT4_ACL_NOT_CACHED ((void *)-1) - /* acl.c */ extern int ext4_permission(struct inode *, int); extern int ext4_acl_chmod(struct inode *); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 17b9998..0ddf7e5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -595,10 +595,6 @@ struct ext4_inode_info { */ struct rw_semaphore xattr_sem; #endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif struct list_head i_orphan; /* unlinked but open inodes */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7c17ae2..60a26f3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4453,10 +4453,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) return inode; ei = EXT4_I(inode); -#ifdef CONFIG_EXT4_FS_POSIX_ACL - ei->i_acl = EXT4_ACL_NOT_CACHED; - ei->i_default_acl = EXT4_ACL_NOT_CACHED; -#endif ret = __ext4_get_inode_loc(inode, &iloc, 0); if (ret < 0) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8bb9e2d..8f4f079 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -666,10 +666,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) if (!ei) return NULL; -#ifdef CONFIG_EXT4_FS_POSIX_ACL - ei->i_acl = EXT4_ACL_NOT_CACHED; - ei->i_default_acl = EXT4_ACL_NOT_CACHED; -#endif ei->vfs_inode.i_version = 1; ei->vfs_inode.i_data.writeback_index = 0; memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); @@ -735,18 +731,6 @@ static void destroy_inodecache(void) static void ext4_clear_inode(struct inode *inode) { -#ifdef CONFIG_EXT4_FS_POSIX_ACL - if (EXT4_I(inode)->i_acl && - EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { - posix_acl_release(EXT4_I(inode)->i_acl); - EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; - } - if (EXT4_I(inode)->i_default_acl && - EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { - posix_acl_release(EXT4_I(inode)->i_default_acl); - EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; - } -#endif ext4_discard_preallocations(inode); if (EXT4_JOURNAL(inode)) jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index caf0491..c54226b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -278,7 +278,26 @@ int sb_has_dirty_inodes(struct super_block *sb) EXPORT_SYMBOL(sb_has_dirty_inodes); /* - * Write a single inode's dirty pages and inode data out to disk. + * Wait for writeback on an inode to complete. + */ +static void inode_wait_for_writeback(struct inode *inode) +{ + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); + wait_queue_head_t *wqh; + + wqh = bit_waitqueue(&inode->i_state, __I_SYNC); + do { + spin_unlock(&inode_lock); + __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); + spin_lock(&inode_lock); + } while (inode->i_state & I_SYNC); +} + +/* + * Write out an inode's dirty pages. Called under inode_lock. Either the + * caller has ref on the inode (either via __iget or via syscall against an fd) + * or the inode has I_WILL_FREE set (via generic_forget_inode) + * * If `wait' is set, wait on the writeout. * * The whole writeout design is quite complex and fragile. We want to avoid @@ -288,13 +307,38 @@ EXPORT_SYMBOL(sb_has_dirty_inodes); * Called under inode_lock. */ static int -__sync_single_inode(struct inode *inode, struct writeback_control *wbc) +writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { - unsigned dirty; struct address_space *mapping = inode->i_mapping; int wait = wbc->sync_mode == WB_SYNC_ALL; + unsigned dirty; int ret; + if (!atomic_read(&inode->i_count)) + WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); + else + WARN_ON(inode->i_state & I_WILL_FREE); + + if (inode->i_state & I_SYNC) { + /* + * If this inode is locked for writeback and we are not doing + * writeback-for-data-integrity, move it to s_more_io so that + * writeback can proceed with the other inodes on s_io. + * + * We'll have another go at writing back this inode when we + * completed a full scan of s_io. + */ + if (!wait) { + requeue_io(inode); + return 0; + } + + /* + * It's a data-integrity sync. We must wait. + */ + inode_wait_for_writeback(inode); + } + BUG_ON(inode->i_state & I_SYNC); /* Set I_SYNC, reset I_DIRTY */ @@ -390,50 +434,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) } /* - * Write out an inode's dirty pages. Called under inode_lock. Either the - * caller has ref on the inode (either via __iget or via syscall against an fd) - * or the inode has I_WILL_FREE set (via generic_forget_inode) - */ -static int -__writeback_single_inode(struct inode *inode, struct writeback_control *wbc) -{ - wait_queue_head_t *wqh; - - if (!atomic_read(&inode->i_count)) - WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); - else - WARN_ON(inode->i_state & I_WILL_FREE); - - if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { - /* - * We're skipping this inode because it's locked, and we're not - * doing writeback-for-data-integrity. Move it to s_more_io so - * that writeback can proceed with the other inodes on s_io. - * We'll have another go at writing back this inode when we - * completed a full scan of s_io. - */ - requeue_io(inode); - return 0; - } - - /* - * It's a data-integrity sync. We must wait. - */ - if (inode->i_state & I_SYNC) { - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); - - wqh = bit_waitqueue(&inode->i_state, __I_SYNC); - do { - spin_unlock(&inode_lock); - __wait_on_bit(wqh, &wq, inode_wait, - TASK_UNINTERRUPTIBLE); - spin_lock(&inode_lock); - } while (inode->i_state & I_SYNC); - } - return __sync_single_inode(inode, wbc); -} - -/* * Write out a superblock's list of dirty inodes. A wait will be performed * upon no inodes, all inodes or the final one, depending upon sync_mode. * @@ -526,7 +526,7 @@ void generic_sync_sb_inodes(struct super_block *sb, BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); __iget(inode); pages_skipped = wbc->pages_skipped; - __writeback_single_inode(inode, wbc); + writeback_single_inode(inode, wbc); if (current_is_pdflush()) writeback_release(bdi); if (wbc->pages_skipped != pages_skipped) { @@ -708,7 +708,7 @@ int write_inode_now(struct inode *inode, int sync) might_sleep(); spin_lock(&inode_lock); - ret = __writeback_single_inode(inode, &wbc); + ret = writeback_single_inode(inode, &wbc); spin_unlock(&inode_lock); if (sync) inode_sync_wait(inode); @@ -732,7 +732,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) int ret; spin_lock(&inode_lock); - ret = __writeback_single_inode(inode, wbc); + ret = writeback_single_inode(inode, wbc); spin_unlock(&inode_lock); return ret; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8fed2ed..f58ecbc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -849,6 +849,81 @@ err: return err; } +static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_inode_out outarg; + int err = -EINVAL; + + if (size != sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + fuse_copy_finish(cs); + + down_read(&fc->killsb); + err = -ENOENT; + if (!fc->sb) + goto err_unlock; + + err = fuse_reverse_inval_inode(fc->sb, outarg.ino, + outarg.off, outarg.len); + +err_unlock: + up_read(&fc->killsb); + return err; + +err: + fuse_copy_finish(cs); + return err; +} + +static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_entry_out outarg; + int err = -EINVAL; + char buf[FUSE_NAME_MAX+1]; + struct qstr name; + + if (size < sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + + err = -ENAMETOOLONG; + if (outarg.namelen > FUSE_NAME_MAX) + goto err; + + name.name = buf; + name.len = outarg.namelen; + err = fuse_copy_one(cs, buf, outarg.namelen + 1); + if (err) + goto err; + fuse_copy_finish(cs); + buf[outarg.namelen] = 0; + name.hash = full_name_hash(name.name, name.len); + + down_read(&fc->killsb); + err = -ENOENT; + if (!fc->sb) + goto err_unlock; + + err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name); + +err_unlock: + up_read(&fc->killsb); + return err; + +err: + fuse_copy_finish(cs); + return err; +} + static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { @@ -856,6 +931,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, case FUSE_NOTIFY_POLL: return fuse_notify_poll(fc, size, cs); + case FUSE_NOTIFY_INVAL_INODE: + return fuse_notify_inval_inode(fc, size, cs); + + case FUSE_NOTIFY_INVAL_ENTRY: + return fuse_notify_inval_entry(fc, size, cs); + default: fuse_copy_finish(cs); return -EINVAL; @@ -910,7 +991,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { int err; - unsigned nbytes = iov_length(iov, nr_segs); + size_t nbytes = iov_length(iov, nr_segs); struct fuse_req *req; struct fuse_out_header oh; struct fuse_copy_state cs; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b3089a0..e703654 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; struct fuse_req *forget_req; - struct fuse_open_in inarg; + struct fuse_create_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; struct fuse_file *ff; @@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!ff) goto out_put_request; + if (!fc->dont_mask) + mode &= ~current_umask(); + flags &= ~O_NOCTTY; memset(&inarg, 0, sizeof(inarg)); memset(&outentry, 0, sizeof(outentry)); inarg.flags = flags; inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_CREATE; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; @@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, if (IS_ERR(req)) return PTR_ERR(req); + if (!fc->dont_mask) + mode &= ~current_umask(); + memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; inarg.rdev = new_encode_dev(rdev); + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKNOD; req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; @@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) if (IS_ERR(req)) return PTR_ERR(req); + if (!fc->dont_mask) + mode &= ~current_umask(); + memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKDIR; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); @@ -845,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, return err; } +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + struct qstr *name) +{ + int err = -ENOTDIR; + struct inode *parent; + struct dentry *dir; + struct dentry *entry; + + parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid); + if (!parent) + return -ENOENT; + + mutex_lock(&parent->i_mutex); + if (!S_ISDIR(parent->i_mode)) + goto unlock; + + err = -ENOENT; + dir = d_find_alias(parent); + if (!dir) + goto unlock; + + entry = d_lookup(dir, name); + dput(dir); + if (!entry) + goto unlock; + + fuse_invalidate_attr(parent); + fuse_invalidate_entry(entry); + dput(entry); + err = 0; + + unlock: + mutex_unlock(&parent->i_mutex); + iput(parent); + return err; +} + /* * Calling into a user-controlled filesystem gives the filesystem * daemon ptrace-like capabilities over the requester process. This diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fce6ce6..cbc4640 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1922,7 +1922,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) req = fuse_get_req(fc); if (IS_ERR(req)) - return PTR_ERR(req); + return POLLERR; req->in.h.opcode = FUSE_POLL; req->in.h.nodeid = ff->nodeid; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index aaf2f9f..52b641f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -446,6 +446,9 @@ struct fuse_conn { /** Do multi-page cached writes */ unsigned big_writes:1; + /** Don't apply umask to creation modes */ + unsigned dont_mask:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -481,6 +484,12 @@ struct fuse_conn { /** Called on final put */ void (*release)(struct fuse_conn *); + + /** Super block for this connection. */ + struct super_block *sb; + + /** Read/write semaphore to hold when accessing sb. */ + struct rw_semaphore killsb; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -509,6 +518,11 @@ extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; /** + * Inode to nodeid comparison. + */ +int fuse_inode_eq(struct inode *inode, void *_nodeidp); + +/** * Get a filled in inode */ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, @@ -708,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode); u64 fuse_get_attr_version(struct fuse_conn *fc); +/** + * File-system tells the kernel to invalidate cache for the given node id. + */ +int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len); + +/** + * File-system tells the kernel to invalidate parent attributes and + * the dentry matching parent/name. + */ +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + struct qstr *name); + int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir); ssize_t fuse_direct_io(struct file *file, const char __user *buf, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d8673cc..f91ccc4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) BUG(); } -static int fuse_inode_eq(struct inode *inode, void *_nodeidp) +int fuse_inode_eq(struct inode *inode, void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; if (get_node_id(inode) == nodeid) @@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, return inode; } +int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len) +{ + struct inode *inode; + pgoff_t pg_start; + pgoff_t pg_end; + + inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid); + if (!inode) + return -ENOENT; + + fuse_invalidate_attr(inode); + if (offset >= 0) { + pg_start = offset >> PAGE_CACHE_SHIFT; + if (len <= 0) + pg_end = -1; + else + pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT; + invalidate_inode_pages2_range(inode->i_mapping, + pg_start, pg_end); + } + iput(inode); + return 0; +} + static void fuse_umount_begin(struct super_block *sb) { fuse_abort_conn(get_fuse_conn_super(sb)); @@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc) memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); mutex_init(&fc->inst_mutex); + init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); init_waitqueue_head(&fc->waitq); init_waitqueue_head(&fc->blocked_waitq); @@ -725,6 +751,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) } if (arg->flags & FUSE_BIG_WRITES) fc->big_writes = 1; + if (arg->flags & FUSE_DONT_MASK) + fc->dont_mask = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -748,7 +776,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | - FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES; + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); @@ -860,10 +888,16 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fuse_conn_init(fc); fc->dev = sb->s_dev; + fc->sb = sb; err = fuse_bdi_init(fc, sb); if (err) goto err_put_conn; + /* Handle umasking inside the fuse code */ + if (sb->s_flags & MS_POSIXACL) + fc->dont_mask = 1; + sb->s_flags |= MS_POSIXACL; + fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; @@ -941,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type, return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); } +static void fuse_kill_sb_anon(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_anon_super(sb); +} + static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", .fs_flags = FS_HAS_SUBTYPE, .get_sb = fuse_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = fuse_kill_sb_anon, }; #ifdef CONFIG_BLOCK @@ -958,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type, mnt); } +static void fuse_kill_sb_blk(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_block_super(sb); +} + static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", .get_sb = fuse_get_sb_blk, - .kill_sb = kill_block_super, + .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index fe02ad4..032604e 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -972,6 +972,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_blocksize_bits = 10; sb->s_magic = HOSTFS_SUPER_MAGIC; sb->s_op = &hostfs_sbops; + sb->s_maxbytes = MAX_LFS_FILESIZE; /* NULL is printed as <NULL> by sprintf: avoid that. */ if (req_root == NULL) @@ -25,6 +25,7 @@ #include <linux/fsnotify.h> #include <linux/mount.h> #include <linux/async.h> +#include <linux/posix_acl.h> /* * This is needed for the following functions: @@ -189,6 +190,9 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) } inode->i_private = NULL; inode->i_mapping = mapping; +#ifdef CONFIG_FS_POSIX_ACL + inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; +#endif #ifdef CONFIG_FSNOTIFY inode->i_fsnotify_mask = 0; @@ -227,6 +231,12 @@ void destroy_inode(struct inode *inode) ima_inode_free(inode); security_inode_free(inode); fsnotify_inode_delete(inode); +#ifdef CONFIG_FS_POSIX_ACL + if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) + posix_acl_release(inode->i_acl); + if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) + posix_acl_release(inode->i_default_acl); +#endif if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else @@ -665,12 +675,17 @@ void unlock_new_inode(struct inode *inode) if (inode->i_mode & S_IFDIR) { struct file_system_type *type = inode->i_sb->s_type; - /* - * ensure nobody is actually holding i_mutex - */ - mutex_destroy(&inode->i_mutex); - mutex_init(&inode->i_mutex); - lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); + /* Set new key only if filesystem hasn't already changed it */ + if (!lockdep_match_class(&inode->i_mutex, + &type->i_mutex_key)) { + /* + * ensure nobody is actually holding i_mutex + */ + mutex_destroy(&inode->i_mutex); + mutex_init(&inode->i_mutex); + lockdep_set_class(&inode->i_mutex, + &type->i_mutex_dir_key); + } } #endif /* @@ -15,6 +15,7 @@ #include <linux/uaccess.h> #include <linux/writeback.h> #include <linux/buffer_head.h> +#include <linux/falloc.h> #include <asm/ioctls.h> @@ -403,6 +404,37 @@ EXPORT_SYMBOL(generic_block_fiemap); #endif /* CONFIG_BLOCK */ +/* + * This provides compatibility with legacy XFS pre-allocation ioctls + * which predate the fallocate syscall. + * + * Only the l_start, l_len and l_whence fields of the 'struct space_resv' + * are used here, rest are ignored. + */ +int ioctl_preallocate(struct file *filp, void __user *argp) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct space_resv sr; + + if (copy_from_user(&sr, argp, sizeof(sr))) + return -EFAULT; + + switch (sr.l_whence) { + case SEEK_SET: + break; + case SEEK_CUR: + sr.l_start += filp->f_pos; + break; + case SEEK_END: + sr.l_start += i_size_read(inode); + break; + default: + return -EINVAL; + } + + return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len); +} + static int file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -414,6 +446,9 @@ static int file_ioctl(struct file *filp, unsigned int cmd, return ioctl_fibmap(filp, p); case FIONREAD: return put_user(i_size_read(inode) - filp->f_pos, p); + case FS_IOC_RESVSP: + case FS_IOC_RESVSP64: + return ioctl_preallocate(filp, p); } return vfs_ioctl(filp, cmd, arg); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 043740d..8fcb62392 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -156,48 +156,25 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size) return ERR_PTR(-EINVAL); } -static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl) -{ - struct posix_acl *acl = JFFS2_ACL_NOT_CACHED; - - spin_lock(&inode->i_lock); - if (*i_acl != JFFS2_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); - return acl; -} - -static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != JFFS2_ACL_NOT_CACHED) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) { - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); struct posix_acl *acl; char *value = NULL; int rc, xprefix; + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + switch (type) { case ACL_TYPE_ACCESS: - acl = jffs2_iget_acl(inode, &f->i_acl_access); - if (acl != JFFS2_ACL_NOT_CACHED) - return acl; xprefix = JFFS2_XPREFIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: - acl = jffs2_iget_acl(inode, &f->i_acl_default); - if (acl != JFFS2_ACL_NOT_CACHED) - return acl; xprefix = JFFS2_XPREFIX_ACL_DEFAULT; break; default: - return ERR_PTR(-EINVAL); + BUG(); } rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0); if (rc > 0) { @@ -215,16 +192,8 @@ static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) } if (value) kfree(value); - if (!IS_ERR(acl)) { - switch (type) { - case ACL_TYPE_ACCESS: - jffs2_iset_acl(inode, &f->i_acl_access, acl); - break; - case ACL_TYPE_DEFAULT: - jffs2_iset_acl(inode, &f->i_acl_default, acl); - break; - } - } + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); return acl; } @@ -249,7 +218,6 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) { - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); int rc, xprefix; if (S_ISLNK(inode->i_mode)) @@ -285,16 +253,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return -EINVAL; } rc = __jffs2_set_acl(inode, xprefix, acl); - if (!rc) { - switch(type) { - case ACL_TYPE_ACCESS: - jffs2_iset_acl(inode, &f->i_acl_access, acl); - break; - case ACL_TYPE_DEFAULT: - jffs2_iset_acl(inode, &f->i_acl_default, acl); - break; - } - } + if (!rc) + set_cached_acl(inode, type, acl); return rc; } @@ -321,12 +281,10 @@ int jffs2_permission(struct inode *inode, int mask) int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) { - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); struct posix_acl *acl, *clone; int rc; - f->i_acl_default = NULL; - f->i_acl_access = NULL; + cache_no_acl(inode); if (S_ISLNK(*i_mode)) return 0; /* Symlink always has no-ACL */ @@ -339,7 +297,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) *i_mode &= ~current_umask(); } else { if (S_ISDIR(*i_mode)) - jffs2_iset_acl(inode, &f->i_acl_default, acl); + set_cached_acl(inode, ACL_TYPE_DEFAULT, acl); clone = posix_acl_clone(acl, GFP_KERNEL); if (!clone) @@ -350,7 +308,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) return rc; } if (rc > 0) - jffs2_iset_acl(inode, &f->i_acl_access, clone); + set_cached_acl(inode, ACL_TYPE_ACCESS, clone); posix_acl_release(clone); } @@ -359,17 +317,16 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) int jffs2_init_acl_post(struct inode *inode) { - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); int rc; - if (f->i_acl_default) { - rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, f->i_acl_default); + if (inode->i_default_acl) { + rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, inode->i_default_acl); if (rc) return rc; } - if (f->i_acl_access) { - rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, f->i_acl_access); + if (inode->i_acl) { + rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, inode->i_acl); if (rc) return rc; } @@ -377,18 +334,6 @@ int jffs2_init_acl_post(struct inode *inode) return 0; } -void jffs2_clear_acl(struct jffs2_inode_info *f) -{ - if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) { - posix_acl_release(f->i_acl_access); - f->i_acl_access = JFFS2_ACL_NOT_CACHED; - } - if (f->i_acl_default && f->i_acl_default != JFFS2_ACL_NOT_CACHED) { - posix_acl_release(f->i_acl_default); - f->i_acl_default = JFFS2_ACL_NOT_CACHED; - } -} - int jffs2_acl_chmod(struct inode *inode) { struct posix_acl *acl, *clone; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 8ca058a..fc929f2 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -26,13 +26,10 @@ struct jffs2_acl_header { #ifdef CONFIG_JFFS2_FS_POSIX_ACL -#define JFFS2_ACL_NOT_CACHED ((void *)-1) - extern int jffs2_permission(struct inode *, int); extern int jffs2_acl_chmod(struct inode *); extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); extern int jffs2_init_acl_post(struct inode *); -extern void jffs2_clear_acl(struct jffs2_inode_info *); extern struct xattr_handler jffs2_acl_access_xattr_handler; extern struct xattr_handler jffs2_acl_default_xattr_handler; @@ -43,6 +40,5 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler; #define jffs2_acl_chmod(inode) (0) #define jffs2_init_acl_pre(dir_i,inode,mode) (0) #define jffs2_init_acl_post(inode) (0) -#define jffs2_clear_acl(f) #endif /* CONFIG_JFFS2_FS_POSIX_ACL */ diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 4c41db9..c6923da 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h @@ -50,10 +50,6 @@ struct jffs2_inode_info { uint16_t flags; uint8_t usercompr; struct inode vfs_inode; -#ifdef CONFIG_JFFS2_FS_POSIX_ACL - struct posix_acl *i_acl_access; - struct posix_acl *i_acl_default; -#endif }; #endif /* _JFFS2_FS_I */ diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 2228380..a7f03b7 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -56,10 +56,6 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) f->target = NULL; f->flags = 0; f->usercompr = 0; -#ifdef CONFIG_JFFS2_FS_POSIX_ACL - f->i_acl_access = JFFS2_ACL_NOT_CACHED; - f->i_acl_default = JFFS2_ACL_NOT_CACHED; -#endif } diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 1fc1e92..1a80301 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -1424,7 +1424,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f) struct jffs2_full_dirent *fd, *fds; int deleted; - jffs2_clear_acl(f); jffs2_xattr_delete_inode(c, f->inocache); mutex_lock(&f->sem); deleted = f->inocache && !f->inocache->pino_nlink; diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 7515e73..696686c 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -130,9 +130,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (jffs2_sum_active()) { s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); if (!s) { - kfree(flashbuf); JFFS2_WARNING("Can't allocate memory for summary\n"); - return -ENOMEM; + ret = -ENOMEM; + goto out; } } diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 06ca1b8..91fa3ad 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -31,27 +31,24 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) { struct posix_acl *acl; char *ea_name; - struct jfs_inode_info *ji = JFS_IP(inode); - struct posix_acl **p_acl; int size; char *value = NULL; + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + switch(type) { case ACL_TYPE_ACCESS: ea_name = POSIX_ACL_XATTR_ACCESS; - p_acl = &ji->i_acl; break; case ACL_TYPE_DEFAULT: ea_name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &ji->i_default_acl; break; default: return ERR_PTR(-EINVAL); } - if (*p_acl != JFS_ACL_NOT_CACHED) - return posix_acl_dup(*p_acl); - size = __jfs_getxattr(inode, ea_name, NULL, 0); if (size > 0) { @@ -62,17 +59,18 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) } if (size < 0) { - if (size == -ENODATA) { - *p_acl = NULL; + if (size == -ENODATA) acl = NULL; - } else + else acl = ERR_PTR(size); } else { acl = posix_acl_from_xattr(value, size); - if (!IS_ERR(acl)) - *p_acl = posix_acl_dup(acl); } kfree(value); + if (!IS_ERR(acl)) { + set_cached_acl(inode, type, acl); + posix_acl_release(acl); + } return acl; } @@ -80,8 +78,6 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type, struct posix_acl *acl) { char *ea_name; - struct jfs_inode_info *ji = JFS_IP(inode); - struct posix_acl **p_acl; int rc; int size = 0; char *value = NULL; @@ -92,11 +88,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type, switch(type) { case ACL_TYPE_ACCESS: ea_name = POSIX_ACL_XATTR_ACCESS; - p_acl = &ji->i_acl; break; case ACL_TYPE_DEFAULT: ea_name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &ji->i_default_acl; if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; break; @@ -116,27 +110,24 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type, out: kfree(value); - if (!rc) { - if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED)) - posix_acl_release(*p_acl); - *p_acl = posix_acl_dup(acl); - } + if (!rc) + set_cached_acl(inode, type, acl); + return rc; } static int jfs_check_acl(struct inode *inode, int mask) { - struct jfs_inode_info *ji = JFS_IP(inode); + struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); - if (ji->i_acl == JFS_ACL_NOT_CACHED) { - struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + int error = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); + return error; } - if (ji->i_acl) - return posix_acl_permission(inode, ji->i_acl, mask); return -EAGAIN; } diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 439901d..1439f11 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -74,10 +74,6 @@ struct jfs_inode_info { /* xattr_sem allows us to access the xattrs without taking i_mutex */ struct rw_semaphore xattr_sem; lid_t xtlid; /* lid of xtree lock on directory */ -#ifdef CONFIG_JFS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif union { struct { xtpage_t _xtroot; /* 288: xtree root */ @@ -107,8 +103,6 @@ struct jfs_inode_info { #define i_inline u.link._inline #define i_inline_ea u.link._inline_ea -#define JFS_ACL_NOT_CACHED ((void *)-1) - #define IREAD_LOCK(ip, subclass) \ down_read_nested(&JFS_IP(ip)->rdwrlock, subclass) #define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock) diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 09b1b6e..37e6dcd 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -128,18 +128,6 @@ static void jfs_destroy_inode(struct inode *inode) ji->active_ag = -1; } spin_unlock_irq(&ji->ag_lock); - -#ifdef CONFIG_JFS_POSIX_ACL - if (ji->i_acl != JFS_ACL_NOT_CACHED) { - posix_acl_release(ji->i_acl); - ji->i_acl = JFS_ACL_NOT_CACHED; - } - if (ji->i_default_acl != JFS_ACL_NOT_CACHED) { - posix_acl_release(ji->i_default_acl); - ji->i_default_acl = JFS_ACL_NOT_CACHED; - } -#endif - kmem_cache_free(jfs_inode_cachep, ji); } @@ -798,10 +786,6 @@ static void init_once(void *foo) init_rwsem(&jfs_ip->xattr_sem); spin_lock_init(&jfs_ip->ag_lock); jfs_ip->active_ag = -1; -#ifdef CONFIG_JFS_POSIX_ACL - jfs_ip->i_acl = JFS_ACL_NOT_CACHED; - jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; -#endif inode_init_once(&jfs_ip->vfs_inode); } diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 61dfa81..fad3645 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -727,10 +727,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, /* * We're changing the ACL. Get rid of the cached one */ - acl =JFS_IP(inode)->i_acl; - if (acl != JFS_ACL_NOT_CACHED) - posix_acl_release(acl); - JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED; + forget_cached_acl(inode, ACL_TYPE_ACCESS); return 0; } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { @@ -746,10 +743,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, /* * We're changing the default ACL. Get rid of the cached one */ - acl =JFS_IP(inode)->i_default_acl; - if (acl && (acl != JFS_ACL_NOT_CACHED)) - posix_acl_release(acl); - JFS_IP(inode)->i_default_acl = JFS_ACL_NOT_CACHED; + forget_cached_acl(inode, ACL_TYPE_DEFAULT); return 0; } @@ -1698,8 +1698,11 @@ struct file *do_filp_open(int dfd, const char *pathname, if (error) return ERR_PTR(error); error = path_walk(pathname, &nd); - if (error) + if (error) { + if (nd.root.mnt) + path_put(&nd.root); return ERR_PTR(error); + } if (unlikely(!audit_dummy_context())) audit_inode(pathname, nd.path.dentry); @@ -1758,7 +1761,13 @@ do_last: goto exit; } filp = nameidata_to_filp(&nd, open_flag); + if (IS_ERR(filp)) + ima_counts_put(&nd.path, + acc_mode & (MAY_READ | MAY_WRITE | + MAY_EXEC)); mnt_drop_write(nd.path.mnt); + if (nd.root.mnt) + path_put(&nd.root); return filp; } @@ -1812,6 +1821,9 @@ ok: goto exit; } filp = nameidata_to_filp(&nd, open_flag); + if (IS_ERR(filp)) + ima_counts_put(&nd.path, + acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); /* * It is now safe to drop the mnt write * because the filp has had a write taken @@ -1819,6 +1831,8 @@ ok: */ if (will_write) mnt_drop_write(nd.path.mnt); + if (nd.root.mnt) + path_put(&nd.root); return filp; exit_mutex_unlock: @@ -1859,6 +1873,8 @@ do_link: * with "intent.open". */ release_open_intent(&nd); + if (nd.root.mnt) + path_put(&nd.root); return ERR_PTR(error); } nd.flags &= ~LOOKUP_PARENT; diff --git a/fs/namespace.c b/fs/namespace.c index a7bea8c..3dc283f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -42,6 +42,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); static int event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); +static int mnt_id_start = 0; +static int mnt_group_start = 1; static struct list_head *mount_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; @@ -69,7 +71,9 @@ static int mnt_alloc_id(struct vfsmount *mnt) retry: ida_pre_get(&mnt_id_ida, GFP_KERNEL); spin_lock(&vfsmount_lock); - res = ida_get_new(&mnt_id_ida, &mnt->mnt_id); + res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); + if (!res) + mnt_id_start = mnt->mnt_id + 1; spin_unlock(&vfsmount_lock); if (res == -EAGAIN) goto retry; @@ -79,8 +83,11 @@ retry: static void mnt_free_id(struct vfsmount *mnt) { + int id = mnt->mnt_id; spin_lock(&vfsmount_lock); - ida_remove(&mnt_id_ida, mnt->mnt_id); + ida_remove(&mnt_id_ida, id); + if (mnt_id_start > id) + mnt_id_start = id; spin_unlock(&vfsmount_lock); } @@ -91,10 +98,18 @@ static void mnt_free_id(struct vfsmount *mnt) */ static int mnt_alloc_group_id(struct vfsmount *mnt) { + int res; + if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) return -ENOMEM; - return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id); + res = ida_get_new_above(&mnt_group_ida, + mnt_group_start, + &mnt->mnt_group_id); + if (!res) + mnt_group_start = mnt->mnt_group_id + 1; + + return res; } /* @@ -102,7 +117,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt) */ void mnt_release_group_id(struct vfsmount *mnt) { - ida_remove(&mnt_group_ida, mnt->mnt_group_id); + int id = mnt->mnt_group_id; + ida_remove(&mnt_group_ida, id); + if (mnt_group_start > id) + mnt_group_start = id; mnt->mnt_group_id = 0; } @@ -2222,16 +2240,9 @@ static void __init init_mount_tree(void) mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) panic("Can't create rootfs"); - ns = kmalloc(sizeof(*ns), GFP_KERNEL); - if (!ns) + ns = create_mnt_ns(mnt); + if (IS_ERR(ns)) panic("Can't allocate initial namespace"); - atomic_set(&ns->count, 1); - INIT_LIST_HEAD(&ns->list); - init_waitqueue_head(&ns->poll); - ns->event = 0; - list_add(&mnt->mnt_list, &ns->list); - ns->root = mnt; - mnt->mnt_ns = ns; init_task.nsproxy->mnt_ns = ns; get_mnt_ns(ns); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4145083..23341c1 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -678,7 +678,6 @@ __be32 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access, struct file **filp) { - const struct cred *cred = current_cred(); struct dentry *dentry; struct inode *inode; int flags = O_RDONLY|O_LARGEFILE; @@ -733,7 +732,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, vfs_dq_init(inode); } *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), - flags, cred); + flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); else diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 2696d6b..fe9d8f2 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -309,10 +309,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) /* ii->i_file_acl = 0; */ /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; -#ifdef CONFIG_NILFS_FS_POSIX_ACL - ii->i_acl = NULL; - ii->i_default_acl = NULL; -#endif ii->i_cno = 0; nilfs_set_inode_flags(inode); spin_lock(&sbi->s_next_gen_lock); @@ -434,10 +430,6 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); -#ifdef CONFIG_NILFS_FS_POSIX_ACL - ii->i_acl = NILFS_ACL_NOT_CACHED; - ii->i_default_acl = NILFS_ACL_NOT_CACHED; -#endif if (nilfs_read_inode_common(inode, raw_inode)) goto failed_unmap; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index edf6a59..724c637 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -58,10 +58,6 @@ struct nilfs_inode_info { */ struct rw_semaphore xattr_sem; #endif -#ifdef CONFIG_NILFS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif struct buffer_head *i_bh; /* i_bh contains a new or dirty disk inode */ struct inode vfs_inode; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index ab785f8..8e2ec43 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -189,16 +189,6 @@ static void nilfs_clear_inode(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); -#ifdef CONFIG_NILFS_POSIX_ACL - if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) { - posix_acl_release(ii->i_acl); - ii->i_acl = NILFS_ACL_NOT_CACHED; - } - if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) { - posix_acl_release(ii->i_default_acl); - ii->i_default_acl = NILFS_ACL_NOT_CACHED; - } -#endif /* * Free resources allocated in nilfs_read_inode(), here. */ diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ff231ad..ff27a29 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -296,12 +296,15 @@ static int inotify_fasync(int fd, struct file *file, int on) static int inotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; + struct user_struct *user = group->inotify_data.user; fsnotify_clear_marks_by_group(group); /* free this group, matching get was inotify_init->fsnotify_obtain_group */ fsnotify_put_group(group); + atomic_dec(&user->inotify_devs); + return 0; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 6cdeaa7..110bb57 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -92,6 +92,9 @@ struct ocfs2_unblock_ctl { enum ocfs2_unblock_action unblock_action; }; +/* Lockdep class keys */ +struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; + static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level); static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); @@ -317,9 +320,16 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, u32 dlm_flags); static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, int wanted); -static void ocfs2_cluster_unlock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int level); +static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, unsigned long caller_ip); +static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level) +{ + __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); +} + static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); @@ -489,6 +499,13 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); ocfs2_init_lock_stats(res); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (type != OCFS2_LOCK_TYPE_OPEN) + lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], + &lockdep_keys[type], 0); + else + res->l_lockdep_map.key = NULL; +#endif } void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) @@ -644,14 +661,10 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, struct ocfs2_super *osb) { - struct ocfs2_orphan_scan_lvb *lvb; - ocfs2_lock_res_init_once(res); ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, &ocfs2_orphan_scan_lops, osb); - lvb = ocfs2_dlm_lvb(&res->l_lksb); - lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; } void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, @@ -1256,11 +1269,13 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, return ret; } -static int ocfs2_cluster_lock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int level, - u32 lkm_flags, - int arg_flags) +static int __ocfs2_cluster_lock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, + u32 lkm_flags, + int arg_flags, + int l_subclass, + unsigned long caller_ip) { struct ocfs2_mask_waiter mw; int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); @@ -1403,13 +1418,37 @@ out: } ocfs2_update_lock_stats(lockres, level, &mw, ret); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (!ret && lockres->l_lockdep_map.key != NULL) { + if (level == DLM_LOCK_PR) + rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, + !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), + caller_ip); + else + rwsem_acquire(&lockres->l_lockdep_map, l_subclass, + !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), + caller_ip); + } +#endif mlog_exit(ret); return ret; } -static void ocfs2_cluster_unlock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int level) +static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, + u32 lkm_flags, + int arg_flags) +{ + return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, + 0, _RET_IP_); +} + + +static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, + unsigned long caller_ip) { unsigned long flags; @@ -1418,6 +1457,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, ocfs2_dec_holders(lockres, level); ocfs2_downconvert_on_unlock(osb, lockres); spin_unlock_irqrestore(&lockres->l_lock, flags); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (lockres->l_lockdep_map.key != NULL) + rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); +#endif mlog_exit_void(); } @@ -1989,7 +2032,8 @@ static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, { struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); - if (lvb->lvb_version == OCFS2_LVB_VERSION + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) + && lvb->lvb_version == OCFS2_LVB_VERSION && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) return 1; return 0; @@ -2162,10 +2206,11 @@ static int ocfs2_assign_bh(struct inode *inode, * returns < 0 error if the callback will never be called, otherwise * the result of the lock will be communicated via the callback. */ -int ocfs2_inode_lock_full(struct inode *inode, - struct buffer_head **ret_bh, - int ex, - int arg_flags) +int ocfs2_inode_lock_full_nested(struct inode *inode, + struct buffer_head **ret_bh, + int ex, + int arg_flags, + int subclass) { int status, level, acquired; u32 dlm_flags; @@ -2203,7 +2248,8 @@ int ocfs2_inode_lock_full(struct inode *inode, if (arg_flags & OCFS2_META_LOCK_NOQUEUE) dlm_flags |= DLM_LKF_NOQUEUE; - status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); + status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, + arg_flags, subclass, _RET_IP_); if (status < 0) { if (status != -EAGAIN && status != -EIOCBRETRY) mlog_errno(status); @@ -2369,35 +2415,45 @@ void ocfs2_inode_unlock(struct inode *inode, mlog_exit_void(); } -int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex) +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) { struct ocfs2_lock_res *lockres; struct ocfs2_orphan_scan_lvb *lvb; - int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; int status = 0; + if (ocfs2_is_hard_readonly(osb)) + return -EROFS; + + if (ocfs2_mount_local(osb)) + return 0; + lockres = &osb->osb_orphan_scan.os_lockres; - status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); + status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); if (status < 0) return status; lvb = ocfs2_dlm_lvb(&lockres->l_lksb); - if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && + lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) *seqno = be32_to_cpu(lvb->lvb_os_seqno); + else + *seqno = osb->osb_orphan_scan.os_seqno + 1; + return status; } -void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex) +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) { struct ocfs2_lock_res *lockres; struct ocfs2_orphan_scan_lvb *lvb; - int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; - lockres = &osb->osb_orphan_scan.os_lockres; - lvb = ocfs2_dlm_lvb(&lockres->l_lksb); - lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; - lvb->lvb_os_seqno = cpu_to_be32(seqno); - ocfs2_cluster_unlock(osb, lockres, level); + if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { + lockres = &osb->osb_orphan_scan.os_lockres; + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; + lvb->lvb_os_seqno = cpu_to_be32(seqno); + ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); + } } int ocfs2_super_lock(struct ocfs2_super *osb, @@ -3627,7 +3683,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) struct ocfs2_global_disk_dqinfo *gdinfo; int status = 0; - if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && + lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 31b90d7..7553836 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -78,6 +78,14 @@ struct ocfs2_orphan_scan_lvb { /* don't block waiting for the downconvert thread, instead return -EAGAIN */ #define OCFS2_LOCK_NONBLOCK (0x04) +/* Locking subclasses of inode cluster lock */ +enum { + OI_LS_NORMAL = 0, + OI_LS_PARENT, + OI_LS_RENAME1, + OI_LS_RENAME2, +}; + int ocfs2_dlm_init(struct ocfs2_super *osb); void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending); void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); @@ -104,25 +112,31 @@ void ocfs2_open_unlock(struct inode *inode); int ocfs2_inode_lock_atime(struct inode *inode, struct vfsmount *vfsmnt, int *level); -int ocfs2_inode_lock_full(struct inode *inode, +int ocfs2_inode_lock_full_nested(struct inode *inode, struct buffer_head **ret_bh, int ex, - int arg_flags); + int arg_flags, + int subclass); int ocfs2_inode_lock_with_page(struct inode *inode, struct buffer_head **ret_bh, int ex, struct page *page); +/* Variants without special locking class or flags */ +#define ocfs2_inode_lock_full(i, r, e, f)\ + ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL) +#define ocfs2_inode_lock_nested(i, b, e, s)\ + ocfs2_inode_lock_full_nested(i, b, e, 0, s) /* 99% of the time we don't want to supply any additional flags -- * those are for very specific cases only. */ -#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0) +#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL) void ocfs2_inode_unlock(struct inode *inode, int ex); int ocfs2_super_lock(struct ocfs2_super *osb, int ex); void ocfs2_super_unlock(struct ocfs2_super *osb, int ex); -int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex); -void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex); +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno); +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno); int ocfs2_rename_lock(struct ocfs2_super *osb); void ocfs2_rename_unlock(struct ocfs2_super *osb); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 07267e0..62442e4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2026,7 +2026,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in, size_t len, unsigned int flags) { - int ret = 0; + int ret = 0, lock_level = 0; struct inode *inode = in->f_path.dentry->d_inode; mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, @@ -2037,12 +2037,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in, /* * See the comment in ocfs2_file_aio_read() */ - ret = ocfs2_inode_lock(inode, NULL, 0); + ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto bail; } - ocfs2_inode_unlock(inode, 0); + ocfs2_inode_unlock(inode, lock_level); ret = generic_file_splice_read(in, ppos, pipe, len, flags); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 10e1fa87..4dc8890 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -215,6 +215,8 @@ bail: static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) { struct ocfs2_find_inode_args *args = opaque; + static struct lock_class_key ocfs2_quota_ip_alloc_sem_key, + ocfs2_file_ip_alloc_sem_key; mlog_entry("inode = %p, opaque = %p\n", inode, opaque); @@ -223,6 +225,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) if (args->fi_sysfile_type != 0) lockdep_set_class(&inode->i_mutex, &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); + if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE || + args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE || + args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE || + args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE) + lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, + &ocfs2_quota_ip_alloc_sem_key); + else + lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, + &ocfs2_file_ip_alloc_sem_key); mlog_exit(0); return 0; diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4a3b9e6..f033760 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1880,13 +1880,20 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) os = &osb->osb_orphan_scan; - status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) + goto out; + + status = ocfs2_orphan_scan_lock(osb, &seqno); if (status < 0) { if (status != -EAGAIN) mlog_errno(status); goto out; } + /* Do no queue the tasks if the volume is being umounted */ + if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) + goto unlock; + if (os->os_seqno != seqno) { os->os_seqno = seqno; goto unlock; @@ -1903,7 +1910,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) os->os_count++; os->os_scantime = CURRENT_TIME; unlock: - ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX); + ocfs2_orphan_scan_unlock(osb, seqno); out: return; } @@ -1920,8 +1927,9 @@ void ocfs2_orphan_scan_work(struct work_struct *work) mutex_lock(&os->os_lock); ocfs2_queue_orphan_scan(osb); - schedule_delayed_work(&os->os_orphan_scan_work, - ocfs2_orphan_scan_timeout()); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) + schedule_delayed_work(&os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); mutex_unlock(&os->os_lock); } @@ -1930,26 +1938,33 @@ void ocfs2_orphan_scan_stop(struct ocfs2_super *osb) struct ocfs2_orphan_scan *os; os = &osb->osb_orphan_scan; - mutex_lock(&os->os_lock); - cancel_delayed_work(&os->os_orphan_scan_work); - mutex_unlock(&os->os_lock); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) { + atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); + mutex_lock(&os->os_lock); + cancel_delayed_work(&os->os_orphan_scan_work); + mutex_unlock(&os->os_lock); + } } -int ocfs2_orphan_scan_init(struct ocfs2_super *osb) +void ocfs2_orphan_scan_init(struct ocfs2_super *osb) { struct ocfs2_orphan_scan *os; os = &osb->osb_orphan_scan; os->os_osb = osb; os->os_count = 0; + os->os_seqno = 0; os->os_scantime = CURRENT_TIME; mutex_init(&os->os_lock); - - INIT_DELAYED_WORK(&os->os_orphan_scan_work, - ocfs2_orphan_scan_work); - schedule_delayed_work(&os->os_orphan_scan_work, - ocfs2_orphan_scan_timeout()); - return 0; + INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); + + if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) + atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); + else { + atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); + schedule_delayed_work(&os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); + } } struct ocfs2_orphan_filldir_priv { diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 61045ee..5432c7f 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -144,7 +144,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, } /* Exported only for the journal struct init code in super.c. Do not call. */ -int ocfs2_orphan_scan_init(struct ocfs2_super *osb); +void ocfs2_orphan_scan_init(struct ocfs2_super *osb); void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 33464c6..8601f93 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -118,7 +118,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); - status = ocfs2_inode_lock(dir, NULL, 0); + status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -636,7 +636,7 @@ static int ocfs2_link(struct dentry *old_dentry, if (S_ISDIR(inode->i_mode)) return -EPERM; - err = ocfs2_inode_lock(dir, &parent_fe_bh, 1); + err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT); if (err < 0) { if (err != -ENOENT) mlog_errno(err); @@ -800,7 +800,8 @@ static int ocfs2_unlink(struct inode *dir, return -EPERM; } - status = ocfs2_inode_lock(dir, &parent_node_bh, 1); + status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1, + OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -978,7 +979,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, inode1 = tmpinode; } /* lock id2 */ - status = ocfs2_inode_lock(inode2, bh2, 1); + status = ocfs2_inode_lock_nested(inode2, bh2, 1, + OI_LS_RENAME1); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -987,7 +989,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, } /* lock id1 */ - status = ocfs2_inode_lock(inode1, bh1, 1); + status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2); if (status < 0) { /* * An error return must mean that no cluster locks @@ -1103,7 +1105,8 @@ static int ocfs2_rename(struct inode *old_dir, * won't have to concurrently downconvert the inode and the * dentry locks. */ - status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1); + status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1, + OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 18c1d9e..c9345eb 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -34,6 +34,7 @@ #include <linux/workqueue.h> #include <linux/kref.h> #include <linux/mutex.h> +#include <linux/lockdep.h> #ifndef CONFIG_OCFS2_COMPAT_JBD # include <linux/jbd2.h> #else @@ -152,6 +153,14 @@ struct ocfs2_lock_res { unsigned int l_lock_max_exmode; /* Max wait for EX */ unsigned int l_lock_refresh; /* Disk refreshes */ #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map l_lockdep_map; +#endif +}; + +enum ocfs2_orphan_scan_state { + ORPHAN_SCAN_ACTIVE, + ORPHAN_SCAN_INACTIVE }; struct ocfs2_orphan_scan { @@ -162,6 +171,7 @@ struct ocfs2_orphan_scan { struct timespec os_scantime; /* time this node ran the scan */ u32 os_count; /* tracks node specific scans */ u32 os_seqno; /* tracks cluster wide scans */ + atomic_t os_state; /* ACTIVE or INACTIVE */ }; struct ocfs2_dlm_debug { diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index fcd120f..3f66137 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -236,6 +236,16 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) return dlm_status_to_errno(lksb->lksb_o2dlm.status); } +/* + * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB + * contents, it will zero out the LVB. Thus the caller can always trust + * the contents. + */ +static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) +{ + return 1; +} + static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) { return (void *)(lksb->lksb_o2dlm.lvb); @@ -354,6 +364,7 @@ static struct ocfs2_stack_operations o2cb_stack_ops = { .dlm_lock = o2cb_dlm_lock, .dlm_unlock = o2cb_dlm_unlock, .lock_status = o2cb_dlm_lock_status, + .lvb_valid = o2cb_dlm_lvb_valid, .lock_lvb = o2cb_dlm_lvb, .dump_lksb = o2cb_dump_lksb, }; diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 9b76d41..ff4c798 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -738,6 +738,13 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) return lksb->lksb_fsdlm.sb_status; } +static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) +{ + int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; + + return !invalid; +} + static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) { if (!lksb->lksb_fsdlm.sb_lvbptr) @@ -873,6 +880,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .dlm_lock = user_dlm_lock, .dlm_unlock = user_dlm_unlock, .lock_status = user_dlm_lock_status, + .lvb_valid = user_dlm_lvb_valid, .lock_lvb = user_dlm_lvb, .plock = user_plock, .dump_lksb = user_dlm_dump_lksb, diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 68b668b..3f2f1c4 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -6,7 +6,7 @@ * Code which implements an OCFS2 specific interface to underlying * cluster stacks. * - * Copyright (C) 2007 Oracle. All rights reserved. + * Copyright (C) 2007, 2009 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -271,11 +271,12 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) } EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); -/* - * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we - * don't cast at the glue level. The real answer is that the header - * ordering is nigh impossible. - */ +int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) +{ + return active_stack->sp_ops->lvb_valid(lksb); +} +EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid); + void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) { return active_stack->sp_ops->lock_lvb(lksb); diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index c571af3..03a44d6 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -186,6 +186,11 @@ struct ocfs2_stack_operations { int (*lock_status)(union ocfs2_dlm_lksb *lksb); /* + * Return non-zero if the LVB is valid. + */ + int (*lvb_valid)(union ocfs2_dlm_lksb *lksb); + + /* * Pull the lvb pointer off of the stack-specific lksb. */ void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); @@ -252,6 +257,7 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, struct ocfs2_lock_res *astarg); int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); +int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb); void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8439f6b..73a16d4 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -923,14 +923,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, int nr) { struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; + int ret; if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) return 0; - if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) + + if (!buffer_jbd(bg_bh)) return 1; + jbd_lock_bh_state(bg_bh); bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; - return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + if (bg) + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + else + ret = 1; + jbd_unlock_bh_state(bg_bh); + + return ret; } static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, @@ -1885,6 +1894,7 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, unsigned int tmp; int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; struct ocfs2_group_desc *undo_bg = NULL; + int cluster_bitmap = 0; mlog_entry_void(); @@ -1905,18 +1915,28 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, } if (ocfs2_is_cluster_bitmap(alloc_inode)) - undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; + cluster_bitmap = 1; + + if (cluster_bitmap) { + jbd_lock_bh_state(group_bh); + undo_bg = (struct ocfs2_group_desc *) + bh2jh(group_bh)->b_committed_data; + BUG_ON(!undo_bg); + } tmp = num_bits; while(tmp--) { ocfs2_clear_bit((bit_off + tmp), (unsigned long *) bg->bg_bitmap); - if (ocfs2_is_cluster_bitmap(alloc_inode)) + if (cluster_bitmap) ocfs2_set_bit(bit_off + tmp, (unsigned long *) undo_bg->bg_bitmap); } le16_add_cpu(&bg->bg_free_bits_count, num_bits); + if (cluster_bitmap) + jbd_unlock_bh_state(group_bh); + status = ocfs2_journal_dirty(handle, group_bh); if (status < 0) mlog_errno(status); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 0d3ed74..7efb349 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -205,11 +205,10 @@ static const match_table_t tokens = { #ifdef CONFIG_DEBUG_FS static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) { - int out = 0; - int i; struct ocfs2_cluster_connection *cconn = osb->cconn; struct ocfs2_recovery_map *rm = osb->recovery_map; - struct ocfs2_orphan_scan *os; + struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan; + int i, out = 0; out += snprintf(buf + out, len - out, "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", @@ -234,20 +233,24 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", osb->s_mount_opt, osb->s_atime_quantum); - out += snprintf(buf + out, len - out, - "%10s => Stack: %s Name: %*s Version: %d.%d\n", - "Cluster", - (*osb->osb_cluster_stack == '\0' ? - "o2cb" : osb->osb_cluster_stack), - cconn->cc_namelen, cconn->cc_name, - cconn->cc_version.pv_major, cconn->cc_version.pv_minor); + if (cconn) { + out += snprintf(buf + out, len - out, + "%10s => Stack: %s Name: %*s " + "Version: %d.%d\n", "Cluster", + (*osb->osb_cluster_stack == '\0' ? + "o2cb" : osb->osb_cluster_stack), + cconn->cc_namelen, cconn->cc_name, + cconn->cc_version.pv_major, + cconn->cc_version.pv_minor); + } spin_lock(&osb->dc_task_lock); out += snprintf(buf + out, len - out, "%10s => Pid: %d Count: %lu WakeSeq: %lu " "WorkSeq: %lu\n", "DownCnvt", - task_pid_nr(osb->dc_task), osb->blocked_lock_count, - osb->dc_wake_sequence, osb->dc_work_sequence); + (osb->dc_task ? task_pid_nr(osb->dc_task) : -1), + osb->blocked_lock_count, osb->dc_wake_sequence, + osb->dc_work_sequence); spin_unlock(&osb->dc_task_lock); spin_lock(&osb->osb_lock); @@ -267,14 +270,15 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) out += snprintf(buf + out, len - out, "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", - task_pid_nr(osb->commit_task), osb->osb_commit_interval, + (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), + osb->osb_commit_interval, atomic_read(&osb->needs_checkpoint)); out += snprintf(buf + out, len - out, - "%10s => State: %d NumTxns: %d TxnId: %lu\n", + "%10s => State: %d TxnId: %lu NumTxns: %d\n", "Journal", osb->journal->j_state, - atomic_read(&osb->journal->j_num_trans), - osb->journal->j_trans_id); + osb->journal->j_trans_id, + atomic_read(&osb->journal->j_num_trans)); out += snprintf(buf + out, len - out, "%10s => GlobalAllocs: %d LocalAllocs: %d " @@ -300,9 +304,18 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) atomic_read(&osb->s_num_inodes_stolen)); spin_unlock(&osb->osb_lock); + out += snprintf(buf + out, len - out, "OrphanScan => "); + out += snprintf(buf + out, len - out, "Local: %u Global: %u ", + os->os_count, os->os_seqno); + out += snprintf(buf + out, len - out, " Last Scan: "); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) + out += snprintf(buf + out, len - out, "Disabled\n"); + else + out += snprintf(buf + out, len - out, "%lu seconds ago\n", + (get_seconds() - os->os_scantime.tv_sec)); + out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", "Slots", "Num", "RecoGen"); - for (i = 0; i < osb->max_slots; ++i) { out += snprintf(buf + out, len - out, "%10s %c %3d %10d\n", @@ -311,13 +324,6 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) i, osb->slot_recovery_generations[i]); } - os = &osb->osb_orphan_scan; - out += snprintf(buf + out, len - out, "Orphan Scan=> "); - out += snprintf(buf + out, len - out, "Local: %u Global: %u ", - os->os_count, os->os_seqno); - out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n", - (get_seconds() - os->os_scantime.tv_sec)); - return out; } @@ -1175,6 +1181,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS); wake_up(&osb->osb_mount_event); + /* Start this when the mount is almost sure of being successful */ + ocfs2_orphan_scan_init(osb); + mlog_exit(status); return status; @@ -1810,14 +1819,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) debugfs_remove(osb->osb_ctxt); + /* Orphan scan should be stopped as early as possible */ + ocfs2_orphan_scan_stop(osb); + ocfs2_disable_quotas(osb); ocfs2_shutdown_local_alloc(osb); ocfs2_truncate_log_shutdown(osb); - ocfs2_orphan_scan_stop(osb); - /* This will disable recovery and flush any recovery work. */ ocfs2_recovery_exit(osb); @@ -1978,13 +1988,6 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - status = ocfs2_orphan_scan_init(osb); - if (status) { - mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n"); - mlog_errno(status); - goto bail; - } - init_waitqueue_head(&osb->checkpoint_event); atomic_set(&osb->needs_checkpoint, 0); diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index ab713eb..40e5370 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c @@ -50,6 +50,10 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb, int type, u32 slot); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; +#endif + static inline int is_global_system_inode(int type) { return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE && @@ -118,6 +122,21 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, inode = NULL; goto bail; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (type == LOCAL_USER_QUOTA_SYSTEM_INODE || + type == LOCAL_GROUP_QUOTA_SYSTEM_INODE || + type == JOURNAL_SYSTEM_INODE) { + /* Ignore inode lock on these inodes as the lock does not + * really belong to any process and lockdep cannot handle + * that */ + OCFS2_I(inode)->ip_inode_lockres.l_lockdep_map.key = NULL; + } else { + lockdep_init_map(&OCFS2_I(inode)->ip_inode_lockres. + l_lockdep_map, + ocfs2_system_inodes[type].si_name, + &ocfs2_sysfile_cluster_lock_key[type], 0); + } +#endif bail: return inode; @@ -378,63 +378,63 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64); #endif #endif /* BITS_PER_LONG == 32 */ -SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) + +int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { - struct file *file; - struct inode *inode; - long ret = -EINVAL; + struct inode *inode = file->f_path.dentry->d_inode; + long ret; if (offset < 0 || len <= 0) - goto out; + return -EINVAL; /* Return error if mode is not supported */ - ret = -EOPNOTSUPP; if (mode && !(mode & FALLOC_FL_KEEP_SIZE)) - goto out; + return -EOPNOTSUPP; - ret = -EBADF; - file = fget(fd); - if (!file) - goto out; if (!(file->f_mode & FMODE_WRITE)) - goto out_fput; + return -EBADF; /* * Revalidate the write permissions, in case security policy has * changed since the files were opened. */ ret = security_file_permission(file, MAY_WRITE); if (ret) - goto out_fput; + return ret; - inode = file->f_path.dentry->d_inode; - - ret = -ESPIPE; if (S_ISFIFO(inode->i_mode)) - goto out_fput; + return -ESPIPE; - ret = -ENODEV; /* * Let individual file system decide if it supports preallocation * for directories or not. */ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - goto out_fput; + return -ENODEV; - ret = -EFBIG; /* Check for wrap through zero too */ if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) - goto out_fput; + return -EFBIG; - if (inode->i_op->fallocate) - ret = inode->i_op->fallocate(inode, mode, offset, len); - else - ret = -EOPNOTSUPP; + if (!inode->i_op->fallocate) + return -EOPNOTSUPP; -out_fput: - fput(file); -out: - return ret; + return inode->i_op->fallocate(inode, mode, offset, len); } + +SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) +{ + struct file *file; + int error = -EBADF; + + file = fget(fd); + if (file) { + error = do_fallocate(file, mode, offset, len); + fput(file); + } + + return error; +} + #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len) { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 6fd0f47..a14d6cd 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1131,8 +1131,6 @@ static void init_inode(struct inode *inode, struct treepath *path) REISERFS_I(inode)->i_trans_id = 0; REISERFS_I(inode)->i_jl = NULL; mutex_init(&(REISERFS_I(inode)->i_mmap)); - reiserfs_init_acl_access(inode); - reiserfs_init_acl_default(inode); reiserfs_init_xattr_rwsem(inode); if (stat_data_v1(ih)) { @@ -1834,8 +1832,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); mutex_init(&(REISERFS_I(inode)->i_mmap)); - reiserfs_init_acl_access(inode); - reiserfs_init_acl_default(inode); reiserfs_init_xattr_rwsem(inode); /* key to search for correct place for new stat data */ diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 238e9d9..18b315d 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -82,7 +82,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { printk ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); - unlock_super(s); return -ENOMEM; } /* the new journal bitmaps are zero filled, now we copy in the bitmap diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 2969773..d3aeb06 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -529,10 +529,6 @@ static void init_once(void *foo) INIT_LIST_HEAD(&ei->i_prealloc_list); inode_init_once(&ei->vfs_inode); -#ifdef CONFIG_REISERFS_FS_POSIX_ACL - ei->i_acl_access = NULL; - ei->i_acl_default = NULL; -#endif } static int init_inodecache(void) @@ -580,25 +576,6 @@ static void reiserfs_dirty_inode(struct inode *inode) reiserfs_write_unlock(inode->i_sb); } -#ifdef CONFIG_REISERFS_FS_POSIX_ACL -static void reiserfs_clear_inode(struct inode *inode) -{ - struct posix_acl *acl; - - acl = REISERFS_I(inode)->i_acl_access; - if (acl && !IS_ERR(acl)) - posix_acl_release(acl); - REISERFS_I(inode)->i_acl_access = NULL; - - acl = REISERFS_I(inode)->i_acl_default; - if (acl && !IS_ERR(acl)) - posix_acl_release(acl); - REISERFS_I(inode)->i_acl_default = NULL; -} -#else -#define reiserfs_clear_inode NULL -#endif - #ifdef CONFIG_QUOTA static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); @@ -612,7 +589,6 @@ static const struct super_operations reiserfs_sops = { .write_inode = reiserfs_write_inode, .dirty_inode = reiserfs_dirty_inode, .delete_inode = reiserfs_delete_inode, - .clear_inode = reiserfs_clear_inode, .put_super = reiserfs_put_super, .write_super = reiserfs_write_super, .sync_fs = reiserfs_sync_fs, diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index c303c42..35d6e67 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -188,29 +188,6 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size) return ERR_PTR(-EINVAL); } -static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != ERR_PTR(-ENODATA)) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - -static inline struct posix_acl *iget_acl(struct inode *inode, - struct posix_acl **i_acl) -{ - struct posix_acl *acl = ERR_PTR(-ENODATA); - - spin_lock(&inode->i_lock); - if (*i_acl != ERR_PTR(-ENODATA)) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); - - return acl; -} - /* * Inode operation get_posix_acl(). * @@ -220,34 +197,29 @@ static inline struct posix_acl *iget_acl(struct inode *inode, struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) { char *name, *value; - struct posix_acl *acl, **p_acl; + struct posix_acl *acl; int size; int retval; - struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; - p_acl = &reiserfs_i->i_acl_access; break; case ACL_TYPE_DEFAULT: name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &reiserfs_i->i_acl_default; break; default: - return ERR_PTR(-EINVAL); + BUG(); } - acl = iget_acl(inode, p_acl); - if (acl && !IS_ERR(acl)) - return acl; - else if (PTR_ERR(acl) == -ENODATA) - return NULL; - size = reiserfs_xattr_get(inode, name, NULL, 0); if (size < 0) { if (size == -ENODATA || size == -ENOSYS) { - *p_acl = ERR_PTR(-ENODATA); + set_cached_acl(inode, type, NULL); return NULL; } return ERR_PTR(size); @@ -262,14 +234,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) /* This shouldn't actually happen as it should have been caught above.. but just in case */ acl = NULL; - *p_acl = ERR_PTR(-ENODATA); } else if (retval < 0) { acl = ERR_PTR(retval); } else { acl = posix_acl_from_disk(value, retval); - if (!IS_ERR(acl)) - iset_acl(inode, p_acl, acl); } + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); kfree(value); return acl; @@ -287,10 +258,8 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, { char *name; void *value = NULL; - struct posix_acl **p_acl; size_t size = 0; int error; - struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; @@ -298,7 +267,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; - p_acl = &reiserfs_i->i_acl_access; if (acl) { mode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); @@ -313,7 +281,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, break; case ACL_TYPE_DEFAULT: name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &reiserfs_i->i_acl_default; if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; break; @@ -346,7 +313,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, kfree(value); if (!error) - iset_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); return error; } @@ -379,11 +346,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, } acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) { - if (PTR_ERR(acl) == -ENODATA) - goto apply_umask; + if (IS_ERR(acl)) return PTR_ERR(acl); - } if (acl) { struct posix_acl *acl_copy; @@ -608,6 +608,7 @@ void emergency_remount(void) static DEFINE_IDA(unnamed_dev_ida); static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ +static int unnamed_dev_start = 0; /* don't bother trying below it */ int set_anon_super(struct super_block *s, void *data) { @@ -618,7 +619,9 @@ int set_anon_super(struct super_block *s, void *data) if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) return -ENOMEM; spin_lock(&unnamed_dev_lock); - error = ida_get_new(&unnamed_dev_ida, &dev); + error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev); + if (!error) + unnamed_dev_start = dev + 1; spin_unlock(&unnamed_dev_lock); if (error == -EAGAIN) /* We raced and lost with another CPU. */ @@ -629,6 +632,8 @@ int set_anon_super(struct super_block *s, void *data) if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { spin_lock(&unnamed_dev_lock); ida_remove(&unnamed_dev_ida, dev); + if (unnamed_dev_start > dev) + unnamed_dev_start = dev; spin_unlock(&unnamed_dev_lock); return -EMFILE; } @@ -645,6 +650,8 @@ void kill_anon_super(struct super_block *sb) generic_shutdown_super(sb); spin_lock(&unnamed_dev_lock); ida_remove(&unnamed_dev_ida, slot); + if (slot < unnamed_dev_start) + unnamed_dev_start = slot; spin_unlock(&unnamed_dev_lock); } diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index cfd31e2..adafcf5 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -55,9 +55,9 @@ * ACL support is not implemented. */ +#include "ubifs.h" #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> -#include "ubifs.h" /* * Limit the number of extended attributes per inode so that the total size diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index e48e9a3..1e06853 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -238,7 +238,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb, mutex_lock(&sbi->s_alloc_mutex); part_len = sbi->s_partmaps[partition].s_partition_len; - if (first_block < 0 || first_block >= part_len) + if (first_block >= part_len) goto out; if (first_block + block_count > part_len) @@ -297,7 +297,7 @@ static int udf_bitmap_new_block(struct super_block *sb, mutex_lock(&sbi->s_alloc_mutex); repeat: - if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) + if (goal >= sbi->s_partmaps[partition].s_partition_len) goal = 0; nr_groups = bitmap->s_nr_groups; @@ -666,8 +666,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, int8_t etype = -1; struct udf_inode_info *iinfo; - if (first_block < 0 || - first_block >= sbi->s_partmaps[partition].s_partition_len) + if (first_block >= sbi->s_partmaps[partition].s_partition_len) return 0; iinfo = UDF_I(table); @@ -743,7 +742,7 @@ static int udf_table_new_block(struct super_block *sb, return newblock; mutex_lock(&sbi->s_alloc_mutex); - if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) + if (goal >= sbi->s_partmaps[partition].s_partition_len) goal = 0; /* We search for the closest matching block to goal. If we find diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 703843f..1b88fd5 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c @@ -56,7 +56,12 @@ unsigned long udf_get_last_block(struct super_block *sb) struct block_device *bdev = sb->s_bdev; unsigned long lblock = 0; - if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock)) + /* + * ioctl failed or returned obviously bogus value? + * Try using the device size... + */ + if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) || + lblock == 0) lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits; if (lblock) diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index 1e9d124..b23a545 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -25,14 +25,10 @@ #include <linux/posix_acl_xattr.h> -#define XFS_ACL_NOT_CACHED ((void *)-1) - /* * Locking scheme: * - all ACL updates are protected by inode->i_mutex, which is taken before * calling into this file. - * - access and updates to the ip->i_acl and ip->i_default_acl pointers are - * protected by inode->i_lock. */ STATIC struct posix_acl * @@ -102,59 +98,35 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) } } -/* - * Update the cached ACL pointer in the inode. - * - * Because we don't hold any locks while reading/writing the attribute - * from/to disk another thread could have raced and updated the cached - * ACL value before us. In that case we release the previous cached value - * and update it with our new value. - */ -STATIC void -xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED) - posix_acl_release(*p_acl); - *p_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - struct posix_acl * xfs_get_acl(struct inode *inode, int type) { struct xfs_inode *ip = XFS_I(inode); - struct posix_acl *acl = NULL, **p_acl; + struct posix_acl *acl; struct xfs_acl *xfs_acl; int len = sizeof(struct xfs_acl); char *ea_name; int error; + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + switch (type) { case ACL_TYPE_ACCESS: ea_name = SGI_ACL_FILE; - p_acl = &ip->i_acl; break; case ACL_TYPE_DEFAULT: ea_name = SGI_ACL_DEFAULT; - p_acl = &ip->i_default_acl; break; default: - return ERR_PTR(-EINVAL); + BUG(); } - spin_lock(&inode->i_lock); - if (*p_acl != XFS_ACL_NOT_CACHED) - acl = posix_acl_dup(*p_acl); - spin_unlock(&inode->i_lock); - /* * If we have a cached ACLs value just return it, not need to * go out to the disk. */ - if (acl) - return acl; xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); if (!xfs_acl) @@ -165,7 +137,7 @@ xfs_get_acl(struct inode *inode, int type) /* * If the attribute doesn't exist make sure we have a negative * cache entry, for any other error assume it is transient and - * leave the cache entry as XFS_ACL_NOT_CACHED. + * leave the cache entry as ACL_NOT_CACHED. */ if (error == -ENOATTR) { acl = NULL; @@ -179,7 +151,7 @@ xfs_get_acl(struct inode *inode, int type) goto out; out_update_cache: - xfs_update_cached_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); out: kfree(xfs_acl); return acl; @@ -189,7 +161,6 @@ STATIC int xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) { struct xfs_inode *ip = XFS_I(inode); - struct posix_acl **p_acl; char *ea_name; int error; @@ -199,13 +170,11 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) switch (type) { case ACL_TYPE_ACCESS: ea_name = SGI_ACL_FILE; - p_acl = &ip->i_acl; break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; ea_name = SGI_ACL_DEFAULT; - p_acl = &ip->i_default_acl; break; default: return -EINVAL; @@ -242,7 +211,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } if (!error) - xfs_update_cached_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); return error; } @@ -384,30 +353,6 @@ xfs_acl_chmod(struct inode *inode) return error; } -void -xfs_inode_init_acls(struct xfs_inode *ip) -{ - /* - * No need for locking, inode is not live yet. - */ - ip->i_acl = XFS_ACL_NOT_CACHED; - ip->i_default_acl = XFS_ACL_NOT_CACHED; -} - -void -xfs_inode_clear_acls(struct xfs_inode *ip) -{ - /* - * No need for locking here, the inode is not live anymore - * and just about to be freed. - */ - if (ip->i_acl != XFS_ACL_NOT_CACHED) - posix_acl_release(ip->i_acl); - if (ip->i_default_acl != XFS_ACL_NOT_CACHED) - posix_acl_release(ip->i_default_acl); -} - - /* * System xattr handlers. * diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 63dc1f2..947b150 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -46,8 +46,6 @@ extern int xfs_check_acl(struct inode *inode, int mask); extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); extern int xfs_acl_chmod(struct inode *inode); -extern void xfs_inode_init_acls(struct xfs_inode *ip); -extern void xfs_inode_clear_acls(struct xfs_inode *ip); extern int posix_acl_access_exists(struct inode *inode); extern int posix_acl_default_exists(struct inode *inode); @@ -57,8 +55,6 @@ extern struct xattr_handler xfs_xattr_system_handler; # define xfs_get_acl(inode, type) NULL # define xfs_inherit_acl(inode, default_acl) 0 # define xfs_acl_chmod(inode) 0 -# define xfs_inode_init_acls(ip) -# define xfs_inode_clear_acls(ip) # define posix_acl_access_exists(inode) 0 # define posix_acl_default_exists(inode) 0 #endif /* CONFIG_XFS_POSIX_ACL */ diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 76c540f..5fcec6f 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -83,7 +83,6 @@ xfs_inode_alloc( memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); ip->i_size = 0; ip->i_new_size = 0; - xfs_inode_init_acls(ip); /* * Initialize inode's trace buffers. @@ -560,7 +559,6 @@ xfs_ireclaim( ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); - xfs_inode_clear_acls(ip); kmem_zone_free(xfs_inode_zone, ip); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7701670..1804f86 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -273,11 +273,6 @@ typedef struct xfs_inode { /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ -#ifdef CONFIG_XFS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif - /* Trace buffers per inode. */ #ifdef XFS_INODE_TRACE struct ktrace *i_trace; /* general inode trace */ |