diff options
author | Scott Wood <scottwood@freescale.com> | 2014-04-08 01:00:49 (GMT) |
---|---|---|
committer | Scott Wood <scottwood@freescale.com> | 2014-04-08 19:58:35 (GMT) |
commit | 47d2261a3fa71cde24263559a4219a25e50d8c89 (patch) | |
tree | 28774d5b330ccf1b777a3af222d8356918328013 /fs | |
parent | fb7f27080adc65cd5f341bdf56a1d0c14f316c1b (diff) | |
parent | 5fb9d37f27351e42f002e372074249f92cbdf815 (diff) | |
download | linux-fsl-qoriq-47d2261a3fa71cde24263559a4219a25e50d8c89.tar.xz |
Merge branch 'merge' into sdk-v1.6.x
This reverts v3.13-rc3+ (78fd82238d0e5716) to v3.12, except for
commits which I noticed which appear relevant to the SDK.
Signed-off-by: Scott Wood <scottwood@freescale.com>
Conflicts:
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_interrupts.S
arch/powerpc/kvm/e500.c
arch/powerpc/kvm/e500mc.c
arch/powerpc/sysdev/fsl_soc.h
drivers/Kconfig
drivers/cpufreq/ppc-corenet-cpufreq.c
drivers/dma/fsldma.c
drivers/dma/s3c24xx-dma.c
drivers/misc/Makefile
drivers/mmc/host/sdhci-of-esdhc.c
drivers/mtd/devices/m25p80.c
drivers/net/ethernet/freescale/gianfar.h
drivers/platform/Kconfig
drivers/platform/Makefile
drivers/spi/spi-fsl-espi.c
include/crypto/algapi.h
include/linux/netdev_features.h
include/linux/skbuff.h
include/net/ip.h
net/core/ethtool.c
Diffstat (limited to 'fs')
471 files changed, 11312 insertions, 17115 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 2b7a032..a9ea73d 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -90,7 +90,7 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index, &v9fs_cache_session_index_def, - v9ses, true); + v9ses); p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n", v9ses, v9ses->fscache); } @@ -204,7 +204,7 @@ void v9fs_cache_inode_get_cookie(struct inode *inode) v9ses = v9fs_inode2v9ses(inode); v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, &v9fs_cache_inode_index_def, - v9inode, true); + v9inode); p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n", inode, v9inode->fscache); @@ -271,7 +271,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode) v9ses = v9fs_inode2v9ses(inode); v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, &v9fs_cache_inode_index_def, - v9inode, true); + v9inode); p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n", inode, old, v9inode->fscache); diff --git a/fs/9p/cache.h b/fs/9p/cache.h index 2f96754..40cc54c 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h @@ -101,18 +101,6 @@ static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, #else /* CONFIG_9P_FSCACHE */ -static inline void v9fs_cache_inode_get_cookie(struct inode *inode) -{ -} - -static inline void v9fs_cache_inode_put_cookie(struct inode *inode) -{ -} - -static inline void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *file) -{ -} - static inline int v9fs_fscache_release_page(struct page *page, gfp_t gfp) { return 1; diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index b03dd23..f039b10 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -43,6 +43,23 @@ #include "fid.h" /** + * v9fs_dentry_delete - called when dentry refcount equals 0 + * @dentry: dentry in question + * + * By returning 1 here we should remove cacheing of unused + * dentry components. + * + */ + +static int v9fs_dentry_delete(const struct dentry *dentry) +{ + p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", + dentry->d_name.name, dentry); + + return 1; +} + +/** * v9fs_cached_dentry_delete - called when dentry refcount equals 0 * @dentry: dentry in question * @@ -117,6 +134,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = { }; const struct dentry_operations v9fs_dentry_operations = { - .d_delete = always_delete_dentry, + .d_delete = v9fs_dentry_delete, .d_release = v9fs_dentry_release, }; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index a0df3e7..aa5ecf4 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -105,8 +105,10 @@ int v9fs_file_open(struct inode *inode, struct file *file) v9inode->writeback_fid = (void *) fid; } mutex_unlock(&v9inode->v_mutex); +#ifdef CONFIG_9P_FSCACHE if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, file); +#endif return 0; out_error: p9_client_clunk(file->private_data); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 4e65aa9..94de6d1 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -448,7 +448,9 @@ void v9fs_evict_inode(struct inode *inode) clear_inode(inode); filemap_fdatawrite(inode->i_mapping); +#ifdef CONFIG_9P_FSCACHE v9fs_cache_inode_put_cookie(inode); +#endif /* clunk the fid stashed in writeback_fid */ if (v9inode->writeback_fid) { p9_client_clunk(v9inode->writeback_fid); @@ -529,7 +531,9 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, goto error; v9fs_stat2inode(st, inode, sb); +#ifdef CONFIG_9P_FSCACHE v9fs_cache_inode_get_cookie(inode); +#endif unlock_new_inode(inode); return inode; error: @@ -901,8 +905,10 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, goto error; file->private_data = fid; +#ifdef CONFIG_9P_FSCACHE if (v9ses->cache) v9fs_cache_inode_set_cookie(dentry->d_inode, file); +#endif *opened |= FILE_CREATED; out: diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 4c10edec2..a7c4814 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -141,7 +141,9 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, goto error; v9fs_stat2inode_dotl(st, inode); +#ifdef CONFIG_9P_FSCACHE v9fs_cache_inode_get_cookie(inode); +#endif retval = v9fs_get_acl(inode, fid); if (retval) goto error; @@ -353,8 +355,10 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (err) goto err_clunk_old_fid; file->private_data = ofid; +#ifdef CONFIG_9P_FSCACHE if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, file); +#endif *opened |= FILE_CREATED; out: v9fs_put_acl(dacl, pacl); diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index c770337..585adaf 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -43,12 +43,9 @@ struct adfs_dir_ops; * ADFS file system superblock data in memory */ struct adfs_sb_info { - union { struct { - struct adfs_discmap *s_map; /* bh list containing map */ - struct adfs_dir_ops *s_dir; /* directory operations */ - }; - struct rcu_head rcu; /* used only at shutdown time */ - }; + struct adfs_discmap *s_map; /* bh list containing map */ + struct adfs_dir_ops *s_dir; /* directory operations */ + kuid_t s_uid; /* owner uid */ kgid_t s_gid; /* owner gid */ umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 7b3003c..0ff4bae 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -123,7 +123,8 @@ static void adfs_put_super(struct super_block *sb) for (i = 0; i < asb->s_map_size; i++) brelse(asb->s_map[i].dm_bh); kfree(asb->s_map); - kfree_rcu(asb, rcu); + kfree(asb); + sb->s_fs_info = NULL; } static int adfs_show_options(struct seq_file *seq, struct dentry *root) diff --git a/fs/affs/Changes b/fs/affs/Changes index b41c2c9..a29409c 100644 --- a/fs/affs/Changes +++ b/fs/affs/Changes @@ -91,7 +91,7 @@ more 2.4 fixes: [Roman Zippel] Version 3.11 ------------ -- Converted to use 2.3.x page cache [Dave Jones] +- Converted to use 2.3.x page cache [Dave Jones <dave@powertweak.com>] - Corruption in truncate() bugfix [Ken Tyler <kent@werple.net.au>] Version 3.10 diff --git a/fs/afs/cell.c b/fs/afs/cell.c index ca0a3cf..3c090b7 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -179,7 +179,7 @@ struct afs_cell *afs_cell_create(const char *name, unsigned namesz, /* put it up for caching (this never returns an error) */ cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, &afs_cell_cache_index_def, - cell, true); + cell); #endif /* add to the cell lists */ diff --git a/fs/afs/inode.c b/fs/afs/inode.c index ce25d75..789bc25 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -259,7 +259,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, #ifdef CONFIG_AFS_FSCACHE vnode->cache = fscache_acquire_cookie(vnode->volume->cache, &afs_vnode_cache_index_def, - vnode, true); + vnode); #endif ret = afs_inode_map_status(vnode, key); diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index b6df2e8..57bcb15 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -308,8 +308,7 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, /* see if we have an in-cache copy (will set vl->valid if there is) */ #ifdef CONFIG_AFS_FSCACHE vl->cache = fscache_acquire_cookie(vl->cell->cache, - &afs_vlocation_cache_index_def, vl, - true); + &afs_vlocation_cache_index_def, vl); #endif if (vl->valid) { diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 2b60725..401eeb2 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -131,7 +131,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) #ifdef CONFIG_AFS_FSCACHE volume->cache = fscache_acquire_cookie(vlocation->cache, &afs_volume_cache_index_def, - volume, true); + volume); #endif afs_get_vlocation(vlocation); volume->vlocation = vlocation; @@ -36,10 +36,10 @@ #include <linux/eventfd.h> #include <linux/blkdev.h> #include <linux/compat.h> +#include <linux/anon_inodes.h> #include <linux/migrate.h> #include <linux/ramfs.h> #include <linux/percpu-refcount.h> -#include <linux/mount.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -80,8 +80,6 @@ struct kioctx { struct percpu_ref users; atomic_t dead; - struct percpu_ref reqs; - unsigned long user_id; struct __percpu kioctx_cpu *cpu; @@ -109,6 +107,7 @@ struct kioctx { struct page **ring_pages; long nr_pages; + struct rcu_head rcu_head; struct work_struct free_work; struct { @@ -153,67 +152,12 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request static struct kmem_cache *kiocb_cachep; static struct kmem_cache *kioctx_cachep; -static struct vfsmount *aio_mnt; - -static const struct file_operations aio_ring_fops; -static const struct address_space_operations aio_ctx_aops; - -static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) -{ - struct qstr this = QSTR_INIT("[aio]", 5); - struct file *file; - struct path path; - struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - inode->i_mapping->a_ops = &aio_ctx_aops; - inode->i_mapping->private_data = ctx; - inode->i_size = PAGE_SIZE * nr_pages; - - path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); - if (!path.dentry) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - path.mnt = mntget(aio_mnt); - - d_instantiate(path.dentry, inode); - file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops); - if (IS_ERR(file)) { - path_put(&path); - return file; - } - - file->f_flags = O_RDWR; - file->private_data = ctx; - return file; -} - -static struct dentry *aio_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - static const struct dentry_operations ops = { - .d_dname = simple_dname, - }; - return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1); -} - /* aio_setup * Creates the slab caches used by the aio routines, panic on * failure as this is done early during the boot sequence. */ static int __init aio_setup(void) { - static struct file_system_type aio_fs = { - .name = "aio", - .mount = aio_mount, - .kill_sb = kill_anon_super, - }; - aio_mnt = kern_mount(&aio_fs); - if (IS_ERR(aio_mnt)) - panic("Failed to create aio fs mount."); - kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); @@ -251,10 +195,8 @@ static void aio_free_ring(struct kioctx *ctx) put_aio_ring_file(ctx); - if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { + if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) kfree(ctx->ring_pages); - ctx->ring_pages = NULL; - } } static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) @@ -341,12 +283,16 @@ static int aio_setup_ring(struct kioctx *ctx) if (nr_pages < 0) return -EINVAL; - file = aio_private_file(ctx, nr_pages); + file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR); if (IS_ERR(file)) { ctx->aio_ring_file = NULL; return -EAGAIN; } + file->f_inode->i_mapping->a_ops = &aio_ctx_aops; + file->f_inode->i_mapping->private_data = ctx; + file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages; + for (i = 0; i < nr_pages; i++) { struct page *page; page = find_or_create_page(file->f_inode->i_mapping, @@ -367,10 +313,8 @@ static int aio_setup_ring(struct kioctx *ctx) if (nr_pages > AIO_RING_PAGES) { ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); - if (!ctx->ring_pages) { - put_aio_ring_file(ctx); + if (!ctx->ring_pages) return -ENOMEM; - } } ctx->mmap_size = nr_pages * PAGE_SIZE; @@ -468,34 +412,26 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) return cancel(kiocb); } -static void free_ioctx(struct work_struct *work) +static void free_ioctx_rcu(struct rcu_head *head) { - struct kioctx *ctx = container_of(work, struct kioctx, free_work); + struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); - pr_debug("freeing %p\n", ctx); - - aio_free_ring(ctx); free_percpu(ctx->cpu); kmem_cache_free(kioctx_cachep, ctx); } -static void free_ioctx_reqs(struct percpu_ref *ref) -{ - struct kioctx *ctx = container_of(ref, struct kioctx, reqs); - - INIT_WORK(&ctx->free_work, free_ioctx); - schedule_work(&ctx->free_work); -} - /* * When this function runs, the kioctx has been removed from the "hash table" * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ -static void free_ioctx_users(struct percpu_ref *ref) +static void free_ioctx(struct work_struct *work) { - struct kioctx *ctx = container_of(ref, struct kioctx, users); + struct kioctx *ctx = container_of(work, struct kioctx, free_work); + struct aio_ring *ring; struct kiocb *req; + unsigned cpu, avail; + DEFINE_WAIT(wait); spin_lock_irq(&ctx->ctx_lock); @@ -509,8 +445,54 @@ static void free_ioctx_users(struct percpu_ref *ref) spin_unlock_irq(&ctx->ctx_lock); - percpu_ref_kill(&ctx->reqs); - percpu_ref_put(&ctx->reqs); + for_each_possible_cpu(cpu) { + struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); + + atomic_add(kcpu->reqs_available, &ctx->reqs_available); + kcpu->reqs_available = 0; + } + + while (1) { + prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE); + + ring = kmap_atomic(ctx->ring_pages[0]); + avail = (ring->head <= ring->tail) + ? ring->tail - ring->head + : ctx->nr_events - ring->head + ring->tail; + + atomic_add(avail, &ctx->reqs_available); + ring->head = ring->tail; + kunmap_atomic(ring); + + if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1) + break; + + schedule(); + } + finish_wait(&ctx->wait, &wait); + + WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1); + + aio_free_ring(ctx); + + pr_debug("freeing %p\n", ctx); + + /* + * Here the call_rcu() is between the wait_event() for reqs_active to + * hit 0, and freeing the ioctx. + * + * aio_complete() decrements reqs_active, but it has to touch the ioctx + * after to issue a wakeup so we use rcu. + */ + call_rcu(&ctx->rcu_head, free_ioctx_rcu); +} + +static void free_ioctx_ref(struct percpu_ref *ref) +{ + struct kioctx *ctx = container_of(ref, struct kioctx, users); + + INIT_WORK(&ctx->free_work, free_ioctx); + schedule_work(&ctx->free_work); } static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) @@ -569,16 +551,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) } } -static void aio_nr_sub(unsigned nr) -{ - spin_lock(&aio_nr_lock); - if (WARN_ON(aio_nr - nr > aio_nr)) - aio_nr = 0; - else - aio_nr -= nr; - spin_unlock(&aio_nr_lock); -} - /* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ @@ -616,11 +588,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->max_reqs = nr_events; - if (percpu_ref_init(&ctx->users, free_ioctx_users)) - goto err; - - if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) - goto err; + if (percpu_ref_init(&ctx->users, free_ioctx_ref)) + goto out_freectx; spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->completion_lock); @@ -631,10 +600,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) ctx->cpu = alloc_percpu(struct kioctx_cpu); if (!ctx->cpu) - goto err; + goto out_freeref; if (aio_setup_ring(ctx) < 0) - goto err; + goto out_freepcpu; atomic_set(&ctx->reqs_available, ctx->nr_events - 1); ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); @@ -646,8 +615,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (aio_nr + nr_events > (aio_max_nr * 2UL) || aio_nr + nr_events < aio_nr) { spin_unlock(&aio_nr_lock); - err = -EAGAIN; - goto err_ctx; + goto out_cleanup; } aio_nr += ctx->max_reqs; spin_unlock(&aio_nr_lock); @@ -656,20 +624,23 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err = ioctx_add_table(ctx, mm); if (err) - goto err_cleanup; + goto out_cleanup_put; pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, mm, ctx->nr_events); return ctx; -err_cleanup: - aio_nr_sub(ctx->max_reqs); -err_ctx: +out_cleanup_put: + percpu_ref_put(&ctx->users); +out_cleanup: + err = -EAGAIN; aio_free_ring(ctx); -err: +out_freepcpu: free_percpu(ctx->cpu); - free_percpu(ctx->reqs.pcpu_count); +out_freeref: free_percpu(ctx->users.pcpu_count); +out_freectx: + put_aio_ring_file(ctx); kmem_cache_free(kioctx_cachep, ctx); pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); @@ -704,7 +675,10 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) * -EAGAIN with no ioctxs actually in use (as far as userspace * could tell). */ - aio_nr_sub(ctx->max_reqs); + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - ctx->max_reqs > aio_nr); + aio_nr -= ctx->max_reqs; + spin_unlock(&aio_nr_lock); if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); @@ -836,8 +810,6 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) if (unlikely(!req)) goto out_put; - percpu_ref_get(&ctx->reqs); - req->ki_ctx = ctx; return req; out_put: @@ -907,6 +879,12 @@ void aio_complete(struct kiocb *iocb, long res, long res2) return; } + /* + * Take rcu_read_lock() in case the kioctx is being destroyed, as we + * need to issue a wakeup after incrementing reqs_available. + */ + rcu_read_lock(); + if (iocb->ki_list.next) { unsigned long flags; @@ -981,7 +959,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - percpu_ref_put(&ctx->reqs); + rcu_read_unlock(); } EXPORT_SYMBOL(aio_complete); @@ -1392,7 +1370,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, return 0; out_put_req: put_reqs_available(ctx, 1); - percpu_ref_put(&ctx->reqs); kiocb_free(req); return ret; } diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 2408473..85c9618 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -24,6 +24,7 @@ static struct vfsmount *anon_inode_mnt __read_mostly; static struct inode *anon_inode_inode; +static const struct file_operations anon_inode_fops; /* * anon_inodefs_dname() is called from d_path(). @@ -38,6 +39,51 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { .d_dname = anon_inodefs_dname, }; +/* + * nop .set_page_dirty method so that people can use .page_mkwrite on + * anon inodes. + */ +static int anon_set_page_dirty(struct page *page) +{ + return 0; +}; + +static const struct address_space_operations anon_aops = { + .set_page_dirty = anon_set_page_dirty, +}; + +/* + * A single inode exists for all anon_inode files. Contrary to pipes, + * anon_inode inodes have no associated per-instance data, so we need + * only allocate one of them. + */ +static struct inode *anon_inode_mkinode(struct super_block *s) +{ + struct inode *inode = new_inode_pseudo(s); + + if (!inode) + return ERR_PTR(-ENOMEM); + + inode->i_ino = get_next_ino(); + inode->i_fop = &anon_inode_fops; + + inode->i_mapping->a_ops = &anon_aops; + + /* + * Mark the inode dirty from the very beginning, + * that way it will never be moved to the dirty + * list because mark_inode_dirty() will think + * that it already _is_ on the dirty list. + */ + inode->i_state = I_DIRTY; + inode->i_mode = S_IRUSR | S_IWUSR; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_flags |= S_PRIVATE; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + return inode; +} + static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -46,7 +92,7 @@ static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); if (!IS_ERR(root)) { struct super_block *s = root->d_sb; - anon_inode_inode = alloc_anon_inode(s); + anon_inode_inode = anon_inode_mkinode(s); if (IS_ERR(anon_inode_inode)) { dput(root); deactivate_locked_super(s); @@ -63,6 +109,72 @@ static struct file_system_type anon_inode_fs_type = { }; /** + * anon_inode_getfile_private - creates a new file instance by hooking it up to an + * anonymous inode, and a dentry that describe the "class" + * of the file + * + * @name: [in] name of the "class" of the new file + * @fops: [in] file operations for the new file + * @priv: [in] private data for the new file (will be file's private_data) + * @flags: [in] flags + * + * + * Similar to anon_inode_getfile, but each file holds a single inode. + * + */ +struct file *anon_inode_getfile_private(const char *name, + const struct file_operations *fops, + void *priv, int flags) +{ + struct qstr this; + struct path path; + struct file *file; + struct inode *inode; + + if (fops->owner && !try_module_get(fops->owner)) + return ERR_PTR(-ENOENT); + + inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb); + if (IS_ERR(inode)) { + file = ERR_PTR(-ENOMEM); + goto err_module; + } + + /* + * Link the inode to a directory entry by creating a unique name + * using the inode sequence number. + */ + file = ERR_PTR(-ENOMEM); + this.name = name; + this.len = strlen(name); + this.hash = 0; + path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this); + if (!path.dentry) + goto err_module; + + path.mnt = mntget(anon_inode_mnt); + + d_instantiate(path.dentry, inode); + + file = alloc_file(&path, OPEN_FMODE(flags), fops); + if (IS_ERR(file)) + goto err_dput; + + file->f_mapping = inode->i_mapping; + file->f_flags = flags & (O_ACCMODE | O_NONBLOCK); + file->private_data = priv; + + return file; + +err_dput: + path_put(&path); +err_module: + module_put(fops->owner); + return file; +} +EXPORT_SYMBOL_GPL(anon_inode_getfile_private); + +/** * anon_inode_getfile - creates a new file instance by hooking it up to an * anonymous inode, and a dentry that describe the "class" * of the file @@ -167,27 +167,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) } EXPORT_SYMBOL(setattr_copy); -/** - * notify_change - modify attributes of a filesytem object - * @dentry: object affected - * @iattr: new attributes - * @delegated_inode: returns inode, if the inode is delegated - * - * The caller must hold the i_mutex on the affected object. - * - * If notify_change discovers a delegation in need of breaking, - * it will return -EWOULDBLOCK and return a reference to the inode in - * delegated_inode. The caller should then break the delegation and - * retry. Because breaking a delegation may take a long time, the - * caller should drop the i_mutex before doing so. - * - * Alternatively, a caller may pass NULL for delegated_inode. This may - * be appropriate for callers that expect the underlying filesystem not - * to be NFS exported. Also, passing NULL is fine for callers holding - * the file open for write, as there can be no conflicting delegation in - * that case. - */ -int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode) +int notify_change(struct dentry * dentry, struct iattr * attr) { struct inode *inode = dentry->d_inode; umode_t mode = inode->i_mode; @@ -263,9 +243,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de error = security_inode_setattr(dentry, attr); if (error) return error; - error = try_break_deleg(inode, delegated_inode); - if (error) - return error; if (inode->i_op->setattr) error = inode->i_op->setattr(dentry, attr); diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 4218e26..3f1128b 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -122,7 +122,6 @@ struct autofs_sb_info { spinlock_t lookup_lock; struct list_head active_list; struct list_head expiring_list; - struct rcu_head rcu; }; static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) @@ -272,7 +271,7 @@ void autofs4_clean_ino(struct autofs_info *); static inline int autofs_prepare_pipe(struct file *pipe) { - if (!pipe->f_op->write) + if (!pipe->f_op || !pipe->f_op->write) return -EINVAL; if (!S_ISFIFO(file_inode(pipe)->i_mode)) return -EINVAL; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 1818ce7..0f00da3 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -658,6 +658,12 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use goto out; } + if (!fp->f_op) { + err = -ENOTTY; + fput(fp); + goto out; + } + sbi = autofs_dev_ioctl_sbi(fp); if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) { err = -EINVAL; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 3b9cc9b..b104726 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -56,13 +56,18 @@ void autofs4_kill_sb(struct super_block *sb) * just call kill_anon_super when we are called from * deactivate_super. */ - if (sbi) /* Free wait queues, close pipe */ - autofs4_catatonic_mode(sbi); + if (!sbi) + goto out_kill_sb; + + /* Free wait queues, close pipe */ + autofs4_catatonic_mode(sbi); + + sb->s_fs_info = NULL; + kfree(sbi); +out_kill_sb: DPRINTK("shutting down"); kill_litter_super(sb); - if (sbi) - kfree_rcu(sbi, rcu); } static int autofs4_show_options(struct seq_file *m, struct dentry *root) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index daa15d6..e9c75e2 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -42,7 +42,7 @@ static void befs_destroy_inode(struct inode *inode); static int befs_init_inodecache(void); static void befs_destroy_inodecache(void); static void *befs_follow_link(struct dentry *, struct nameidata *); -static void *befs_fast_follow_link(struct dentry *, struct nameidata *); +static void befs_put_link(struct dentry *, struct nameidata *, void *); static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, @@ -79,15 +79,10 @@ static const struct address_space_operations befs_aops = { .bmap = befs_bmap, }; -static const struct inode_operations befs_fast_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = befs_fast_follow_link, -}; - static const struct inode_operations befs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = befs_follow_link, - .put_link = kfree_put_link, + .put_link = befs_put_link, }; /* @@ -416,10 +411,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &befs_dir_inode_operations; inode->i_fop = &befs_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - if (befs_ino->i_flags & BEFS_LONG_SYMLINK) - inode->i_op = &befs_symlink_inode_operations; - else - inode->i_op = &befs_fast_symlink_inode_operations; + inode->i_op = &befs_symlink_inode_operations; } else { befs_error(sb, "Inode %lu is not a regular file, " "directory or symlink. THAT IS WRONG! BeFS has no " @@ -485,40 +477,47 @@ befs_destroy_inodecache(void) static void * befs_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct super_block *sb = dentry->d_sb; befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); - befs_data_stream *data = &befs_ino->i_data.ds; - befs_off_t len = data->size; char *link; - if (len == 0) { - befs_error(sb, "Long symlink with illegal length"); - link = ERR_PTR(-EIO); - } else { - befs_debug(sb, "Follow long symlink"); + if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { + struct super_block *sb = dentry->d_sb; + befs_data_stream *data = &befs_ino->i_data.ds; + befs_off_t len = data->size; - link = kmalloc(len, GFP_NOFS); - if (!link) { - link = ERR_PTR(-ENOMEM); - } else if (befs_read_lsymlink(sb, data, link, len) != len) { - kfree(link); - befs_error(sb, "Failed to read entire long symlink"); + if (len == 0) { + befs_error(sb, "Long symlink with illegal length"); link = ERR_PTR(-EIO); } else { - link[len - 1] = '\0'; + befs_debug(sb, "Follow long symlink"); + + link = kmalloc(len, GFP_NOFS); + if (!link) { + link = ERR_PTR(-ENOMEM); + } else if (befs_read_lsymlink(sb, data, link, len) != len) { + kfree(link); + befs_error(sb, "Failed to read entire long symlink"); + link = ERR_PTR(-EIO); + } else { + link[len - 1] = '\0'; + } } + } else { + link = befs_ino->i_data.symlink; } + nd_set_link(nd, link); return NULL; } - -static void * -befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd) +static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); - nd_set_link(nd, befs_ino->i_data.symlink); - return NULL; + if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { + char *link = nd_get_link(nd); + if (!IS_ERR(link)) + kfree(link); + } } /* diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index ca0ba15..89dec7f 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -45,6 +45,7 @@ static int load_aout_library(struct file*); */ static int aout_core_dump(struct coredump_params *cprm) { + struct file *file = cprm->file; mm_segment_t fs; int has_dumped = 0; void __user *dump_start; @@ -84,10 +85,10 @@ static int aout_core_dump(struct coredump_params *cprm) set_fs(KERNEL_DS); /* struct user */ - if (!dump_emit(cprm, &dump, sizeof(dump))) + if (!dump_write(file, &dump, sizeof(dump))) goto end_coredump; /* Now dump all of the user data. Include malloced stuff as well */ - if (!dump_skip(cprm, PAGE_SIZE - sizeof(dump))) + if (!dump_seek(cprm->file, PAGE_SIZE - sizeof(dump))) goto end_coredump; /* now we start writing out the user space info */ set_fs(USER_DS); @@ -95,14 +96,14 @@ static int aout_core_dump(struct coredump_params *cprm) if (dump.u_dsize != 0) { dump_start = START_DATA(dump); dump_size = dump.u_dsize << PAGE_SHIFT; - if (!dump_emit(cprm, dump_start, dump_size)) + if (!dump_write(file, dump_start, dump_size)) goto end_coredump; } /* Now prepare to dump the stack area */ if (dump.u_ssize != 0) { dump_start = START_STACK(dump); dump_size = dump.u_ssize << PAGE_SHIFT; - if (!dump_emit(cprm, dump_start, dump_size)) + if (!dump_write(file, dump_start, dump_size)) goto end_coredump; } end_coredump: @@ -220,7 +221,7 @@ static int load_aout_binary(struct linux_binprm * bprm) * Requires a mmap handler. This prevents people from using a.out * as part of an exploit attack against /proc-related vulnerabilities. */ - if (!bprm->file->f_op->mmap) + if (!bprm->file->f_op || !bprm->file->f_op->mmap) return -ENOEXEC; fd_offset = N_TXTOFF(ex); @@ -373,7 +374,7 @@ static int load_aout_library(struct file *file) * Requires a mmap handler. This prevents people from using a.out * as part of an exploit attack against /proc-related vulnerabilities. */ - if (!file->f_op->mmap) + if (!file->f_op || !file->f_op->mmap) goto out; if (N_FLAGS(ex)) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 571a423..4c94a79 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -406,7 +406,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, goto out; if (!elf_check_arch(interp_elf_ex)) goto out; - if (!interpreter->f_op->mmap) + if (!interpreter->f_op || !interpreter->f_op->mmap) goto out; /* @@ -607,7 +607,7 @@ static int load_elf_binary(struct linux_binprm *bprm) goto out; if (!elf_check_arch(&loc->elf_ex)) goto out; - if (!bprm->file->f_op->mmap) + if (!bprm->file->f_op || !bprm->file->f_op->mmap) goto out; /* Now read in all of the header information */ @@ -1028,7 +1028,7 @@ static int load_elf_library(struct file *file) /* First of all, some simple consistency checks */ if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || - !elf_check_arch(&elf_ex) || !file->f_op->mmap) + !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap) goto out; /* Now read in all of the header information */ @@ -1225,17 +1225,35 @@ static int notesize(struct memelfnote *en) return sz; } -static int writenote(struct memelfnote *men, struct coredump_params *cprm) +#define DUMP_WRITE(addr, nr, foffset) \ + do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0) + +static int alignfile(struct file *file, loff_t *foffset) +{ + static const char buf[4] = { 0, }; + DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset); + return 1; +} + +static int writenote(struct memelfnote *men, struct file *file, + loff_t *foffset) { struct elf_note en; en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; - return dump_emit(cprm, &en, sizeof(en)) && - dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) && - dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4); + DUMP_WRITE(&en, sizeof(en), foffset); + DUMP_WRITE(men->name, en.n_namesz, foffset); + if (!alignfile(file, foffset)) + return 0; + DUMP_WRITE(men->data, men->datasz, foffset); + if (!alignfile(file, foffset)) + return 0; + + return 1; } +#undef DUMP_WRITE static void fill_elf_header(struct elfhdr *elf, int segs, u16 machine, u32 flags) @@ -1374,7 +1392,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) } static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, - const siginfo_t *siginfo) + siginfo_t *siginfo) { mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); @@ -1581,7 +1599,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - const siginfo_t *siginfo, struct pt_regs *regs) + siginfo_t *siginfo, struct pt_regs *regs) { struct task_struct *dump_task = current; const struct user_regset_view *view = task_user_regset_view(dump_task); @@ -1684,7 +1702,7 @@ static size_t get_note_info_size(struct elf_note_info *info) * process-wide notes are interleaved after the first thread-specific note. */ static int write_note_info(struct elf_note_info *info, - struct coredump_params *cprm) + struct file *file, loff_t *foffset) { bool first = 1; struct elf_thread_core_info *t = info->thread; @@ -1692,22 +1710,22 @@ static int write_note_info(struct elf_note_info *info, do { int i; - if (!writenote(&t->notes[0], cprm)) + if (!writenote(&t->notes[0], file, foffset)) return 0; - if (first && !writenote(&info->psinfo, cprm)) + if (first && !writenote(&info->psinfo, file, foffset)) return 0; - if (first && !writenote(&info->signote, cprm)) + if (first && !writenote(&info->signote, file, foffset)) return 0; - if (first && !writenote(&info->auxv, cprm)) + if (first && !writenote(&info->auxv, file, foffset)) return 0; if (first && info->files.data && - !writenote(&info->files, cprm)) + !writenote(&info->files, file, foffset)) return 0; for (i = 1; i < info->thread_notes; ++i) if (t->notes[i].data && - !writenote(&t->notes[i], cprm)) + !writenote(&t->notes[i], file, foffset)) return 0; first = 0; @@ -1830,31 +1848,34 @@ static int elf_note_info_init(struct elf_note_info *info) static int fill_note_info(struct elfhdr *elf, int phdrs, struct elf_note_info *info, - const siginfo_t *siginfo, struct pt_regs *regs) + siginfo_t *siginfo, struct pt_regs *regs) { struct list_head *t; - struct core_thread *ct; - struct elf_thread_status *ets; if (!elf_note_info_init(info)) return 0; - for (ct = current->mm->core_state->dumper.next; - ct; ct = ct->next) { - ets = kzalloc(sizeof(*ets), GFP_KERNEL); - if (!ets) - return 0; + if (siginfo->si_signo) { + struct core_thread *ct; + struct elf_thread_status *ets; - ets->thread = ct->task; - list_add(&ets->list, &info->thread_list); - } + for (ct = current->mm->core_state->dumper.next; + ct; ct = ct->next) { + ets = kzalloc(sizeof(*ets), GFP_KERNEL); + if (!ets) + return 0; - list_for_each(t, &info->thread_list) { - int sz; + ets->thread = ct->task; + list_add(&ets->list, &info->thread_list); + } + + list_for_each(t, &info->thread_list) { + int sz; - ets = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(siginfo->si_signo, ets); - info->thread_status_size += sz; + ets = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(siginfo->si_signo, ets); + info->thread_status_size += sz; + } } /* now collect the dump for the current */ memset(info->prstatus, 0, sizeof(*info->prstatus)); @@ -1914,13 +1935,13 @@ static size_t get_note_info_size(struct elf_note_info *info) } static int write_note_info(struct elf_note_info *info, - struct coredump_params *cprm) + struct file *file, loff_t *foffset) { int i; struct list_head *t; for (i = 0; i < info->numnote; i++) - if (!writenote(info->notes + i, cprm)) + if (!writenote(info->notes + i, file, foffset)) return 0; /* write out the thread status notes section */ @@ -1929,7 +1950,7 @@ static int write_note_info(struct elf_note_info *info, list_entry(t, struct elf_thread_status, list); for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], cprm)) + if (!writenote(&tmp->notes[i], file, foffset)) return 0; } @@ -2025,9 +2046,10 @@ static int elf_core_dump(struct coredump_params *cprm) int has_dumped = 0; mm_segment_t fs; int segs; + size_t size = 0; struct vm_area_struct *vma, *gate_vma; struct elfhdr *elf = NULL; - loff_t offset = 0, dataoff; + loff_t offset = 0, dataoff, foffset; struct elf_note_info info = { }; struct elf_phdr *phdr4note = NULL; struct elf_shdr *shdr4extnum = NULL; @@ -2083,6 +2105,7 @@ static int elf_core_dump(struct coredump_params *cprm) offset += sizeof(*elf); /* Elf header */ offset += segs * sizeof(struct elf_phdr); /* Program headers */ + foffset = offset; /* Write notes phdr entry */ { @@ -2113,10 +2136,13 @@ static int elf_core_dump(struct coredump_params *cprm) offset = dataoff; - if (!dump_emit(cprm, elf, sizeof(*elf))) + size += sizeof(*elf); + if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) goto end_coredump; - if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) + size += sizeof(*phdr4note); + if (size > cprm->limit + || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note))) goto end_coredump; /* Write program headers for segments dump */ @@ -2138,22 +2164,24 @@ static int elf_core_dump(struct coredump_params *cprm) phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; - if (!dump_emit(cprm, &phdr, sizeof(phdr))) + size += sizeof(phdr); + if (size > cprm->limit + || !dump_write(cprm->file, &phdr, sizeof(phdr))) goto end_coredump; } - if (!elf_core_write_extra_phdrs(cprm, offset)) + if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) goto end_coredump; /* write out the notes section */ - if (!write_note_info(&info, cprm)) + if (!write_note_info(&info, cprm->file, &foffset)) goto end_coredump; - if (elf_coredump_extra_notes_write(cprm)) + if (elf_coredump_extra_notes_write(cprm->file, &foffset)) goto end_coredump; /* Align to page */ - if (!dump_skip(cprm, dataoff - cprm->written)) + if (!dump_seek(cprm->file, dataoff - foffset)) goto end_coredump; for (vma = first_vma(current, gate_vma); vma != NULL; @@ -2170,21 +2198,26 @@ static int elf_core_dump(struct coredump_params *cprm) page = get_dump_page(addr); if (page) { void *kaddr = kmap(page); - stop = !dump_emit(cprm, kaddr, PAGE_SIZE); + stop = ((size += PAGE_SIZE) > cprm->limit) || + !dump_write(cprm->file, kaddr, + PAGE_SIZE); kunmap(page); page_cache_release(page); } else - stop = !dump_skip(cprm, PAGE_SIZE); + stop = !dump_seek(cprm->file, PAGE_SIZE); if (stop) goto end_coredump; } } - if (!elf_core_write_extra_data(cprm)) + if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) goto end_coredump; if (e_phnum == PN_XNUM) { - if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) + size += sizeof(*shdr4extnum); + if (size > cprm->limit + || !dump_write(cprm->file, shdr4extnum, + sizeof(*shdr4extnum))) goto end_coredump; } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index fe2a643..c166f32 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -111,7 +111,7 @@ static int is_elf_fdpic(struct elfhdr *hdr, struct file *file) return 0; if (!elf_check_arch(hdr) || !elf_check_fdpic(hdr)) return 0; - if (!file->f_op->mmap) + if (!file->f_op || !file->f_op->mmap) return 0; return 1; } @@ -1267,17 +1267,35 @@ static int notesize(struct memelfnote *en) /* #define DEBUG */ -static int writenote(struct memelfnote *men, struct coredump_params *cprm) +#define DUMP_WRITE(addr, nr, foffset) \ + do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0) + +static int alignfile(struct file *file, loff_t *foffset) +{ + static const char buf[4] = { 0, }; + DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset); + return 1; +} + +static int writenote(struct memelfnote *men, struct file *file, + loff_t *foffset) { struct elf_note en; en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; - return dump_emit(cprm, &en, sizeof(en)) && - dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) && - dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4); + DUMP_WRITE(&en, sizeof(en), foffset); + DUMP_WRITE(men->name, en.n_namesz, foffset); + if (!alignfile(file, foffset)) + return 0; + DUMP_WRITE(men->data, men->datasz, foffset); + if (!alignfile(file, foffset)) + return 0; + + return 1; } +#undef DUMP_WRITE static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs) { @@ -1482,40 +1500,66 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, /* * dump the segments for an MMU process */ -static bool elf_fdpic_dump_segments(struct coredump_params *cprm) +#ifdef CONFIG_MMU +static int elf_fdpic_dump_segments(struct file *file, size_t *size, + unsigned long *limit, unsigned long mm_flags) { struct vm_area_struct *vma; + int err = 0; for (vma = current->mm->mmap; vma; vma = vma->vm_next) { unsigned long addr; - if (!maydump(vma, cprm->mm_flags)) + if (!maydump(vma, mm_flags)) continue; -#ifdef CONFIG_MMU for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { - bool res; struct page *page = get_dump_page(addr); if (page) { void *kaddr = kmap(page); - res = dump_emit(cprm, kaddr, PAGE_SIZE); + *size += PAGE_SIZE; + if (*size > *limit) + err = -EFBIG; + else if (!dump_write(file, kaddr, PAGE_SIZE)) + err = -EIO; kunmap(page); page_cache_release(page); - } else { - res = dump_skip(cprm, PAGE_SIZE); - } - if (!res) - return false; + } else if (!dump_seek(file, PAGE_SIZE)) + err = -EFBIG; + if (err) + goto out; } -#else - if (!dump_emit(cprm, (void *) vma->vm_start, - vma->vm_end - vma->vm_start)) - return false; + } +out: + return err; +} #endif + +/* + * dump the segments for a NOMMU process + */ +#ifndef CONFIG_MMU +static int elf_fdpic_dump_segments(struct file *file, size_t *size, + unsigned long *limit, unsigned long mm_flags) +{ + struct vm_area_struct *vma; + + for (vma = current->mm->mmap; vma; vma = vma->vm_next) { + if (!maydump(vma, mm_flags)) + continue; + + if ((*size += PAGE_SIZE) > *limit) + return -EFBIG; + + if (!dump_write(file, (void *) vma->vm_start, + vma->vm_end - vma->vm_start)) + return -EIO; } - return true; + + return 0; } +#endif static size_t elf_core_vma_data_size(unsigned long mm_flags) { @@ -1541,10 +1585,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) int has_dumped = 0; mm_segment_t fs; int segs; + size_t size = 0; int i; struct vm_area_struct *vma; struct elfhdr *elf = NULL; - loff_t offset = 0, dataoff; + loff_t offset = 0, dataoff, foffset; int numnote; struct memelfnote *notes = NULL; struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ @@ -1561,8 +1606,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; elf_addr_t e_shoff; - struct core_thread *ct; - struct elf_thread_status *tmp; /* * We no longer stop all VM operations. @@ -1598,23 +1641,28 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto cleanup; #endif - for (ct = current->mm->core_state->dumper.next; - ct; ct = ct->next) { - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - goto cleanup; + if (cprm->siginfo->si_signo) { + struct core_thread *ct; + struct elf_thread_status *tmp; - tmp->thread = ct->task; - list_add(&tmp->list, &thread_list); - } + for (ct = current->mm->core_state->dumper.next; + ct; ct = ct->next) { + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + goto cleanup; - list_for_each(t, &thread_list) { - struct elf_thread_status *tmp; - int sz; + tmp->thread = ct->task; + list_add(&tmp->list, &thread_list); + } - tmp = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp); - thread_status_size += sz; + list_for_each(t, &thread_list) { + struct elf_thread_status *tmp; + int sz; + + tmp = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp); + thread_status_size += sz; + } } /* now collect the dump for the current */ @@ -1672,6 +1720,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) offset += sizeof(*elf); /* Elf header */ offset += segs * sizeof(struct elf_phdr); /* Program headers */ + foffset = offset; /* Write notes phdr entry */ { @@ -1706,10 +1755,13 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) offset = dataoff; - if (!dump_emit(cprm, elf, sizeof(*elf))) + size += sizeof(*elf); + if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf))) goto end_coredump; - if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) + size += sizeof(*phdr4note); + if (size > cprm->limit + || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note))) goto end_coredump; /* write program headers for segments dump */ @@ -1733,16 +1785,18 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) phdr.p_flags |= PF_X; phdr.p_align = ELF_EXEC_PAGESIZE; - if (!dump_emit(cprm, &phdr, sizeof(phdr))) + size += sizeof(phdr); + if (size > cprm->limit + || !dump_write(cprm->file, &phdr, sizeof(phdr))) goto end_coredump; } - if (!elf_core_write_extra_phdrs(cprm, offset)) + if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit)) goto end_coredump; /* write out the notes section */ for (i = 0; i < numnote; i++) - if (!writenote(notes + i, cprm)) + if (!writenote(notes + i, cprm->file, &foffset)) goto end_coredump; /* write out the thread status notes section */ @@ -1751,21 +1805,25 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) list_entry(t, struct elf_thread_status, list); for (i = 0; i < tmp->num_notes; i++) - if (!writenote(&tmp->notes[i], cprm)) + if (!writenote(&tmp->notes[i], cprm->file, &foffset)) goto end_coredump; } - if (!dump_skip(cprm, dataoff - cprm->written)) + if (!dump_seek(cprm->file, dataoff - foffset)) goto end_coredump; - if (!elf_fdpic_dump_segments(cprm)) + if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit, + cprm->mm_flags) < 0) goto end_coredump; - if (!elf_core_write_extra_data(cprm)) + if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit)) goto end_coredump; if (e_phnum == PN_XNUM) { - if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) + size += sizeof(*shdr4extnum); + if (size > cprm->limit + || !dump_write(cprm->file, shdr4extnum, + sizeof(*shdr4extnum))) goto end_coredump; } diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index f37b08c..037a3e2 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -38,7 +38,7 @@ static int load_em86(struct linux_binprm *bprm) /* First of all, some simple consistency checks */ if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) || (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) || - !bprm->file->f_op->mmap) { + (!bprm->file->f_op || !bprm->file->f_op->mmap)) { return -ENOEXEC; } @@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs); static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, - unsigned int max_sectors) + unsigned short max_sectors) { int retried_segments = 0; struct bio_vec *bvec; @@ -1805,52 +1805,6 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) EXPORT_SYMBOL(bio_split); /** - * bio_trim - trim a bio - * @bio: bio to trim - * @offset: number of sectors to trim from the front of @bio - * @size: size we want to trim @bio to, in sectors - */ -void bio_trim(struct bio *bio, int offset, int size) -{ - /* 'bio' is a cloned bio which we need to trim to match - * the given offset and size. - * This requires adjusting bi_sector, bi_size, and bi_io_vec - */ - int i; - struct bio_vec *bvec; - int sofar = 0; - - size <<= 9; - if (offset == 0 && size == bio->bi_size) - return; - - clear_bit(BIO_SEG_VALID, &bio->bi_flags); - - bio_advance(bio, offset << 9); - - bio->bi_size = size; - - /* avoid any complications with bi_idx being non-zero*/ - if (bio->bi_idx) { - memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, - (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); - bio->bi_vcnt -= bio->bi_idx; - bio->bi_idx = 0; - } - /* Make sure vcnt and last bv are not too big */ - bio_for_each_segment(bvec, bio, i) { - if (sofar + bvec->bv_len > size) - bvec->bv_len = size - sofar; - if (bvec->bv_len == 0) { - bio->bi_vcnt = i; - break; - } - sofar += bvec->bv_len; - } -} -EXPORT_SYMBOL_GPL(bio_trim); - -/** * bio_sector_offset - Find hardware sector offset in bio * @bio: bio to inspect * @index: bio_vec index diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index aa976ec..398cbd5 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -9,17 +9,12 @@ config BTRFS_FS select XOR_BLOCKS help - Btrfs is a general purpose copy-on-write filesystem with extents, - writable snapshotting, support for multiple devices and many more - features focused on fault tolerance, repair and easy administration. + Btrfs is a new filesystem with extents, writable snapshotting, + support for multiple devices and many more features. - The filesystem disk format is no longer unstable, and it's not - expected to change unless there are strong reasons to do so. If there - is a format change, file systems with a unchanged format will - continue to be mountable and usable by newer kernels. - - For more information, please see the web pages at - http://btrfs.wiki.kernel.org. + Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET + FINALIZED. You should say N here unless you are interested in + testing Btrfs with non-critical data. To compile this file system support as a module, choose M here. The module will be called btrfs. @@ -64,8 +59,7 @@ config BTRFS_FS_RUN_SANITY_TESTS help This will run some basic sanity tests on the free space cache code to make sure it is acting as it should. These are mostly - regression tests and are only really interesting to btrfs - developers. + regression tests and are only really interesting to btrfs devlopers. If unsure, say N. diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 1a44e42..a91a6a3 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -14,6 +14,4 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o -btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ - tests/extent-buffer-tests.o tests/btrfs-tests.o \ - tests/extent-io-tests.o tests/inode-tests.o +btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 0890c83..e15d2b0 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, if (ret > 0) { /* we need an acl */ ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); - } else if (ret < 0) { + } else { cache_no_acl(inode); } } else { diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c1e0b0c..08cc08f 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -262,7 +262,7 @@ static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, struct btrfs_work *work = NULL; struct list_head *cur = NULL; - if (!list_empty(prio_head)) + if(!list_empty(prio_head)) cur = prio_head->next; smp_mb(); @@ -495,7 +495,6 @@ static int __btrfs_start_workers(struct btrfs_workers *workers) spin_lock_irq(&workers->lock); if (workers->stopping) { spin_unlock_irq(&workers->lock); - ret = -EINVAL; goto fail_kthread; } list_add_tail(&worker->worker_list, &workers->idle_list); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 3775947..0552a59 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -185,9 +185,6 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, { struct __prelim_ref *ref; - if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID) - return 0; - ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); if (!ref) return -ENOMEM; @@ -326,7 +323,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, eb = path->nodes[level]; while (!eb) { - if (WARN_ON(!level)) { + if (!level) { + WARN_ON(1); ret = 1; goto out; } @@ -1621,7 +1619,7 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); - item = btrfs_item_nr(slot); + item = btrfs_item_nr(eb, slot); iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ac0b39d..71f074e 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -19,7 +19,6 @@ #ifndef __BTRFS_I__ #define __BTRFS_I__ -#include <linux/hash.h> #include "extent_map.h" #include "extent_io.h" #include "ordered-data.h" @@ -180,25 +179,6 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode) return container_of(inode, struct btrfs_inode, vfs_inode); } -static inline unsigned long btrfs_inode_hash(u64 objectid, - const struct btrfs_root *root) -{ - u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME); - -#if BITS_PER_LONG == 32 - h = (h >> 32) ^ (h & 0xffffffff); -#endif - - return (unsigned long)h; -} - -static inline void btrfs_insert_inode_hash(struct inode *inode) -{ - unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root); - - __insert_inode_hash(inode, h); -} - static inline u64 btrfs_ino(struct inode *inode) { u64 ino = BTRFS_I(inode)->location.objectid; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 131d828..1c47be1 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -77,15 +77,6 @@ * the integrity of (super)-block write requests, do not * enable the config option BTRFS_FS_CHECK_INTEGRITY to * include and compile the integrity check tool. - * - * Expect millions of lines of information in the kernel log with an - * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the - * kernel config to at least 26 (which is 64MB). Usually the value is - * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be - * changed like this before LOG_BUF_SHIFT can be set to a high value: - * config LOG_BUF_SHIFT - * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" - * range 12 30 */ #include <linux/sched.h> @@ -133,7 +124,6 @@ #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 -#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000 struct btrfsic_dev_state; struct btrfsic_state; @@ -333,6 +323,7 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); static int btrfsic_read_block(struct btrfsic_state *state, struct btrfsic_block_data_ctx *block_ctx); static void btrfsic_dump_database(struct btrfsic_state *state); +static void btrfsic_complete_bio_end_io(struct bio *bio, int err); static int btrfsic_test_for_metadata(struct btrfsic_state *state, char **datav, unsigned int num_pages); static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, @@ -1047,7 +1038,7 @@ leaf_item_out_of_bounce_error: disk_item_offset, sizeof(struct btrfs_item)); item_offset = btrfs_stack_item_offset(&disk_item); - item_size = btrfs_stack_item_size(&disk_item); + item_size = btrfs_stack_item_offset(&disk_item); disk_key = &disk_item.key; type = btrfs_disk_key_type(disk_key); @@ -1686,6 +1677,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, for (i = 0; i < num_pages;) { struct bio *bio; unsigned int j; + DECLARE_COMPLETION_ONSTACK(complete); bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); if (!bio) { @@ -1696,6 +1688,8 @@ static int btrfsic_read_block(struct btrfsic_state *state, } bio->bi_bdev = block_ctx->dev->bdev; bio->bi_sector = dev_bytenr >> 9; + bio->bi_end_io = btrfsic_complete_bio_end_io; + bio->bi_private = &complete; for (j = i; j < num_pages; j++) { ret = bio_add_page(bio, block_ctx->pagev[j], @@ -1708,7 +1702,12 @@ static int btrfsic_read_block(struct btrfsic_state *state, "btrfsic: error, failed to add a single page!\n"); return -1; } - if (submit_bio_wait(READ, bio)) { + submit_bio(READ, bio); + + /* this will also unplug the queue */ + wait_for_completion(&complete); + + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { printk(KERN_INFO "btrfsic: read error at logical %llu dev %s!\n", block_ctx->start, block_ctx->dev->name); @@ -1731,6 +1730,11 @@ static int btrfsic_read_block(struct btrfsic_state *state, return block_ctx->len; } +static void btrfsic_complete_bio_end_io(struct bio *bio, int err) +{ + complete((struct completion *)bio->bi_private); +} + static void btrfsic_dump_database(struct btrfsic_state *state) { struct list_head *elem_all; @@ -1896,9 +1900,7 @@ again: dev_state, dev_bytenr); } - if (block->logical_bytenr != bytenr && - !(!block->is_metadata && - block->logical_bytenr == 0)) + if (block->logical_bytenr != bytenr) { printk(KERN_INFO "Written block @%llu (%s/%llu/%d)" " found in hash table, %c," @@ -1908,14 +1910,15 @@ again: block->mirror_num, btrfsic_get_block_type(state, block), block->logical_bytenr); - else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) + block->logical_bytenr = bytenr; + } else if (state->print_mask & + BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "Written block @%llu (%s/%llu/%d)" " found in hash table, %c.\n", bytenr, dev_state->name, dev_bytenr, block->mirror_num, btrfsic_get_block_type(state, block)); - block->logical_bytenr = bytenr; } else { if (num_pages * PAGE_CACHE_SIZE < state->datablock_size) { @@ -2460,8 +2463,10 @@ static int btrfsic_process_written_superblock( } } - if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0))) + if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) { + WARN_ON(1); btrfsic_dump_tree(state); + } return 0; } @@ -2901,7 +2906,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, btrfsic_release_block_ctx(&block_ctx); } - if (WARN_ON(!match)) { + if (!match) { printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," " buffer->log_bytenr=%llu, submit_bio(bdev=%s," " phys_bytenr=%llu)!\n", @@ -2918,6 +2923,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, bytenr, block_ctx.dev->name, block_ctx.dev_bytenr, mirror_num); } + WARN_ON(1); } } @@ -2994,12 +3000,14 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) return submit_bh(rw, bh); } -static void __btrfsic_submit_bio(int rw, struct bio *bio) +void btrfsic_submit_bio(int rw, struct bio *bio) { struct btrfsic_dev_state *dev_state; - if (!btrfsic_is_initialized) + if (!btrfsic_is_initialized) { + submit_bio(rw, bio); return; + } mutex_lock(&btrfsic_mutex); /* since btrfsic_submit_bio() is also called before @@ -3009,7 +3017,6 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) (rw & WRITE) && NULL != bio->bi_io_vec) { unsigned int i; u64 dev_bytenr; - u64 cur_bytenr; int bio_is_patched; char **mapped_datav; @@ -3028,7 +3035,6 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) GFP_NOFS); if (!mapped_datav) goto leave; - cur_bytenr = dev_bytenr; for (i = 0; i < bio->bi_vcnt; i++) { BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); @@ -3040,13 +3046,16 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) kfree(mapped_datav); goto leave; } - if (dev_state->state->print_mask & - BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) + if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | + BTRFSIC_PRINT_MASK_VERBOSE) == + (dev_state->state->print_mask & + (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | + BTRFSIC_PRINT_MASK_VERBOSE))) printk(KERN_INFO - "#%u: bytenr=%llu, len=%u, offset=%u\n", - i, cur_bytenr, bio->bi_io_vec[i].bv_len, + "#%u: page=%p, len=%u, offset=%u\n", + i, bio->bi_io_vec[i].bv_page, + bio->bi_io_vec[i].bv_len, bio->bi_io_vec[i].bv_offset); - cur_bytenr += bio->bi_io_vec[i].bv_len; } btrfsic_process_written_block(dev_state, dev_bytenr, mapped_datav, bio->bi_vcnt, @@ -3090,20 +3099,10 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) } leave: mutex_unlock(&btrfsic_mutex); -} -void btrfsic_submit_bio(int rw, struct bio *bio) -{ - __btrfsic_submit_bio(rw, bio); submit_bio(rw, bio); } -int btrfsic_submit_bio_wait(int rw, struct bio *bio) -{ - __btrfsic_submit_bio(rw, bio); - return submit_bio_wait(rw, bio); -} - int btrfsic_mount(struct btrfs_root *root, struct btrfs_fs_devices *fs_devices, int including_extent_data, u32 print_mask) diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h index 13b8566..8b59175 100644 --- a/fs/btrfs/check-integrity.h +++ b/fs/btrfs/check-integrity.h @@ -22,11 +22,9 @@ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY int btrfsic_submit_bh(int rw, struct buffer_head *bh); void btrfsic_submit_bio(int rw, struct bio *bio); -int btrfsic_submit_bio_wait(int rw, struct bio *bio); #else #define btrfsic_submit_bh submit_bh #define btrfsic_submit_bio submit_bio -#define btrfsic_submit_bio_wait submit_bio_wait #endif int btrfsic_mount(struct btrfs_root *root, diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h new file mode 100644 index 0000000..7c4503e --- /dev/null +++ b/fs/btrfs/compat.h @@ -0,0 +1,7 @@ +#ifndef _COMPAT_H_ +#define _COMPAT_H_ + +#define btrfs_drop_nlink(inode) drop_nlink(inode) +#define btrfs_inc_nlink(inode) inc_nlink(inode) + +#endif /* _COMPAT_H_ */ diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 1499b27..6aad98c 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -32,6 +32,7 @@ #include <linux/writeback.h> #include <linux/bit_spinlock.h> #include <linux/slab.h> +#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -359,7 +360,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); - if (!bio) { + if(!bio) { kfree(cb); return -ENOMEM; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 316136b..61b5bcd 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -274,7 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, new_root_objectid); - write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); WARN_ON(btrfs_header_generation(buf) > trans->transid); @@ -996,7 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, else btrfs_set_header_owner(cow, root->root_key.objectid); - write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(), + write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow), BTRFS_FSID_SIZE); ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); @@ -1285,10 +1285,11 @@ get_old_root(struct btrfs_root *root, u64 time_seq) free_extent_buffer(eb_root); blocksize = btrfs_level_size(root, old_root->level); old = read_tree_block(root, logical, blocksize, 0); - if (WARN_ON(!old || !extent_buffer_uptodate(old))) { + if (!old || !extent_buffer_uptodate(old)) { free_extent_buffer(old); pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", logical); + WARN_ON(1); } else { eb = btrfs_clone_extent_buffer(old); free_extent_buffer(old); @@ -2757,7 +2758,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, int level; int lowest_unlock = 1; u8 lowest_level = 0; - int prev_cmp = -1; + int prev_cmp; lowest_level = p->lowest_level; WARN_ON(p->nodes[0] != NULL); @@ -2768,6 +2769,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, } again: + prev_cmp = -1; b = get_old_root(root, time_seq); level = btrfs_header_level(b); p->locks[level] = BTRFS_READ_LOCK; @@ -2785,11 +2787,6 @@ again: */ btrfs_unlock_up_safe(p, level + 1); - /* - * Since we can unwind eb's we want to do a real search every - * time. - */ - prev_cmp = -1; ret = key_search(b, key, level, &prev_cmp, &slot); if (level != 0) { @@ -3151,7 +3148,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(c, root->root_key.objectid); - write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(), + write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c), BTRFS_FSID_SIZE); write_extent_buffer(c, root->fs_info->chunk_tree_uuid, @@ -3290,7 +3287,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, - btrfs_header_fsid(), BTRFS_FSID_SIZE); + btrfs_header_fsid(split), BTRFS_FSID_SIZE); write_extent_buffer(split, root->fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(split), BTRFS_UUID_SIZE); @@ -3340,8 +3337,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr) if (!nr) return 0; btrfs_init_map_token(&token); - start_item = btrfs_item_nr(start); - end_item = btrfs_item_nr(end); + start_item = btrfs_item_nr(l, start); + end_item = btrfs_item_nr(l, end); data_len = btrfs_token_item_offset(l, start_item, &token) + btrfs_token_item_size(l, start_item, &token); data_len = data_len - btrfs_token_item_offset(l, end_item, &token); @@ -3409,7 +3406,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, slot = path->slots[1]; i = left_nritems - 1; while (i >= nr) { - item = btrfs_item_nr(i); + item = btrfs_item_nr(left, i); if (!empty && push_items > 0) { if (path->slots[0] > i) @@ -3473,7 +3470,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - item = btrfs_item_nr(i); + item = btrfs_item_nr(right, i); push_space -= btrfs_token_item_size(right, item, &token); btrfs_set_token_item_offset(right, item, push_space, &token); } @@ -3615,7 +3612,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, nr = min(right_nritems - 1, max_slot); for (i = 0; i < nr; i++) { - item = btrfs_item_nr(i); + item = btrfs_item_nr(right, i); if (!empty && push_items > 0) { if (path->slots[0] < i) @@ -3642,7 +3639,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, ret = 1; goto out; } - WARN_ON(!empty && push_items == btrfs_header_nritems(right)); + if (!empty && push_items == btrfs_header_nritems(right)) + WARN_ON(1); /* push data from right to left */ copy_extent_buffer(left, right, @@ -3665,7 +3663,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { u32 ioff; - item = btrfs_item_nr(i); + item = btrfs_item_nr(left, i); ioff = btrfs_token_item_offset(left, item, &token); btrfs_set_token_item_offset(left, item, @@ -3696,7 +3694,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - item = btrfs_item_nr(i); + item = btrfs_item_nr(right, i); push_space = push_space - btrfs_token_item_size(right, item, &token); @@ -3837,7 +3835,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, btrfs_item_end_nr(l, mid); for (i = 0; i < nritems; i++) { - struct btrfs_item *item = btrfs_item_nr(i); + struct btrfs_item *item = btrfs_item_nr(right, i); u32 ioff; ioff = btrfs_token_item_offset(right, item, &token); @@ -4018,7 +4016,7 @@ again: data_size > BTRFS_LEAF_DATA_SIZE(root)) { if (data_size && !tried_avoid_double) goto push_for_double; - split = 2; + split = 2 ; } } } @@ -4044,7 +4042,7 @@ again: btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); write_extent_buffer(right, root->fs_info->fsid, - btrfs_header_fsid(), BTRFS_FSID_SIZE); + btrfs_header_fsid(right), BTRFS_FSID_SIZE); write_extent_buffer(right, root->fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(right), @@ -4179,7 +4177,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans, btrfs_set_path_blocking(path); - item = btrfs_item_nr(path->slots[0]); + item = btrfs_item_nr(leaf, path->slots[0]); orig_offset = btrfs_item_offset(leaf, item); item_size = btrfs_item_size(leaf, item); @@ -4202,7 +4200,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans, btrfs_cpu_key_to_disk(&disk_key, new_key); btrfs_set_item_key(leaf, &disk_key, slot); - new_item = btrfs_item_nr(slot); + new_item = btrfs_item_nr(leaf, slot); btrfs_set_item_offset(leaf, new_item, orig_offset); btrfs_set_item_size(leaf, new_item, item_size - split_offset); @@ -4341,7 +4339,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path, /* first correct the data pointers */ for (i = slot; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(i); + item = btrfs_item_nr(leaf, i); ioff = btrfs_token_item_offset(leaf, item, &token); btrfs_set_token_item_offset(leaf, item, @@ -4389,7 +4387,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path, fixup_low_keys(root, path, &disk_key, 1); } - item = btrfs_item_nr(slot); + item = btrfs_item_nr(leaf, slot); btrfs_set_item_size(leaf, item, new_size); btrfs_mark_buffer_dirty(leaf); @@ -4443,7 +4441,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path, /* first correct the data pointers */ for (i = slot; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(i); + item = btrfs_item_nr(leaf, i); ioff = btrfs_token_item_offset(leaf, item, &token); btrfs_set_token_item_offset(leaf, item, @@ -4457,7 +4455,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path, data_end = old_data; old_size = btrfs_item_size_nr(leaf, slot); - item = btrfs_item_nr(slot); + item = btrfs_item_nr(leaf, slot); btrfs_set_item_size(leaf, item, old_size + data_size); btrfs_mark_buffer_dirty(leaf); @@ -4516,7 +4514,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, for (i = slot; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr( i); + item = btrfs_item_nr(leaf, i); ioff = btrfs_token_item_offset(leaf, item, &token); btrfs_set_token_item_offset(leaf, item, ioff - total_data, &token); @@ -4537,7 +4535,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, for (i = 0; i < nr; i++) { btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); btrfs_set_item_key(leaf, &disk_key, slot + i); - item = btrfs_item_nr(slot + i); + item = btrfs_item_nr(leaf, slot + i); btrfs_set_token_item_offset(leaf, item, data_end - data_size[i], &token); data_end -= data_size[i]; @@ -4732,7 +4730,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, for (i = slot + nr; i < nritems; i++) { u32 ioff; - item = btrfs_item_nr(i); + item = btrfs_item_nr(leaf, i); ioff = btrfs_token_item_offset(leaf, item, &token); btrfs_set_token_item_offset(leaf, item, ioff + dsize, &token); @@ -4825,18 +4823,14 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) btrfs_item_key_to_cpu(path->nodes[0], &key, 0); - if (key.offset > 0) { + if (key.offset > 0) key.offset--; - } else if (key.type > 0) { + else if (key.type > 0) key.type--; - key.offset = (u64)-1; - } else if (key.objectid > 0) { + else if (key.objectid > 0) key.objectid--; - key.type = (u8)-1; - key.offset = (u64)-1; - } else { + else return 1; - } btrfs_release_path(path); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -4872,6 +4866,7 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) * was nothing in the tree that matched the search criteria. */ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_key *max_key, struct btrfs_path *path, u64 min_trans) { @@ -4916,8 +4911,10 @@ again: * If it is too old, old, skip to the next one. */ while (slot < nritems) { + u64 blockptr; u64 gen; + blockptr = btrfs_node_blockptr(cur, slot); gen = btrfs_node_ptr_generation(cur, slot); if (gen < min_trans) { slot++; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 54ab861..0506f40 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -47,12 +47,6 @@ extern struct kmem_cache *btrfs_path_cachep; extern struct kmem_cache *btrfs_free_space_cachep; struct btrfs_ordered_sum; -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -#define STATIC noinline -#else -#define STATIC static noinline -#endif - #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ #define BTRFS_MAX_MIRRORS 3 @@ -1586,6 +1580,7 @@ struct btrfs_fs_info { atomic_t scrubs_paused; atomic_t scrub_cancel_req; wait_queue_head_t scrub_pause_wait; + struct rw_semaphore scrub_super_lock; int scrub_workers_refcnt; struct btrfs_workers scrub_workers; struct btrfs_workers scrub_wr_completion_workers; @@ -1729,9 +1724,7 @@ struct btrfs_root { int ref_cows; int track_dirty; int in_radix; -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - int dummy_root; -#endif + u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; @@ -2468,7 +2461,8 @@ static inline unsigned long btrfs_item_nr_offset(int nr) sizeof(struct btrfs_item) * nr; } -static inline struct btrfs_item *btrfs_item_nr(int nr) +static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb, + int nr) { return (struct btrfs_item *)btrfs_item_nr_offset(nr); } @@ -2481,30 +2475,30 @@ static inline u32 btrfs_item_end(struct extent_buffer *eb, static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) { - return btrfs_item_end(eb, btrfs_item_nr(nr)); + return btrfs_item_end(eb, btrfs_item_nr(eb, nr)); } static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) { - return btrfs_item_offset(eb, btrfs_item_nr(nr)); + return btrfs_item_offset(eb, btrfs_item_nr(eb, nr)); } static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) { - return btrfs_item_size(eb, btrfs_item_nr(nr)); + return btrfs_item_size(eb, btrfs_item_nr(eb, nr)); } static inline void btrfs_item_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { - struct btrfs_item *item = btrfs_item_nr(nr); + struct btrfs_item *item = btrfs_item_nr(eb, nr); read_eb_member(eb, item, struct btrfs_item, key, disk_key); } static inline void btrfs_set_item_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { - struct btrfs_item *item = btrfs_item_nr(nr); + struct btrfs_item *item = btrfs_item_nr(eb, nr); write_eb_member(eb, item, struct btrfs_item, key, disk_key); } @@ -2672,7 +2666,7 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, btrfs_set_header_flags(eb, flags); } -static inline unsigned long btrfs_header_fsid(void) +static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb) { return offsetof(struct btrfs_header, fsid); } @@ -3111,6 +3105,11 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) ((unsigned long)(btrfs_leaf_data(leaf) + \ btrfs_item_offset_nr(leaf, slot))) +static inline struct dentry *fdentry(struct file *file) +{ + return file->f_path.dentry; +} + static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) { return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && @@ -3309,6 +3308,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, int lowest_level, u64 min_trans); int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_key *max_key, struct btrfs_path *path, u64 min_trans); enum btrfs_compare_tree_result { @@ -3613,6 +3613,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sums); int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, struct bio *bio, u64 file_start, int contig); +int btrfs_csum_truncate(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 isize); int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct list_head *list, int search_commit); /* inode.c */ @@ -3672,7 +3675,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u32 min_type); int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput); +int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, + int delay_iput); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, struct extent_state **cached_state); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, @@ -3741,6 +3745,9 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); +int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace, + u64 start, u64 end, int skip_pinned, + int modified); extern const struct file_operations btrfs_file_operations; int __btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, @@ -3937,7 +3944,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, u64 end, struct btrfs_scrub_progress *progress, int readonly, int is_dev_replace); void btrfs_scrub_pause(struct btrfs_root *root); +void btrfs_scrub_pause_super(struct btrfs_root *root); void btrfs_scrub_continue(struct btrfs_root *root); +void btrfs_scrub_continue_super(struct btrfs_root *root); int btrfs_scrub_cancel(struct btrfs_fs_info *info); int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, struct btrfs_device *dev); @@ -4019,9 +4028,5 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) return signal_pending(current); } -/* Sanity test specific functions */ -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -void btrfs_test_destroy_inode(struct inode *inode); -#endif #endif diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 8d292fb..cbd9523 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -108,8 +108,8 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode) return node; } btrfs_inode->delayed_node = node; - /* can be accessed and cached in the inode */ - atomic_add(2, &node->refs); + atomic_inc(&node->refs); /* can be accessed */ + atomic_inc(&node->refs); /* cached in the inode */ spin_unlock(&root->inode_lock); return node; } @@ -138,8 +138,8 @@ again: return ERR_PTR(-ENOMEM); btrfs_init_delayed_node(node, root, ino); - /* cached in the btrfs inode and can be accessed */ - atomic_add(2, &node->refs); + atomic_inc(&node->refs); /* cached in the btrfs inode */ + atomic_inc(&node->refs); /* can be accessed */ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); if (ret) { @@ -649,13 +649,14 @@ static int btrfs_delayed_inode_reserve_metadata( goto out; ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); - if (!WARN_ON(ret)) + if (!ret) goto out; /* * Ok this is a problem, let's just steal from the global rsv * since this really shouldn't happen that often. */ + WARN_ON(1); ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv, dst_rsv, num_bytes); goto out; @@ -770,13 +771,13 @@ static int btrfs_batch_insert_items(struct btrfs_root *root, */ btrfs_set_path_blocking(path); - keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS); + keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS); if (!keys) { ret = -ENOMEM; goto out; } - data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS); + data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS); if (!data_size) { ret = -ENOMEM; goto error; @@ -1173,10 +1174,8 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, mutex_unlock(&delayed_node->mutex); path = btrfs_alloc_path(); - if (!path) { - btrfs_release_delayed_node(delayed_node); + if (!path) return -ENOMEM; - } path->leave_spinning = 1; block_rsv = trans->block_rsv; diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 2cfc3df..9efb94e 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -26,6 +26,7 @@ #include <linux/kthread.h> #include <linux/math64.h> #include <asm/div64.h> +#include "compat.h" #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -37,6 +38,7 @@ #include "rcu-string.h" #include "dev-replace.h" +static u64 btrfs_get_seconds_since_1970(void); static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, int scrub_ret); static void btrfs_dev_replace_update_device_in_mapping_tree( @@ -294,6 +296,13 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info) dev_replace->cursor_left_last_write_of_item; } +static u64 btrfs_get_seconds_since_1970(void) +{ + struct timespec t = CURRENT_TIME_SEC; + + return t.tv_sec; +} + int btrfs_dev_replace_start(struct btrfs_root *root, struct btrfs_ioctl_dev_replace_args *args) { @@ -366,7 +375,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, dev_replace->tgtdev = tgt_device; printk_in_rcu(KERN_INFO - "btrfs: dev_replace from %s (devid %llu) to %s started\n", + "btrfs: dev_replace from %s (devid %llu) to %s) started\n", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, @@ -381,7 +390,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, * go to the tgtdev as well (refer to btrfs_map_block()). */ dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; - dev_replace->time_started = get_seconds(); + dev_replace->time_started = btrfs_get_seconds_since_1970(); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; @@ -391,7 +400,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; btrfs_dev_replace_unlock(dev_replace); - btrfs_wait_ordered_roots(root->fs_info, -1); + btrfs_wait_all_ordered_extents(root->fs_info); /* force writing the updated state information to disk */ trans = btrfs_start_transaction(root, 0); @@ -461,12 +470,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, * flush all outstanding I/O and inode extent mappings before the * copy operation is declared as being finished */ - ret = btrfs_start_delalloc_roots(root->fs_info, 0); + ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0); if (ret) { mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return ret; } - btrfs_wait_ordered_roots(root->fs_info, -1); + btrfs_wait_all_ordered_extents(root->fs_info); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { @@ -484,7 +493,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; dev_replace->tgtdev = NULL; dev_replace->srcdev = NULL; - dev_replace->time_stopped = get_seconds(); + dev_replace->time_stopped = btrfs_get_seconds_since_1970(); dev_replace->item_needs_writeback = 1; if (scrub_ret) { @@ -641,9 +650,6 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) u64 result; int ret; - if (fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - mutex_lock(&dev_replace->lock_finishing_cancel_unmount); btrfs_dev_replace_lock(dev_replace); switch (dev_replace->replace_state) { @@ -662,7 +668,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) break; } dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; - dev_replace->time_stopped = get_seconds(); + dev_replace->time_stopped = btrfs_get_seconds_since_1970(); dev_replace->item_needs_writeback = 1; btrfs_dev_replace_unlock(dev_replace); btrfs_scrub_cancel(fs_info); @@ -697,7 +703,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; - dev_replace->time_stopped = get_seconds(); + dev_replace->time_stopped = btrfs_get_seconds_since_1970(); dev_replace->item_needs_writeback = 1; pr_info("btrfs: suspending dev_replace for unmount\n"); break; diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index c031ea3..79e594e 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -58,7 +58,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle return ERR_PTR(ret); WARN_ON(ret > 0); leaf = path->nodes[0]; - item = btrfs_item_nr(path->slots[0]); + item = btrfs_item_nr(leaf, path->slots[0]); ptr = btrfs_item_ptr(leaf, path->slots[0], char); BUG_ON(data_size > btrfs_item_size(leaf, item)); ptr += btrfs_item_size(leaf, item) - data_size; @@ -474,10 +474,8 @@ int verify_dir_item(struct btrfs_root *root, } /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ - if ((btrfs_dir_data_len(leaf, dir_item) + - btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) { - printk(KERN_CRIT "btrfs: invalid dir item name + data len: %u + %u\n", - (unsigned)btrfs_dir_name_len(leaf, dir_item), + if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) { + printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n", (unsigned)btrfs_dir_data_len(leaf, dir_item)); return 1; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8072cfa..62176ad 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -33,6 +33,7 @@ #include <linux/uuid.h> #include <linux/semaphore.h> #include <asm/unaligned.h> +#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -63,6 +64,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, static void btrfs_destroy_ordered_extents(struct btrfs_root *root); static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_root *root); +static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t); static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); static int btrfs_destroy_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, @@ -475,8 +477,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) if (page != eb->pages[0]) return 0; found_start = btrfs_header_bytenr(eb); - if (WARN_ON(found_start != start || !PageUptodate(page))) + if (found_start != start) { + WARN_ON(1); return 0; + } + if (!PageUptodate(page)) { + WARN_ON(1); + return 0; + } csum_tree_block(root, eb, 0); return 0; } @@ -488,7 +496,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, u8 fsid[BTRFS_UUID_SIZE]; int ret = 1; - read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); + read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE); while (fs_devices) { if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { ret = 0; @@ -1097,7 +1105,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr); + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, + bytenr, blocksize); return eb; } @@ -1220,18 +1229,14 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, atomic_set(&root->refs, 1); root->log_transid = 0; root->last_log_commit = 0; - if (fs_info) - extent_io_tree_init(&root->dirty_log_pages, - fs_info->btree_inode->i_mapping); + extent_io_tree_init(&root->dirty_log_pages, + fs_info->btree_inode->i_mapping); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); - if (fs_info) - root->defrag_trans_start = fs_info->generation; - else - root->defrag_trans_start = 0; + root->defrag_trans_start = fs_info->generation; init_completion(&root->kobj_unregister); root->defrag_running = 0; root->root_key.objectid = objectid; @@ -1248,22 +1253,6 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) return root; } -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -/* Should only be used by the testing infrastructure */ -struct btrfs_root *btrfs_alloc_dummy_root(void) -{ - struct btrfs_root *root; - - root = btrfs_alloc_root(NULL); - if (!root) - return ERR_PTR(-ENOMEM); - __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); - root->dummy_root = 1; - - return root; -} -#endif - struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 objectid) @@ -1303,7 +1292,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, btrfs_set_header_owner(leaf, objectid); root->node = leaf; - write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(), + write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(leaf), @@ -1390,7 +1379,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, root->node = leaf; write_extent_buffer(root->node, root->fs_info->fsid, - btrfs_header_fsid(), BTRFS_FSID_SIZE); + btrfs_header_fsid(root->node), BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); return root; @@ -1791,9 +1780,6 @@ sleep: wake_up_process(root->fs_info->cleaner_kthread); mutex_unlock(&root->fs_info->transaction_kthread_mutex); - if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, - &root->fs_info->fs_state))) - btrfs_cleanup_transaction(root); if (!try_to_freeze()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && @@ -2027,28 +2013,50 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_stop_workers(&fs_info->qgroup_rescan_workers); } -static void free_root_extent_buffers(struct btrfs_root *root) -{ - if (root) { - free_extent_buffer(root->node); - free_extent_buffer(root->commit_root); - root->node = NULL; - root->commit_root = NULL; - } -} - /* helper to cleanup tree roots */ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) { - free_root_extent_buffers(info->tree_root); - - free_root_extent_buffers(info->dev_root); - free_root_extent_buffers(info->extent_root); - free_root_extent_buffers(info->csum_root); - free_root_extent_buffers(info->quota_root); - free_root_extent_buffers(info->uuid_root); - if (chunk_root) - free_root_extent_buffers(info->chunk_root); + free_extent_buffer(info->tree_root->node); + free_extent_buffer(info->tree_root->commit_root); + info->tree_root->node = NULL; + info->tree_root->commit_root = NULL; + + if (info->dev_root) { + free_extent_buffer(info->dev_root->node); + free_extent_buffer(info->dev_root->commit_root); + info->dev_root->node = NULL; + info->dev_root->commit_root = NULL; + } + if (info->extent_root) { + free_extent_buffer(info->extent_root->node); + free_extent_buffer(info->extent_root->commit_root); + info->extent_root->node = NULL; + info->extent_root->commit_root = NULL; + } + if (info->csum_root) { + free_extent_buffer(info->csum_root->node); + free_extent_buffer(info->csum_root->commit_root); + info->csum_root->node = NULL; + info->csum_root->commit_root = NULL; + } + if (info->quota_root) { + free_extent_buffer(info->quota_root->node); + free_extent_buffer(info->quota_root->commit_root); + info->quota_root->node = NULL; + info->quota_root->commit_root = NULL; + } + if (info->uuid_root) { + free_extent_buffer(info->uuid_root->node); + free_extent_buffer(info->uuid_root->commit_root); + info->uuid_root->node = NULL; + info->uuid_root->commit_root = NULL; + } + if (chunk_root) { + free_extent_buffer(info->chunk_root->node); + free_extent_buffer(info->chunk_root->commit_root); + info->chunk_root->node = NULL; + info->chunk_root->commit_root = NULL; + } } static void del_fs_roots(struct btrfs_fs_info *fs_info) @@ -2222,6 +2230,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->scrubs_paused, 0); atomic_set(&fs_info->scrub_cancel_req, 0); init_waitqueue_head(&fs_info->scrub_pause_wait); + init_rwsem(&fs_info->scrub_super_lock); fs_info->scrub_workers_refcnt = 0; #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY fs_info->check_integrity_print_mask = 0; @@ -2263,7 +2272,7 @@ int open_ctree(struct super_block *sb, sizeof(struct btrfs_key)); set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(fs_info->btree_inode)->runtime_flags); - btrfs_insert_inode_hash(fs_info->btree_inode); + insert_inode_hash(fs_info->btree_inode); spin_lock_init(&fs_info->block_group_cache_lock); fs_info->block_group_cache_tree = RB_ROOT; @@ -2661,7 +2670,6 @@ retry_root_backup: btrfs_set_root_node(&tree_root->root_item, tree_root->node); tree_root->commit_root = btrfs_root_node(tree_root); - btrfs_set_root_refs(&tree_root->root_item, 1); location.objectid = BTRFS_EXTENT_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; @@ -3440,7 +3448,10 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, int max_mirrors) { - return write_all_supers(root, max_mirrors); + int ret; + + ret = write_all_supers(root, max_mirrors); + return ret; } /* Drop a fs root from the radix tree and free it. */ @@ -3517,6 +3528,7 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) int btrfs_commit_super(struct btrfs_root *root) { struct btrfs_trans_handle *trans; + int ret; mutex_lock(&root->fs_info->cleaner_mutex); btrfs_run_delayed_iputs(root); @@ -3530,7 +3542,25 @@ int btrfs_commit_super(struct btrfs_root *root) trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); - return btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); + if (ret) + return ret; + /* run commit again to drop the original snapshot */ + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) + return PTR_ERR(trans); + ret = btrfs_commit_transaction(trans, root); + if (ret) + return ret; + ret = btrfs_write_and_wait_transaction(NULL, root); + if (ret) { + btrfs_error(root->fs_info, ret, + "Failed to sync btree inode to disk."); + return ret; + } + + ret = write_ctree_super(NULL, root, 0); + return ret; } int close_ctree(struct btrfs_root *root) @@ -3584,12 +3614,12 @@ int close_ctree(struct btrfs_root *root) percpu_counter_sum(&fs_info->delalloc_bytes)); } - del_fs_roots(fs_info); - btrfs_free_block_groups(fs_info); btrfs_stop_all_workers(fs_info); + del_fs_roots(fs_info); + free_root_pointers(fs_info, 1); iput(fs_info->btree_inode); @@ -3639,20 +3669,10 @@ int btrfs_set_buffer_uptodate(struct extent_buffer *buf) void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root; + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; u64 transid = btrfs_header_generation(buf); int was_dirty; -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - /* - * This is a fast path so only do this check if we have sanity tests - * enabled. Normal people shouldn't be marking dummy buffers as dirty - * outside of the sanity tests. - */ - if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags))) - return; -#endif - root = BTRFS_I(buf->pages[0]->mapping->host)->root; btrfs_assert_tree_locked(buf); if (transid != root->fs_info->generation) WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " @@ -3782,8 +3802,7 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, ordered_root); - list_move_tail(&root->ordered_root, - &fs_info->ordered_roots); + list_del_init(&root->ordered_root); btrfs_destroy_ordered_extents(root); @@ -3861,6 +3880,24 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, return ret; } +static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t) +{ + struct btrfs_pending_snapshot *snapshot; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + list_splice_init(&t->pending_snapshots, &splice); + + while (!list_empty(&splice)) { + snapshot = list_entry(splice.next, + struct btrfs_pending_snapshot, + list); + snapshot->error = -ECANCELED; + list_del_init(&snapshot->list); + } +} + static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) { struct btrfs_inode *btrfs_inode; @@ -3990,13 +4027,15 @@ again: void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, struct btrfs_root *root) { - btrfs_destroy_ordered_operations(cur_trans, root); - btrfs_destroy_delayed_refs(cur_trans, root); + btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, + cur_trans->dirty_pages.dirty_bytes); cur_trans->state = TRANS_STATE_COMMIT_START; wake_up(&root->fs_info->transaction_blocked_wait); + btrfs_evict_pending_snapshots(cur_trans); + cur_trans->state = TRANS_STATE_UNBLOCKED; wake_up(&root->fs_info->transaction_wait); @@ -4020,51 +4059,63 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, static int btrfs_cleanup_transaction(struct btrfs_root *root) { struct btrfs_transaction *t; + LIST_HEAD(list); mutex_lock(&root->fs_info->transaction_kthread_mutex); spin_lock(&root->fs_info->trans_lock); - while (!list_empty(&root->fs_info->trans_list)) { - t = list_first_entry(&root->fs_info->trans_list, - struct btrfs_transaction, list); - if (t->state >= TRANS_STATE_COMMIT_START) { - atomic_inc(&t->use_count); - spin_unlock(&root->fs_info->trans_lock); - btrfs_wait_for_commit(root, t->transid); - btrfs_put_transaction(t); - spin_lock(&root->fs_info->trans_lock); - continue; - } - if (t == root->fs_info->running_transaction) { - t->state = TRANS_STATE_COMMIT_DOING; - spin_unlock(&root->fs_info->trans_lock); - /* - * We wait for 0 num_writers since we don't hold a trans - * handle open currently for this transaction. - */ - wait_event(t->writer_wait, - atomic_read(&t->num_writers) == 0); - } else { - spin_unlock(&root->fs_info->trans_lock); - } - btrfs_cleanup_one_transaction(t, root); + list_splice_init(&root->fs_info->trans_list, &list); + root->fs_info->running_transaction = NULL; + spin_unlock(&root->fs_info->trans_lock); - spin_lock(&root->fs_info->trans_lock); - if (t == root->fs_info->running_transaction) - root->fs_info->running_transaction = NULL; - list_del_init(&t->list); - spin_unlock(&root->fs_info->trans_lock); + while (!list_empty(&list)) { + t = list_entry(list.next, struct btrfs_transaction, list); - btrfs_put_transaction(t); - trace_btrfs_transaction_commit(root); - spin_lock(&root->fs_info->trans_lock); + btrfs_destroy_ordered_operations(t, root); + + btrfs_destroy_all_ordered_extents(root->fs_info); + + btrfs_destroy_delayed_refs(t, root); + + /* + * FIXME: cleanup wait for commit + * We needn't acquire the lock here, because we are during + * the umount, there is no other task which will change it. + */ + t->state = TRANS_STATE_COMMIT_START; + smp_mb(); + if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) + wake_up(&root->fs_info->transaction_blocked_wait); + + btrfs_evict_pending_snapshots(t); + + t->state = TRANS_STATE_UNBLOCKED; + smp_mb(); + if (waitqueue_active(&root->fs_info->transaction_wait)) + wake_up(&root->fs_info->transaction_wait); + + btrfs_destroy_delayed_inodes(root); + btrfs_assert_delayed_root_empty(root); + + btrfs_destroy_all_delalloc_inodes(root->fs_info); + + btrfs_destroy_marked_extents(root, &t->dirty_pages, + EXTENT_DIRTY); + + btrfs_destroy_pinned_extent(root, + root->fs_info->pinned_extents); + + t->state = TRANS_STATE_COMPLETED; + smp_mb(); + if (waitqueue_active(&t->commit_wait)) + wake_up(&t->commit_wait); + + atomic_set(&t->use_count, 0); + list_del_init(&t->list); + memset(t, 0, sizeof(*t)); + kmem_cache_free(btrfs_transaction_cachep, t); } - spin_unlock(&root->fs_info->trans_lock); - btrfs_destroy_all_ordered_extents(root->fs_info); - btrfs_destroy_delayed_inodes(root); - btrfs_assert_delayed_root_empty(root); - btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents); - btrfs_destroy_all_delalloc_inodes(root->fs_info); + mutex_unlock(&root->fs_info->transaction_kthread_mutex); return 0; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 53059df..5ce2a7d 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -86,10 +86,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_free_fs_root(struct btrfs_root *root); -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -struct btrfs_root *btrfs_alloc_dummy_root(void); -#endif - /* * This function is used to grab the root, and avoid it is freed when we * access it. But it doesn't ensure that the tree is not dropped. diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 41422a3..4b86916 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -5,6 +5,7 @@ #include "btrfs_inode.h" #include "print-tree.h" #include "export.h" +#include "compat.h" #define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \ parent_objectid) / 4) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 45d98d0..d58bef1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/ratelimit.h> #include <linux/percpu_counter.h> +#include "compat.h" #include "hash.h" #include "ctree.h" #include "disk-io.h" @@ -1550,8 +1551,9 @@ again: if (ret && !insert) { err = -ENOENT; goto out; - } else if (WARN_ON(ret)) { + } else if (ret) { err = -EIO; + WARN_ON(1); goto out; } @@ -1977,6 +1979,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; u64 refs; int ret; + int err = 0; path = btrfs_alloc_path(); if (!path) @@ -1989,9 +1992,14 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, path, bytenr, num_bytes, parent, root_objectid, owner, offset, refs_to_add, extent_op); - if (ret != -EAGAIN) + if (ret == 0) goto out; + if (ret != -EAGAIN) { + err = ret; + goto out; + } + leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, item); @@ -2013,7 +2021,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_abort_transaction(trans, root, ret); out: btrfs_free_path(path); - return ret; + return err; } static int run_delayed_data_ref(struct btrfs_trans_handle *trans, @@ -2129,28 +2137,15 @@ again: } if (ret > 0) { if (metadata) { - if (path->slots[0] > 0) { - path->slots[0]--; - btrfs_item_key_to_cpu(path->nodes[0], &key, - path->slots[0]); - if (key.objectid == node->bytenr && - key.type == BTRFS_EXTENT_ITEM_KEY && - key.offset == node->num_bytes) - ret = 0; - } - if (ret > 0) { - btrfs_release_path(path); - metadata = 0; + btrfs_release_path(path); + metadata = 0; - key.objectid = node->bytenr; - key.offset = node->num_bytes; - key.type = BTRFS_EXTENT_ITEM_KEY; - goto again; - } - } else { - err = -EIO; - goto out; + key.offset = node->num_bytes; + key.type = BTRFS_EXTENT_ITEM_KEY; + goto again; } + err = -EIO; + goto out; } leaf = path->nodes[0]; @@ -2239,12 +2234,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, { int ret = 0; - if (trans->aborted) { - if (insert_reserved) - btrfs_pin_extent(root, node->bytenr, - node->num_bytes, 1); + if (trans->aborted) return 0; - } if (btrfs_delayed_ref_is_head(node)) { struct btrfs_delayed_ref_head *head; @@ -2420,14 +2411,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, btrfs_free_delayed_extent_op(extent_op); if (ret) { - /* - * Need to reset must_insert_reserved if - * there was an error so the abort stuff - * can cleanup the reserved space - * properly. - */ - if (must_insert_reserved) - locked_ref->must_insert_reserved = 1; btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); spin_lock(&delayed_refs->lock); btrfs_delayed_ref_unlock(locked_ref); @@ -3214,7 +3197,8 @@ again: if (ret) goto out_put; - ret = btrfs_truncate_free_space_cache(root, trans, inode); + ret = btrfs_truncate_free_space_cache(root, trans, path, + inode); if (ret) goto out_put; } @@ -3334,9 +3318,10 @@ again: last = cache->key.objectid + cache->key.offset; err = write_one_cache_group(trans, root, path, cache); - btrfs_put_block_group(cache); if (err) /* File system offline */ goto out; + + btrfs_put_block_group(cache); } while (1) { @@ -3620,9 +3605,10 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) /* make sure bytes are sectorsize aligned */ bytes = ALIGN(bytes, root->sectorsize); - if (btrfs_is_free_space_inode(inode)) { + if (root == root->fs_info->tree_root || + BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { + alloc_chunk = 0; committed = 1; - ASSERT(current->journal_info); } data_sinfo = fs_info->data_sinfo; @@ -3650,16 +3636,6 @@ again: spin_unlock(&data_sinfo->lock); alloc: alloc_target = btrfs_get_alloc_profile(root, 1); - /* - * It is ugly that we don't call nolock join - * transaction for the free space inode case here. - * But it is safe because we only do the data space - * reservation for the free space cache in the - * transaction context, the common join transaction - * just increase the counter of the current transaction - * handler, doesn't try to acquire the trans_lock of - * the fs. - */ trans = btrfs_join_transaction(root); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -3705,9 +3681,6 @@ commit_trans: goto again; } - trace_btrfs_space_reservation(root->fs_info, - "space_info:enospc", - data_sinfo->flags, bytes, 1); return -ENOSPC; } data_sinfo->bytes_may_use += bytes; @@ -4016,26 +3989,12 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, * the filesystem is readonly(all dirty pages are written to * the disk). */ - btrfs_start_delalloc_roots(root->fs_info, 0); + btrfs_start_all_delalloc_inodes(root->fs_info, 0); if (!current->journal_info) - btrfs_wait_ordered_roots(root->fs_info, -1); + btrfs_wait_all_ordered_extents(root->fs_info); } } -static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim) -{ - u64 bytes; - int nr; - - bytes = btrfs_calc_trans_metadata_size(root, 1); - nr = (int)div64_u64(to_reclaim, bytes); - if (!nr) - nr = 1; - return nr; -} - -#define EXTENT_SIZE_PER_ITEM (256 * 1024) - /* * shrink metadata reservation for delalloc */ @@ -4048,30 +4007,24 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, u64 delalloc_bytes; u64 max_reclaim; long time_left; - unsigned long nr_pages; - int loops; - int items; + unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; + int loops = 0; enum btrfs_reserve_flush_enum flush; - /* Calc the number of the pages we need flush for space reservation */ - items = calc_reclaim_items_nr(root, to_reclaim); - to_reclaim = items * EXTENT_SIZE_PER_ITEM; - trans = (struct btrfs_trans_handle *)current->journal_info; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; + smp_mb(); delalloc_bytes = percpu_counter_sum_positive( &root->fs_info->delalloc_bytes); if (delalloc_bytes == 0) { if (trans) return; - if (wait_ordered) - btrfs_wait_ordered_roots(root->fs_info, items); + btrfs_wait_all_ordered_extents(root->fs_info); return; } - loops = 0; while (delalloc_bytes && loops < 3) { max_reclaim = min(delalloc_bytes, to_reclaim); nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; @@ -4080,19 +4033,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, * We need to wait for the async pages to actually start before * we do anything. */ - max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages); - if (!max_reclaim) - goto skip_async; - - if (max_reclaim <= nr_pages) - max_reclaim = 0; - else - max_reclaim -= nr_pages; - wait_event(root->fs_info->async_submit_wait, - atomic_read(&root->fs_info->async_delalloc_pages) <= - (int)max_reclaim); -skip_async: + !atomic_read(&root->fs_info->async_delalloc_pages)); + if (!trans) flush = BTRFS_RESERVE_FLUSH_ALL; else @@ -4106,12 +4049,13 @@ skip_async: loops++; if (wait_ordered && !trans) { - btrfs_wait_ordered_roots(root->fs_info, items); + btrfs_wait_all_ordered_extents(root->fs_info); } else { time_left = schedule_timeout_killable(1); if (time_left) break; } + smp_mb(); delalloc_bytes = percpu_counter_sum_positive( &root->fs_info->delalloc_bytes); } @@ -4196,11 +4140,16 @@ static int flush_space(struct btrfs_root *root, switch (state) { case FLUSH_DELAYED_ITEMS_NR: case FLUSH_DELAYED_ITEMS: - if (state == FLUSH_DELAYED_ITEMS_NR) - nr = calc_reclaim_items_nr(root, num_bytes) * 2; - else - nr = -1; + if (state == FLUSH_DELAYED_ITEMS_NR) { + u64 bytes = btrfs_calc_trans_metadata_size(root, 1); + nr = (int)div64_u64(num_bytes, bytes); + if (!nr) + nr = 1; + nr *= 2; + } else { + nr = -1; + } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); @@ -4383,10 +4332,6 @@ out: !block_rsv_use_bytes(global_rsv, orig_bytes)) ret = 0; } - if (ret == -ENOSPC) - trace_btrfs_space_reservation(root->fs_info, - "space_info:enospc", - space_info->flags, orig_bytes, 1); if (flushing) { spin_lock(&space_info->lock); space_info->flush = 0; @@ -5041,7 +4986,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); if (to_reserve) - trace_btrfs_space_reservation(root->fs_info, "delalloc", + trace_btrfs_space_reservation(root->fs_info,"delalloc", btrfs_ino(inode), to_reserve, 1); block_rsv_add_bytes(block_rsv, to_reserve, 1); @@ -5319,8 +5264,6 @@ static int pin_down_extent(struct btrfs_root *root, set_extent_dirty(root->fs_info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); - if (reserved) - trace_btrfs_reserved_extent_free(root, bytenr, num_bytes); return 0; } @@ -5775,8 +5718,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } extent_slot = path->slots[0]; } - } else if (WARN_ON(ret == -ENOENT)) { + } else if (ret == -ENOENT) { btrfs_print_leaf(extent_root, path->nodes[0]); + WARN_ON(1); btrfs_err(info, "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", bytenr, parent, root_objectid, owner_objectid, @@ -6023,7 +5967,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, btrfs_add_free_space(cache, buf->start, buf->len); btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE); - trace_btrfs_reserved_extent_free(root, buf->start, buf->len); pin = 0; } out: @@ -6651,6 +6594,8 @@ again: } } + trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); + return ret; } @@ -6762,7 +6707,6 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ins->objectid, ins->offset); BUG(); } - trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); return ret; } @@ -6787,18 +6731,13 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, size += sizeof(*block_info); path = btrfs_alloc_path(); - if (!path) { - btrfs_free_and_pin_reserved_extent(root, ins->objectid, - root->leafsize); + if (!path) return -ENOMEM; - } path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, ins, size); if (ret) { - btrfs_free_and_pin_reserved_extent(root, ins->objectid, - root->leafsize); btrfs_free_path(path); return ret; } @@ -6840,8 +6779,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, ins->objectid, ins->offset); BUG(); } - - trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize); return ret; } @@ -8046,7 +7983,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) spin_lock(&sinfo->lock); - for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) + for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) if (!list_empty(&sinfo->block_groups[i])) free_bytes += __btrfs_get_ro_block_group_free_space( &sinfo->block_groups[i]); @@ -8334,14 +8271,15 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) release_global_block_rsv(info); - while (!list_empty(&info->space_info)) { + while(!list_empty(&info->space_info)) { space_info = list_entry(info->space_info.next, struct btrfs_space_info, list); if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) { - if (WARN_ON(space_info->bytes_pinned > 0 || + if (space_info->bytes_pinned > 0 || space_info->bytes_reserved > 0 || - space_info->bytes_may_use > 0)) { + space_info->bytes_may_use > 0) { + WARN_ON(1); dump_space_info(space_info, 0, 0); } } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ff43802..51731b7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -13,13 +13,13 @@ #include <linux/cleancache.h> #include "extent_io.h" #include "extent_map.h" +#include "compat.h" #include "ctree.h" #include "btrfs_inode.h" #include "volumes.h" #include "check-integrity.h" #include "locking.h" #include "rcu-string.h" -#include "backref.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -1597,10 +1597,11 @@ done: * * 1 is returned if we find something, 0 if nothing was in the tree */ -STATIC u64 find_lock_delalloc_range(struct inode *inode, - struct extent_io_tree *tree, - struct page *locked_page, u64 *start, - u64 *end, u64 max_bytes) +static noinline u64 find_lock_delalloc_range(struct inode *inode, + struct extent_io_tree *tree, + struct page *locked_page, + u64 *start, u64 *end, + u64 max_bytes) { u64 delalloc_start; u64 delalloc_end; @@ -1739,8 +1740,10 @@ u64 count_range_bits(struct extent_io_tree *tree, u64 last = 0; int found = 0; - if (WARN_ON(search_end <= cur_start)) + if (search_end <= cur_start) { + WARN_ON(1); return 0; + } spin_lock(&tree->lock); if (cur_start == 0 && bits == EXTENT_DIRTY) { @@ -1952,6 +1955,11 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec, return err; } +static void repair_io_failure_callback(struct bio *bio, int err) +{ + complete(bio->bi_private); +} + /* * this bypasses the standard btrfs submit functions deliberately, as * the standard behavior is to write all copies in a raid setup. here we only @@ -1968,13 +1976,13 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, { struct bio *bio; struct btrfs_device *dev; + DECLARE_COMPLETION_ONSTACK(compl); u64 map_length = 0; u64 sector; struct btrfs_bio *bbio = NULL; struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; int ret; - ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); BUG_ON(!mirror_num); /* we can't repair anything in raid56 yet */ @@ -1984,6 +1992,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, bio = btrfs_io_bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; + bio->bi_private = &compl; + bio->bi_end_io = repair_io_failure_callback; bio->bi_size = 0; map_length = length; @@ -2004,8 +2014,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, } bio->bi_bdev = dev->bdev; bio_add_page(bio, page, length, start - page_offset(page)); + btrfsic_submit_bio(WRITE_SYNC, bio); + wait_for_completion(&compl); - if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { /* try to remap that extent elsewhere? */ bio_put(bio); btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); @@ -2027,9 +2039,6 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); int ret = 0; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, @@ -2051,12 +2060,12 @@ static int clean_io_failure(u64 start, struct page *page) u64 private; u64 private_failure; struct io_failure_record *failrec; - struct inode *inode = page->mapping->host; - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct btrfs_fs_info *fs_info; struct extent_state *state; int num_copies; int did_repair = 0; int ret; + struct inode *inode = page->mapping->host; private = 0; ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, @@ -2079,8 +2088,6 @@ static int clean_io_failure(u64 start, struct page *page) did_repair = 1; goto out; } - if (fs_info->sb->s_flags & MS_RDONLY) - goto out; spin_lock(&BTRFS_I(inode)->io_tree.lock); state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, @@ -2090,6 +2097,7 @@ static int clean_io_failure(u64 start, struct page *page) if (state && state->start <= failrec->start && state->end >= failrec->start + failrec->len - 1) { + fs_info = BTRFS_I(inode)->root->fs_info; num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); if (num_copies > 1) { @@ -3561,8 +3569,9 @@ retry: * but no sense in crashing the users box for something * we can survive anyway. */ - if (WARN_ON(!eb)) { + if (!eb) { spin_unlock(&mapping->private_lock); + WARN_ON(1); continue; } @@ -4029,7 +4038,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, if (offset >= last) return NULL; - while (1) { + while(1) { len = last - offset; if (len == 0) break; @@ -4053,19 +4062,6 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, return NULL; } -static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx) -{ - unsigned long cnt = *((unsigned long *)ctx); - - cnt++; - *((unsigned long *)ctx) = cnt; - - /* Now we're sure that the extent is shared. */ - if (cnt > 1) - return 1; - return 0; -} - int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len, get_extent_t *get_extent) { @@ -4132,7 +4128,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, last = found_key.offset; last_for_get_extent = last + 1; } - btrfs_release_path(path); + btrfs_free_path(path); /* * we might have some extents allocated but more delalloc past those @@ -4202,24 +4198,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags |= (FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN); } else { - unsigned long ref_cnt = 0; - disko = em->block_start + offset_in_extent; - - /* - * As btrfs supports shared space, this information - * can be exported to userspace tools via - * flag FIEMAP_EXTENT_SHARED. - */ - ret = iterate_inodes_from_logical( - em->block_start, - BTRFS_I(inode)->root->fs_info, - path, count_ext_ref, &ref_cnt); - if (ret < 0 && ret != -ENOENT) - goto out_free; - - if (ref_cnt > 1) - flags |= FIEMAP_EXTENT_SHARED; } if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) flags |= FIEMAP_EXTENT_ENCODED; @@ -4251,7 +4230,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, out_free: free_extent_map(em); out: - btrfs_free_path(path); unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1, &cached_state, GFP_NOFS); return ret; @@ -4477,23 +4455,6 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb) } } -struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, - u64 start) -{ - struct extent_buffer *eb; - - rcu_read_lock(); - eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); - if (eb && atomic_inc_not_zero(&eb->refs)) { - rcu_read_unlock(); - mark_extent_buffer_accessed(eb); - return eb; - } - rcu_read_unlock(); - - return NULL; -} - struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, u64 start, unsigned long len) { @@ -4507,10 +4468,14 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, int uptodate = 1; int ret; - - eb = find_extent_buffer(tree, start); - if (eb) + rcu_read_lock(); + eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); + if (eb && atomic_inc_not_zero(&eb->refs)) { + rcu_read_unlock(); + mark_extent_buffer_accessed(eb); return eb; + } + rcu_read_unlock(); eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS); if (!eb) @@ -4569,17 +4534,24 @@ again: spin_lock(&tree->buffer_lock); ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); - spin_unlock(&tree->buffer_lock); - radix_tree_preload_end(); if (ret == -EEXIST) { - exists = find_extent_buffer(tree, start); - if (exists) - goto free_eb; - else + exists = radix_tree_lookup(&tree->buffer, + start >> PAGE_CACHE_SHIFT); + if (!atomic_inc_not_zero(&exists->refs)) { + spin_unlock(&tree->buffer_lock); + radix_tree_preload_end(); + exists = NULL; goto again; + } + spin_unlock(&tree->buffer_lock); + radix_tree_preload_end(); + mark_extent_buffer_accessed(exists); + goto free_eb; } /* add one reference for the tree */ check_buffer_tree_ref(eb); + spin_unlock(&tree->buffer_lock); + radix_tree_preload_end(); /* * there is a race where release page may have @@ -4610,6 +4582,23 @@ free_eb: return exists; } +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len) +{ + struct extent_buffer *eb; + + rcu_read_lock(); + eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); + if (eb && atomic_inc_not_zero(&eb->refs)) { + rcu_read_unlock(); + mark_extent_buffer_accessed(eb); + return eb; + } + rcu_read_unlock(); + + return NULL; +} + static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) { struct extent_buffer *eb = @@ -5073,6 +5062,23 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, } } +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = page_address(dst_page); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + char *src_kaddr = page_address(src_page); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + } +} + static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) { unsigned long distance = (src > dst) ? src - dst : dst - src; @@ -5183,7 +5189,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min_t(unsigned long, len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); - copy_pages(extent_buffer_page(dst, dst_i), + move_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 19620c5..6dbc645 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -271,7 +271,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, - u64 start); + u64 start, unsigned long len); void free_extent_buffer(struct extent_buffer *eb); void free_extent_buffer_stale(struct extent_buffer *eb); #define WAIT_NONE 0 @@ -345,10 +345,4 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, int end_extent_writepage(struct page *page, int err, u64 start, u64 end); int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, int mirror_num); -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -noinline u64 find_lock_delalloc_range(struct inode *inode, - struct extent_io_tree *tree, - struct page *locked_page, u64 *start, - u64 *end, u64 max_bytes); -#endif #endif diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 93fba71..61adc44 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,10 +3,10 @@ #include <linux/rbtree.h> -#define EXTENT_MAP_LAST_BYTE ((u64)-4) -#define EXTENT_MAP_HOLE ((u64)-3) -#define EXTENT_MAP_INLINE ((u64)-2) -#define EXTENT_MAP_DELALLOC ((u64)-1) +#define EXTENT_MAP_LAST_BYTE (u64)-4 +#define EXTENT_MAP_HOLE (u64)-3 +#define EXTENT_MAP_INLINE (u64)-2 +#define EXTENT_MAP_DELALLOC (u64)-1 /* bits for the flags field */ #define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */ diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 6f38488..4f53159 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -329,9 +329,6 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, u64 csum_end; u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); - ASSERT(start == ALIGN(start, root->sectorsize) && - (end + 1) == ALIGN(end + 1, root->sectorsize)); - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -849,8 +846,10 @@ insert: path->leave_spinning = 0; if (ret < 0) goto fail_unlock; - if (WARN_ON(ret != 0)) + if (ret != 0) { + WARN_ON(1); goto fail_unlock; + } leaf = path->nodes[0]; csum: item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 82d0342..72da4df 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -39,6 +39,7 @@ #include "print-tree.h" #include "tree-log.h" #include "locking.h" +#include "compat.h" #include "volumes.h" static struct kmem_cache *btrfs_inode_defrag_cachep; @@ -369,7 +370,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) u64 root_objectid = 0; atomic_inc(&fs_info->defrag_running); - while (1) { + while(1) { /* Pause the auto defragger. */ if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)) @@ -1280,7 +1281,6 @@ again: } wait_on_page_writeback(pages[i]); } - faili = num_pages - 1; err = 0; if (start_pos < inode->i_size) { struct btrfs_ordered_extent *ordered; @@ -1299,10 +1299,8 @@ again: unlock_page(pages[i]); page_cache_release(pages[i]); } - err = btrfs_wait_ordered_range(inode, start_pos, - last_pos - start_pos); - if (err) - goto fail; + btrfs_wait_ordered_range(inode, start_pos, + last_pos - start_pos); goto again; } if (ordered) @@ -1811,13 +1809,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); - if (full_sync) { - ret = btrfs_wait_ordered_range(inode, start, end - start + 1); - if (ret) { - mutex_unlock(&inode->i_mutex); - goto out; - } - } + if (full_sync) + btrfs_wait_ordered_range(inode, start, end - start + 1); atomic_inc(&root->log_batch); /* @@ -1883,20 +1876,27 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) mutex_unlock(&inode->i_mutex); if (ret != BTRFS_NO_LOG_SYNC) { - if (!ret) { + if (ret > 0) { + /* + * If we didn't already wait for ordered extents we need + * to do that now. + */ + if (!full_sync) + btrfs_wait_ordered_range(inode, start, + end - start + 1); + ret = btrfs_commit_transaction(trans, root); + } else { ret = btrfs_sync_log(trans, root); - if (!ret) { + if (ret == 0) { ret = btrfs_end_transaction(trans, root); - goto out; + } else { + if (!full_sync) + btrfs_wait_ordered_range(inode, start, + end - + start + 1); + ret = btrfs_commit_transaction(trans, root); } } - if (!full_sync) { - ret = btrfs_wait_ordered_range(inode, start, - end - start + 1); - if (ret) - goto out; - } - ret = btrfs_commit_transaction(trans, root); } else { ret = btrfs_end_transaction(trans, root); } @@ -2067,9 +2067,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) bool same_page = ((offset >> PAGE_CACHE_SHIFT) == ((offset + len - 1) >> PAGE_CACHE_SHIFT)); - ret = btrfs_wait_ordered_range(inode, offset, len); - if (ret) - return ret; + btrfs_wait_ordered_range(inode, offset, len); mutex_lock(&inode->i_mutex); /* @@ -2138,12 +2136,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) btrfs_put_ordered_extent(ordered); unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state, GFP_NOFS); - ret = btrfs_wait_ordered_range(inode, lockstart, - lockend - lockstart + 1); - if (ret) { - mutex_unlock(&inode->i_mutex); - return ret; - } + btrfs_wait_ordered_range(inode, lockstart, + lockend - lockstart + 1); } path = btrfs_alloc_path(); @@ -2314,10 +2308,7 @@ static long btrfs_fallocate(struct file *file, int mode, * wait for ordered IO before we have any locks. We'll loop again * below with the locks held. */ - ret = btrfs_wait_ordered_range(inode, alloc_start, - alloc_end - alloc_start); - if (ret) - goto out; + btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); locked_end = alloc_end - 1; while (1) { @@ -2341,10 +2332,8 @@ static long btrfs_fallocate(struct file *file, int mode, * we can't wait on the range with the transaction * running or with the extent lock held */ - ret = btrfs_wait_ordered_range(inode, alloc_start, - alloc_end - alloc_start); - if (ret) - goto out; + btrfs_wait_ordered_range(inode, alloc_start, + alloc_end - alloc_start); } else { if (ordered) btrfs_put_ordered_extent(ordered); @@ -2416,12 +2405,14 @@ out_reserve_fail: static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map *em = NULL; + struct extent_map *em; struct extent_state *cached_state = NULL; u64 lockstart = *offset; u64 lockend = i_size_read(inode); u64 start = *offset; + u64 orig_start = *offset; u64 len = i_size_read(inode); + u64 last_end = 0; int ret = 0; lockend = max_t(u64, root->sectorsize, lockend); @@ -2438,35 +2429,89 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, &cached_state); - while (start < inode->i_size) { + /* + * Delalloc is such a pain. If we have a hole and we have pending + * delalloc for a portion of the hole we will get back a hole that + * exists for the entire range since it hasn't been actually written + * yet. So to take care of this case we need to look for an extent just + * before the position we want in case there is outstanding delalloc + * going on here. + */ + if (whence == SEEK_HOLE && start != 0) { + if (start <= root->sectorsize) + em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, + root->sectorsize, 0); + else + em = btrfs_get_extent_fiemap(inode, NULL, 0, + start - root->sectorsize, + root->sectorsize, 0); + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out; + } + last_end = em->start + em->len; + if (em->block_start == EXTENT_MAP_DELALLOC) + last_end = min_t(u64, last_end, inode->i_size); + free_extent_map(em); + } + + while (1) { em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); if (IS_ERR(em)) { ret = PTR_ERR(em); - em = NULL; break; } - if (whence == SEEK_HOLE && - (em->block_start == EXTENT_MAP_HOLE || - test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) - break; - else if (whence == SEEK_DATA && - (em->block_start != EXTENT_MAP_HOLE && - !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) - break; + if (em->block_start == EXTENT_MAP_HOLE) { + if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { + if (last_end <= orig_start) { + free_extent_map(em); + ret = -ENXIO; + break; + } + } + + if (whence == SEEK_HOLE) { + *offset = start; + free_extent_map(em); + break; + } + } else { + if (whence == SEEK_DATA) { + if (em->block_start == EXTENT_MAP_DELALLOC) { + if (start >= inode->i_size) { + free_extent_map(em); + ret = -ENXIO; + break; + } + } + + if (!test_bit(EXTENT_FLAG_PREALLOC, + &em->flags)) { + *offset = start; + free_extent_map(em); + break; + } + } + } start = em->start + em->len; + last_end = em->start + em->len; + + if (em->block_start == EXTENT_MAP_DELALLOC) + last_end = min_t(u64, last_end, inode->i_size); + + if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { + free_extent_map(em); + ret = -ENXIO; + break; + } free_extent_map(em); - em = NULL; cond_resched(); } - free_extent_map(em); - if (!ret) { - if (whence == SEEK_DATA && start >= inode->i_size) - ret = -ENXIO; - else - *offset = min_t(loff_t, start, inode->i_size); - } + if (!ret) + *offset = min(*offset, inode->i_size); +out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state, GFP_NOFS); return ret; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 057be95..b4f9904 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -218,6 +218,7 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, + struct btrfs_path *path, struct inode *inode) { int ret = 0; @@ -1008,13 +1009,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, if (ret) goto out; - ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); - if (ret) { - clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, - GFP_NOFS); - goto out; - } + + btrfs_wait_ordered_range(inode, 0, (u64)-1); key.objectid = BTRFS_FREE_SPACE_OBJECTID; key.offset = offset; @@ -2280,7 +2276,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, goto out; entry = rb_entry(node, struct btrfs_free_space, offset_index); - while (1) { + while(1) { if (entry->bytes < bytes && entry->bytes > *max_extent_size) *max_extent_size = entry->bytes; @@ -2971,15 +2967,19 @@ out: int btrfs_write_out_ino_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, - struct btrfs_path *path, - struct inode *inode) + struct btrfs_path *path) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; + struct inode *inode; int ret; if (!btrfs_test_opt(root, INODE_MAP_CACHE)) return 0; + inode = lookup_free_ino_inode(root, path); + if (IS_ERR(inode)) + return 0; + ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); if (ret) { btrfs_delalloc_release_metadata(inode, inode->i_size); @@ -2990,6 +2990,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, #endif } + iput(inode); return ret; } diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index 0cf4977..e737f92 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -58,6 +58,7 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, struct btrfs_block_rsv *rsv); int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, + struct btrfs_path *path, struct inode *inode); int load_free_space_cache(struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group); @@ -75,8 +76,7 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root); int btrfs_write_out_ino_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans, - struct btrfs_path *path, - struct inode *inode); + struct btrfs_path *path); void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group); int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index ec82fae..e0b7034 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -369,7 +369,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans, goto out; leaf = path->nodes[0]; - item = btrfs_item_nr(path->slots[0]); + item = btrfs_item_nr(leaf, path->slots[0]); ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char); ptr += btrfs_item_size(leaf, item) - ins_len; extref = (struct btrfs_inode_extref *)ptr; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index ab485e5..2c66ddb 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -78,8 +78,10 @@ again: btrfs_transaction_in_commit(fs_info)) { leaf = path->nodes[0]; - if (WARN_ON(btrfs_header_nritems(leaf) == 0)) + if (btrfs_header_nritems(leaf) == 0) { + WARN_ON(1); break; + } /* * Save the key so we can advances forward @@ -235,7 +237,7 @@ again: start_caching(root); if (objectid <= root->cache_progress || - objectid >= root->highest_objectid) + objectid > root->highest_objectid) __btrfs_add_free_space(ctl, objectid, 1); else __btrfs_add_free_space(pinned, objectid, 1); @@ -410,7 +412,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root, return 0; /* Don't save inode cache if we are deleting this root */ - if (btrfs_root_refs(&root->root_item) == 0) + if (btrfs_root_refs(&root->root_item) == 0 && + root != root->fs_info->tree_root) return 0; if (!btrfs_test_opt(root, INODE_MAP_CACHE)) @@ -464,7 +467,7 @@ again: } if (i_size_read(inode) > 0) { - ret = btrfs_truncate_free_space_cache(root, trans, inode); + ret = btrfs_truncate_free_space_cache(root, trans, path, inode); if (ret) { if (ret != -ENOSPC) btrfs_abort_transaction(trans, root, ret); @@ -501,7 +504,7 @@ again: } btrfs_free_reserved_data_space(inode, prealloc); - ret = btrfs_write_out_ino_cache(root, trans, path, inode); + ret = btrfs_write_out_ino_cache(root, trans, path); out_put: iput(inode); out_release: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f1a7744..51e3afa 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,6 +43,7 @@ #include <linux/btrfs.h> #include <linux/blkdev.h> #include <linux/posix_acl_xattr.h> +#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -843,10 +844,7 @@ static noinline int cow_file_range(struct inode *inode, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - if (btrfs_is_free_space_inode(inode)) { - WARN_ON_ONCE(1); - return -EINVAL; - } + BUG_ON(btrfs_is_free_space_inode(inode)); num_bytes = ALIGN(end - start + 1, blocksize); num_bytes = max(blocksize, num_bytes); @@ -1180,8 +1178,10 @@ static noinline int run_delalloc_nocow(struct inode *inode, while (1) { ret = btrfs_lookup_file_extent(trans, root, path, ino, cur_offset, 0); - if (ret < 0) + if (ret < 0) { + btrfs_abort_transaction(trans, root, ret); goto error; + } if (ret > 0 && path->slots[0] > 0 && check_prev) { leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, @@ -1195,8 +1195,10 @@ next_slot: leaf = path->nodes[0]; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); - if (ret < 0) + if (ret < 0) { + btrfs_abort_transaction(trans, root, ret); goto error; + } if (ret > 0) break; leaf = path->nodes[0]; @@ -1287,8 +1289,10 @@ out_check: ret = cow_file_range(inode, locked_page, cow_start, found_key.offset - 1, page_started, nr_written, 1); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, root, ret); goto error; + } cow_start = (u64)-1; } @@ -1335,8 +1339,10 @@ out_check: BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, cur_offset, num_bytes); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, root, ret); goto error; + } } extent_clear_unlock_delalloc(inode, cur_offset, @@ -1358,8 +1364,10 @@ out_check: if (cow_start != (u64)-1) { ret = cow_file_range(inode, locked_page, cow_start, end, page_started, nr_written, 1); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, root, ret); goto error; + } } error: @@ -1543,13 +1551,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, spin_unlock(&BTRFS_I(inode)->lock); } - /* - * We don't reserve metadata space for space cache inodes so we - * don't need to call dellalloc_release_metadata if there is an - * error. - */ - if (*bits & EXTENT_DO_ACCOUNTING && - root != root->fs_info->tree_root) + if (*bits & EXTENT_DO_ACCOUNTING) btrfs_delalloc_release_metadata(inode, len); if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID @@ -2039,8 +2041,10 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, key.offset = offset; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (WARN_ON(ret < 0)) + if (ret < 0) { + WARN_ON(1); return ret; + } ret = 0; while (1) { @@ -2129,8 +2133,7 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, old->extent_offset, fs_info, path, record_one_backref, old); - if (ret < 0 && ret != -ENOENT) - return false; + BUG_ON(ret < 0 && ret != -ENOENT); /* no backref to be processed for this extent */ if (!old->count) { @@ -2364,23 +2367,10 @@ out_unlock: return ret; } -static void free_sa_defrag_extent(struct new_sa_defrag_extent *new) -{ - struct old_sa_defrag_extent *old, *tmp; - - if (!new) - return; - - list_for_each_entry_safe(old, tmp, &new->head, list) { - list_del(&old->list); - kfree(old); - } - kfree(new); -} - static void relink_file_extents(struct new_sa_defrag_extent *new) { struct btrfs_path *path; + struct old_sa_defrag_extent *old, *tmp; struct sa_defrag_extent_backref *backref; struct sa_defrag_extent_backref *prev = NULL; struct inode *inode; @@ -2423,11 +2413,16 @@ static void relink_file_extents(struct new_sa_defrag_extent *new) kfree(prev); btrfs_free_path(path); -out: - free_sa_defrag_extent(new); + list_for_each_entry_safe(old, tmp, &new->head, list) { + list_del(&old->list); + kfree(old); + } +out: atomic_dec(&root->fs_info->defrag_running); wake_up(&root->fs_info->transaction_wait); + + kfree(new); } static struct new_sa_defrag_extent * @@ -2437,7 +2432,7 @@ record_old_file_extents(struct inode *inode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path; struct btrfs_key key; - struct old_sa_defrag_extent *old; + struct old_sa_defrag_extent *old, *tmp; struct new_sa_defrag_extent *new; int ret; @@ -2485,7 +2480,7 @@ record_old_file_extents(struct inode *inode, if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret < 0) - goto out_free_path; + goto out_free_list; else if (ret > 0) break; continue; @@ -2514,7 +2509,7 @@ record_old_file_extents(struct inode *inode, old = kmalloc(sizeof(*old), GFP_NOFS); if (!old) - goto out_free_path; + goto out_free_list; offset = max(new->file_pos, key.offset); end = min(new->file_pos + new->len, key.offset + num_bytes); @@ -2536,10 +2531,15 @@ next: return new; +out_free_list: + list_for_each_entry_safe(old, tmp, &new->head, list) { + list_del(&old->list); + kfree(old); + } out_free_path: btrfs_free_path(path); out_kfree: - free_sa_defrag_extent(new); + kfree(new); return NULL; } @@ -2710,14 +2710,8 @@ out: btrfs_remove_ordered_extent(inode, ordered_extent); /* for snapshot-aware defrag */ - if (new) { - if (ret) { - free_sa_defrag_extent(new); - atomic_dec(&root->fs_info->defrag_running); - } else { - relink_file_extents(new); - } - } + if (new) + relink_file_extents(new); /* once for us */ btrfs_put_ordered_extent(ordered_extent); @@ -2975,7 +2969,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); if (ret) { - atomic_dec(&root->orphan_inodes); if (reserve) { clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED, &BTRFS_I(inode)->runtime_flags); @@ -3025,15 +3018,13 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans, release_rsv = 1; spin_unlock(&root->orphan_lock); - if (delete_item) { - atomic_dec(&root->orphan_inodes); - if (trans) - ret = btrfs_del_orphan_item(trans, root, - btrfs_ino(inode)); - } + if (trans && delete_item) + ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); - if (release_rsv) + if (release_rsv) { btrfs_orphan_release_metadata(inode); + atomic_dec(&root->orphan_inodes); + } return ret; } @@ -3181,7 +3172,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { - if (WARN_ON(!S_ISREG(inode->i_mode))) { + if (!S_ISREG(inode->i_mode)) { + WARN_ON(1); iput(inode); continue; } @@ -3644,7 +3636,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, int ret; ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (!ret) { - drop_nlink(inode); + btrfs_drop_nlink(inode); ret = btrfs_update_inode(trans, root, inode); } return ret; @@ -4238,16 +4230,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) while (1) { struct btrfs_ordered_extent *ordered; - + btrfs_wait_ordered_range(inode, hole_start, + block_end - hole_start); lock_extent_bits(io_tree, hole_start, block_end - 1, 0, &cached_state); - ordered = btrfs_lookup_ordered_range(inode, hole_start, - block_end - hole_start); + ordered = btrfs_lookup_ordered_extent(inode, hole_start); if (!ordered) break; unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, GFP_NOFS); - btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); } @@ -4481,10 +4472,8 @@ void btrfs_evict_inode(struct inode *inode) trace_btrfs_inode_evict(inode); truncate_inode_pages(&inode->i_data, 0); - if (inode->i_nlink && - ((btrfs_root_refs(&root->root_item) != 0 && - root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) || - btrfs_is_free_space_inode(inode))) + if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || + btrfs_is_free_space_inode(inode))) goto no_delete; if (is_bad_inode(inode)) { @@ -4501,8 +4490,7 @@ void btrfs_evict_inode(struct inode *inode) } if (inode->i_nlink > 0) { - BUG_ON(btrfs_root_refs(&root->root_item) != 0 && - root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID); + BUG_ON(btrfs_root_refs(&root->root_item) != 0); goto no_delete; } @@ -4743,7 +4731,14 @@ static void inode_tree_del(struct inode *inode) } spin_unlock(&root->inode_lock); - if (empty && btrfs_root_refs(&root->root_item) == 0) { + /* + * Free space cache has inodes in the tree root, but the tree root has a + * root_refs of 0, so this could end up dropping the tree root as a + * snapshot, so we need the extra !root->fs_info->tree_root check to + * make sure we don't drop it. + */ + if (empty && btrfs_root_refs(&root->root_item) == 0 && + root != root->fs_info->tree_root) { synchronize_srcu(&root->fs_info->subvol_srcu); spin_lock(&root->inode_lock); empty = RB_EMPTY_ROOT(&root->inode_tree); @@ -4836,12 +4831,10 @@ static struct inode *btrfs_iget_locked(struct super_block *s, { struct inode *inode; struct btrfs_iget_args args; - unsigned long hashval = btrfs_inode_hash(objectid, root); - args.ino = objectid; args.root = root; - inode = iget5_locked(s, hashval, btrfs_find_actor, + inode = iget5_locked(s, objectid, btrfs_find_actor, btrfs_init_locked_inode, (void *)&args); return inode; @@ -5055,7 +5048,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) continue; } - item = btrfs_item_nr(slot); + item = btrfs_item_nr(leaf, slot); btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.objectid != key.objectid) @@ -5461,7 +5454,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_INODE_NODATASUM; } - btrfs_insert_inode_hash(inode); + insert_inode_hash(inode); inode_tree_add(inode); trace_btrfs_inode_new(inode); @@ -5737,7 +5730,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, goto fail; } - inc_nlink(inode); + btrfs_inc_nlink(inode); inode_inc_iversion(inode); inode->i_ctime = CURRENT_TIME; ihold(inode); @@ -5867,7 +5860,7 @@ static noinline int uncompress_inline(struct btrfs_path *path, compress_type = btrfs_file_extent_compression(leaf, item); max_size = btrfs_file_extent_ram_bytes(leaf, item); inline_size = btrfs_file_extent_inline_item_len(leaf, - btrfs_item_nr(path->slots[0])); + btrfs_item_nr(leaf, path->slots[0])); tmp = kmalloc(inline_size, GFP_NOFS); if (!tmp) return -ENOMEM; @@ -5981,14 +5974,7 @@ again: found_type = btrfs_key_type(&found_key); if (found_key.objectid != objectid || found_type != BTRFS_EXTENT_DATA_KEY) { - /* - * If we backup past the first extent we want to move forward - * and see if there is an extent in front of us, otherwise we'll - * say there is a hole for our whole search range which can - * cause problems. - */ - extent_end = start; - goto next; + goto not_found; } found_type = btrfs_file_extent_type(leaf, item); @@ -6003,7 +5989,7 @@ again: size = btrfs_file_extent_inline_len(leaf, item); extent_end = ALIGN(extent_start + size, root->sectorsize); } -next: + if (start >= extent_end) { path->slots[0]++; if (path->slots[0] >= btrfs_header_nritems(leaf)) { @@ -6187,7 +6173,8 @@ insert: write_unlock(&em_tree->lock); out: - trace_btrfs_get_extent(root, em); + if (em) + trace_btrfs_get_extent(root, em); if (path) btrfs_free_path(path); @@ -6262,7 +6249,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag /* adjust the range_start to make sure it doesn't * go backwards from the start they passed in */ - range_start = max(start, range_start); + range_start = max(start,range_start); found = found_end - range_start; if (found > 0) { @@ -7066,7 +7053,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, } } else { submit_len += bvec->bv_len; - nr_pages++; + nr_pages ++; bvec++; } } @@ -7235,9 +7222,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, * outstanding dirty pages are on disk. */ count = iov_length(iov, nr_segs); - ret = btrfs_wait_ordered_range(inode, offset, count); - if (ret) - return ret; + btrfs_wait_ordered_range(inode, offset, count); if (rw & WRITE) { /* @@ -7578,10 +7563,7 @@ static int btrfs_truncate(struct inode *inode) u64 mask = root->sectorsize - 1; u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); - ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask), - (u64)-1); - if (ret) - return ret; + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); /* * Yes ladies and gentelment, this is indeed ugly. The fact is we have @@ -7805,14 +7787,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return inode; } -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS -void btrfs_test_destroy_inode(struct inode *inode) -{ - btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); - kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); -} -#endif - static void btrfs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); @@ -7883,7 +7857,8 @@ int btrfs_drop_inode(struct inode *inode) return 1; /* the snap/subvol tree is on deleting */ - if (btrfs_root_refs(&root->root_item) == 0) + if (btrfs_root_refs(&root->root_item) == 0 && + root != root->fs_info->tree_root) return 1; else return generic_drop_inode(inode); @@ -8020,7 +7995,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (ret == -EEXIST) { /* we shouldn't get * eexist without a new_inode */ - if (WARN_ON(!new_inode)) { + if (!new_inode) { + WARN_ON(1); return ret; } } else { @@ -8168,24 +8144,18 @@ out_notrans: static void btrfs_run_delalloc_work(struct btrfs_work *work) { struct btrfs_delalloc_work *delalloc_work; - struct inode *inode; delalloc_work = container_of(work, struct btrfs_delalloc_work, work); - inode = delalloc_work->inode; - if (delalloc_work->wait) { - btrfs_wait_ordered_range(inode, 0, (u64)-1); - } else { - filemap_flush(inode->i_mapping); - if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, - &BTRFS_I(inode)->runtime_flags)) - filemap_flush(inode->i_mapping); - } + if (delalloc_work->wait) + btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1); + else + filemap_flush(delalloc_work->inode->i_mapping); if (delalloc_work->delay_iput) - btrfs_add_delayed_iput(inode); + btrfs_add_delayed_iput(delalloc_work->inode); else - iput(inode); + iput(delalloc_work->inode); complete(&delalloc_work->completion); } @@ -8306,7 +8276,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) return ret; } -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) +int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, + int delay_iput) { struct btrfs_root *root; struct list_head splice; @@ -8366,14 +8337,14 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, int err; int drop_inode = 0; u64 objectid; - u64 index = 0; + u64 index = 0 ; int name_len; int datasize; unsigned long ptr; struct btrfs_file_extent_item *ei; struct extent_buffer *leaf; - name_len = strlen(symname); + name_len = strlen(symname) + 1; if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; @@ -8461,7 +8432,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode_set_bytes(inode, name_len); - btrfs_i_size_write(inode, name_len); + btrfs_i_size_write(inode, name_len - 1); err = btrfs_update_inode(trans, root, inode); if (err) drop_inode = 1; @@ -8520,8 +8491,6 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, ins.offset, 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); if (ret) { - btrfs_free_reserved_extent(root, ins.objectid, - ins.offset); btrfs_abort_transaction(trans, root, ret); if (own_trans) btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a111622..9d46f60 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -44,6 +44,7 @@ #include <linux/uuid.h> #include <linux/btrfs.h> #include <linux/uaccess.h> +#include "compat.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -320,7 +321,7 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) { - struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb); + struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb); struct btrfs_device *device; struct request_queue *q; struct fstrim_range range; @@ -368,13 +369,9 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) int btrfs_is_empty_uuid(u8 *uuid) { - int i; + static char empty_uuid[BTRFS_UUID_SIZE] = {0}; - for (i = 0; i < BTRFS_UUID_SIZE; i++) { - if (uuid[i]) - return 0; - } - return 1; + return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE); } static noinline int create_subvol(struct inode *dir, @@ -439,7 +436,7 @@ static noinline int create_subvol(struct inode *dir, btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(leaf, objectid); - write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(), + write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf), BTRFS_FSID_SIZE); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, btrfs_header_chunk_tree_uuid(leaf), @@ -577,7 +574,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (ret) return ret; - btrfs_wait_ordered_extents(root, -1); + btrfs_wait_ordered_extents(root); pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); if (!pending_snapshot) @@ -691,7 +688,7 @@ static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) * nfs_async_unlink(). */ -static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) +static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) { int error; @@ -845,6 +842,7 @@ static int find_new_extents(struct btrfs_root *root, { struct btrfs_path *path; struct btrfs_key min_key; + struct btrfs_key max_key; struct extent_buffer *leaf; struct btrfs_file_extent_item *extent; int type; @@ -859,10 +857,15 @@ static int find_new_extents(struct btrfs_root *root, min_key.type = BTRFS_EXTENT_DATA_KEY; min_key.offset = *off; + max_key.objectid = ino; + max_key.type = (u8)-1; + max_key.offset = (u64)-1; + path->keep_locks = 1; - while (1) { - ret = btrfs_search_forward(root, &min_key, path, newer_than); + while(1) { + ret = btrfs_search_forward(root, &min_key, &max_key, + path, newer_than); if (ret != 0) goto none; if (min_key.objectid != ino) @@ -1203,7 +1206,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, ra = &file->f_ra; } - pages = kmalloc_array(max_cluster, sizeof(struct page *), + pages = kmalloc(sizeof(struct page *) * max_cluster, GFP_NOFS); if (!pages) { ret = -ENOMEM; @@ -1890,6 +1893,7 @@ static noinline int search_ioctl(struct inode *inode, { struct btrfs_root *root; struct btrfs_key key; + struct btrfs_key max_key; struct btrfs_path *path; struct btrfs_ioctl_search_key *sk = &args->key; struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; @@ -1921,10 +1925,15 @@ static noinline int search_ioctl(struct inode *inode, key.type = sk->min_type; key.offset = sk->min_offset; + max_key.objectid = sk->max_objectid; + max_key.type = sk->max_type; + max_key.offset = sk->max_offset; + path->keep_locks = 1; - while (1) { - ret = btrfs_search_forward(root, &key, path, sk->min_transid); + while(1) { + ret = btrfs_search_forward(root, &key, &max_key, path, + sk->min_transid); if (ret != 0) { if (ret > 0) ret = 0; @@ -2009,7 +2018,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, key.type = BTRFS_INODE_REF_KEY; key.offset = (u64)-1; - while (1) { + while(1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; @@ -2038,7 +2047,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, } *(ptr + len) = '/'; - read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len); + read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); if (key.offset == BTRFS_FIRST_FREE_OBJECTID) break; @@ -2049,7 +2058,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, dirid = key.objectid; } memmove(name, ptr, total_len); - name[total_len] = '\0'; + name[total_len]='\0'; ret = 0; out: btrfs_free_path(path); @@ -2089,7 +2098,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, static noinline int btrfs_ioctl_snap_destroy(struct file *file, void __user *arg) { - struct dentry *parent = file->f_path.dentry; + struct dentry *parent = fdentry(file); struct dentry *dentry; struct inode *dir = parent->d_inode; struct inode *inode; @@ -2135,7 +2144,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, inode = dentry->d_inode; dest = BTRFS_I(inode)->root; - if (!capable(CAP_SYS_ADMIN)) { + if (!capable(CAP_SYS_ADMIN)){ /* * Regular user. Only allow this with a special mount * option, when the user has write+exec access to the @@ -2718,10 +2727,15 @@ static long btrfs_ioctl_file_extent_same(struct file *file, size = sizeof(tmp) + tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info); - same = memdup_user((struct btrfs_ioctl_same_args __user *)argp, size); + same = kmalloc(size, GFP_NOFS); + if (!same) { + ret = -EFAULT; + goto out; + } - if (IS_ERR(same)) { - ret = PTR_ERR(same); + if (copy_from_user(same, + (struct btrfs_ioctl_same_args __user *)argp, size)) { + ret = -EFAULT; goto out; } @@ -3105,7 +3119,7 @@ out: static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off, u64 olen, u64 destoff) { - struct inode *inode = file_inode(file); + struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct fd src_file; struct inode *src; @@ -3665,10 +3679,9 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) switch (p->cmd) { case BTRFS_IOCTL_DEV_REPLACE_CMD_START: - if (root->fs_info->sb->s_flags & MS_RDONLY) { - ret = -EROFS; - goto out; - } + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + if (atomic_xchg( &root->fs_info->mutually_exclusive_operation_running, 1)) { @@ -3694,7 +3707,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) if (copy_to_user(arg, p, sizeof(*p))) ret = -EFAULT; -out: + kfree(p); return ret; } @@ -4304,7 +4317,7 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) { - struct btrfs_root *root = BTRFS_I(file_inode(file))->root; + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -4544,15 +4557,9 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_logical_to_ino(root, argp); case BTRFS_IOC_SPACE_INFO: return btrfs_ioctl_space_info(root, argp); - case BTRFS_IOC_SYNC: { - int ret; - - ret = btrfs_start_delalloc_roots(root->fs_info, 0); - if (ret) - return ret; - ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); - return ret; - } + case BTRFS_IOC_SYNC: + btrfs_sync_fs(file->f_dentry->d_sb, 1); + return 0; case BTRFS_IOC_START_SYNC: return btrfs_ioctl_start_sync(root, argp); case BTRFS_IOC_WAIT_SYNC: diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 69582d5..c702cb6 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -537,9 +537,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, */ if (RB_EMPTY_ROOT(&tree->tree) && !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { - spin_lock(&root->fs_info->ordered_root_lock); list_del_init(&BTRFS_I(inode)->ordered_operations); - spin_unlock(&root->fs_info->ordered_root_lock); } if (!root->nr_ordered_extents) { @@ -565,11 +563,10 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work) * wait for all the ordered extents in a root. This is done when balancing * space between drives. */ -int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) +void btrfs_wait_ordered_extents(struct btrfs_root *root) { struct list_head splice, works; struct btrfs_ordered_extent *ordered, *next; - int count = 0; INIT_LIST_HEAD(&splice); INIT_LIST_HEAD(&works); @@ -577,7 +574,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) mutex_lock(&root->fs_info->ordered_operations_mutex); spin_lock(&root->ordered_extent_lock); list_splice_init(&root->ordered_extents, &splice); - while (!list_empty(&splice) && nr) { + while (!list_empty(&splice)) { ordered = list_first_entry(&splice, struct btrfs_ordered_extent, root_extent_list); list_move_tail(&ordered->root_extent_list, @@ -592,11 +589,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) cond_resched(); spin_lock(&root->ordered_extent_lock); - if (nr != -1) - nr--; - count++; } - list_splice_tail(&splice, &root->ordered_extents); spin_unlock(&root->ordered_extent_lock); list_for_each_entry_safe(ordered, next, &works, work_list) { @@ -606,21 +599,18 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) cond_resched(); } mutex_unlock(&root->fs_info->ordered_operations_mutex); - - return count; } -void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) +void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct list_head splice; - int done; INIT_LIST_HEAD(&splice); spin_lock(&fs_info->ordered_root_lock); list_splice_init(&fs_info->ordered_roots, &splice); - while (!list_empty(&splice) && nr) { + while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, ordered_root); root = btrfs_grab_fs_root(root); @@ -629,16 +619,11 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); - done = btrfs_wait_ordered_extents(root, nr); + btrfs_wait_ordered_extents(root); btrfs_put_fs_root(root); spin_lock(&fs_info->ordered_root_lock); - if (nr != -1) { - nr -= done; - WARN_ON(nr < 0); - } } - list_splice_tail(&splice, &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); } @@ -749,9 +734,8 @@ void btrfs_start_ordered_extent(struct inode *inode, /* * Used to wait on ordered extents across a large range of bytes. */ -int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { - int ret = 0; u64 end; u64 orig_end; struct btrfs_ordered_extent *ordered; @@ -767,9 +751,8 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) /* start IO across the range first to instantiate any delalloc * extents */ - ret = filemap_fdatawrite_range(inode->i_mapping, start, orig_end); - if (ret) - return ret; + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + /* * So with compression we will find and lock a dirty page and clear the * first one as dirty, setup an async extent, and immediately return @@ -785,15 +768,10 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) * right and you are wrong. */ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, - &BTRFS_I(inode)->runtime_flags)) { - ret = filemap_fdatawrite_range(inode->i_mapping, start, - orig_end); - if (ret) - return ret; - } - ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end); - if (ret) - return ret; + &BTRFS_I(inode)->runtime_flags)) + filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + + filemap_fdatawait_range(inode->i_mapping, start, orig_end); end = orig_end; while (1) { @@ -804,20 +782,17 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) btrfs_put_ordered_extent(ordered); break; } - if (ordered->file_offset + ordered->len <= start) { + if (ordered->file_offset + ordered->len < start) { btrfs_put_ordered_extent(ordered); break; } btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; - if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) - ret = -EIO; btrfs_put_ordered_extent(ordered); - if (ret || end == 0 || end == start) + if (end == 0 || end == start) break; end--; } - return ret; } /* @@ -1101,7 +1076,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, * if this file hasn't been changed since the last transaction * commit, we can safely return without doing anything */ - if (last_mod <= root->fs_info->last_trans_committed) + if (last_mod < root->fs_info->last_trans_committed) return; spin_lock(&root->fs_info->ordered_root_lock); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 9b0450f..0c0b356 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -180,7 +180,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, u64 file_offset); void btrfs_start_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry, int wait); -int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); +void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); struct btrfs_ordered_extent * btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, @@ -195,8 +195,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); -int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); -void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); +void btrfs_wait_ordered_extents(struct btrfs_root *root); +void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info); void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 417053b..0088bed 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -193,7 +193,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d", btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { - item = btrfs_item_nr(i); + item = btrfs_item_nr(l, i); btrfs_item_key_to_cpu(l, &key, i); type = btrfs_key_type(&key); printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d " diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 24ac218..d0ecfbd 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -33,6 +33,7 @@ #include <linux/raid/xor.h> #include <linux/vmalloc.h> #include <asm/div64.h> +#include "compat.h" #include "ctree.h" #include "extent_map.h" #include "disk-io.h" diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ce459a7..4a35572 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1383,7 +1383,6 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, { struct btrfs_root *reloc_root; struct reloc_control *rc = root->fs_info->reloc_ctl; - struct btrfs_block_rsv *rsv; int clear_rsv = 0; int ret; @@ -1397,14 +1396,13 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) return 0; - if (!trans->reloc_reserved) { - rsv = trans->block_rsv; + if (!trans->block_rsv) { trans->block_rsv = rc->block_rsv; clear_rsv = 1; } reloc_root = create_reloc_root(trans, root, root->root_key.objectid); if (clear_rsv) - trans->block_rsv = rsv; + trans->block_rsv = NULL; ret = __add_reloc_root(reloc_root); BUG_ON(ret < 0); @@ -1777,7 +1775,8 @@ again: new_ptr_gen = 0; } - if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) { + if (new_bytenr > 0 && new_bytenr == old_bytenr) { + WARN_ON(1); ret = level; break; } @@ -2059,7 +2058,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, LIST_HEAD(inode_list); struct btrfs_key key; struct btrfs_key next_key; - struct btrfs_trans_handle *trans = NULL; + struct btrfs_trans_handle *trans; struct btrfs_root *reloc_root; struct btrfs_root_item *root_item; struct btrfs_path *path; @@ -2108,19 +2107,18 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, memset(&next_key, 0, sizeof(next_key)); while (1) { + trans = btrfs_start_transaction(root, 0); + BUG_ON(IS_ERR(trans)); + trans->block_rsv = rc->block_rsv; + ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, BTRFS_RESERVE_FLUSH_ALL); if (ret) { - err = ret; - goto out; - } - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); - trans = NULL; - goto out; + BUG_ON(ret != -EAGAIN); + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + continue; } - trans->block_rsv = rc->block_rsv; replaced = 0; max_level = level; @@ -2166,7 +2164,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, root_item->drop_level = level; btrfs_end_transaction_throttle(trans, root); - trans = NULL; btrfs_btree_balance_dirty(root); @@ -2195,8 +2192,7 @@ out: btrfs_update_reloc_root(trans, root); } - if (trans) - btrfs_end_transaction_throttle(trans, root); + btrfs_end_transaction_throttle(trans, root); btrfs_btree_balance_dirty(root); @@ -3262,7 +3258,7 @@ static int add_tree_block(struct reloc_control *rc, struct rb_node *rb_node; u32 item_size; int level = -1; - u64 generation; + int generation; eb = path->nodes[0]; item_size = btrfs_item_size_nr(eb, path->slots[0]); @@ -3411,6 +3407,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, struct inode *inode, u64 ino) { struct btrfs_key key; + struct btrfs_path *path; struct btrfs_root *root = fs_info->tree_root; struct btrfs_trans_handle *trans; int ret = 0; @@ -3435,14 +3432,22 @@ truncate: if (ret) goto out; + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { + btrfs_free_path(path); ret = PTR_ERR(trans); goto out; } - ret = btrfs_truncate_free_space_cache(root, trans, inode); + ret = btrfs_truncate_free_space_cache(root, trans, path, inode); + btrfs_free_path(path); btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); out: @@ -3544,8 +3549,10 @@ static int find_data_references(struct reloc_control *rc, err = ret; goto out; } - if (WARN_ON(ret > 0)) + if (ret > 0) { + WARN_ON(1); goto out; + } leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); @@ -3565,9 +3572,11 @@ static int find_data_references(struct reloc_control *rc, } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (WARN_ON(key.objectid != ref_objectid || - key.type != BTRFS_EXTENT_DATA_KEY)) + if (key.objectid != ref_objectid || + key.type != BTRFS_EXTENT_DATA_KEY) { + WARN_ON(1); break; + } fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -3992,6 +4001,16 @@ restart: } } + ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5); + if (ret < 0) { + if (ret != -ENOSPC) { + err = ret; + WARN_ON(1); + break; + } + rc->commit_transaction = 1; + } + if (rc->commit_transaction) { rc->commit_transaction = 0; ret = btrfs_commit_transaction(trans, rc->extent_root); @@ -4222,12 +4241,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", rc->block_group->key.objectid, rc->block_group->flags); - ret = btrfs_start_delalloc_roots(fs_info, 0); + ret = btrfs_start_all_delalloc_inodes(fs_info, 0); if (ret < 0) { err = ret; goto out; } - btrfs_wait_ordered_roots(fs_info, -1); + btrfs_wait_all_ordered_extents(fs_info); while (1) { mutex_lock(&fs_info->cleaner_mutex); @@ -4245,12 +4264,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) rc->extents_found); if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { - ret = btrfs_wait_ordered_range(rc->data_inode, 0, - (u64)-1); - if (ret) { - err = ret; - goto out; - } + btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); invalidate_mapping_pages(rc->data_inode->i_mapping, 0, -1); rc->stage = UPDATE_DATA_PTRS; @@ -4467,7 +4481,6 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) struct btrfs_root *root = BTRFS_I(inode)->root; int ret; u64 disk_bytenr; - u64 new_bytenr; LIST_HEAD(list); ordered = btrfs_lookup_ordered_extent(inode, file_pos); @@ -4479,24 +4492,13 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) if (ret) goto out; + disk_bytenr = ordered->start; while (!list_empty(&list)) { sums = list_entry(list.next, struct btrfs_ordered_sum, list); list_del_init(&sums->list); - /* - * We need to offset the new_bytenr based on where the csum is. - * We need to do this because we will read in entire prealloc - * extents but we may have written to say the middle of the - * prealloc extent, so we need to make sure the csum goes with - * the right disk offset. - * - * We can do this because the data reloc inode refers strictly - * to the on disk bytes, so we don't have to worry about - * disk_len vs real len like with real inodes since it's all - * disk length. - */ - new_bytenr = ordered->start + (sums->bytenr - disk_bytenr); - sums->bytenr = new_bytenr; + sums->bytenr = disk_bytenr; + disk_bytenr += sums->len; btrfs_add_ordered_sum(inode, ordered, sums); } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 1fd3f33..a18e0e2 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -208,6 +208,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, int is_metadata, int have_csum, const u8 *csum, u64 generation, u16 csum_size); +static void scrub_complete_bio_end_io(struct bio *bio, int err); static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, struct scrub_block *sblock_good, int force_write); @@ -937,10 +938,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) BTRFS_DEV_STAT_CORRUPTION_ERRS); } - if (sctx->readonly) { - ASSERT(!sctx->is_dev_replace); - goto out; - } + if (sctx->readonly && !sctx->is_dev_replace) + goto did_not_correct_error; if (!is_metadata && !have_csum) { struct scrub_fixup_nodatasum *fixup_nodatasum; @@ -1293,6 +1292,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, for (page_num = 0; page_num < sblock->page_count; page_num++) { struct bio *bio; struct scrub_page *page = sblock->pagev[page_num]; + DECLARE_COMPLETION_ONSTACK(complete); if (page->dev->bdev == NULL) { page->io_error = 1; @@ -1309,11 +1309,18 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, } bio->bi_bdev = page->dev->bdev; bio->bi_sector = page->physical >> 9; + bio->bi_end_io = scrub_complete_bio_end_io; + bio->bi_private = &complete; bio_add_page(bio, page->page, PAGE_SIZE, 0); - if (btrfsic_submit_bio_wait(READ, bio)) - sblock->no_io_error_seen = 0; + btrfsic_submit_bio(READ, bio); + + /* this will also unplug the queue */ + wait_for_completion(&complete); + page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags); + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + sblock->no_io_error_seen = 0; bio_put(bio); } @@ -1382,6 +1389,11 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, sblock->checksum_error = 1; } +static void scrub_complete_bio_end_io(struct bio *bio, int err) +{ + complete((struct completion *)bio->bi_private); +} + static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, struct scrub_block *sblock_good, int force_write) @@ -1416,6 +1428,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, sblock_bad->checksum_error || page_bad->io_error) { struct bio *bio; int ret; + DECLARE_COMPLETION_ONSTACK(complete); if (!page_bad->dev->bdev) { printk_ratelimited(KERN_WARNING @@ -1428,14 +1441,19 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, return -EIO; bio->bi_bdev = page_bad->dev->bdev; bio->bi_sector = page_bad->physical >> 9; + bio->bi_end_io = scrub_complete_bio_end_io; + bio->bi_private = &complete; ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); if (PAGE_SIZE != ret) { bio_put(bio); return -EIO; } + btrfsic_submit_bio(WRITE, bio); - if (btrfsic_submit_bio_wait(WRITE, bio)) { + /* this will also unplug the queue */ + wait_for_completion(&complete); + if (!bio_flagged(bio, BIO_UPTODATE)) { btrfs_dev_stat_inc_and_print(page_bad->dev, BTRFS_DEV_STAT_WRITE_ERRS); btrfs_dev_replace_stats_inc( @@ -2699,6 +2717,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, mutex_unlock(&fs_info->scrub_lock); wake_up(&fs_info->scrub_pause_wait); + dev_replace->cursor_left = dev_replace->cursor_right; + dev_replace->item_needs_writeback = 1; btrfs_put_block_group(cache); if (ret) break; @@ -2712,9 +2732,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, break; } - dev_replace->cursor_left = dev_replace->cursor_right; - dev_replace->item_needs_writeback = 1; - key.offset = found_key.offset + length; btrfs_release_path(path); } @@ -2766,6 +2783,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, { int ret = 0; + mutex_lock(&fs_info->scrub_lock); if (fs_info->scrub_workers_refcnt == 0) { if (is_dev_replace) btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, @@ -2795,17 +2813,21 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, } ++fs_info->scrub_workers_refcnt; out: + mutex_unlock(&fs_info->scrub_lock); + return ret; } static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) { + mutex_lock(&fs_info->scrub_lock); if (--fs_info->scrub_workers_refcnt == 0) { btrfs_stop_workers(&fs_info->scrub_workers); btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); btrfs_stop_workers(&fs_info->scrub_nocow_workers); } WARN_ON(fs_info->scrub_workers_refcnt < 0); + mutex_unlock(&fs_info->scrub_lock); } int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, @@ -2866,18 +2888,23 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, return -EINVAL; } + ret = scrub_workers_get(fs_info, is_dev_replace); + if (ret) + return ret; mutex_lock(&fs_info->fs_devices->device_list_mutex); dev = btrfs_find_device(fs_info, devid, NULL, NULL); if (!dev || (dev->missing && !is_dev_replace)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); + scrub_workers_put(fs_info); return -ENODEV; } - mutex_lock(&fs_info->scrub_lock); + if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); + scrub_workers_put(fs_info); return -EIO; } @@ -2888,17 +2915,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, btrfs_dev_replace_unlock(&fs_info->dev_replace); mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); + scrub_workers_put(fs_info); return -EINPROGRESS; } btrfs_dev_replace_unlock(&fs_info->dev_replace); - - ret = scrub_workers_get(fs_info, is_dev_replace); - if (ret) { - mutex_unlock(&fs_info->scrub_lock); - mutex_unlock(&fs_info->fs_devices->device_list_mutex); - return ret; - } - sctx = scrub_setup_ctx(dev, is_dev_replace); if (IS_ERR(sctx)) { mutex_unlock(&fs_info->scrub_lock); @@ -2911,15 +2931,13 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, atomic_inc(&fs_info->scrubs_running); mutex_unlock(&fs_info->scrub_lock); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); if (!is_dev_replace) { - /* - * by holding device list mutex, we can - * kick off writing super in log tree sync. - */ + down_read(&fs_info->scrub_super_lock); ret = scrub_supers(sctx, dev); + up_read(&fs_info->scrub_super_lock); } - mutex_unlock(&fs_info->fs_devices->device_list_mutex); if (!ret) ret = scrub_enumerate_chunks(sctx, dev, start, end, @@ -2936,10 +2954,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, mutex_lock(&fs_info->scrub_lock); dev->scrub_device = NULL; - scrub_workers_put(fs_info); mutex_unlock(&fs_info->scrub_lock); scrub_free_ctx(sctx); + scrub_workers_put(fs_info); return ret; } @@ -2969,6 +2987,16 @@ void btrfs_scrub_continue(struct btrfs_root *root) wake_up(&fs_info->scrub_pause_wait); } +void btrfs_scrub_pause_super(struct btrfs_root *root) +{ + down_write(&root->fs_info->scrub_super_lock); +} + +void btrfs_scrub_continue_super(struct btrfs_root *root) +{ + up_write(&root->fs_info->scrub_super_lock); +} + int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) { mutex_lock(&fs_info->scrub_lock); @@ -3355,6 +3383,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, struct bio *bio; struct btrfs_device *dev; int ret; + DECLARE_COMPLETION_ONSTACK(compl); dev = sctx->wr_ctx.tgtdev; if (!dev) @@ -3371,6 +3400,8 @@ static int write_page_nocow(struct scrub_ctx *sctx, spin_unlock(&sctx->stat_lock); return -ENOMEM; } + bio->bi_private = &compl; + bio->bi_end_io = scrub_complete_bio_end_io; bio->bi_size = 0; bio->bi_sector = physical_for_dev_replace >> 9; bio->bi_bdev = dev->bdev; @@ -3381,8 +3412,10 @@ leave_with_eio: btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); return -EIO; } + btrfsic_submit_bio(WRITE_SYNC, bio); + wait_for_completion(&compl); - if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) goto leave_with_eio; bio_put(bio); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6837fe8..e46e0ed 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -121,6 +121,7 @@ struct send_ctx { struct list_head name_cache_list; int name_cache_size; + struct file *cur_inode_filp; char *read_buf; }; @@ -564,8 +565,10 @@ static int begin_cmd(struct send_ctx *sctx, int cmd) { struct btrfs_cmd_header *hdr; - if (WARN_ON(!sctx->send_buf)) + if (!sctx->send_buf) { + WARN_ON(1); return -EINVAL; + } BUG_ON(sctx->send_size); @@ -788,7 +791,7 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, if (found_key->type == BTRFS_INODE_REF_KEY) { ptr = (unsigned long)btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); - item = btrfs_item_nr(slot); + item = btrfs_item_nr(eb, slot); total = btrfs_item_size(eb, item); elem_size = sizeof(*iref); } else { @@ -902,7 +905,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, eb = path->nodes[0]; slot = path->slots[0]; - item = btrfs_item_nr(slot); + item = btrfs_item_nr(eb, slot); di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); cur = 0; len = 0; @@ -2117,6 +2120,77 @@ out: } /* + * Called for regular files when sending extents data. Opens a struct file + * to read from the file. + */ +static int open_cur_inode_file(struct send_ctx *sctx) +{ + int ret = 0; + struct btrfs_key key; + struct path path; + struct inode *inode; + struct dentry *dentry; + struct file *filp; + int new = 0; + + if (sctx->cur_inode_filp) + goto out; + + key.objectid = sctx->cur_ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root, + &new); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } + + dentry = d_obtain_alias(inode); + inode = NULL; + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + path.mnt = sctx->mnt; + path.dentry = dentry; + filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred()); + dput(dentry); + dentry = NULL; + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto out; + } + sctx->cur_inode_filp = filp; + +out: + /* + * no xxxput required here as every vfs op + * does it by itself on failure + */ + return ret; +} + +/* + * Closes the struct file that was created in open_cur_inode_file + */ +static int close_cur_inode_file(struct send_ctx *sctx) +{ + int ret = 0; + + if (!sctx->cur_inode_filp) + goto out; + + ret = filp_close(sctx->cur_inode_filp, NULL); + sctx->cur_inode_filp = NULL; + +out: + return ret; +} + +/* * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace */ static int send_subvol_begin(struct send_ctx *sctx) @@ -3548,72 +3622,6 @@ out: return ret; } -static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) -{ - struct btrfs_root *root = sctx->send_root; - struct btrfs_fs_info *fs_info = root->fs_info; - struct inode *inode; - struct page *page; - char *addr; - struct btrfs_key key; - pgoff_t index = offset >> PAGE_CACHE_SHIFT; - pgoff_t last_index; - unsigned pg_offset = offset & ~PAGE_CACHE_MASK; - ssize_t ret = 0; - - key.objectid = sctx->cur_ino; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; - - inode = btrfs_iget(fs_info->sb, &key, root, NULL); - if (IS_ERR(inode)) - return PTR_ERR(inode); - - if (offset + len > i_size_read(inode)) { - if (offset > i_size_read(inode)) - len = 0; - else - len = offset - i_size_read(inode); - } - if (len == 0) - goto out; - - last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; - while (index <= last_index) { - unsigned cur_len = min_t(unsigned, len, - PAGE_CACHE_SIZE - pg_offset); - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); - if (!page) { - ret = -ENOMEM; - break; - } - - if (!PageUptodate(page)) { - btrfs_readpage(NULL, page); - lock_page(page); - if (!PageUptodate(page)) { - unlock_page(page); - page_cache_release(page); - ret = -EIO; - break; - } - } - - addr = kmap(page); - memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); - kunmap(page); - unlock_page(page); - page_cache_release(page); - index++; - pg_offset = 0; - len -= cur_len; - ret += cur_len; - } -out: - iput(inode); - return ret; -} - /* * Read some bytes from the current inode/file and send a write command to * user space. @@ -3622,20 +3630,35 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) { int ret = 0; struct fs_path *p; - ssize_t num_read = 0; + loff_t pos = offset; + int num_read = 0; + mm_segment_t old_fs; p = fs_path_alloc(); if (!p) return -ENOMEM; + /* + * vfs normally only accepts user space buffers for security reasons. + * we only read from the file and also only provide the read_buf buffer + * to vfs. As this buffer does not come from a user space call, it's + * ok to temporary allow kernel space buffers. + */ + old_fs = get_fs(); + set_fs(KERNEL_DS); + verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); - num_read = fill_read_buf(sctx, offset, len); - if (num_read <= 0) { - if (num_read < 0) - ret = num_read; + ret = open_cur_inode_file(sctx); + if (ret < 0) + goto out; + + ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos); + if (ret < 0) + goto out; + num_read = ret; + if (!num_read) goto out; - } ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); if (ret < 0) @@ -3654,6 +3677,7 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); tlv_put_failure: out: fs_path_free(p); + set_fs(old_fs); if (ret < 0) return ret; return num_read; @@ -3902,16 +3926,16 @@ static int is_extent_unchanged(struct send_ctx *sctx, while (key.offset < ekey->offset + left_len) { ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); right_type = btrfs_file_extent_type(eb, ei); - if (right_type != BTRFS_FILE_EXTENT_REG) { - ret = 0; - goto out; - } - right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); right_len = btrfs_file_extent_num_bytes(eb, ei); right_offset = btrfs_file_extent_offset(eb, ei); right_gen = btrfs_file_extent_generation(eb, ei); + if (right_type != BTRFS_FILE_EXTENT_REG) { + ret = 0; + goto out; + } + /* * Are we at extent 8? If yes, we know the extent is changed. * This may only happen on the first iteration. @@ -4198,6 +4222,10 @@ static int changed_inode(struct send_ctx *sctx, u64 left_gen = 0; u64 right_gen = 0; + ret = close_cur_inode_file(sctx); + if (ret < 0) + goto out; + sctx->cur_ino = key->objectid; sctx->cur_inode_new_gen = 0; @@ -4658,6 +4686,11 @@ static int send_subvol(struct send_ctx *sctx) } out: + if (!ret) + ret = close_cur_inode_file(sctx); + else + close_cur_inode_file(sctx); + free_recorded_refs(sctx); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2d8ac1b..e913328 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -42,6 +42,7 @@ #include <linux/cleancache.h> #include <linux/ratelimit.h> #include <linux/btrfs.h> +#include "compat.h" #include "delayed-inode.h" #include "ctree.h" #include "disk-io.h" @@ -920,7 +921,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } - btrfs_wait_ordered_roots(fs_info, -1); + btrfs_wait_all_ordered_extents(fs_info); trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { @@ -1329,12 +1330,6 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) * this also happens on 'umount -rf' or on shutdown, when * the filesystem is busy. */ - - /* wait for the uuid_scan task to finish */ - down(&fs_info->uuid_tree_rescan_sem); - /* avoid complains from lockdep et al. */ - up(&fs_info->uuid_tree_rescan_sem); - sb->s_flags |= MS_RDONLY; btrfs_dev_replace_suspend_for_unmount(fs_info); @@ -1470,7 +1465,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) nr_devices = fs_info->fs_devices->open_devices; BUG_ON(!nr_devices); - devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), + devices_info = kmalloc(sizeof(*devices_info) * nr_devices, GFP_NOFS); if (!devices_info) return -ENOMEM; @@ -1794,25 +1789,7 @@ static void btrfs_print_info(void) static int btrfs_run_sanity_tests(void) { - int ret; - - ret = btrfs_init_test_fs(); - if (ret) - return ret; - - ret = btrfs_test_free_space_cache(); - if (ret) - goto out; - ret = btrfs_test_extent_buffer_operations(); - if (ret) - goto out; - ret = btrfs_test_extent_io(); - if (ret) - goto out; - ret = btrfs_test_inodes(); -out: - btrfs_destroy_test_fs(); - return ret; + return btrfs_test_free_space_cache(); } static int __init init_btrfs_fs(void) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c deleted file mode 100644 index 757ef00..0000000 --- a/fs/btrfs/tests/btrfs-tests.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (C) 2013 Fusion IO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include <linux/fs.h> -#include <linux/mount.h> -#include <linux/magic.h> -#include "btrfs-tests.h" -#include "../ctree.h" - -static struct vfsmount *test_mnt = NULL; - -static const struct super_operations btrfs_test_super_ops = { - .alloc_inode = btrfs_alloc_inode, - .destroy_inode = btrfs_test_destroy_inode, -}; - -static struct dentry *btrfs_test_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) -{ - return mount_pseudo(fs_type, "btrfs_test:", &btrfs_test_super_ops, - NULL, BTRFS_TEST_MAGIC); -} - -static struct file_system_type test_type = { - .name = "btrfs_test_fs", - .mount = btrfs_test_mount, - .kill_sb = kill_anon_super, -}; - -struct inode *btrfs_new_test_inode(void) -{ - return new_inode(test_mnt->mnt_sb); -} - -int btrfs_init_test_fs(void) -{ - int ret; - - ret = register_filesystem(&test_type); - if (ret) { - printk(KERN_ERR "btrfs: cannot register test file system\n"); - return ret; - } - - test_mnt = kern_mount(&test_type); - if (IS_ERR(test_mnt)) { - printk(KERN_ERR "btrfs: cannot mount test file system\n"); - unregister_filesystem(&test_type); - return ret; - } - return 0; -} - -void btrfs_destroy_test_fs(void) -{ - kern_unmount(test_mnt); - unregister_filesystem(&test_type); -} diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index b353bc8..5808776 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -24,36 +24,11 @@ #define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__) int btrfs_test_free_space_cache(void); -int btrfs_test_extent_buffer_operations(void); -int btrfs_test_extent_io(void); -int btrfs_test_inodes(void); -int btrfs_init_test_fs(void); -void btrfs_destroy_test_fs(void); -struct inode *btrfs_new_test_inode(void); #else static inline int btrfs_test_free_space_cache(void) { return 0; } -static inline int btrfs_test_extent_buffer_operations(void) -{ - return 0; -} -static inline int btrfs_init_test_fs(void) -{ - return 0; -} -static inline void btrfs_destroy_test_fs(void) -{ -} -static inline int btrfs_test_extent_io(void) -{ - return 0; -} -static inline int btrfs_test_inodes(void) -{ - return 0; -} #endif #endif diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c deleted file mode 100644 index cc286ce..0000000 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (C) 2013 Fusion IO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include <linux/slab.h> -#include "btrfs-tests.h" -#include "../ctree.h" -#include "../extent_io.h" -#include "../disk-io.h" - -static int test_btrfs_split_item(void) -{ - struct btrfs_path *path; - struct btrfs_root *root; - struct extent_buffer *eb; - struct btrfs_item *item; - char *value = "mary had a little lamb"; - char *split1 = "mary had a little"; - char *split2 = " lamb"; - char *split3 = "mary"; - char *split4 = " had a little"; - char buf[32]; - struct btrfs_key key; - u32 value_len = strlen(value); - int ret = 0; - - test_msg("Running btrfs_split_item tests\n"); - - root = btrfs_alloc_dummy_root(); - if (IS_ERR(root)) { - test_msg("Could not allocate root\n"); - return PTR_ERR(root); - } - - path = btrfs_alloc_path(); - if (!path) { - test_msg("Could not allocate path\n"); - kfree(root); - return -ENOMEM; - } - - path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096); - if (!eb) { - test_msg("Could not allocate dummy buffer\n"); - ret = -ENOMEM; - goto out; - } - path->slots[0] = 0; - - key.objectid = 0; - key.type = BTRFS_EXTENT_CSUM_KEY; - key.offset = 0; - - setup_items_for_insert(root, path, &key, &value_len, value_len, - value_len + sizeof(struct btrfs_item), 1); - item = btrfs_item_nr(0); - write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0), - value_len); - - key.offset = 3; - - /* - * Passing NULL trans here should be safe because we have plenty of - * space in this leaf to split the item without having to split the - * leaf. - */ - ret = btrfs_split_item(NULL, root, path, &key, 17); - if (ret) { - test_msg("Split item failed %d\n", ret); - goto out; - } - - /* - * Read the first slot, it should have the original key and contain only - * 'mary had a little' - */ - btrfs_item_key_to_cpu(eb, &key, 0); - if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY || - key.offset != 0) { - test_msg("Invalid key at slot 0\n"); - ret = -EINVAL; - goto out; - } - - item = btrfs_item_nr(0); - if (btrfs_item_size(eb, item) != strlen(split1)) { - test_msg("Invalid len in the first split\n"); - ret = -EINVAL; - goto out; - } - - read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0), - strlen(split1)); - if (memcmp(buf, split1, strlen(split1))) { - test_msg("Data in the buffer doesn't match what it should " - "in the first split have='%.*s' want '%s'\n", - (int)strlen(split1), buf, split1); - ret = -EINVAL; - goto out; - } - - btrfs_item_key_to_cpu(eb, &key, 1); - if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY || - key.offset != 3) { - test_msg("Invalid key at slot 1\n"); - ret = -EINVAL; - goto out; - } - - item = btrfs_item_nr(1); - if (btrfs_item_size(eb, item) != strlen(split2)) { - test_msg("Invalid len in the second split\n"); - ret = -EINVAL; - goto out; - } - - read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1), - strlen(split2)); - if (memcmp(buf, split2, strlen(split2))) { - test_msg("Data in the buffer doesn't match what it should " - "in the second split\n"); - ret = -EINVAL; - goto out; - } - - key.offset = 1; - /* Do it again so we test memmoving the other items in the leaf */ - ret = btrfs_split_item(NULL, root, path, &key, 4); - if (ret) { - test_msg("Second split item failed %d\n", ret); - goto out; - } - - btrfs_item_key_to_cpu(eb, &key, 0); - if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY || - key.offset != 0) { - test_msg("Invalid key at slot 0\n"); - ret = -EINVAL; - goto out; - } - - item = btrfs_item_nr(0); - if (btrfs_item_size(eb, item) != strlen(split3)) { - test_msg("Invalid len in the first split\n"); - ret = -EINVAL; - goto out; - } - - read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0), - strlen(split3)); - if (memcmp(buf, split3, strlen(split3))) { - test_msg("Data in the buffer doesn't match what it should " - "in the third split"); - ret = -EINVAL; - goto out; - } - - btrfs_item_key_to_cpu(eb, &key, 1); - if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY || - key.offset != 1) { - test_msg("Invalid key at slot 1\n"); - ret = -EINVAL; - goto out; - } - - item = btrfs_item_nr(1); - if (btrfs_item_size(eb, item) != strlen(split4)) { - test_msg("Invalid len in the second split\n"); - ret = -EINVAL; - goto out; - } - - read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1), - strlen(split4)); - if (memcmp(buf, split4, strlen(split4))) { - test_msg("Data in the buffer doesn't match what it should " - "in the fourth split\n"); - ret = -EINVAL; - goto out; - } - - btrfs_item_key_to_cpu(eb, &key, 2); - if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY || - key.offset != 3) { - test_msg("Invalid key at slot 2\n"); - ret = -EINVAL; - goto out; - } - - item = btrfs_item_nr(2); - if (btrfs_item_size(eb, item) != strlen(split2)) { - test_msg("Invalid len in the second split\n"); - ret = -EINVAL; - goto out; - } - - read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 2), - strlen(split2)); - if (memcmp(buf, split2, strlen(split2))) { - test_msg("Data in the buffer doesn't match what it should " - "in the last chunk\n"); - ret = -EINVAL; - goto out; - } -out: - btrfs_free_path(path); - kfree(root); - return ret; -} - -int btrfs_test_extent_buffer_operations(void) -{ - test_msg("Running extent buffer operation tests"); - return test_btrfs_split_item(); -} diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c deleted file mode 100644 index 7e99c2f..0000000 --- a/fs/btrfs/tests/extent-io-tests.c +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (C) 2013 Fusion IO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include <linux/pagemap.h> -#include <linux/sched.h> -#include "btrfs-tests.h" -#include "../extent_io.h" - -#define PROCESS_UNLOCK (1 << 0) -#define PROCESS_RELEASE (1 << 1) -#define PROCESS_TEST_LOCKED (1 << 2) - -static noinline int process_page_range(struct inode *inode, u64 start, u64 end, - unsigned long flags) -{ - int ret; - struct page *pages[16]; - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - unsigned long nr_pages = end_index - index + 1; - int i; - int count = 0; - int loops = 0; - - while (nr_pages > 0) { - ret = find_get_pages_contig(inode->i_mapping, index, - min_t(unsigned long, nr_pages, - ARRAY_SIZE(pages)), pages); - for (i = 0; i < ret; i++) { - if (flags & PROCESS_TEST_LOCKED && - !PageLocked(pages[i])) - count++; - if (flags & PROCESS_UNLOCK && PageLocked(pages[i])) - unlock_page(pages[i]); - page_cache_release(pages[i]); - if (flags & PROCESS_RELEASE) - page_cache_release(pages[i]); - } - nr_pages -= ret; - index += ret; - cond_resched(); - loops++; - if (loops > 100000) { - printk(KERN_ERR "stuck in a loop, start %Lu, end %Lu, nr_pages %lu, ret %d\n", start, end, nr_pages, ret); - break; - } - } - return count; -} - -static int test_find_delalloc(void) -{ - struct inode *inode; - struct extent_io_tree tmp; - struct page *page; - struct page *locked_page = NULL; - unsigned long index = 0; - u64 total_dirty = 256 * 1024 * 1024; - u64 max_bytes = 128 * 1024 * 1024; - u64 start, end, test_start; - u64 found; - int ret = -EINVAL; - - inode = btrfs_new_test_inode(); - if (!inode) { - test_msg("Failed to allocate test inode\n"); - return -ENOMEM; - } - - extent_io_tree_init(&tmp, &inode->i_data); - - /* - * First go through and create and mark all of our pages dirty, we pin - * everything to make sure our pages don't get evicted and screw up our - * test. - */ - for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); - if (!page) { - test_msg("Failed to allocate test page\n"); - ret = -ENOMEM; - goto out; - } - SetPageDirty(page); - if (index) { - unlock_page(page); - } else { - page_cache_get(page); - locked_page = page; - } - } - - /* Test this scenario - * |--- delalloc ---| - * |--- search ---| - */ - set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS); - start = 0; - end = 0; - found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, - &end, max_bytes); - if (!found) { - test_msg("Should have found at least one delalloc\n"); - goto out_bits; - } - if (start != 0 || end != 4095) { - test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n", - start, end); - goto out_bits; - } - unlock_extent(&tmp, start, end); - unlock_page(locked_page); - page_cache_release(locked_page); - - /* - * Test this scenario - * - * |--- delalloc ---| - * |--- search ---| - */ - test_start = 64 * 1024 * 1024; - locked_page = find_lock_page(inode->i_mapping, - test_start >> PAGE_CACHE_SHIFT); - if (!locked_page) { - test_msg("Couldn't find the locked page\n"); - goto out_bits; - } - set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS); - start = test_start; - end = 0; - found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, - &end, max_bytes); - if (!found) { - test_msg("Couldn't find delalloc in our range\n"); - goto out_bits; - } - if (start != test_start || end != max_bytes - 1) { - test_msg("Expected start %Lu end %Lu, got start %Lu, end " - "%Lu\n", test_start, max_bytes - 1, start, end); - goto out_bits; - } - if (process_page_range(inode, start, end, - PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) { - test_msg("There were unlocked pages in the range\n"); - goto out_bits; - } - unlock_extent(&tmp, start, end); - /* locked_page was unlocked above */ - page_cache_release(locked_page); - - /* - * Test this scenario - * |--- delalloc ---| - * |--- search ---| - */ - test_start = max_bytes + 4096; - locked_page = find_lock_page(inode->i_mapping, test_start >> - PAGE_CACHE_SHIFT); - if (!locked_page) { - test_msg("Could'nt find the locked page\n"); - goto out_bits; - } - start = test_start; - end = 0; - found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, - &end, max_bytes); - if (found) { - test_msg("Found range when we shouldn't have\n"); - goto out_bits; - } - if (end != (u64)-1) { - test_msg("Did not return the proper end offset\n"); - goto out_bits; - } - - /* - * Test this scenario - * [------- delalloc -------| - * [max_bytes]|-- search--| - * - * We are re-using our test_start from above since it works out well. - */ - set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS); - start = test_start; - end = 0; - found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, - &end, max_bytes); - if (!found) { - test_msg("Didn't find our range\n"); - goto out_bits; - } - if (start != test_start || end != total_dirty - 1) { - test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n", - test_start, total_dirty - 1, start, end); - goto out_bits; - } - if (process_page_range(inode, start, end, - PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) { - test_msg("Pages in range were not all locked\n"); - goto out_bits; - } - unlock_extent(&tmp, start, end); - - /* - * Now to test where we run into a page that is no longer dirty in the - * range we want to find. - */ - page = find_get_page(inode->i_mapping, (max_bytes + (1 * 1024 * 1024)) - >> PAGE_CACHE_SHIFT); - if (!page) { - test_msg("Couldn't find our page\n"); - goto out_bits; - } - ClearPageDirty(page); - page_cache_release(page); - - /* We unlocked it in the previous test */ - lock_page(locked_page); - start = test_start; - end = 0; - /* - * Currently if we fail to find dirty pages in the delalloc range we - * will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search. If - * this changes at any point in the future we will need to fix this - * tests expected behavior. - */ - found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, - &end, max_bytes); - if (!found) { - test_msg("Didn't find our range\n"); - goto out_bits; - } - if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) { - test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n", - test_start, test_start + PAGE_CACHE_SIZE - 1, start, - end); - goto out_bits; - } - if (process_page_range(inode, start, end, PROCESS_TEST_LOCKED | - PROCESS_UNLOCK)) { - test_msg("Pages in range were not all locked\n"); - goto out_bits; - } - ret = 0; -out_bits: - clear_extent_bits(&tmp, 0, total_dirty - 1, - (unsigned long)-1, GFP_NOFS); -out: - if (locked_page) - page_cache_release(locked_page); - process_page_range(inode, 0, total_dirty - 1, - PROCESS_UNLOCK | PROCESS_RELEASE); - iput(inode); - return ret; -} - -int btrfs_test_extent_io(void) -{ - test_msg("Running find delalloc tests\n"); - return test_find_delalloc(); -} diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c deleted file mode 100644 index 397d1f9..0000000 --- a/fs/btrfs/tests/inode-tests.c +++ /dev/null @@ -1,955 +0,0 @@ -/* - * Copyright (C) 2013 Fusion IO. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include "btrfs-tests.h" -#include "../ctree.h" -#include "../btrfs_inode.h" -#include "../disk-io.h" -#include "../extent_io.h" -#include "../volumes.h" - -static struct btrfs_fs_info *alloc_dummy_fs_info(void) -{ - struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), - GFP_NOFS); - if (!fs_info) - return fs_info; - fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), - GFP_NOFS); - if (!fs_info->fs_devices) { - kfree(fs_info); - return NULL; - } - return fs_info; -} -static void free_dummy_root(struct btrfs_root *root) -{ - if (!root) - return; - if (root->fs_info) { - kfree(root->fs_info->fs_devices); - kfree(root->fs_info); - } - if (root->node) - free_extent_buffer(root->node); - kfree(root); -} - -static void insert_extent(struct btrfs_root *root, u64 start, u64 len, - u64 ram_bytes, u64 offset, u64 disk_bytenr, - u64 disk_len, u32 type, u8 compression, int slot) -{ - struct btrfs_path path; - struct btrfs_file_extent_item *fi; - struct extent_buffer *leaf = root->node; - struct btrfs_key key; - u32 value_len = sizeof(struct btrfs_file_extent_item); - - if (type == BTRFS_FILE_EXTENT_INLINE) - value_len += len; - memset(&path, 0, sizeof(path)); - - path.nodes[0] = leaf; - path.slots[0] = slot; - - key.objectid = BTRFS_FIRST_FREE_OBJECTID; - key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = start; - - setup_items_for_insert(root, &path, &key, &value_len, value_len, - value_len + sizeof(struct btrfs_item), 1); - fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(leaf, fi, 1); - btrfs_set_file_extent_type(leaf, fi, type); - btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr); - btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_len); - btrfs_set_file_extent_offset(leaf, fi, offset); - btrfs_set_file_extent_num_bytes(leaf, fi, len); - btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes); - btrfs_set_file_extent_compression(leaf, fi, compression); - btrfs_set_file_extent_encryption(leaf, fi, 0); - btrfs_set_file_extent_other_encoding(leaf, fi, 0); -} - -static void insert_inode_item_key(struct btrfs_root *root) -{ - struct btrfs_path path; - struct extent_buffer *leaf = root->node; - struct btrfs_key key; - u32 value_len = 0; - - memset(&path, 0, sizeof(path)); - - path.nodes[0] = leaf; - path.slots[0] = 0; - - key.objectid = BTRFS_INODE_ITEM_KEY; - key.type = BTRFS_INODE_ITEM_KEY; - key.offset = 0; - - setup_items_for_insert(root, &path, &key, &value_len, value_len, - value_len + sizeof(struct btrfs_item), 1); -} - -/* - * Build the most complicated map of extents the earth has ever seen. We want - * this so we can test all of the corner cases of btrfs_get_extent. Here is a - * diagram of how the extents will look though this may not be possible we still - * want to make sure everything acts normally (the last number is not inclusive) - * - * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288] - * [hole ][inline][ hole ][ regular ][regular1 split][ hole ] - * - * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056] - * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ] - * - * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ] - * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent] - * - * [81920-86016] - * [ regular ] - */ -static void setup_file_extents(struct btrfs_root *root) -{ - int slot = 0; - u64 disk_bytenr = 1 * 1024 * 1024; - u64 offset = 0; - - /* First we want a hole */ - insert_extent(root, offset, 5, 5, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0, - slot); - slot++; - offset += 5; - - /* - * Now we want an inline extent, I don't think this is possible but hey - * why not? Also keep in mind if we have an inline extent it counts as - * the whole first page. If we were to expand it we would have to cow - * and we wouldn't have an inline extent anymore. - */ - insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0, - slot); - slot++; - offset = 4096; - - /* Now another hole */ - insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0, - slot); - slot++; - offset += 4; - - /* Now for a regular extent */ - insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); - slot++; - disk_bytenr += 4096; - offset += 4095; - - /* - * Now for 3 extents that were split from a hole punch so we test - * offsets properly. - */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_REG, 0, slot); - slot++; - offset += 4096; - insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG, - 0, slot); - slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_REG, 0, slot); - slot++; - offset += 8192; - disk_bytenr += 16384; - - /* Now for a unwritten prealloc extent */ - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); - slot++; - offset += 4096; - - /* - * We want to jack up disk_bytenr a little more so the em stuff doesn't - * merge our records. - */ - disk_bytenr += 8192; - - /* - * Now for a partially written prealloc extent, basically the same as - * the hole punch example above. Ram_bytes never changes when you mark - * extents written btw. - */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); - slot++; - offset += 4096; - insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_REG, 0, slot); - slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384, - BTRFS_FILE_EXTENT_PREALLOC, 0, slot); - slot++; - offset += 8192; - disk_bytenr += 16384; - - /* Now a normal compressed extent */ - insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); - slot++; - offset += 8192; - /* No merges */ - disk_bytenr += 8192; - - /* Now a split compressed extent */ - insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); - slot++; - offset += 4096; - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); - slot++; - offset += 4096; - insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot); - slot++; - offset += 8192; - disk_bytenr += 8192; - - /* Now extents that have a hole but no hole extent */ - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); - slot++; - offset += 16384; - disk_bytenr += 4096; - insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096, - BTRFS_FILE_EXTENT_REG, 0, slot); -} - -static unsigned long prealloc_only = 0; -static unsigned long compressed_only = 0; -static unsigned long vacancy_only = 0; - -static noinline int test_btrfs_get_extent(void) -{ - struct inode *inode = NULL; - struct btrfs_root *root = NULL; - struct extent_map *em = NULL; - u64 orig_start; - u64 disk_bytenr; - u64 offset; - int ret = -ENOMEM; - - inode = btrfs_new_test_inode(); - if (!inode) { - test_msg("Couldn't allocate inode\n"); - return ret; - } - - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; - BTRFS_I(inode)->location.offset = 0; - - root = btrfs_alloc_dummy_root(); - if (IS_ERR(root)) { - test_msg("Couldn't allocate root\n"); - goto out; - } - - /* - * We do this since btrfs_get_extent wants to assign em->bdev to - * root->fs_info->fs_devices->latest_bdev. - */ - root->fs_info = alloc_dummy_fs_info(); - if (!root->fs_info) { - test_msg("Couldn't allocate dummy fs info\n"); - goto out; - } - - root->node = alloc_dummy_extent_buffer(0, 4096); - if (!root->node) { - test_msg("Couldn't allocate dummy buffer\n"); - goto out; - } - - /* - * We will just free a dummy node if it's ref count is 2 so we need an - * extra ref so our searches don't accidently release our page. - */ - extent_buffer_get(root->node); - btrfs_set_header_nritems(root->node, 0); - btrfs_set_header_level(root->node, 0); - ret = -EINVAL; - - /* First with no extents */ - BTRFS_I(inode)->root = root; - em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0); - if (IS_ERR(em)) { - em = NULL; - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start != EXTENT_MAP_HOLE) { - test_msg("Expected a hole, got %llu\n", em->block_start); - goto out; - } - if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { - test_msg("Vacancy flag wasn't set properly\n"); - goto out; - } - free_extent_map(em); - btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); - - /* - * All of the magic numbers are based on the mapping setup in - * setup_file_extents, so if you change anything there you need to - * update the comment and update the expected values below. - */ - setup_file_extents(root); - - em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start != EXTENT_MAP_HOLE) { - test_msg("Expected a hole, got %llu\n", em->block_start); - goto out; - } - if (em->start != 0 || em->len != 5) { - test_msg("Unexpected extent wanted start 0 len 5, got start " - "%llu len %llu\n", em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start != EXTENT_MAP_INLINE) { - test_msg("Expected an inline, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4091) { - test_msg("Unexpected extent wanted start %llu len 1, got start " - "%llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - /* - * We don't test anything else for inline since it doesn't get set - * unless we have a page for it to write into. Maybe we should change - * this? - */ - offset = em->start + em->len; - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start != EXTENT_MAP_HOLE) { - test_msg("Expected a hole, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4) { - test_msg("Unexpected extent wanted start %llu len 4, got start " - "%llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - /* Regular extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4095) { - test_msg("Unexpected extent wanted start %llu len 4095, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - if (em->orig_start != em->start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, - em->orig_start); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - /* The next 3 are split extents */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - if (em->orig_start != em->start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, - em->orig_start); - goto out; - } - disk_bytenr = em->block_start; - orig_start = em->start; - offset = em->start + em->len; - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start != EXTENT_MAP_HOLE) { - test_msg("Expected a hole, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - if (em->orig_start != orig_start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", - orig_start, em->orig_start); - goto out; - } - disk_bytenr += (em->start - orig_start); - if (em->block_start != disk_bytenr) { - test_msg("Wrong block start, want %llu, have %llu\n", - disk_bytenr, em->block_start); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - /* Prealloc extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != prealloc_only) { - test_msg("Unexpected flags set, want %lu have %lu\n", - prealloc_only, em->flags); - goto out; - } - if (em->orig_start != em->start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, - em->orig_start); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - /* The next 3 are a half written prealloc extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != prealloc_only) { - test_msg("Unexpected flags set, want %lu have %lu\n", - prealloc_only, em->flags); - goto out; - } - if (em->orig_start != em->start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, - em->orig_start); - goto out; - } - disk_bytenr = em->block_start; - orig_start = em->start; - offset = em->start + em->len; - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_HOLE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - if (em->orig_start != orig_start) { - test_msg("Unexpected orig offset, wanted %llu, have %llu\n", - orig_start, em->orig_start); - goto out; - } - if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) { - test_msg("Unexpected block start, wanted %llu, have %llu\n", - disk_bytenr + (em->start - em->orig_start), - em->block_start); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != prealloc_only) { - test_msg("Unexpected flags set, want %lu have %lu\n", - prealloc_only, em->flags); - goto out; - } - if (em->orig_start != orig_start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", orig_start, - em->orig_start); - goto out; - } - if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) { - test_msg("Unexpected block start, wanted %llu, have %llu\n", - disk_bytenr + (em->start - em->orig_start), - em->block_start); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - /* Now for the compressed extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != compressed_only) { - test_msg("Unexpected flags set, want %lu have %lu\n", - compressed_only, em->flags); - goto out; - } - if (em->orig_start != em->start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", - em->start, em->orig_start); - goto out; - } - if (em->compress_type != BTRFS_COMPRESS_ZLIB) { - test_msg("Unexpected compress type, wanted %d, got %d\n", - BTRFS_COMPRESS_ZLIB, em->compress_type); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - /* Split compressed extent */ - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != compressed_only) { - test_msg("Unexpected flags set, want %lu have %lu\n", - compressed_only, em->flags); - goto out; - } - if (em->orig_start != em->start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", - em->start, em->orig_start); - goto out; - } - if (em->compress_type != BTRFS_COMPRESS_ZLIB) { - test_msg("Unexpected compress type, wanted %d, got %d\n", - BTRFS_COMPRESS_ZLIB, em->compress_type); - goto out; - } - disk_bytenr = em->block_start; - orig_start = em->start; - offset = em->start + em->len; - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - if (em->orig_start != em->start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, - em->orig_start); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start != disk_bytenr) { - test_msg("Block start does not match, want %llu got %llu\n", - disk_bytenr, em->block_start); - goto out; - } - if (em->start != offset || em->len != 8192) { - test_msg("Unexpected extent wanted start %llu len 8192, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != compressed_only) { - test_msg("Unexpected flags set, want %lu have %lu\n", - compressed_only, em->flags); - goto out; - } - if (em->orig_start != orig_start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", - em->start, orig_start); - goto out; - } - if (em->compress_type != BTRFS_COMPRESS_ZLIB) { - test_msg("Unexpected compress type, wanted %d, got %d\n", - BTRFS_COMPRESS_ZLIB, em->compress_type); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - /* A hole between regular extents but no hole extent */ - em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - if (em->orig_start != em->start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, - em->orig_start); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, offset, 4096 * 1024, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start != EXTENT_MAP_HOLE) { - test_msg("Expected a hole extent, got %llu\n", em->block_start); - goto out; - } - /* - * Currently we just return a length that we requested rather than the - * length of the actual hole, if this changes we'll have to change this - * test. - */ - if (em->start != offset || em->len != 12288) { - test_msg("Unexpected extent wanted start %llu len 12288, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != vacancy_only) { - test_msg("Unexpected flags set, want %lu have %lu\n", - vacancy_only, em->flags); - goto out; - } - if (em->orig_start != em->start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, - em->orig_start); - goto out; - } - offset = em->start + em->len; - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != offset || em->len != 4096) { - test_msg("Unexpected extent wanted start %llu len 4096, got " - "start %llu len %llu\n", offset, em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, want 0 have %lu\n", em->flags); - goto out; - } - if (em->orig_start != em->start) { - test_msg("Wrong orig offset, want %llu, have %llu\n", em->start, - em->orig_start); - goto out; - } - ret = 0; -out: - if (!IS_ERR(em)) - free_extent_map(em); - iput(inode); - free_dummy_root(root); - return ret; -} - -static int test_hole_first(void) -{ - struct inode *inode = NULL; - struct btrfs_root *root = NULL; - struct extent_map *em = NULL; - int ret = -ENOMEM; - - inode = btrfs_new_test_inode(); - if (!inode) { - test_msg("Couldn't allocate inode\n"); - return ret; - } - - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; - BTRFS_I(inode)->location.offset = 0; - - root = btrfs_alloc_dummy_root(); - if (IS_ERR(root)) { - test_msg("Couldn't allocate root\n"); - goto out; - } - - root->fs_info = alloc_dummy_fs_info(); - if (!root->fs_info) { - test_msg("Couldn't allocate dummy fs info\n"); - goto out; - } - - root->node = alloc_dummy_extent_buffer(0, 4096); - if (!root->node) { - test_msg("Couldn't allocate dummy buffer\n"); - goto out; - } - - extent_buffer_get(root->node); - btrfs_set_header_nritems(root->node, 0); - btrfs_set_header_level(root->node, 0); - BTRFS_I(inode)->root = root; - ret = -EINVAL; - - /* - * Need a blank inode item here just so we don't confuse - * btrfs_get_extent. - */ - insert_inode_item_key(root); - insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096, - BTRFS_FILE_EXTENT_REG, 0, 1); - em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start != EXTENT_MAP_HOLE) { - test_msg("Expected a hole, got %llu\n", em->block_start); - goto out; - } - if (em->start != 0 || em->len != 4096) { - test_msg("Unexpected extent wanted start 0 len 4096, got start " - "%llu len %llu\n", em->start, em->len); - goto out; - } - if (em->flags != vacancy_only) { - test_msg("Wrong flags, wanted %lu, have %lu\n", vacancy_only, - em->flags); - goto out; - } - free_extent_map(em); - - em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0); - if (IS_ERR(em)) { - test_msg("Got an error when we shouldn't have\n"); - goto out; - } - if (em->block_start != 4096) { - test_msg("Expected a real extent, got %llu\n", em->block_start); - goto out; - } - if (em->start != 4096 || em->len != 4096) { - test_msg("Unexpected extent wanted start 4096 len 4096, got " - "start %llu len %llu\n", em->start, em->len); - goto out; - } - if (em->flags != 0) { - test_msg("Unexpected flags set, wanted 0 got %lu\n", - em->flags); - goto out; - } - ret = 0; -out: - if (!IS_ERR(em)) - free_extent_map(em); - iput(inode); - free_dummy_root(root); - return ret; -} - -int btrfs_test_inodes(void) -{ - int ret; - - set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only); - set_bit(EXTENT_FLAG_VACANCY, &vacancy_only); - set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only); - - test_msg("Running btrfs_get_extent tests\n"); - ret = test_btrfs_get_extent(); - if (ret) - return ret; - test_msg("Running hole first btrfs_get_extent test\n"); - return test_hole_first(); -} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c6a872a..8c81bdc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -57,7 +57,7 @@ static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { __TRANS_JOIN_NOLOCK), }; -void btrfs_put_transaction(struct btrfs_transaction *transaction) +static void put_transaction(struct btrfs_transaction *transaction) { WARN_ON(atomic_read(&transaction->use_count) == 0); if (atomic_dec_and_test(&transaction->use_count)) { @@ -332,7 +332,7 @@ static void wait_current_trans(struct btrfs_root *root) wait_event(root->fs_info->transaction_wait, cur_trans->state >= TRANS_STATE_UNBLOCKED || cur_trans->aborted); - btrfs_put_transaction(cur_trans); + put_transaction(cur_trans); } else { spin_unlock(&root->fs_info->trans_lock); } @@ -353,17 +353,6 @@ static int may_wait_transaction(struct btrfs_root *root, int type) return 0; } -static inline bool need_reserve_reloc_root(struct btrfs_root *root) -{ - if (!root->fs_info->reloc_ctl || - !root->ref_cows || - root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || - root->reloc_root) - return false; - - return true; -} - static struct btrfs_trans_handle * start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, enum btrfs_reserve_flush_enum flush) @@ -371,9 +360,8 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; u64 num_bytes = 0; - u64 qgroup_reserved = 0; - bool reloc_reserved = false; int ret; + u64 qgroup_reserved = 0; if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) return ERR_PTR(-EROFS); @@ -402,14 +390,6 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, } num_bytes = btrfs_calc_trans_metadata_size(root, num_items); - /* - * Do the reservation for the relocation root creation - */ - if (unlikely(need_reserve_reloc_root(root))) { - num_bytes += root->nodesize; - reloc_reserved = true; - } - ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv, num_bytes, flush); @@ -471,7 +451,6 @@ again: h->delayed_ref_elem.seq = 0; h->type = type; h->allocating_chunk = false; - h->reloc_reserved = false; INIT_LIST_HEAD(&h->qgroup_ref_list); INIT_LIST_HEAD(&h->new_bgs); @@ -487,7 +466,6 @@ again: h->transid, num_bytes, 1); h->block_rsv = &root->fs_info->trans_block_rsv; h->bytes_reserved = num_bytes; - h->reloc_reserved = reloc_reserved; } h->qgroup_reserved = qgroup_reserved; @@ -632,7 +610,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) } wait_for_commit(root, cur_trans); - btrfs_put_transaction(cur_trans); + put_transaction(cur_trans); out: return ret; } @@ -757,7 +735,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, smp_mb(); if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); - btrfs_put_transaction(cur_trans); + put_transaction(cur_trans); if (current->journal_info == trans) current->journal_info = NULL; @@ -766,10 +744,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, btrfs_run_delayed_iputs(root); if (trans->aborted || - test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) { - wake_up_process(info->transaction_kthread); + test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) err = -EIO; - } assert_qgroups_uptodate(trans); kmem_cache_free(btrfs_trans_handle_cachep, trans); @@ -972,19 +948,16 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, return ret; ret = btrfs_run_dev_stats(trans, root->fs_info); - if (ret) - return ret; + WARN_ON(ret); ret = btrfs_run_dev_replace(trans, root->fs_info); - if (ret) - return ret; + WARN_ON(ret); + ret = btrfs_run_qgroups(trans, root->fs_info); - if (ret) - return ret; + BUG_ON(ret); /* run_qgroups might have added some more refs */ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) - return ret; + BUG_ON(ret); while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; @@ -1480,7 +1453,7 @@ static void do_async_commit(struct work_struct *work) * We've got freeze protection passed with the transaction. * Tell lockdep about it. */ - if (ac->newtrans->type & __TRANS_FREEZABLE) + if (ac->newtrans->type < TRANS_JOIN_NOLOCK) rwsem_acquire_read( &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 0, 1, _THIS_IP_); @@ -1521,7 +1494,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, * Tell lockdep we've released the freeze rwsem, since the * async commit thread will be the one to unlock it. */ - if (ac->newtrans->type & __TRANS_FREEZABLE) + if (trans->type < TRANS_JOIN_NOLOCK) rwsem_release( &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 1, _THIS_IP_); @@ -1537,7 +1510,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, if (current->journal_info == trans) current->journal_info = NULL; - btrfs_put_transaction(cur_trans); + put_transaction(cur_trans); return 0; } @@ -1579,10 +1552,8 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, root->fs_info->running_transaction = NULL; spin_unlock(&root->fs_info->trans_lock); - if (trans->type & __TRANS_FREEZABLE) - sb_end_intwrite(root->fs_info->sb); - btrfs_put_transaction(cur_trans); - btrfs_put_transaction(cur_trans); + put_transaction(cur_trans); + put_transaction(cur_trans); trace_btrfs_transaction_commit(root); @@ -1600,19 +1571,15 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, int ret; ret = btrfs_run_delayed_items(trans, root); + if (ret) + return ret; + /* * running the delayed items may have added new refs. account * them now so that they hinder processing of more delayed refs * as little as possible. */ - if (ret) { - btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); - return ret; - } - - ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); - if (ret) - return ret; + btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); /* * rename don't use btrfs_join_transaction, so, once we @@ -1629,14 +1596,14 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) { if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) - return btrfs_start_delalloc_roots(fs_info, 1); + return btrfs_start_all_delalloc_inodes(fs_info, 1); return 0; } static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) { if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) - btrfs_wait_ordered_roots(fs_info, -1); + btrfs_wait_all_ordered_extents(fs_info); } int btrfs_commit_transaction(struct btrfs_trans_handle *trans, @@ -1702,7 +1669,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wait_for_commit(root, cur_trans); - btrfs_put_transaction(cur_trans); + put_transaction(cur_trans); return ret; } @@ -1719,7 +1686,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wait_for_commit(root, prev_trans); - btrfs_put_transaction(prev_trans); + put_transaction(prev_trans); } else { spin_unlock(&root->fs_info->trans_lock); } @@ -1918,8 +1885,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, list_del_init(&cur_trans->list); spin_unlock(&root->fs_info->trans_lock); - btrfs_put_transaction(cur_trans); - btrfs_put_transaction(cur_trans); + put_transaction(cur_trans); + put_transaction(cur_trans); if (trans->type & __TRANS_FREEZABLE) sb_end_intwrite(root->fs_info->sb); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 7657d11..5c2af84 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -92,7 +92,6 @@ struct btrfs_trans_handle { short aborted; short adding_csums; bool allocating_chunk; - bool reloc_reserved; unsigned int type; /* * this root is only needed to validate that the root passed to @@ -167,5 +166,4 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages, int mark); int btrfs_transaction_blocked(struct btrfs_fs_info *info); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); -void btrfs_put_transaction(struct btrfs_transaction *transaction); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 76928ca..94e05c1 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -37,6 +37,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int ret = 0; int wret; int level; + int is_extent = 0; int next_key_ret = 0; u64 last_ret = 0; u64 min_trans = 0; @@ -49,7 +50,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } - if (root->ref_cows == 0) + if (root->ref_cows == 0 && !is_extent) goto out; if (btrfs_test_opt(root, SSD)) @@ -84,7 +85,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, path->keep_locks = 1; - ret = btrfs_search_forward(root, &key, path, min_trans); + ret = btrfs_search_forward(root, &key, NULL, path, min_trans); if (ret < 0) goto out; if (ret > 0) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9f7fc51..79f057c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -26,6 +26,7 @@ #include "locking.h" #include "print-tree.h" #include "backref.h" +#include "compat.h" #include "tree-log.h" #include "hash.h" @@ -935,7 +936,7 @@ again: parent_objectid, victim_name, victim_name_len)) { - inc_nlink(inode); + btrfs_inc_nlink(inode); btrfs_release_path(path); ret = btrfs_unlink_inode(trans, root, dir, @@ -1005,7 +1006,7 @@ again: victim_parent = read_one_inode(root, parent_objectid); if (victim_parent) { - inc_nlink(inode); + btrfs_inc_nlink(inode); btrfs_release_path(path); ret = btrfs_unlink_inode(trans, root, @@ -1112,11 +1113,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, struct extent_buffer *eb, int slot, struct btrfs_key *key) { - struct inode *dir = NULL; - struct inode *inode = NULL; + struct inode *dir; + struct inode *inode; unsigned long ref_ptr; unsigned long ref_end; - char *name = NULL; + char *name; int namelen; int ret; int search_done = 0; @@ -1149,15 +1150,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * care of the rest */ dir = read_one_inode(root, parent_objectid); - if (!dir) { - ret = -ENOENT; - goto out; - } + if (!dir) + return -ENOENT; inode = read_one_inode(root, inode_objectid); if (!inode) { - ret = -EIO; - goto out; + iput(dir); + return -EIO; } while (ref_ptr < ref_end) { @@ -1170,16 +1169,14 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, */ if (!dir) dir = read_one_inode(root, parent_objectid); - if (!dir) { - ret = -ENOENT; - goto out; - } + if (!dir) + return -ENOENT; } else { ret = ref_get_fields(eb, ref_ptr, &namelen, &name, &ref_index); } if (ret) - goto out; + return ret; /* if we already have a perfect match, we're done */ if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), @@ -1199,11 +1196,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, parent_objectid, ref_index, name, namelen, &search_done); - if (ret) { - if (ret == 1) - ret = 0; + if (ret == 1) { + ret = 0; goto out; } + if (ret) + goto out; } /* insert our name */ @@ -1217,7 +1215,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; kfree(name); - name = NULL; if (log_ref_ver) { iput(dir); dir = NULL; @@ -1228,7 +1225,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ret = overwrite_item(trans, root, path, eb, slot, key); out: btrfs_release_path(path); - kfree(name); iput(dir); iput(inode); return ret; @@ -1311,7 +1307,6 @@ static int count_inode_refs(struct btrfs_root *root, break; path->slots[0]--; } -process_slot: btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid != ino || @@ -1332,10 +1327,6 @@ process_slot: if (key.offset == 0) break; - if (path->slots[0] > 0) { - path->slots[0]--; - goto process_slot; - } key.offset--; btrfs_release_path(path); } @@ -1489,7 +1480,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, if (!inode->i_nlink) set_nlink(inode, 1); else - inc_nlink(inode); + btrfs_inc_nlink(inode); ret = btrfs_update_inode(trans, root, inode); } else if (ret == -EEXIST) { ret = 0; @@ -1832,7 +1823,7 @@ again: dir_key->offset, name, name_len, 0); } - if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) { + if (IS_ERR_OR_NULL(log_di)) { btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); @@ -1850,7 +1841,7 @@ again: goto out; } - inc_nlink(inode); + btrfs_inc_nlink(inode); ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); if (!ret) @@ -1869,9 +1860,6 @@ again: goto again; ret = 0; goto out; - } else if (IS_ERR(log_di)) { - kfree(name); - return PTR_ERR(log_di); } btrfs_release_path(log_path); kfree(name); @@ -2130,7 +2118,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; - WARN_ON(btrfs_header_level(cur) != *level); + if (btrfs_header_level(cur) != *level) + WARN_ON(1); if (path->slots[*level] >= btrfs_header_nritems(cur)) @@ -2162,13 +2151,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, return ret; } - if (trans) { - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); - clean_tree_block(trans, root, next); - btrfs_wait_tree_block_writeback(next); - btrfs_tree_unlock(next); - } + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); @@ -2240,13 +2227,11 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[*level]; - if (trans) { - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); - clean_tree_block(trans, root, next); - btrfs_wait_tree_block_writeback(next); - btrfs_tree_unlock(next); - } + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + clean_tree_block(trans, root, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); ret = btrfs_free_and_pin_reserved_extent(root, @@ -2316,13 +2301,11 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[orig_level]; - if (trans) { - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); - clean_tree_block(trans, log, next); - btrfs_wait_tree_block_writeback(next); - btrfs_tree_unlock(next); - } + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + clean_tree_block(trans, log, next); + btrfs_wait_tree_block_writeback(next); + btrfs_tree_unlock(next); WARN_ON(log->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); @@ -2588,7 +2571,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * the running transaction open, so a full commit can't hop * in and cause problems either. */ + btrfs_scrub_pause_super(root); ret = write_ctree_super(trans, root->fs_info->tree_root, 1); + btrfs_scrub_continue_super(root); if (ret) { btrfs_abort_transaction(trans, root, ret); goto out_wake_log_root; @@ -2623,10 +2608,13 @@ static void free_log_tree(struct btrfs_trans_handle *trans, .process_func = process_one_buffer }; - ret = walk_log_tree(trans, log, &wc); - /* I don't think this can happen but just in case */ - if (ret) - btrfs_abort_transaction(trans, log, ret); + if (trans) { + ret = walk_log_tree(trans, log, &wc); + + /* I don't think this can happen but just in case */ + if (ret) + btrfs_abort_transaction(trans, log, ret); + } while (1) { ret = find_first_extent_bit(&log->dirty_log_pages, @@ -2879,6 +2867,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, u64 min_offset, u64 *last_offset_ret) { struct btrfs_key min_key; + struct btrfs_key max_key; struct btrfs_root *log = root->log_root; struct extent_buffer *src; int err = 0; @@ -2890,6 +2879,9 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, u64 ino = btrfs_ino(inode); log = root->log_root; + max_key.objectid = ino; + max_key.offset = (u64)-1; + max_key.type = key_type; min_key.objectid = ino; min_key.type = key_type; @@ -2897,7 +2889,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, path->keep_locks = 1; - ret = btrfs_search_forward(root, &min_key, path, trans->transid); + ret = btrfs_search_forward(root, &min_key, &max_key, + path, trans->transid); /* * we didn't find anything from this transaction, see if there @@ -2950,8 +2943,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, /* find the first key from this transaction again */ ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); - if (WARN_ON(ret != 0)) + if (ret != 0) { + WARN_ON(1); goto done; + } /* * we have a block from this transaction, log every item in it @@ -3177,10 +3172,11 @@ static int log_inode_item(struct btrfs_trans_handle *trans, struct inode *inode) { struct btrfs_inode_item *inode_item; + struct btrfs_key key; int ret; - ret = btrfs_insert_empty_item(trans, log, path, - &BTRFS_I(inode)->location, + memcpy(&key, &BTRFS_I(inode)->location, sizeof(key)); + ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*inode_item)); if (ret && ret != -EEXIST) return ret; @@ -3379,7 +3375,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, btrfs_set_token_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG, &token); - if (em->block_start == EXTENT_MAP_HOLE) + if (em->block_start == 0) skip_csum = true; } @@ -3421,6 +3417,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans, if (skip_csum) return 0; + if (em->compress_type) { + csum_offset = 0; + csum_len = block_len; + } + /* * First check and see if our csums are on our outstanding ordered * extents. @@ -3504,13 +3505,8 @@ unlocked: if (!mod_len || ret) return ret; - if (em->compress_type) { - csum_offset = 0; - csum_len = block_len; - } else { - csum_offset = mod_start - em->start; - csum_len = mod_len; - } + csum_offset = mod_start - em->start; + csum_len = mod_len; /* block start is already adjusted for the file extent offset. */ ret = btrfs_lookup_csums_range(log->fs_info->csum_root, @@ -3697,8 +3693,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, - &BTRFS_I(inode)->runtime_flags) || - inode_only == LOG_INODE_EXISTS) { + &BTRFS_I(inode)->runtime_flags)) { if (inode_only == LOG_INODE_ALL) fast_search = true; max_key.type = BTRFS_XATTR_ITEM_KEY; @@ -3724,7 +3719,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, while (1) { ins_nr = 0; - ret = btrfs_search_forward(root, &min_key, + ret = btrfs_search_forward(root, &min_key, &max_key, path, trans->transid); if (ret != 0) break; @@ -3774,14 +3769,14 @@ next_slot: } btrfs_release_path(path); - if (min_key.offset < (u64)-1) { + if (min_key.offset < (u64)-1) min_key.offset++; - } else if (min_key.type < max_key.type) { + else if (min_key.type < (u8)-1) min_key.type++; - min_key.offset = 0; - } else { + else if (min_key.objectid < (u64)-1) + min_key.objectid++; + else break; - } } if (ins_nr) { ret = copy_items(trans, inode, dst_path, src, ins_start_slot, @@ -3802,7 +3797,7 @@ log_extents: err = ret; goto out_unlock; } - } else if (inode_only == LOG_INODE_ALL) { + } else { struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em, *n; diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index fbda900..dd0dea3 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -260,6 +260,7 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, { struct btrfs_root *root = fs_info->uuid_root; struct btrfs_key key; + struct btrfs_key max_key; struct btrfs_path *path; int ret = 0; struct extent_buffer *leaf; @@ -276,10 +277,13 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, key.objectid = 0; key.type = 0; key.offset = 0; + max_key.objectid = (u64)-1; + max_key.type = (u8)-1; + max_key.offset = (u64)-1; again_search_slot: path->keep_locks = 1; - ret = btrfs_search_forward(root, &key, path, 0); + ret = btrfs_search_forward(root, &key, &max_key, path, 0); if (ret) { if (ret > 0) ret = 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 92303f4..043b215 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -28,6 +28,7 @@ #include <linux/raid/pq.h> #include <linux/semaphore.h> #include <asm/div64.h> +#include "compat.h" #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -665,8 +666,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->bdev) fs_devices->open_devices--; - if (device->writeable && - device->devid != BTRFS_DEV_REPLACE_DEVID) { + if (device->writeable && !device->is_tgtdev_for_dev_replace) { list_del_init(&device->dev_alloc_list); fs_devices->rw_devices--; } @@ -2041,7 +2041,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->in_fs_metadata = 1; device->is_tgtdev_for_dev_replace = 0; device->mode = FMODE_EXCL; - device->dev_stats_valid = 1; set_blocksize(device->bdev, 4096); if (seeding_dev) { @@ -2209,7 +2208,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->in_fs_metadata = 1; device->is_tgtdev_for_dev_replace = 1; device->mode = FMODE_EXCL; - device->dev_stats_valid = 1; set_blocksize(device->bdev, 4096); device->fs_devices = fs_info->fs_devices; list_add(&device->dev_list, &fs_info->fs_devices->devices); @@ -2552,7 +2550,8 @@ again: failed = 0; retried = true; goto again; - } else if (WARN_ON(failed && retried)) { + } else if (failed && retried) { + WARN_ON(1); ret = -ENOSPC; } error: @@ -3424,9 +3423,6 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info) int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) { - if (fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; - mutex_lock(&fs_info->balance_mutex); if (!fs_info->balance_ctl) { mutex_unlock(&fs_info->balance_mutex); @@ -3492,7 +3488,7 @@ static int btrfs_uuid_scan_kthread(void *data) path->keep_locks = 1; while (1) { - ret = btrfs_search_forward(root, &key, path, 0); + ret = btrfs_search_forward(root, &key, &max_key, path, 0); if (ret) { if (ret > 0) ret = 0; @@ -4492,7 +4488,6 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " "%Lu-%Lu\n", logical, logical+len, em->start, em->start + em->len); - free_extent_map(em); return 1; } @@ -4673,7 +4668,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " "found %Lu-%Lu\n", logical, em->start, em->start + em->len); - free_extent_map(em); return -EINVAL; } @@ -4901,7 +4895,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, num_stripes = map->num_stripes; max_errors = nr_parity_stripes(map); - raid_map = kmalloc_array(num_stripes, sizeof(u64), + raid_map = kmalloc(sizeof(u64) * num_stripes, GFP_NOFS); if (!raid_map) { ret = -ENOMEM; @@ -5394,15 +5388,17 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, { struct bio_vec *prev; struct request_queue *q = bdev_get_queue(bdev); - unsigned int max_sectors = queue_max_sectors(q); + unsigned short max_sectors = queue_max_sectors(q); struct bvec_merge_data bvm = { .bi_bdev = bdev, .bi_sector = sector, .bi_rw = bio->bi_rw, }; - if (WARN_ON(bio->bi_vcnt == 0)) + if (bio->bi_vcnt == 0) { + WARN_ON(1); return 1; + } prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; if (bio_sectors(bio) > max_sectors) @@ -5635,8 +5631,10 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, struct btrfs_device *dev; u64 tmp; - if (WARN_ON(!devid && !fs_info)) + if (!devid && !fs_info) { + WARN_ON(1); return ERR_PTR(-EINVAL); + } dev = __alloc_device(); if (IS_ERR(dev)) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 8b3cd14..b72f540 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -43,8 +43,9 @@ struct btrfs_device { /* WRITE_SYNC bios */ struct btrfs_pending_bios pending_sync_bios; - u64 generation; int running_pending; + u64 generation; + int writeable; int in_fs_metadata; int missing; @@ -52,11 +53,11 @@ struct btrfs_device { int is_tgtdev_for_dev_replace; spinlock_t io_lock; - /* the mode sent to blkdev_get */ - fmode_t mode; struct block_device *bdev; + /* the mode sent to blkdev_get */ + fmode_t mode; struct rcu_string *name; @@ -77,21 +78,16 @@ struct btrfs_device { /* optimal io width for this device */ u32 io_width; - /* type and info about this device */ - u64 type; /* minimal io size for this device */ u32 sector_size; + /* type and info about this device */ + u64 type; /* physical drive uuid (or lvm uuid) */ u8 uuid[BTRFS_UUID_SIZE]; - /* for sending down flush barriers */ - int nobarriers; - struct bio *flush_bio; - struct completion flush_wait; - /* per-device scrub information */ struct scrub_ctx *scrub_device; @@ -107,6 +103,10 @@ struct btrfs_device { struct radix_tree_root reada_zones; struct radix_tree_root reada_extents; + /* for sending down flush barriers */ + struct bio *flush_bio; + struct completion flush_wait; + int nobarriers; /* disk I/O failure stats. For detailed description refer to * enum btrfs_dev_stat_values in ioctl.h */ @@ -132,9 +132,7 @@ struct btrfs_fs_devices { /* all of the devices in the FS, protected by a mutex * so we can safely walk it to write out the supers without - * worrying about add/remove by the multi-device code. - * Scrubbing super can kick off supers writing by holding - * this mutex lock. + * worrying about add/remove by the multi-device code */ struct mutex device_list_mutex; struct list_head devices; diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 57e17fe..43eb559 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -270,7 +270,7 @@ static void cachefiles_drop_object(struct fscache_object *_object) #endif /* delete retired objects */ - if (test_bit(FSCACHE_OBJECT_RETIRED, &object->fscache.flags) && + if (test_bit(FSCACHE_COOKIE_RETIRED, &object->fscache.cookie->flags) && _object != cache->cache.fsdef ) { _debug("- retire object OBJ%x", object->fscache.debug_id); @@ -449,14 +449,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object) _debug("discard tail %llx", oi_size); newattrs.ia_valid = ATTR_SIZE; newattrs.ia_size = oi_size & PAGE_MASK; - ret = notify_change(object->backer, &newattrs, NULL); + ret = notify_change(object->backer, &newattrs); if (ret < 0) goto truncate_failed; } newattrs.ia_valid = ATTR_SIZE; newattrs.ia_size = ni_size; - ret = notify_change(object->backer, &newattrs, NULL); + ret = notify_change(object->backer, &newattrs); truncate_failed: mutex_unlock(&object->backer->d_inode->i_mutex); diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ca65f39..f4a08d7 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -294,7 +294,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache, if (ret < 0) { cachefiles_io_error(cache, "Unlink security error"); } else { - ret = vfs_unlink(dir->d_inode, rep, NULL); + ret = vfs_unlink(dir->d_inode, rep); if (preemptive) cachefiles_mark_object_buried(cache, rep); @@ -396,7 +396,7 @@ try_again: cachefiles_io_error(cache, "Rename security error %d", ret); } else { ret = vfs_rename(dir->d_inode, rep, - cache->graveyard->d_inode, grave, NULL); + cache->graveyard->d_inode, grave); if (ret != 0 && ret != -ENOMEM) cachefiles_io_error(cache, "Rename failed with error %d", ret); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 1e561c0..6df8bd4 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -216,7 +216,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) } SetPageUptodate(page); - if (err >= 0) + if (err == 0) ceph_readpage_to_fscache(inode, page); out: diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 8c44fdd..6bfe65e 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -68,7 +68,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc) { fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index, &ceph_fscache_fsid_object_def, - fsc, true); + fsc); if (fsc->fscache == NULL) { pr_err("Unable to resgister fsid: %p fscache cookie", fsc); @@ -204,7 +204,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc, ci->fscache = fscache_acquire_cookie(fsc->fscache, &ceph_fscache_inode_object_def, - ci, true); + ci); done: mutex_unlock(&inode->i_mutex); @@ -324,9 +324,6 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page) { struct ceph_inode_info *ci = ceph_inode(inode); - if (!PageFsCache(page)) - return; - fscache_wait_on_page_write(ci->fscache, page); fscache_uncache_page(ci->fscache, page); } diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 3c0a4bd..13976c3 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) * caller should hold i_ceph_lock. * caller will not hold session s_mutex if called from destroy_inode. */ -void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) +void __ceph_remove_cap(struct ceph_cap *cap) { struct ceph_mds_session *session = cap->session; struct ceph_inode_info *ci = cap->ci; @@ -909,16 +909,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) /* remove from session list */ spin_lock(&session->s_cap_lock); - /* - * s_cap_reconnect is protected by s_cap_lock. no one changes - * s_cap_gen while session is in the reconnect state. - */ - if (queue_release && - (!session->s_cap_reconnect || - cap->cap_gen == session->s_cap_gen)) - __queue_cap_release(session, ci->i_vino.ino, cap->cap_id, - cap->mseq, cap->issue_seq); - if (session->s_cap_iterator == cap) { /* not yet, we are iterating over this very cap */ dout("__ceph_remove_cap delaying %p removal from session %p\n", @@ -1033,6 +1023,7 @@ void __queue_cap_release(struct ceph_mds_session *session, struct ceph_mds_cap_release *head; struct ceph_mds_cap_item *item; + spin_lock(&session->s_cap_lock); BUG_ON(!session->s_num_cap_releases); msg = list_first_entry(&session->s_cap_releases, struct ceph_msg, list_head); @@ -1061,6 +1052,7 @@ void __queue_cap_release(struct ceph_mds_session *session, (int)CEPH_CAPS_PER_RELEASE, (int)msg->front.iov_len); } + spin_unlock(&session->s_cap_lock); } /* @@ -1075,8 +1067,12 @@ void ceph_queue_caps_release(struct inode *inode) p = rb_first(&ci->i_caps); while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); + struct ceph_mds_session *session = cap->session; + + __queue_cap_release(session, ceph_ino(inode), cap->cap_id, + cap->mseq, cap->issue_seq); p = rb_next(p); - __ceph_remove_cap(cap, true); + __ceph_remove_cap(cap); } } @@ -2795,7 +2791,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, } spin_unlock(&mdsc->cap_dirty_lock); } - __ceph_remove_cap(cap, false); + __ceph_remove_cap(cap); } /* else, we already released it */ @@ -2935,12 +2931,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, if (!inode) { dout(" i don't have ino %llx\n", vino.ino); - if (op == CEPH_CAP_OP_IMPORT) { - spin_lock(&session->s_cap_lock); + if (op == CEPH_CAP_OP_IMPORT) __queue_cap_release(session, vino.ino, cap_id, mseq, seq); - spin_unlock(&session->s_cap_lock); - } goto flush_cap_releases; } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 2a0bcae..868b61d 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -352,18 +352,8 @@ more: } /* note next offset and last dentry name */ - rinfo = &req->r_reply_info; - if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { - frag = le32_to_cpu(rinfo->dir_dir->frag); - if (ceph_frag_is_leftmost(frag)) - fi->next_offset = 2; - else - fi->next_offset = 0; - off = fi->next_offset; - } fi->offset = fi->next_offset; fi->last_readdir = req; - fi->frag = frag; if (req->r_reply_info.dir_end) { kfree(fi->last_name); @@ -373,6 +363,7 @@ more: else fi->next_offset = 0; } else { + rinfo = &req->r_reply_info; err = note_last_dentry(fi, rinfo->dir_dname[rinfo->dir_nr-1], rinfo->dir_dname_len[rinfo->dir_nr-1]); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9a8e396..8549a48 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -577,8 +577,6 @@ static int fill_inode(struct inode *inode, int issued = 0, implemented; struct timespec mtime, atime, ctime; u32 nsplits; - struct ceph_inode_frag *frag; - struct rb_node *rb_node; struct ceph_buffer *xattr_blob = NULL; int err = 0; int queue_trunc = 0; @@ -753,38 +751,15 @@ no_change: /* FIXME: move me up, if/when version reflects fragtree changes */ nsplits = le32_to_cpu(info->fragtree.nsplits); mutex_lock(&ci->i_fragtree_mutex); - rb_node = rb_first(&ci->i_fragtree); for (i = 0; i < nsplits; i++) { u32 id = le32_to_cpu(info->fragtree.splits[i].frag); - frag = NULL; - while (rb_node) { - frag = rb_entry(rb_node, struct ceph_inode_frag, node); - if (ceph_frag_compare(frag->frag, id) >= 0) { - if (frag->frag != id) - frag = NULL; - else - rb_node = rb_next(rb_node); - break; - } - rb_node = rb_next(rb_node); - rb_erase(&frag->node, &ci->i_fragtree); - kfree(frag); - frag = NULL; - } - if (!frag) { - frag = __get_or_create_frag(ci, id); - if (IS_ERR(frag)) - continue; - } + struct ceph_inode_frag *frag = __get_or_create_frag(ci, id); + + if (IS_ERR(frag)) + continue; frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); dout(" frag %x split by %d\n", frag->frag, frag->split_by); } - while (rb_node) { - frag = rb_entry(rb_node, struct ceph_inode_frag, node); - rb_node = rb_next(rb_node); - rb_erase(&frag->node, &ci->i_fragtree); - kfree(frag); - } mutex_unlock(&ci->i_fragtree_mutex); /* were we issued a capability? */ @@ -1275,20 +1250,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, int err = 0, i; struct inode *snapdir = NULL; struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; + u64 frag = le32_to_cpu(rhead->args.readdir.frag); struct ceph_dentry_info *di; - u64 r_readdir_offset = req->r_readdir_offset; - u32 frag = le32_to_cpu(rhead->args.readdir.frag); - - if (rinfo->dir_dir && - le32_to_cpu(rinfo->dir_dir->frag) != frag) { - dout("readdir_prepopulate got new frag %x -> %x\n", - frag, le32_to_cpu(rinfo->dir_dir->frag)); - frag = le32_to_cpu(rinfo->dir_dir->frag); - if (ceph_frag_is_leftmost(frag)) - r_readdir_offset = 2; - else - r_readdir_offset = 0; - } if (req->r_aborted) return readdir_prepopulate_inodes_only(req, session); @@ -1352,7 +1315,7 @@ retry_lookup: } di = dn->d_fsdata; - di->offset = ceph_make_fpos(frag, i + r_readdir_offset); + di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); /* inode */ if (dn->d_inode) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d90861f..b7bda5d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -43,7 +43,6 @@ */ struct ceph_reconnect_state { - int nr_caps; struct ceph_pagelist *pagelist; bool flock; }; @@ -444,7 +443,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, INIT_LIST_HEAD(&s->s_waiting); INIT_LIST_HEAD(&s->s_unsafe); s->s_num_cap_releases = 0; - s->s_cap_reconnect = 0; s->s_cap_iterator = NULL; INIT_LIST_HEAD(&s->s_cap_releases); INIT_LIST_HEAD(&s->s_cap_releases_done); @@ -644,8 +642,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc, req->r_unsafe_dir = NULL; } - complete_all(&req->r_safe_completion); - ceph_mdsc_put_request(req); } @@ -990,7 +986,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, dout("removing cap %p, ci is %p, inode is %p\n", cap, ci, &ci->vfs_inode); spin_lock(&ci->i_ceph_lock); - __ceph_remove_cap(cap, false); + __ceph_remove_cap(cap); if (!__ceph_is_any_real_caps(ci)) { struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; @@ -1235,7 +1231,9 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) session->s_trim_caps--; if (oissued) { /* we aren't the only cap.. just remove us */ - __ceph_remove_cap(cap, true); + __queue_cap_release(session, ceph_ino(inode), cap->cap_id, + cap->mseq, cap->issue_seq); + __ceph_remove_cap(cap); } else { /* try to drop referring dentries */ spin_unlock(&ci->i_ceph_lock); @@ -1418,6 +1416,7 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, unsigned num; dout("discard_cap_releases mds%d\n", session->s_mds); + spin_lock(&session->s_cap_lock); /* zero out the in-progress message */ msg = list_first_entry(&session->s_cap_releases, @@ -1444,6 +1443,8 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, msg->front.iov_len = sizeof(*head); list_add(&msg->list_head, &session->s_cap_releases); } + + spin_unlock(&session->s_cap_lock); } /* @@ -1874,11 +1875,8 @@ static int __do_request(struct ceph_mds_client *mdsc, int mds = -1; int err = -EAGAIN; - if (req->r_err || req->r_got_result) { - if (req->r_aborted) - __unregister_request(mdsc, req); + if (req->r_err || req->r_got_result) goto out; - } if (req->r_timeout && time_after_eq(jiffies, req->r_started + req->r_timeout)) { @@ -2188,6 +2186,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) if (head->safe) { req->r_got_safe = true; __unregister_request(mdsc, req); + complete_all(&req->r_safe_completion); if (req->r_got_unsafe) { /* @@ -2239,7 +2238,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); if (err == 0) { if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || - req->r_op == CEPH_MDS_OP_LSSNAP)) + req->r_op == CEPH_MDS_OP_LSSNAP) && + rinfo->dir_nr) ceph_readdir_prepopulate(req, req->r_session); ceph_unreserve_caps(mdsc, &req->r_caps_reservation); } @@ -2490,7 +2490,6 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, cap->seq = 0; /* reset cap seq */ cap->issue_seq = 0; /* and issue_seq */ cap->mseq = 0; /* and migrate_seq */ - cap->cap_gen = cap->session->s_cap_gen; if (recon_state->flock) { rec.v2.cap_id = cpu_to_le64(cap->cap_id); @@ -2553,8 +2552,6 @@ encode_again: } else { err = ceph_pagelist_append(pagelist, &rec, reclen); } - - recon_state->nr_caps++; out_free: kfree(path); out_dput: @@ -2582,7 +2579,6 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, struct rb_node *p; int mds = session->s_mds; int err = -ENOMEM; - int s_nr_caps; struct ceph_pagelist *pagelist; struct ceph_reconnect_state recon_state; @@ -2614,38 +2610,20 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, dout("session %p state %s\n", session, session_state_name(session->s_state)); - spin_lock(&session->s_gen_ttl_lock); - session->s_cap_gen++; - spin_unlock(&session->s_gen_ttl_lock); - - spin_lock(&session->s_cap_lock); - /* - * notify __ceph_remove_cap() that we are composing cap reconnect. - * If a cap get released before being added to the cap reconnect, - * __ceph_remove_cap() should skip queuing cap release. - */ - session->s_cap_reconnect = 1; /* drop old cap expires; we're about to reestablish that state */ discard_cap_releases(mdsc, session); - spin_unlock(&session->s_cap_lock); /* traverse this session's caps */ - s_nr_caps = session->s_nr_caps; - err = ceph_pagelist_encode_32(pagelist, s_nr_caps); + err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); if (err) goto fail; - recon_state.nr_caps = 0; recon_state.pagelist = pagelist; recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; err = iterate_session_caps(session, encode_caps_cb, &recon_state); if (err < 0) goto fail; - spin_lock(&session->s_cap_lock); - session->s_cap_reconnect = 0; - spin_unlock(&session->s_cap_lock); - /* * snaprealms. we provide mds with the ino, seq (version), and * parent for all of our realms. If the mds has any newer info, @@ -2668,18 +2646,11 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, if (recon_state.flock) reply->hdr.version = cpu_to_le16(2); - - /* raced with cap release? */ - if (s_nr_caps != recon_state.nr_caps) { - struct page *page = list_first_entry(&pagelist->head, - struct page, lru); - __le32 *addr = kmap_atomic(page); - *addr = cpu_to_le32(recon_state.nr_caps); - kunmap_atomic(addr); + if (pagelist->length) { + /* set up outbound data if we have any */ + reply->hdr.data_len = cpu_to_le32(pagelist->length); + ceph_msg_data_add_pagelist(reply, pagelist); } - - reply->hdr.data_len = cpu_to_le32(pagelist->length); - ceph_msg_data_add_pagelist(reply, pagelist); ceph_con_send(&session->s_con, reply); mutex_unlock(&session->s_mutex); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 4c053d0..c2a19fb 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -132,7 +132,6 @@ struct ceph_mds_session { struct list_head s_caps; /* all caps issued by this session */ int s_nr_caps, s_trim_caps; int s_num_cap_releases; - int s_cap_reconnect; struct list_head s_cap_releases; /* waiting cap_release messages */ struct list_head s_cap_releases_done; /* ready to send */ struct ceph_cap *s_cap_iterator; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ef4ac38..6014b0a 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -741,7 +741,13 @@ extern int ceph_add_cap(struct inode *inode, int fmode, unsigned issued, unsigned wanted, unsigned cap, unsigned seq, u64 realmino, int flags, struct ceph_cap_reservation *caps_reservation); -extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); +extern void __ceph_remove_cap(struct ceph_cap *cap); +static inline void ceph_remove_cap(struct ceph_cap *cap) +{ + spin_lock(&cap->ci->i_ceph_lock); + __ceph_remove_cap(cap); + spin_unlock(&cap->ci->i_ceph_lock); +} extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); diff --git a/fs/char_dev.c b/fs/char_dev.c index f77f770..afc2bb6 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -368,7 +368,6 @@ void cdev_put(struct cdev *p) */ static int chrdev_open(struct inode *inode, struct file *filp) { - const struct file_operations *fops; struct cdev *p; struct cdev *new = NULL; int ret = 0; @@ -401,11 +400,10 @@ static int chrdev_open(struct inode *inode, struct file *filp) return ret; ret = -ENXIO; - fops = fops_get(p->ops); - if (!fops) + filp->f_op = fops_get(p->ops); + if (!filp->f_op) goto out_cdev_put; - replace_fops(filp, fops); if (filp->f_op->open) { ret = filp->f_op->open(inode, filp); if (ret) @@ -576,8 +574,7 @@ static struct kobject *base_probe(dev_t dev, int *part, void *data) void __init chrdev_init(void) { cdev_map = kobj_map_init(base_probe, &chrdevs_lock); - if (bdi_init(&directly_mappable_cdev_bdi)) - panic("Failed to init directly mappable cdev bdi"); + bdi_init(&directly_mappable_cdev_bdi); } diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 9409fa1..37e4a72 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -65,6 +65,5 @@ struct cifs_sb_info { char *mountdata; /* options received at mount time or via DFS refs */ struct backing_dev_info bdi; struct delayed_work prune_tlinks; - struct rcu_head rcu; }; #endif /* _CIFS_FS_SB_H */ diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 4934347..fc6f4f3 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -548,13 +548,7 @@ static int CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) { int rc; - struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *) - (ses->auth_key.response + CIFS_SESS_KEY_SIZE); - unsigned int hash_len; - - /* The MD5 hash starts at challenge_key.key */ - hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE + - offsetof(struct ntlmv2_resp, challenge.key[0])); + unsigned int offset = CIFS_SESS_KEY_SIZE + 8; if (!ses->server->secmech.sdeschmacmd5) { cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__); @@ -562,7 +556,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) } rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, - ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); + ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n", __func__); @@ -576,21 +570,20 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) } if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) - memcpy(ntlmv2->challenge.key, - ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); + memcpy(ses->auth_key.response + offset, + ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); else - memcpy(ntlmv2->challenge.key, - ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); + memcpy(ses->auth_key.response + offset, + ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, - ntlmv2->challenge.key, hash_len); + ses->auth_key.response + offset, ses->auth_key.len - offset); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; } - /* Note that the MD5 digest over writes anon.challenge_key.key */ rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, - ntlmv2->ntlmv2_hash); + ses->auth_key.response + CIFS_SESS_KEY_SIZE); if (rc) cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); @@ -634,7 +627,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) int rc; int baselen; unsigned int tilen; - struct ntlmv2_resp *ntlmv2; + struct ntlmv2_resp *buf; char ntlmv2_hash[16]; unsigned char *tiblob = NULL; /* target info blob */ @@ -667,14 +660,13 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) } ses->auth_key.len += baselen; - ntlmv2 = (struct ntlmv2_resp *) + buf = (struct ntlmv2_resp *) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); - ntlmv2->blob_signature = cpu_to_le32(0x00000101); - ntlmv2->reserved = 0; - /* Must be within 5 minutes of the server */ - ntlmv2->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal)); - ntlmv2->reserved2 = 0; + buf->blob_signature = cpu_to_le32(0x00000101); + buf->reserved = 0; + buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); + buf->reserved2 = 0; memcpy(ses->auth_key.response + baselen, tiblob, tilen); @@ -714,7 +706,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) } rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, - ntlmv2->ntlmv2_hash, + ses->auth_key.response + CIFS_SESS_KEY_SIZE, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 849f613..77fc5e1 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -862,7 +862,7 @@ const struct inode_operations cifs_file_inode_ops = { const struct inode_operations cifs_symlink_inode_ops = { .readlink = generic_readlink, .follow_link = cifs_follow_link, - .put_link = kfree_put_link, + .put_link = cifs_put_link, .permission = cifs_permission, /* BB add the following two eventually */ /* revalidate: cifs_revalidate, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 26a754f..6d0b072 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -115,6 +115,8 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path); /* Functions related to symlinks */ extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); +extern void cifs_put_link(struct dentry *direntry, + struct nameidata *nd, void *); extern int cifs_readlink(struct dentry *direntry, char __user *buffer, int buflen); extern int cifs_symlink(struct inode *inode, struct dentry *direntry, diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f918a99..52b6f6c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -261,7 +261,7 @@ struct smb_version_operations { /* query path data from the server */ int (*query_path_info)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *, - FILE_ALL_INFO *, bool *, bool *); + FILE_ALL_INFO *, bool *); /* query file data from the server */ int (*query_file_info)(const unsigned int, struct cifs_tcon *, struct cifs_fid *, FILE_ALL_INFO *); @@ -278,8 +278,6 @@ struct smb_version_operations { /* set attributes */ int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *, const unsigned int); - int (*set_compression)(const unsigned int, struct cifs_tcon *, - struct cifsFileInfo *); /* check if we can send an echo or nor */ bool (*can_echo)(struct TCP_Server_Info *); /* send echo request */ @@ -381,10 +379,6 @@ struct smb_version_operations { char * (*create_lease_buf)(u8 *, u8); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *, unsigned int *); - int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file, - struct cifsFileInfo *target_file, u64 src_off, u64 len, - u64 dest_off); - int (*validate_negotiate)(const unsigned int, struct cifs_tcon *); }; struct smb_version_values { @@ -626,32 +620,9 @@ set_credits(struct TCP_Server_Info *server, const int val) } static inline __u64 -get_next_mid64(struct TCP_Server_Info *server) -{ - return server->ops->get_next_mid(server); -} - -static inline __le16 get_next_mid(struct TCP_Server_Info *server) { - __u16 mid = get_next_mid64(server); - /* - * The value in the SMB header should be little endian for easy - * on-the-wire decoding. - */ - return cpu_to_le16(mid); -} - -static inline __u16 -get_mid(const struct smb_hdr *smb) -{ - return le16_to_cpu(smb->Mid); -} - -static inline bool -compare_mid(__u16 mid, const struct smb_hdr *smb) -{ - return mid == le16_to_cpu(smb->Mid); + return server->ops->get_next_mid(server); } /* @@ -857,11 +828,6 @@ struct cifs_tcon { __u32 maximal_access; __u32 vol_serial_number; __le64 vol_create_time; - __u32 ss_flags; /* sector size flags */ - __u32 perf_sector_size; /* best sector size for perf */ - __u32 max_chunks; - __u32 max_bytes_chunk; - __u32 max_bytes_copy; #endif /* CONFIG_CIFS_SMB2 */ #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 33df36e..08f9dfb 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -428,7 +428,7 @@ struct smb_hdr { __u16 Tid; __le16 Pid; __u16 Uid; - __le16 Mid; + __u16 Mid; __u8 WordCount; } __attribute__((packed)); @@ -697,13 +697,7 @@ struct ntlmssp2_name { } __attribute__((packed)); struct ntlmv2_resp { - union { - char ntlmv2_hash[CIFS_ENCPWD_SIZE]; - struct { - __u8 reserved[8]; - __u8 key[CIFS_SERVER_CHALLENGE_SIZE]; - } __attribute__((packed)) challenge; - } __attribute__((packed)); + char ntlmv2_hash[CIFS_ENCPWD_SIZE]; __le32 blob_signature; __u32 reserved; __le64 time; @@ -1358,35 +1352,6 @@ typedef struct smb_com_transaction_ioctl_req { __u8 Data[1]; } __attribute__((packed)) TRANSACT_IOCTL_REQ; -typedef struct smb_com_transaction_compr_ioctl_req { - struct smb_hdr hdr; /* wct = 23 */ - __u8 MaxSetupCount; - __u16 Reserved; - __le32 TotalParameterCount; - __le32 TotalDataCount; - __le32 MaxParameterCount; - __le32 MaxDataCount; - __le32 ParameterCount; - __le32 ParameterOffset; - __le32 DataCount; - __le32 DataOffset; - __u8 SetupCount; /* four setup words follow subcommand */ - /* SNIA spec incorrectly included spurious pad here */ - __le16 SubCommand; /* 2 = IOCTL/FSCTL */ - __le32 FunctionCode; - __u16 Fid; - __u8 IsFsctl; /* 1 = File System Control 0 = device control (IOCTL) */ - __u8 IsRootFlag; /* 1 = apply command to root of share (must be DFS) */ - __le16 ByteCount; - __u8 Pad[3]; - __le16 compression_state; /* See below for valid flags */ -} __attribute__((packed)) TRANSACT_COMPR_IOCTL_REQ; - -/* compression state flags */ -#define COMPRESSION_FORMAT_NONE 0x0000 -#define COMPRESSION_FORMAT_DEFAULT 0x0001 -#define COMPRESSION_FORMAT_LZNT1 0x0002 - typedef struct smb_com_transaction_ioctl_rsp { struct smb_hdr hdr; /* wct = 19 */ __u8 Reserved[3]; @@ -2250,9 +2215,6 @@ typedef struct { __le32 DeviceCharacteristics; } __attribute__((packed)) FILE_SYSTEM_DEVICE_INFO; /* device info level 0x104 */ -/* minimum includes first three fields, and empty FS Name */ -#define MIN_FS_ATTR_INFO_SIZE 12 - typedef struct { __le32 Attributes; __le32 MaxPathNameComponentLength; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index aa33976..b5ec2a2 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -360,8 +360,6 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid, extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, char **symlinkinfo, const struct nls_table *nls_codepage); -extern int CIFSSMB_set_compression(const unsigned int xid, - struct cifs_tcon *tcon, __u16 fid); extern int CIFSSMBOpen(const unsigned int xid, struct cifs_tcon *tcon, const char *fileName, const int disposition, const int access_flags, const int omode, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 124aa02..ccd31ab 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3199,60 +3199,6 @@ qreparse_out: return rc; } -int -CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon, - __u16 fid) -{ - int rc = 0; - int bytes_returned; - struct smb_com_transaction_compr_ioctl_req *pSMB; - struct smb_com_transaction_ioctl_rsp *pSMBr; - - cifs_dbg(FYI, "Set compression for %u\n", fid); - rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, - (void **) &pSMBr); - if (rc) - return rc; - - pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT); - - pSMB->TotalParameterCount = 0; - pSMB->TotalDataCount = __constant_cpu_to_le32(2); - pSMB->MaxParameterCount = 0; - pSMB->MaxDataCount = 0; - pSMB->MaxSetupCount = 4; - pSMB->Reserved = 0; - pSMB->ParameterOffset = 0; - pSMB->DataCount = __constant_cpu_to_le32(2); - pSMB->DataOffset = - cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req, - compression_state) - 4); /* 84 */ - pSMB->SetupCount = 4; - pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL); - pSMB->ParameterCount = 0; - pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION); - pSMB->IsFsctl = 1; /* FSCTL */ - pSMB->IsRootFlag = 0; - pSMB->Fid = fid; /* file handle always le */ - /* 3 byte pad, followed by 2 byte compress state */ - pSMB->ByteCount = __constant_cpu_to_le16(5); - inc_rfc1001_len(pSMB, 5); - - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 0); - if (rc) - cifs_dbg(FYI, "Send error in SetCompression = %d\n", rc); - - cifs_buf_release(pSMB); - - /* - * Note: On -EAGAIN error only caller can retry on handle based calls - * since file handle passed in no longer valid. - */ - return rc; -} - - #ifdef CONFIG_CIFS_POSIX /*Convert an Access Control Entry from wire format to local POSIX xattr format*/ @@ -3369,13 +3315,11 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, return 0; } cifs_acl->version = cpu_to_le16(1); - if (acl_type == ACL_TYPE_ACCESS) { + if (acl_type == ACL_TYPE_ACCESS) cifs_acl->access_entry_count = cpu_to_le16(count); - cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF); - } else if (acl_type == ACL_TYPE_DEFAULT) { + else if (acl_type == ACL_TYPE_DEFAULT) cifs_acl->default_entry_count = cpu_to_le16(count); - cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF); - } else { + else { cifs_dbg(FYI, "unknown ACL type %d\n", acl_type); return 0; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8813ff7..a279ffc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2242,8 +2242,6 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { - if (ses->status == CifsExiting) - continue; if (!match_session(ses, vol)) continue; ++ses->ses_count; @@ -2257,37 +2255,24 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) static void cifs_put_smb_ses(struct cifs_ses *ses) { - unsigned int rc, xid; + unsigned int xid; struct TCP_Server_Info *server = ses->server; cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count); - spin_lock(&cifs_tcp_ses_lock); - if (ses->status == CifsExiting) { - spin_unlock(&cifs_tcp_ses_lock); - return; - } if (--ses->ses_count > 0) { spin_unlock(&cifs_tcp_ses_lock); return; } - if (ses->status == CifsGood) - ses->status = CifsExiting; + + list_del_init(&ses->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); - if (ses->status == CifsExiting && server->ops->logoff) { + if (ses->status == CifsGood && server->ops->logoff) { xid = get_xid(); - rc = server->ops->logoff(xid, ses); - if (rc) - cifs_dbg(VFS, "%s: Session Logoff failure rc=%d\n", - __func__, rc); + server->ops->logoff(xid, ses); _free_xid(xid); } - - spin_lock(&cifs_tcp_ses_lock); - list_del_init(&ses->smb_ses_list); - spin_unlock(&cifs_tcp_ses_lock); - sesInfoFree(ses); cifs_put_tcp_session(server); } @@ -3770,13 +3755,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, return rc; } -static void delayed_free(struct rcu_head *p) -{ - struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu); - unload_nls(sbi->local_nls); - kfree(sbi); -} - void cifs_umount(struct cifs_sb_info *cifs_sb) { @@ -3801,7 +3779,8 @@ cifs_umount(struct cifs_sb_info *cifs_sb) bdi_destroy(&cifs_sb->bdi); kfree(cifs_sb->mountdata); - call_rcu(&cifs_sb->rcu, delayed_free); + unload_nls(cifs_sb->local_nls); + kfree(cifs_sb); } int diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 11ff5f1..5384c2a 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -756,7 +756,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ } else if (rc != -EACCES) { - cifs_dbg(FYI, "Unexpected lookup error %d\n", rc); + cifs_dbg(VFS, "Unexpected lookup error %d\n", rc); /* We special case check for Access Denied - since that is a common return code */ } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5a5a872..7ddddf2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3663,27 +3663,6 @@ void cifs_oplock_break(struct work_struct *work) } } -/* - * The presence of cifs_direct_io() in the address space ops vector - * allowes open() O_DIRECT flags which would have failed otherwise. - * - * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests - * so this method should never be called. - * - * Direct IO is not yet supported in the cached mode. - */ -static ssize_t -cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t pos, unsigned long nr_segs) -{ - /* - * FIXME - * Eventually need to support direct IO for non forcedirectio mounts - */ - return -EINVAL; -} - - const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, .readpages = cifs_readpages, @@ -3693,7 +3672,6 @@ const struct address_space_operations cifs_addr_ops = { .write_end = cifs_write_end, .set_page_dirty = __set_page_dirty_nobuffers, .releasepage = cifs_release_page, - .direct_IO = cifs_direct_io, .invalidatepage = cifs_invalidate_page, .launder_page = cifs_launder_page, }; diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 8d4b7bc..b3258f3 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -27,7 +27,7 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) { server->fscache = fscache_acquire_cookie(cifs_fscache_netfs.primary_index, - &cifs_fscache_server_index_def, server, true); + &cifs_fscache_server_index_def, server); cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, server, server->fscache); } @@ -46,7 +46,7 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) tcon->fscache = fscache_acquire_cookie(server->fscache, - &cifs_fscache_super_index_def, tcon, true); + &cifs_fscache_super_index_def, tcon); cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, server->fscache, tcon->fscache); } @@ -69,7 +69,7 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) { cifsi->fscache = fscache_acquire_cookie(tcon->fscache, - &cifs_fscache_inode_object_def, cifsi, true); + &cifs_fscache_inode_object_def, cifsi); cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n", __func__, tcon->fscache, cifsi->fscache); } @@ -119,7 +119,7 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode) cifsi->fscache = fscache_acquire_cookie( cifs_sb_master_tcon(cifs_sb)->fscache, &cifs_fscache_inode_object_def, - cifsi, true); + cifsi); cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n", __func__, cifsi->fscache, old); } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 36f9ebb..867b7cd 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -542,8 +542,7 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, /* Fill a cifs_fattr struct with info from FILE_ALL_INFO */ static void cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, - struct cifs_sb_info *cifs_sb, bool adjust_tz, - bool symlink) + struct cifs_sb_info *cifs_sb, bool adjust_tz) { struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); @@ -570,11 +569,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_createtime = le64_to_cpu(info->CreationTime); fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); - - if (symlink) { - fattr->cf_mode = S_IFLNK; - fattr->cf_dtype = DT_LNK; - } else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { + if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; fattr->cf_dtype = DT_DIR; /* @@ -583,6 +578,10 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, */ if (!tcon->unix_ext) fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; + } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { + fattr->cf_mode = S_IFLNK; + fattr->cf_dtype = DT_LNK; + fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; @@ -627,8 +626,7 @@ cifs_get_file_info(struct file *filp) rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data); switch (rc) { case 0: - cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false, - false); + cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); break; case -EREMOTE: cifs_create_dfs_fattr(&fattr, inode->i_sb); @@ -675,7 +673,6 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, bool adjust_tz = false; struct cifs_fattr fattr; struct cifs_search_info *srchinf = NULL; - bool symlink = false; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -705,12 +702,12 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } data = (FILE_ALL_INFO *)buf; rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path, - data, &adjust_tz, &symlink); + data, &adjust_tz); } if (!rc) { - cifs_all_info_to_fattr(&fattr, data, cifs_sb, adjust_tz, - symlink); + cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *)data, cifs_sb, + adjust_tz); } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); rc = 0; diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 7749230..3e08455 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -3,7 +3,7 @@ * * vfs operations that deal with io control * - * Copyright (C) International Business Machines Corp., 2005,2013 + * Copyright (C) International Business Machines Corp., 2005,2007 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -22,132 +22,25 @@ */ #include <linux/fs.h> -#include <linux/file.h> -#include <linux/mount.h> -#include <linux/mm.h> -#include <linux/pagemap.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" #include "cifsfs.h" -#define CIFS_IOCTL_MAGIC 0xCF -#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int) - -static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, - unsigned long srcfd, u64 off, u64 len, u64 destoff) -{ - int rc; - struct cifsFileInfo *smb_file_target = dst_file->private_data; - struct inode *target_inode = file_inode(dst_file); - struct cifs_tcon *target_tcon; - struct fd src_file; - struct cifsFileInfo *smb_file_src; - struct inode *src_inode; - struct cifs_tcon *src_tcon; - - cifs_dbg(FYI, "ioctl clone range\n"); - /* the destination must be opened for writing */ - if (!(dst_file->f_mode & FMODE_WRITE)) { - cifs_dbg(FYI, "file target not open for write\n"); - return -EINVAL; - } - - /* check if target volume is readonly and take reference */ - rc = mnt_want_write_file(dst_file); - if (rc) { - cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc); - return rc; - } - - src_file = fdget(srcfd); - if (!src_file.file) { - rc = -EBADF; - goto out_drop_write; - } - - if ((!src_file.file->private_data) || (!dst_file->private_data)) { - rc = -EBADF; - cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); - goto out_fput; - } - - rc = -EXDEV; - smb_file_target = dst_file->private_data; - smb_file_src = src_file.file->private_data; - src_tcon = tlink_tcon(smb_file_src->tlink); - target_tcon = tlink_tcon(smb_file_target->tlink); - - /* check if source and target are on same tree connection */ - if (src_tcon != target_tcon) { - cifs_dbg(VFS, "file copy src and target on different volume\n"); - goto out_fput; - } - - src_inode = src_file.file->f_dentry->d_inode; - - /* - * Note: cifs case is easier than btrfs since server responsible for - * checks for proper open modes and file type and if it wants - * server could even support copy of range where source = target - */ - - /* so we do not deadlock racing two ioctls on same files */ - if (target_inode < src_inode) { - mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_CHILD); - } - - /* determine range to clone */ - rc = -EINVAL; - if (off + len > src_inode->i_size || off + len < off) - goto out_unlock; - if (len == 0) - len = src_inode->i_size - off; - - cifs_dbg(FYI, "about to flush pages\n"); - /* should we flush first and last page first */ - truncate_inode_pages_range(&target_inode->i_data, destoff, - PAGE_CACHE_ALIGN(destoff + len)-1); - - if (target_tcon->ses->server->ops->clone_range) - rc = target_tcon->ses->server->ops->clone_range(xid, - smb_file_src, smb_file_target, off, len, destoff); - - /* force revalidate of size and timestamps of target file now - that target is updated on the server */ - CIFS_I(target_inode)->time = 0; -out_unlock: - /* although unlocking in the reverse order from locking is not - strictly necessary here it is a little cleaner to be consistent */ - if (target_inode < src_inode) { - mutex_unlock(&src_inode->i_mutex); - mutex_unlock(&target_inode->i_mutex); - } else { - mutex_unlock(&target_inode->i_mutex); - mutex_unlock(&src_inode->i_mutex); - } -out_fput: - fdput(src_file); -out_drop_write: - mnt_drop_write_file(dst_file); - return rc; -} - long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) { struct inode *inode = file_inode(filep); int rc = -ENOTTY; /* strange error - but the precedent */ unsigned int xid; struct cifs_sb_info *cifs_sb; +#ifdef CONFIG_CIFS_POSIX struct cifsFileInfo *pSMBFile = filep->private_data; struct cifs_tcon *tcon; __u64 ExtAttrBits = 0; + __u64 ExtAttrMask = 0; __u64 caps; +#endif /* CONFIG_CIFS_POSIX */ xid = get_xid(); @@ -156,14 +49,13 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) cifs_sb = CIFS_SB(inode->i_sb); switch (command) { +#ifdef CONFIG_CIFS_POSIX case FS_IOC_GETFLAGS: if (pSMBFile == NULL) break; tcon = tlink_tcon(pSMBFile->tlink); caps = le64_to_cpu(tcon->fsUnixInfo.Capability); -#ifdef CONFIG_CIFS_POSIX if (CIFS_UNIX_EXTATTR_CAP & caps) { - __u64 ExtAttrMask = 0; rc = CIFSGetExtAttr(xid, tcon, pSMBFile->fid.netfid, &ExtAttrBits, &ExtAttrMask); @@ -171,53 +63,29 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE, (int __user *)arg); - if (rc != EOPNOTSUPP) - break; - } -#endif /* CONFIG_CIFS_POSIX */ - rc = 0; - if (CIFS_I(inode)->cifsAttrs & ATTR_COMPRESSED) { - /* add in the compressed bit */ - ExtAttrBits = FS_COMPR_FL; - rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE, - (int __user *)arg); } break; + case FS_IOC_SETFLAGS: if (pSMBFile == NULL) break; tcon = tlink_tcon(pSMBFile->tlink); caps = le64_to_cpu(tcon->fsUnixInfo.Capability); - - if (get_user(ExtAttrBits, (int __user *)arg)) { - rc = -EFAULT; - break; - } - - /* - * if (CIFS_UNIX_EXTATTR_CAP & caps) - * rc = CIFSSetExtAttr(xid, tcon, - * pSMBFile->fid.netfid, - * extAttrBits, - * &ExtAttrMask); - * if (rc != EOPNOTSUPP) - * break; - */ - - /* Currently only flag we can set is compressed flag */ - if ((ExtAttrBits & FS_COMPR_FL) == 0) - break; - - /* Try to set compress flag */ - if (tcon->ses->server->ops->set_compression) { - rc = tcon->ses->server->ops->set_compression( - xid, tcon, pSMBFile); - cifs_dbg(FYI, "set compress flag rc %d\n", rc); + if (CIFS_UNIX_EXTATTR_CAP & caps) { + if (get_user(ExtAttrBits, (int __user *)arg)) { + rc = -EFAULT; + break; + } + /* + * rc = CIFSGetExtAttr(xid, tcon, + * pSMBFile->fid.netfid, + * extAttrBits, + * &ExtAttrMask); + */ } + cifs_dbg(FYI, "set flags not implemented yet\n"); break; - case CIFS_IOC_COPYCHUNK_FILE: - rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0); - break; +#endif /* CONFIG_CIFS_POSIX */ default: cifs_dbg(FYI, "unsupported ioctl\n"); break; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index cc02347..7e36ceb 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -621,3 +621,10 @@ symlink_exit: free_xid(xid); return rc; } + +void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie) +{ + char *p = nd_get_link(nd); + if (!IS_ERR(p)) + kfree(p); +} diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 2f9f379..138a011 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -278,7 +278,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , } static int -check_smb_hdr(struct smb_hdr *smb) +check_smb_hdr(struct smb_hdr *smb, __u16 mid) { /* does it have the right SMB "signature" ? */ if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) { @@ -287,6 +287,13 @@ check_smb_hdr(struct smb_hdr *smb) return 1; } + /* Make sure that message ids match */ + if (mid != smb->Mid) { + cifs_dbg(VFS, "Mids do not match. received=%u expected=%u\n", + smb->Mid, mid); + return 1; + } + /* if it's a response then accept */ if (smb->Flags & SMBFLG_RESPONSE) return 0; @@ -295,8 +302,7 @@ check_smb_hdr(struct smb_hdr *smb) if (smb->Command == SMB_COM_LOCKING_ANDX) return 0; - cifs_dbg(VFS, "Server sent request, not response. mid=%u\n", - get_mid(smb)); + cifs_dbg(VFS, "Server sent request, not response. mid=%u\n", smb->Mid); return 1; } @@ -304,6 +310,7 @@ int checkSMB(char *buf, unsigned int total_read) { struct smb_hdr *smb = (struct smb_hdr *)buf; + __u16 mid = smb->Mid; __u32 rfclen = be32_to_cpu(smb->smb_buf_length); __u32 clc_len; /* calculated length */ cifs_dbg(FYI, "checkSMB Length: 0x%x, smb_buf_length: 0x%x\n", @@ -341,7 +348,7 @@ checkSMB(char *buf, unsigned int total_read) } /* otherwise, there is enough to get to the BCC */ - if (check_smb_hdr(smb)) + if (check_smb_hdr(smb, mid)) return -EIO; clc_len = smbCalcSize(smb); @@ -352,7 +359,6 @@ checkSMB(char *buf, unsigned int total_read) } if (4 + rfclen != clc_len) { - __u16 mid = get_mid(smb); /* check if bcc wrapped around for large read responses */ if ((rfclen > 64 * 1024) && (rfclen > clc_len)) { /* check if lengths match mod 64K */ @@ -360,11 +366,11 @@ checkSMB(char *buf, unsigned int total_read) return 0; /* bcc wrapped */ } cifs_dbg(FYI, "Calculated size %u vs length %u mismatch for mid=%u\n", - clc_len, 4 + rfclen, mid); + clc_len, 4 + rfclen, smb->Mid); if (4 + rfclen < clc_len) { cifs_dbg(VFS, "RFC1001 size %u smaller than SMB for mid=%u\n", - rfclen, mid); + rfclen, smb->Mid); return -EIO; } else if (rfclen > clc_len + 512) { /* @@ -377,7 +383,7 @@ checkSMB(char *buf, unsigned int total_read) * data to 512 bytes. */ cifs_dbg(VFS, "RFC1001 size %u more than 512 bytes larger than SMB for mid=%u\n", - rfclen, mid); + rfclen, smb->Mid); return -EIO; } } diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 0498845..651a527 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -51,7 +51,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { {ERRnoaccess, -EACCES}, {ERRbadfid, -EBADF}, {ERRbadmcb, -EIO}, - {ERRnomem, -EREMOTEIO}, + {ERRnomem, -ENOMEM}, {ERRbadmem, -EFAULT}, {ERRbadenv, -EFAULT}, {ERRbadformat, -EINVAL}, diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 5940eca..53a75f3 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -134,6 +134,22 @@ out: dput(dentry); } +/* + * Is it possible that this directory might turn out to be a DFS referral + * once we go to try and use it? + */ +static bool +cifs_dfs_is_possible(struct cifs_sb_info *cifs_sb) +{ +#ifdef CONFIG_CIFS_DFS_UPCALL + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + + if (tcon->Flags & SMB_SHARE_IS_IN_DFS) + return true; +#endif + return false; +} + static void cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { @@ -143,19 +159,27 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; fattr->cf_dtype = DT_DIR; + /* + * Windows CIFS servers generally make DFS referrals look + * like directories in FIND_* responses with the reparse + * attribute flag also set (since DFS junctions are + * reparse points). We must revalidate at least these + * directory inodes before trying to use them (if + * they are DFS we will get PATH_NOT_COVERED back + * when queried directly and can then try to connect + * to the DFS target) + */ + if (cifs_dfs_is_possible(cifs_sb) && + (fattr->cf_cifsattrs & ATTR_REPARSE)) + fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; + } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { + fattr->cf_mode = S_IFLNK; + fattr->cf_dtype = DT_LNK; } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; } - /* - * We need to revalidate it further to make a decision about whether it - * is a symbolic link, DFS referral or a reparse point with a direct - * access like junctions, deduplicated files, NFS symlinks. - */ - if (fattr->cf_cifsattrs & ATTR_REPARSE) - fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; - /* non-unix readdir doesn't provide nlink */ fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 5f5ba0d..8233b17 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -67,7 +67,7 @@ send_nt_cancel(struct TCP_Server_Info *server, void *buf, mutex_unlock(&server->srv_mutex); cifs_dbg(FYI, "issued NT_CANCEL for mid %u, rc = %d\n", - get_mid(in_buf), rc); + in_buf->Mid, rc); return rc; } @@ -101,7 +101,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer) spin_lock(&GlobalMid_Lock); list_for_each_entry(mid, &server->pending_mid_q, qhead) { - if (compare_mid(mid->mid, buf) && + if (mid->mid == buf->Mid && mid->mid_state == MID_REQUEST_SUBMITTED && le16_to_cpu(mid->command) == buf->Command) { spin_unlock(&GlobalMid_Lock); @@ -534,12 +534,10 @@ cifs_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - FILE_ALL_INFO *data, bool *adjustTZ, bool *symlink) + FILE_ALL_INFO *data, bool *adjustTZ) { int rc; - *symlink = false; - /* could do find first instead but this returns more info */ rc = CIFSSMBQPathInfo(xid, tcon, full_path, data, 0 /* not legacy */, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & @@ -556,23 +554,6 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, CIFS_MOUNT_MAP_SPECIAL_CHR); *adjustTZ = true; } - - if (!rc && (le32_to_cpu(data->Attributes) & ATTR_REPARSE)) { - int tmprc; - int oplock = 0; - __u16 netfid; - - /* Need to check if this is a symbolic link or not */ - tmprc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, - FILE_READ_ATTRIBUTES, 0, &netfid, &oplock, - NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (tmprc == -EOPNOTSUPP) - *symlink = true; - else - CIFSSMBClose(xid, tcon, netfid); - } - return rc; } @@ -826,13 +807,6 @@ out: } static int -cifs_set_compression(const unsigned int xid, struct cifs_tcon *tcon, - struct cifsFileInfo *cfile) -{ - return CIFSSMB_set_compression(xid, tcon, cfile->fid.netfid); -} - -static int cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, const char *path, struct cifs_sb_info *cifs_sb, struct cifs_fid *fid, __u16 search_flags, @@ -982,7 +956,6 @@ struct smb_version_operations smb1_operations = { .set_path_size = CIFSSMBSetEOF, .set_file_size = CIFSSMBSetFileSize, .set_file_info = smb_set_file_info, - .set_compression = cifs_set_compression, .echo = CIFSSMBEcho, .mkdir = CIFSSMBMkDir, .mkdir_setinfo = cifs_mkdir_setinfo, diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 84c012a..78ff88c 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -123,13 +123,12 @@ move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src) int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - FILE_ALL_INFO *data, bool *adjust_tz, bool *symlink) + FILE_ALL_INFO *data, bool *adjust_tz) { int rc; struct smb2_file_all_info *smb2_data; *adjust_tz = false; - *symlink = false; smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, GFP_KERNEL); @@ -137,16 +136,9 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, return -ENOMEM; rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, - FILE_READ_ATTRIBUTES, FILE_OPEN, 0, - smb2_data, SMB2_OP_QUERY_INFO); - if (rc == -EOPNOTSUPP) { - *symlink = true; - /* Failed on a symbolic link - query a reparse point info */ - rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, - FILE_READ_ATTRIBUTES, FILE_OPEN, - OPEN_REPARSE_POINT, smb2_data, - SMB2_OP_QUERY_INFO); - } + FILE_READ_ATTRIBUTES, FILE_OPEN, + OPEN_REPARSE_POINT, smb2_data, + SMB2_OP_QUERY_INFO); if (rc) goto out; diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 94bd4fb..7c2f45c 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -306,7 +306,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_NONEXISTENT_SECTOR, -EIO, "STATUS_NONEXISTENT_SECTOR"}, {STATUS_MORE_PROCESSING_REQUIRED, -EIO, "STATUS_MORE_PROCESSING_REQUIRED"}, - {STATUS_NO_MEMORY, -EREMOTEIO, "STATUS_NO_MEMORY"}, + {STATUS_NO_MEMORY, -ENOMEM, "STATUS_NO_MEMORY"}, {STATUS_CONFLICTING_ADDRESSES, -EADDRINUSE, "STATUS_CONFLICTING_ADDRESSES"}, {STATUS_NOT_MAPPED_VIEW, -EIO, "STATUS_NOT_MAPPED_VIEW"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 757da3e..861b332 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -209,94 +209,6 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) return rsize; } -#ifdef CONFIG_CIFS_STATS2 -static int -SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) -{ - int rc; - unsigned int ret_data_len = 0; - struct network_interface_info_ioctl_rsp *out_buf; - - rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, - FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */, - NULL /* no data input */, 0 /* no data input */, - (char **)&out_buf, &ret_data_len); - - if ((rc == 0) && (ret_data_len > 0)) { - /* Dump info on first interface */ - cifs_dbg(FYI, "Adapter Capability 0x%x\t", - le32_to_cpu(out_buf->Capability)); - cifs_dbg(FYI, "Link Speed %lld\n", - le64_to_cpu(out_buf->LinkSpeed)); - } else - cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc); - - return rc; -} -#endif /* STATS2 */ - -static void -smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) -{ - int rc; - __le16 srch_path = 0; /* Null - open root of share */ - u8 oplock = SMB2_OPLOCK_LEVEL_NONE; - struct cifs_open_parms oparms; - struct cifs_fid fid; - - oparms.tcon = tcon; - oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.disposition = FILE_OPEN; - oparms.create_options = 0; - oparms.fid = &fid; - oparms.reconnect = false; - - rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL); - if (rc) - return; - -#ifdef CONFIG_CIFS_STATS2 - SMB3_request_interfaces(xid, tcon); -#endif /* STATS2 */ - - SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, - FS_ATTRIBUTE_INFORMATION); - SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, - FS_DEVICE_INFORMATION); - SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, - FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */ - SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); - return; -} - -static void -smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) -{ - int rc; - __le16 srch_path = 0; /* Null - open root of share */ - u8 oplock = SMB2_OPLOCK_LEVEL_NONE; - struct cifs_open_parms oparms; - struct cifs_fid fid; - - oparms.tcon = tcon; - oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.disposition = FILE_OPEN; - oparms.create_options = 0; - oparms.fid = &fid; - oparms.reconnect = false; - - rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL); - if (rc) - return; - - SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, - FS_ATTRIBUTE_INFORMATION); - SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid, - FS_DEVICE_INFORMATION); - SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); - return; -} - static int smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path) @@ -392,19 +304,7 @@ smb2_dump_share_caps(struct seq_file *m, struct cifs_tcon *tcon) seq_puts(m, " ASYMMETRIC,"); if (tcon->capabilities == 0) seq_puts(m, " None"); - if (tcon->ss_flags & SSINFO_FLAGS_ALIGNED_DEVICE) - seq_puts(m, " Aligned,"); - if (tcon->ss_flags & SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE) - seq_puts(m, " Partition Aligned,"); - if (tcon->ss_flags & SSINFO_FLAGS_NO_SEEK_PENALTY) - seq_puts(m, " SSD,"); - if (tcon->ss_flags & SSINFO_FLAGS_TRIM_ENABLED) - seq_puts(m, " TRIM-support,"); - seq_printf(m, "\tShare Flags: 0x%x", tcon->share_flags); - if (tcon->perf_sector_size) - seq_printf(m, "\tOptimal sector size: 0x%x", - tcon->perf_sector_size); } static void @@ -494,157 +394,6 @@ smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, } static int -SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, - struct copychunk_ioctl *pcchunk) -{ - int rc; - unsigned int ret_data_len; - struct resume_key_req *res_key; - - rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, - FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */, - NULL, 0 /* no input */, - (char **)&res_key, &ret_data_len); - - if (rc) { - cifs_dbg(VFS, "refcpy ioctl error %d getting resume key\n", rc); - goto req_res_key_exit; - } - if (ret_data_len < sizeof(struct resume_key_req)) { - cifs_dbg(VFS, "Invalid refcopy resume key length\n"); - rc = -EINVAL; - goto req_res_key_exit; - } - memcpy(pcchunk->SourceKey, res_key->ResumeKey, COPY_CHUNK_RES_KEY_SIZE); - -req_res_key_exit: - kfree(res_key); - return rc; -} - -static int -smb2_clone_range(const unsigned int xid, - struct cifsFileInfo *srcfile, - struct cifsFileInfo *trgtfile, u64 src_off, - u64 len, u64 dest_off) -{ - int rc; - unsigned int ret_data_len; - struct copychunk_ioctl *pcchunk; - struct copychunk_ioctl_rsp *retbuf = NULL; - struct cifs_tcon *tcon; - int chunks_copied = 0; - bool chunk_sizes_updated = false; - - pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); - - if (pcchunk == NULL) - return -ENOMEM; - - cifs_dbg(FYI, "in smb2_clone_range - about to call request res key\n"); - /* Request a key from the server to identify the source of the copy */ - rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink), - srcfile->fid.persistent_fid, - srcfile->fid.volatile_fid, pcchunk); - - /* Note: request_res_key sets res_key null only if rc !=0 */ - if (rc) - goto cchunk_out; - - /* For now array only one chunk long, will make more flexible later */ - pcchunk->ChunkCount = __constant_cpu_to_le32(1); - pcchunk->Reserved = 0; - pcchunk->Reserved2 = 0; - - tcon = tlink_tcon(trgtfile->tlink); - - while (len > 0) { - pcchunk->SourceOffset = cpu_to_le64(src_off); - pcchunk->TargetOffset = cpu_to_le64(dest_off); - pcchunk->Length = - cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk)); - - /* Request server copy to target from src identified by key */ - rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, - trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, - true /* is_fsctl */, (char *)pcchunk, - sizeof(struct copychunk_ioctl), (char **)&retbuf, - &ret_data_len); - if (rc == 0) { - if (ret_data_len != - sizeof(struct copychunk_ioctl_rsp)) { - cifs_dbg(VFS, "invalid cchunk response size\n"); - rc = -EIO; - goto cchunk_out; - } - if (retbuf->TotalBytesWritten == 0) { - cifs_dbg(FYI, "no bytes copied\n"); - rc = -EIO; - goto cchunk_out; - } - /* - * Check if server claimed to write more than we asked - */ - if (le32_to_cpu(retbuf->TotalBytesWritten) > - le32_to_cpu(pcchunk->Length)) { - cifs_dbg(VFS, "invalid copy chunk response\n"); - rc = -EIO; - goto cchunk_out; - } - if (le32_to_cpu(retbuf->ChunksWritten) != 1) { - cifs_dbg(VFS, "invalid num chunks written\n"); - rc = -EIO; - goto cchunk_out; - } - chunks_copied++; - - src_off += le32_to_cpu(retbuf->TotalBytesWritten); - dest_off += le32_to_cpu(retbuf->TotalBytesWritten); - len -= le32_to_cpu(retbuf->TotalBytesWritten); - - cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n", - le32_to_cpu(retbuf->ChunksWritten), - le32_to_cpu(retbuf->ChunkBytesWritten), - le32_to_cpu(retbuf->TotalBytesWritten)); - } else if (rc == -EINVAL) { - if (ret_data_len != sizeof(struct copychunk_ioctl_rsp)) - goto cchunk_out; - - cifs_dbg(FYI, "MaxChunks %d BytesChunk %d MaxCopy %d\n", - le32_to_cpu(retbuf->ChunksWritten), - le32_to_cpu(retbuf->ChunkBytesWritten), - le32_to_cpu(retbuf->TotalBytesWritten)); - - /* - * Check if this is the first request using these sizes, - * (ie check if copy succeed once with original sizes - * and check if the server gave us different sizes after - * we already updated max sizes on previous request). - * if not then why is the server returning an error now - */ - if ((chunks_copied != 0) || chunk_sizes_updated) - goto cchunk_out; - - /* Check that server is not asking us to grow size */ - if (le32_to_cpu(retbuf->ChunkBytesWritten) < - tcon->max_bytes_chunk) - tcon->max_bytes_chunk = - le32_to_cpu(retbuf->ChunkBytesWritten); - else - goto cchunk_out; /* server gave us bogus size */ - - /* No need to change MaxChunks since already set to 1 */ - chunk_sizes_updated = true; - } - } - -cchunk_out: - kfree(pcchunk); - return rc; -} - -static int smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { @@ -697,14 +446,6 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon, } static int -smb2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, - struct cifsFileInfo *cfile) -{ - return SMB2_set_compression(xid, tcon, cfile->fid.persistent_fid, - cfile->fid.volatile_fid); -} - -static int smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, const char *path, struct cifs_sb_info *cifs_sb, struct cifs_fid *fid, __u16 search_flags, @@ -1124,7 +865,6 @@ struct smb_version_operations smb20_operations = { .logoff = SMB2_logoff, .tree_connect = SMB2_tcon, .tree_disconnect = SMB2_tdis, - .qfs_tcon = smb2_qfs_tcon, .is_path_accessible = smb2_is_path_accessible, .can_echo = smb2_can_echo, .echo = SMB2_echo, @@ -1134,7 +874,6 @@ struct smb_version_operations smb20_operations = { .set_path_size = smb2_set_path_size, .set_file_size = smb2_set_file_size, .set_file_info = smb2_set_file_info, - .set_compression = smb2_set_compression, .mkdir = smb2_mkdir, .mkdir_setinfo = smb2_mkdir_setinfo, .rmdir = smb2_rmdir, @@ -1168,7 +907,6 @@ struct smb_version_operations smb20_operations = { .set_oplock_level = smb2_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, - .clone_range = smb2_clone_range, }; struct smb_version_operations smb21_operations = { @@ -1198,7 +936,6 @@ struct smb_version_operations smb21_operations = { .logoff = SMB2_logoff, .tree_connect = SMB2_tcon, .tree_disconnect = SMB2_tdis, - .qfs_tcon = smb2_qfs_tcon, .is_path_accessible = smb2_is_path_accessible, .can_echo = smb2_can_echo, .echo = SMB2_echo, @@ -1208,7 +945,6 @@ struct smb_version_operations smb21_operations = { .set_path_size = smb2_set_path_size, .set_file_size = smb2_set_file_size, .set_file_info = smb2_set_file_info, - .set_compression = smb2_set_compression, .mkdir = smb2_mkdir, .mkdir_setinfo = smb2_mkdir_setinfo, .rmdir = smb2_rmdir, @@ -1242,7 +978,6 @@ struct smb_version_operations smb21_operations = { .set_oplock_level = smb21_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, - .clone_range = smb2_clone_range, }; struct smb_version_operations smb30_operations = { @@ -1273,7 +1008,6 @@ struct smb_version_operations smb30_operations = { .logoff = SMB2_logoff, .tree_connect = SMB2_tcon, .tree_disconnect = SMB2_tdis, - .qfs_tcon = smb3_qfs_tcon, .is_path_accessible = smb2_is_path_accessible, .can_echo = smb2_can_echo, .echo = SMB2_echo, @@ -1283,7 +1017,6 @@ struct smb_version_operations smb30_operations = { .set_path_size = smb2_set_path_size, .set_file_size = smb2_set_file_size, .set_file_info = smb2_set_file_info, - .set_compression = smb2_set_compression, .mkdir = smb2_mkdir, .mkdir_setinfo = smb2_mkdir_setinfo, .rmdir = smb2_rmdir, @@ -1318,8 +1051,6 @@ struct smb_version_operations smb30_operations = { .set_oplock_level = smb3_set_oplock_level, .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, - .clone_range = smb2_clone_range, - .validate_negotiate = smb3_validate_negotiate, }; struct smb_version_values smb20_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2013234..edccb52 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -454,81 +454,6 @@ neg_exit: return rc; } -int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) -{ - int rc = 0; - struct validate_negotiate_info_req vneg_inbuf; - struct validate_negotiate_info_rsp *pneg_rsp; - u32 rsplen; - - cifs_dbg(FYI, "validate negotiate\n"); - - /* - * validation ioctl must be signed, so no point sending this if we - * can not sign it. We could eventually change this to selectively - * sign just this, the first and only signed request on a connection. - * This is good enough for now since a user who wants better security - * would also enable signing on the mount. Having validation of - * negotiate info for signed connections helps reduce attack vectors - */ - if (tcon->ses->server->sign == false) - return 0; /* validation requires signing */ - - vneg_inbuf.Capabilities = - cpu_to_le32(tcon->ses->server->vals->req_capabilities); - memcpy(vneg_inbuf.Guid, cifs_client_guid, SMB2_CLIENT_GUID_SIZE); - - if (tcon->ses->sign) - vneg_inbuf.SecurityMode = - cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED); - else if (global_secflags & CIFSSEC_MAY_SIGN) - vneg_inbuf.SecurityMode = - cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED); - else - vneg_inbuf.SecurityMode = 0; - - vneg_inbuf.DialectCount = cpu_to_le16(1); - vneg_inbuf.Dialects[0] = - cpu_to_le16(tcon->ses->server->vals->protocol_id); - - rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, - FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, - (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req), - (char **)&pneg_rsp, &rsplen); - - if (rc != 0) { - cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc); - return -EIO; - } - - if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { - cifs_dbg(VFS, "invalid size of protocol negotiate response\n"); - return -EIO; - } - - /* check validate negotiate info response matches what we got earlier */ - if (pneg_rsp->Dialect != - cpu_to_le16(tcon->ses->server->vals->protocol_id)) - goto vneg_out; - - if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode)) - goto vneg_out; - - /* do not validate server guid because not saved at negprot time yet */ - - if ((le32_to_cpu(pneg_rsp->Capabilities) | SMB2_NT_FIND | - SMB2_LARGE_FILES) != tcon->ses->server->capabilities) - goto vneg_out; - - /* validate negotiate successful */ - cifs_dbg(FYI, "validate negotiate info successful\n"); - return 0; - -vneg_out: - cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n"); - return -EIO; -} - int SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp) @@ -705,8 +630,6 @@ ssetup_ntlmssp_authenticate: goto ssetup_exit; ses->session_flags = le16_to_cpu(rsp->SessionFlags); - if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) - cifs_dbg(VFS, "SMB3 encryption not supported yet\n"); ssetup_exit: free_rsp_buf(resp_buftype, rsp); @@ -794,14 +717,6 @@ static inline void cifs_stats_fail_inc(struct cifs_tcon *tcon, uint16_t code) #define MAX_SHARENAME_LENGTH (255 /* server */ + 80 /* share */ + 1 /* NULL */) -/* These are similar values to what Windows uses */ -static inline void init_copy_chunk_defaults(struct cifs_tcon *tcon) -{ - tcon->max_chunks = 256; - tcon->max_bytes_chunk = 1048576; - tcon->max_bytes_copy = 16777216; -} - int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct cifs_tcon *tcon, const struct nls_table *cp) @@ -903,9 +818,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) cifs_dbg(VFS, "DFS capability contradicts DFS flag\n"); - init_copy_chunk_defaults(tcon); - if (tcon->ses->server->ops->validate_negotiate) - rc = tcon->ses->server->ops->validate_negotiate(xid, tcon); + tcon_exit: free_rsp_buf(resp_buftype, rsp); kfree(unc_path); @@ -1224,7 +1137,6 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, cifs_dbg(FYI, "SMB2 IOCTL\n"); - *out_data = NULL; /* zero out returned data len, in case of error */ if (plen) *plen = 0; @@ -1270,38 +1182,19 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, req->Flags = 0; iov[0].iov_base = (char *)req; + /* 4 for rfc1002 length field */ + iov[0].iov_len = get_rfc1002_length(req) + 4; - /* - * If no input data, the size of ioctl struct in - * protocol spec still includes a 1 byte data buffer, - * but if input data passed to ioctl, we do not - * want to double count this, so we do not send - * the dummy one byte of data in iovec[0] if sending - * input data (in iovec[1]). We also must add 4 bytes - * in first iovec to allow for rfc1002 length field. - */ - - if (indatalen) { - iov[0].iov_len = get_rfc1002_length(req) + 4 - 1; - inc_rfc1001_len(req, indatalen - 1); - } else - iov[0].iov_len = get_rfc1002_length(req) + 4; - + if (indatalen) + inc_rfc1001_len(req, indatalen); rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; - if ((rc != 0) && (rc != -EINVAL)) { + if (rc != 0) { if (tcon) cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); goto ioctl_exit; - } else if (rc == -EINVAL) { - if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) && - (opcode != FSCTL_SRV_COPYCHUNK)) { - if (tcon) - cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); - goto ioctl_exit; - } } /* check if caller wants to look at return data or just return rc */ @@ -1341,33 +1234,6 @@ ioctl_exit: return rc; } -/* - * Individual callers to ioctl worker function follow - */ - -int -SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid) -{ - int rc; - char *res_key = NULL; - struct compress_ioctl fsctl_input; - char *ret_data = NULL; - - fsctl_input.CompressionState = - __constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT); - - rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, - FSCTL_SET_COMPRESSION, true /* is_fsctl */, - (char *)&fsctl_input /* data input */, - 2 /* in data len */, &ret_data /* out data */, NULL); - - cifs_dbg(FYI, "set compression rc %d\n", rc); - kfree(res_key); - - return rc; -} - int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid) @@ -2238,9 +2104,11 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0); rsp = (struct smb2_set_info_rsp *)iov[0].iov_base; - if (rc != 0) + if (rc != 0) { cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE); - + goto out; + } +out: free_rsp_buf(resp_buftype, rsp); kfree(iov); return rc; @@ -2431,7 +2299,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0); if (rc) { cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); - goto qfsinf_exit; + goto qinf_exit; } rsp = (struct smb2_query_info_rsp *)iov.iov_base; @@ -2443,70 +2311,7 @@ SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, if (!rc) copy_fs_info_to_kstatfs(info, fsdata); -qfsinf_exit: - free_rsp_buf(resp_buftype, iov.iov_base); - return rc; -} - -int -SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid, int level) -{ - struct smb2_query_info_rsp *rsp = NULL; - struct kvec iov; - int rc = 0; - int resp_buftype, max_len, min_len; - struct cifs_ses *ses = tcon->ses; - unsigned int rsp_len, offset; - - if (level == FS_DEVICE_INFORMATION) { - max_len = sizeof(FILE_SYSTEM_DEVICE_INFO); - min_len = sizeof(FILE_SYSTEM_DEVICE_INFO); - } else if (level == FS_ATTRIBUTE_INFORMATION) { - max_len = sizeof(FILE_SYSTEM_ATTRIBUTE_INFO); - min_len = MIN_FS_ATTR_INFO_SIZE; - } else if (level == FS_SECTOR_SIZE_INFORMATION) { - max_len = sizeof(struct smb3_fs_ss_info); - min_len = sizeof(struct smb3_fs_ss_info); - } else { - cifs_dbg(FYI, "Invalid qfsinfo level %d\n", level); - return -EINVAL; - } - - rc = build_qfs_info_req(&iov, tcon, level, max_len, - persistent_fid, volatile_fid); - if (rc) - return rc; - - rc = SendReceive2(xid, ses, &iov, 1, &resp_buftype, 0); - if (rc) { - cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE); - goto qfsattr_exit; - } - rsp = (struct smb2_query_info_rsp *)iov.iov_base; - - rsp_len = le32_to_cpu(rsp->OutputBufferLength); - offset = le16_to_cpu(rsp->OutputBufferOffset); - rc = validate_buf(offset, rsp_len, &rsp->hdr, min_len); - if (rc) - goto qfsattr_exit; - - if (level == FS_ATTRIBUTE_INFORMATION) - memcpy(&tcon->fsAttrInfo, 4 /* RFC1001 len */ + offset - + (char *)&rsp->hdr, min_t(unsigned int, - rsp_len, max_len)); - else if (level == FS_DEVICE_INFORMATION) - memcpy(&tcon->fsDevInfo, 4 /* RFC1001 len */ + offset - + (char *)&rsp->hdr, sizeof(FILE_SYSTEM_DEVICE_INFO)); - else if (level == FS_SECTOR_SIZE_INFORMATION) { - struct smb3_fs_ss_info *ss_info = (struct smb3_fs_ss_info *) - (4 /* RFC1001 len */ + offset + (char *)&rsp->hdr); - tcon->ss_flags = le32_to_cpu(ss_info->Flags); - tcon->perf_sector_size = - le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf); - } - -qfsattr_exit: +qinf_exit: free_rsp_buf(resp_buftype, iov.iov_base); return rc; } diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 2022c54..b83d011 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -122,23 +122,6 @@ struct smb2_pdu { __le16 StructureSize2; /* size of wct area (varies, request specific) */ } __packed; -struct smb2_transform_hdr { - __be32 smb2_buf_length; /* big endian on wire */ - /* length is only two or three bytes - with - one or two byte type preceding it that MBZ */ - __u8 ProtocolId[4]; /* 0xFD 'S' 'M' 'B' */ - __u8 Signature[16]; - __u8 Nonce[11]; - __u8 Reserved[5]; - __le32 OriginalMessageSize; - __u16 Reserved1; - __le16 EncryptionAlgorithm; - __u64 SessionId; -} __packed; - -/* Encryption Algorithms */ -#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001) - /* * SMB2 flag definitions */ @@ -254,7 +237,6 @@ struct smb2_sess_setup_req { /* Currently defined SessionFlags */ #define SMB2_SESSION_FLAG_IS_GUEST 0x0001 #define SMB2_SESSION_FLAG_IS_NULL 0x0002 -#define SMB2_SESSION_FLAG_ENCRYPT_DATA 0x0004 struct smb2_sess_setup_rsp { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 9 */ @@ -552,16 +534,9 @@ struct create_durable { } Data; } __packed; -#define COPY_CHUNK_RES_KEY_SIZE 24 -struct resume_key_req { - char ResumeKey[COPY_CHUNK_RES_KEY_SIZE]; - __le32 ContextLength; /* MBZ */ - char Context[0]; /* ignored, Windows sets to 4 bytes of zero */ -} __packed; - /* this goes in the ioctl buffer when doing a copychunk request */ struct copychunk_ioctl { - char SourceKey[COPY_CHUNK_RES_KEY_SIZE]; + char SourceKey[24]; __le32 ChunkCount; /* we are only sending 1 */ __le32 Reserved; /* array will only be one chunk long for us */ @@ -571,25 +546,13 @@ struct copychunk_ioctl { __u32 Reserved2; } __packed; -struct copychunk_ioctl_rsp { - __le32 ChunksWritten; - __le32 ChunkBytesWritten; - __le32 TotalBytesWritten; -} __packed; - -struct validate_negotiate_info_req { +/* Response and Request are the same format */ +struct validate_negotiate_info { __le32 Capabilities; __u8 Guid[SMB2_CLIENT_GUID_SIZE]; __le16 SecurityMode; __le16 DialectCount; - __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */ -} __packed; - -struct validate_negotiate_info_rsp { - __le32 Capabilities; - __u8 Guid[SMB2_CLIENT_GUID_SIZE]; - __le16 SecurityMode; - __le16 Dialect; /* Dialect in use for the connection */ + __le16 Dialect[1]; } __packed; #define RSS_CAPABLE 0x00000001 @@ -606,10 +569,6 @@ struct network_interface_info_ioctl_rsp { #define NO_FILE_ID 0xFFFFFFFFFFFFFFFFULL /* general ioctls to srv not to file */ -struct compress_ioctl { - __le16 CompressionState; /* See cifspdu.h for possible flag values */ -} __packed; - struct smb2_ioctl_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 57 */ @@ -625,7 +584,7 @@ struct smb2_ioctl_req { __le32 MaxOutputResponse; __le32 Flags; __u32 Reserved2; - __u8 Buffer[0]; + char Buffer[0]; } __packed; struct smb2_ioctl_rsp { @@ -911,16 +870,14 @@ struct smb2_lease_ack { /* File System Information Classes */ #define FS_VOLUME_INFORMATION 1 /* Query */ -#define FS_LABEL_INFORMATION 2 /* Local only */ +#define FS_LABEL_INFORMATION 2 /* Set */ #define FS_SIZE_INFORMATION 3 /* Query */ #define FS_DEVICE_INFORMATION 4 /* Query */ #define FS_ATTRIBUTE_INFORMATION 5 /* Query */ #define FS_CONTROL_INFORMATION 6 /* Query, Set */ #define FS_FULL_SIZE_INFORMATION 7 /* Query */ #define FS_OBJECT_ID_INFORMATION 8 /* Query, Set */ -#define FS_DRIVER_PATH_INFORMATION 9 /* Local only */ -#define FS_VOLUME_FLAGS_INFORMATION 10 /* Local only */ -#define FS_SECTOR_SIZE_INFORMATION 11 /* SMB3 or later. Query */ +#define FS_DRIVER_PATH_INFORMATION 9 /* Query */ struct smb2_fs_full_size_info { __le64 TotalAllocationUnits; @@ -930,22 +887,6 @@ struct smb2_fs_full_size_info { __le32 BytesPerSector; } __packed; -#define SSINFO_FLAGS_ALIGNED_DEVICE 0x00000001 -#define SSINFO_FLAGS_PARTITION_ALIGNED_ON_DEVICE 0x00000002 -#define SSINFO_FLAGS_NO_SEEK_PENALTY 0x00000004 -#define SSINFO_FLAGS_TRIM_ENABLED 0x00000008 - -/* sector size info struct */ -struct smb3_fs_ss_info { - __le32 LogicalBytesPerSector; - __le32 PhysicalBytesPerSectorForAtomicity; - __le32 PhysicalBytesPerSectorForPerf; - __le32 FileSystemEffectivePhysicalBytesPerSectorForAtomicity; - __le32 Flags; - __le32 ByteOffsetForSectorAlignment; - __le32 ByteOffsetForPartitionAlignment; -} __packed; - /* partial list of QUERY INFO levels */ #define FILE_DIRECTORY_INFORMATION 1 #define FILE_FULL_DIRECTORY_INFORMATION 2 diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 93adc64..e3fb480 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -61,7 +61,7 @@ extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, FILE_ALL_INFO *data, - bool *adjust_tz, bool *symlink); + bool *adjust_tz); extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, const char *full_path, __u64 size, struct cifs_sb_info *cifs_sb, bool set_alloc); @@ -142,16 +142,12 @@ extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf); -extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_fid, u64 volatile_fid); extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, const u64 persistent_fid, const u64 volatile_fid, const __u8 oplock_level); extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct kstatfs *FSData); -extern int SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon, - u64 persistent_file_id, u64 volatile_file_id, int lvl); extern int SMB2_lock(const unsigned int xid, struct cifs_tcon *tcon, const __u64 persist_fid, const __u64 volatile_fid, const __u32 pid, const __u64 length, const __u64 offset, @@ -162,6 +158,5 @@ extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, struct smb2_lock_element *buf); extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, __u8 *lease_key, const __le32 lease_state); -extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); #endif /* _SMB2PROTO_H */ diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 59c748c..340abca 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -466,7 +466,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) static inline void smb2_seq_num_into_buf(struct TCP_Server_Info *server, struct smb2_hdr *hdr) { - hdr->MessageId = get_next_mid64(server); + hdr->MessageId = get_next_mid(server); } static struct mid_q_entry * @@ -516,19 +516,13 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct smb2_hdr *buf, return -EAGAIN; } - if (ses->status == CifsNew) { + if (ses->status != CifsGood) { + /* check if SMB2 session is bad because we are setting it up */ if ((buf->Command != SMB2_SESSION_SETUP) && (buf->Command != SMB2_NEGOTIATE)) return -EAGAIN; /* else ok - we are setting up session */ } - - if (ses->status == CifsExiting) { - if (buf->Command != SMB2_LOGOFF) - return -EAGAIN; - /* else ok - we are shutting down the session */ - } - *mid = smb2_mid_entry_alloc(buf, ses->server); if (*mid == NULL) return -ENOMEM; diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index 0e538b5..a4b2391f 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h @@ -90,7 +90,7 @@ #define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ -#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 +#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */ /* Perform server-side data movement */ #define FSCTL_SRV_COPYCHUNK 0x001440F2 #define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2 diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b375709..800b938 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -58,7 +58,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) return temp; else { memset(temp, 0, sizeof(struct mid_q_entry)); - temp->mid = get_mid(smb_buffer); + temp->mid = smb_buffer->Mid; /* always LE */ temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); cifs_dbg(FYI, "For smb_command %d\n", smb_buffer->Command); @@ -431,20 +431,13 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, return -EAGAIN; } - if (ses->status == CifsNew) { + if (ses->status != CifsGood) { + /* check if SMB session is bad because we are setting it up */ if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && (in_buf->Command != SMB_COM_NEGOTIATE)) return -EAGAIN; /* else ok - we are setting up session */ } - - if (ses->status == CifsExiting) { - /* check if SMB session is bad because we are setting it up */ - if (in_buf->Command != SMB_COM_LOGOFF_ANDX) - return -EAGAIN; - /* else ok - we are shutting down session */ - } - *ppmidQ = AllocMidQEntry(in_buf, ses->server); if (*ppmidQ == NULL) return -ENOMEM; diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index e7550cb..cc0ea9f 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -40,7 +40,7 @@ extern const struct file_operations coda_ioctl_operations; int coda_open(struct inode *i, struct file *f); int coda_release(struct inode *i, struct file *f); int coda_permission(struct inode *inode, int mask); -int coda_revalidate_inode(struct inode *); +int coda_revalidate_inode(struct dentry *); int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 5efbb5e..190effc 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -387,6 +387,9 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; + if (!host_file->f_op) + return -ENOTDIR; + if (host_file->f_op->iterate) { struct inode *host_inode = file_inode(host_file); mutex_lock(&host_inode->i_mutex); @@ -563,12 +566,13 @@ static int coda_dentry_delete(const struct dentry * dentry) * cache manager Venus issues a downcall to the kernel when this * happens */ -int coda_revalidate_inode(struct inode *inode) +int coda_revalidate_inode(struct dentry *dentry) { struct coda_vattr attr; int error; int old_mode; ino_t old_ino; + struct inode *inode = dentry->d_inode; struct coda_inode_info *cii = ITOC(inode); if (!cii->c_flags) diff --git a/fs/coda/file.c b/fs/coda/file.c index 9e83b77..380b798 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -36,7 +36,7 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op->read) + if (!host_file->f_op || !host_file->f_op->read) return -EINVAL; return host_file->f_op->read(host_file, buf, count, ppos); @@ -75,7 +75,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op->write) + if (!host_file->f_op || !host_file->f_op->write) return -EINVAL; host_inode = file_inode(host_file); @@ -105,7 +105,7 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (!host_file->f_op->mmap) + if (!host_file->f_op || !host_file->f_op->mmap) return -ENODEV; coda_inode = file_inode(coda_file); diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 506de34..4dcc0d8 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -257,7 +257,7 @@ static void coda_evict_inode(struct inode *inode) int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - int err = coda_revalidate_inode(dentry->d_inode); + int err = coda_revalidate_inode(dentry); if (!err) generic_fillattr(dentry->d_inode, stat); return err; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index dc52e13..5d19acf 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1583,13 +1583,13 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, /*FALL THROUGH*/ default: - if (f.file->f_op->compat_ioctl) { + if (f.file->f_op && f.file->f_op->compat_ioctl) { error = f.file->f_op->compat_ioctl(f.file, cmd, arg); if (error != -ENOIOCTLCMD) goto out_fput; } - if (!f.file->f_op->unlocked_ioctl) + if (!f.file->f_op || !f.file->f_op->unlocked_ioctl) goto do_ioctl; break; } diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index e081acb..277bd1b 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -56,28 +56,29 @@ static void configfs_d_iput(struct dentry * dentry, struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { + BUG_ON(sd->s_dentry != dentry); /* Coordinate with configfs_readdir */ spin_lock(&configfs_dirent_lock); - /* Coordinate with configfs_attach_attr where will increase - * sd->s_count and update sd->s_dentry to new allocated one. - * Only set sd->dentry to null when this dentry is the only - * sd owner. - * If not do so, configfs_d_iput may run just after - * configfs_attach_attr and set sd->s_dentry to null - * even it's still in use. - */ - if (atomic_read(&sd->s_count) <= 2) - sd->s_dentry = NULL; - + sd->s_dentry = NULL; spin_unlock(&configfs_dirent_lock); configfs_put(sd); } iput(inode); } +/* + * We _must_ delete our dentries on last dput, as the chain-to-parent + * behavior is required to clear the parents of default_groups. + */ +static int configfs_d_delete(const struct dentry *dentry) +{ + return 1; +} + const struct dentry_operations configfs_dentry_ops = { .d_iput = configfs_d_iput, - .d_delete = always_delete_dentry, + /* simple_delete_dentry() isn't exported */ + .d_delete = configfs_d_delete, }; #ifdef CONFIG_LOCKDEP @@ -425,11 +426,8 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den struct configfs_attribute * attr = sd->s_element; int error; - spin_lock(&configfs_dirent_lock); dentry->d_fsdata = configfs_get(sd); sd->s_dentry = dentry; - spin_unlock(&configfs_dirent_lock); - error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, configfs_init_file); if (error) { diff --git a/fs/coredump.c b/fs/coredump.c index bc3fbcd..9bdeca1 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -485,7 +485,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) return err; } -void do_coredump(const siginfo_t *siginfo) +void do_coredump(siginfo_t *siginfo) { struct core_state core_state; struct core_name cn; @@ -645,7 +645,7 @@ void do_coredump(const siginfo_t *siginfo) */ if (!uid_eq(inode->i_uid, current_fsuid())) goto close_fail; - if (!cprm.file->f_op->write) + if (!cprm.file->f_op || !cprm.file->f_op->write) goto close_fail; if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file)) goto close_fail; @@ -685,55 +685,40 @@ fail: * do on a core-file: use only these functions to write out all the * necessary info. */ -int dump_emit(struct coredump_params *cprm, const void *addr, int nr) +int dump_write(struct file *file, const void *addr, int nr) { - struct file *file = cprm->file; - loff_t pos = file->f_pos; - ssize_t n; - if (cprm->written + nr > cprm->limit) - return 0; - while (nr) { - if (dump_interrupted()) - return 0; - n = __kernel_write(file, addr, nr, &pos); - if (n <= 0) - return 0; - file->f_pos = pos; - cprm->written += n; - nr -= n; - } - return 1; + return !dump_interrupted() && + access_ok(VERIFY_READ, addr, nr) && + file->f_op->write(file, addr, nr, &file->f_pos) == nr; } -EXPORT_SYMBOL(dump_emit); +EXPORT_SYMBOL(dump_write); -int dump_skip(struct coredump_params *cprm, size_t nr) +int dump_seek(struct file *file, loff_t off) { - static char zeroes[PAGE_SIZE]; - struct file *file = cprm->file; + int ret = 1; + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { - if (cprm->written + nr > cprm->limit) - return 0; if (dump_interrupted() || - file->f_op->llseek(file, nr, SEEK_CUR) < 0) + file->f_op->llseek(file, off, SEEK_CUR) < 0) return 0; - cprm->written += nr; - return 1; } else { - while (nr > PAGE_SIZE) { - if (!dump_emit(cprm, zeroes, PAGE_SIZE)) - return 0; - nr -= PAGE_SIZE; + char *buf = (char *)get_zeroed_page(GFP_KERNEL); + + if (!buf) + return 0; + while (off > 0) { + unsigned long n = off; + + if (n > PAGE_SIZE) + n = PAGE_SIZE; + if (!dump_write(file, buf, n)) { + ret = 0; + break; + } + off -= n; } - return dump_emit(cprm, zeroes, nr); + free_page((unsigned long)buf); } + return ret; } -EXPORT_SYMBOL(dump_skip); - -int dump_align(struct coredump_params *cprm, int align) -{ - unsigned mod = cprm->written & (align - 1); - if (align & (align - 1)) - return 0; - return mod ? dump_skip(cprm, align - mod) : 1; -} -EXPORT_SYMBOL(dump_align); +EXPORT_SYMBOL(dump_seek); diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig index 11b29d4..cd06466 100644 --- a/fs/cramfs/Kconfig +++ b/fs/cramfs/Kconfig @@ -1,5 +1,5 @@ config CRAMFS - tristate "Compressed ROM file system support (cramfs) (OBSOLETE)" + tristate "Compressed ROM file system support (cramfs)" depends on BLOCK select ZLIB_INFLATE help @@ -16,7 +16,4 @@ config CRAMFS cramfs. Note that the root file system (the one containing the directory /) cannot be compiled as a module. - This filesystem is obsoleted by SquashFS, which is much better - in terms of performance and features. - If unsure, say N. diff --git a/fs/dcache.c b/fs/dcache.c index 4bdb300..ae6ebb8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -88,6 +88,35 @@ EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; +/** + * read_seqbegin_or_lock - begin a sequence number check or locking block + * @lock: sequence lock + * @seq : sequence number to be checked + * + * First try it once optimistically without taking the lock. If that fails, + * take the lock. The sequence number is also used as a marker for deciding + * whether to be a reader (even) or writer (odd). + * N.B. seq must be initialized to an even number to begin with. + */ +static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) +{ + if (!(*seq & 1)) /* Even */ + *seq = read_seqbegin(lock); + else /* Odd */ + read_seqlock_excl(lock); +} + +static inline int need_seqretry(seqlock_t *lock, int seq) +{ + return !(seq & 1) && read_seqretry(lock, seq); +} + +static inline void done_seqretry(seqlock_t *lock, int seq) +{ + if (seq & 1) + read_sequnlock_excl(lock); +} + /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -96,6 +125,8 @@ static struct kmem_cache *dentry_cache __read_mostly; * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. */ +#define D_HASHBITS d_hash_shift +#define D_HASHMASK d_hash_mask static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; @@ -106,8 +137,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash + (hash >> d_hash_shift); - return dentry_hashtable + (hash & d_hash_mask); + hash = hash + (hash >> D_HASHBITS); + return dentry_hashtable + (hash & D_HASHMASK); } /* Statistics gathering. */ @@ -312,7 +343,6 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; - __d_clear_type(dentry); dentry->d_inode = NULL; hlist_del_init(&dentry->d_alias); dentry_rcuwalk_barrier(dentry); @@ -438,7 +468,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) { list_del(&dentry->d_u.d_child); /* - * Inform d_walk() that we are no longer attached to the + * Inform try_to_ascend() that we are no longer attached to the * dentry tree */ dentry->d_flags |= DCACHE_DENTRY_KILLED; @@ -453,6 +483,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) return parent; } +/* + * Unhash a dentry without inserting an RCU walk barrier or checking that + * dentry->d_lock is locked. The caller must take care of that, if + * appropriate. + */ +static void __d_shrink(struct dentry *dentry) +{ + if (!d_unhashed(dentry)) { + struct hlist_bl_head *b; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + b = &dentry->d_sb->s_anon; + else + b = d_hash(dentry->d_parent, dentry->d_name.hash); + + hlist_bl_lock(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + hlist_bl_unlock(b); + } +} + /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -471,21 +522,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) void __d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { - struct hlist_bl_head *b; - /* - * Hashed dentries are normally on the dentry hashtable, - * with the exception of those newly allocated by - * d_obtain_alias, which are always IS_ROOT: - */ - if (unlikely(IS_ROOT(dentry))) - b = &dentry->d_sb->s_anon; - else - b = d_hash(dentry->d_parent, dentry->d_name.hash); - - hlist_bl_lock(b); - __hlist_bl_del(&dentry->d_hash); - dentry->d_hash.pprev = NULL; - hlist_bl_unlock(b); + __d_shrink(dentry); dentry_rcuwalk_barrier(dentry); } } @@ -1038,6 +1075,144 @@ void shrink_dcache_sb(struct super_block *sb) } EXPORT_SYMBOL(shrink_dcache_sb); +/* + * destroy a single subtree of dentries for unmount + * - see the comments on shrink_dcache_for_umount() for a description of the + * locking + */ +static void shrink_dcache_for_umount_subtree(struct dentry *dentry) +{ + struct dentry *parent; + + BUG_ON(!IS_ROOT(dentry)); + + for (;;) { + /* descend to the first leaf in the current subtree */ + while (!list_empty(&dentry->d_subdirs)) + dentry = list_entry(dentry->d_subdirs.next, + struct dentry, d_u.d_child); + + /* consume the dentries from this leaf up through its parents + * until we find one with children or run out altogether */ + do { + struct inode *inode; + + /* + * inform the fs that this dentry is about to be + * unhashed and destroyed. + */ + if ((dentry->d_flags & DCACHE_OP_PRUNE) && + !d_unhashed(dentry)) + dentry->d_op->d_prune(dentry); + + dentry_lru_del(dentry); + __d_shrink(dentry); + + if (dentry->d_lockref.count != 0) { + printk(KERN_ERR + "BUG: Dentry %p{i=%lx,n=%s}" + " still in use (%d)" + " [unmount of %s %s]\n", + dentry, + dentry->d_inode ? + dentry->d_inode->i_ino : 0UL, + dentry->d_name.name, + dentry->d_lockref.count, + dentry->d_sb->s_type->name, + dentry->d_sb->s_id); + BUG(); + } + + if (IS_ROOT(dentry)) { + parent = NULL; + list_del(&dentry->d_u.d_child); + } else { + parent = dentry->d_parent; + parent->d_lockref.count--; + list_del(&dentry->d_u.d_child); + } + + inode = dentry->d_inode; + if (inode) { + dentry->d_inode = NULL; + hlist_del_init(&dentry->d_alias); + if (dentry->d_op && dentry->d_op->d_iput) + dentry->d_op->d_iput(dentry, inode); + else + iput(inode); + } + + d_free(dentry); + + /* finished when we fall off the top of the tree, + * otherwise we ascend to the parent and move to the + * next sibling if there is one */ + if (!parent) + return; + dentry = parent; + } while (list_empty(&dentry->d_subdirs)); + + dentry = list_entry(dentry->d_subdirs.next, + struct dentry, d_u.d_child); + } +} + +/* + * destroy the dentries attached to a superblock on unmounting + * - we don't need to use dentry->d_lock because: + * - the superblock is detached from all mountings and open files, so the + * dentry trees will not be rearranged by the VFS + * - s_umount is write-locked, so the memory pressure shrinker will ignore + * any dentries belonging to this superblock that it comes across + * - the filesystem itself is no longer permitted to rearrange the dentries + * in this superblock + */ +void shrink_dcache_for_umount(struct super_block *sb) +{ + struct dentry *dentry; + + if (down_read_trylock(&sb->s_umount)) + BUG(); + + dentry = sb->s_root; + sb->s_root = NULL; + dentry->d_lockref.count--; + shrink_dcache_for_umount_subtree(dentry); + + while (!hlist_bl_empty(&sb->s_anon)) { + dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); + shrink_dcache_for_umount_subtree(dentry); + } +} + +/* + * This tries to ascend one level of parenthood, but + * we can race with renaming, so we need to re-check + * the parenthood after dropping the lock and check + * that the sequence number still matches. + */ +static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) +{ + struct dentry *new = old->d_parent; + + rcu_read_lock(); + spin_unlock(&old->d_lock); + spin_lock(&new->d_lock); + + /* + * might go back up the wrong parent if we have had a rename + * or deletion + */ + if (new != old->d_parent || + (old->d_flags & DCACHE_DENTRY_KILLED) || + need_seqretry(&rename_lock, seq)) { + spin_unlock(&new->d_lock); + new = NULL; + } + rcu_read_unlock(); + return new; +} + /** * enum d_walk_ret - action to talke during tree walk * @D_WALK_CONTINUE: contrinue walk @@ -1126,24 +1301,9 @@ resume: */ if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = child->d_parent; - - rcu_read_lock(); - spin_unlock(&child->d_lock); - spin_lock(&this_parent->d_lock); - - /* - * might go back up the wrong parent if we have had a rename - * or deletion - */ - if (this_parent != child->d_parent || - (child->d_flags & DCACHE_DENTRY_KILLED) || - need_seqretry(&rename_lock, seq)) { - spin_unlock(&this_parent->d_lock); - rcu_read_unlock(); + this_parent = try_to_ascend(this_parent, seq); + if (!this_parent) goto rename_retry; - } - rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } @@ -1318,91 +1478,6 @@ void shrink_dcache_parent(struct dentry *parent) } EXPORT_SYMBOL(shrink_dcache_parent); -static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry) -{ - struct select_data *data = _data; - enum d_walk_ret ret = D_WALK_CONTINUE; - - if (dentry->d_lockref.count) { - dentry_lru_del(dentry); - if (likely(!list_empty(&dentry->d_subdirs))) - goto out; - if (dentry == data->start && dentry->d_lockref.count == 1) - goto out; - printk(KERN_ERR - "BUG: Dentry %p{i=%lx,n=%s}" - " still in use (%d)" - " [unmount of %s %s]\n", - dentry, - dentry->d_inode ? - dentry->d_inode->i_ino : 0UL, - dentry->d_name.name, - dentry->d_lockref.count, - dentry->d_sb->s_type->name, - dentry->d_sb->s_id); - BUG(); - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - /* - * We can't use d_lru_shrink_move() because we - * need to get the global LRU lock and do the - * LRU accounting. - */ - if (dentry->d_flags & DCACHE_LRU_LIST) - d_lru_del(dentry); - d_shrink_add(dentry, &data->dispose); - data->found++; - ret = D_WALK_NORETRY; - } -out: - if (data->found && need_resched()) - ret = D_WALK_QUIT; - return ret; -} - -/* - * destroy the dentries attached to a superblock on unmounting - */ -void shrink_dcache_for_umount(struct super_block *sb) -{ - struct dentry *dentry; - - if (down_read_trylock(&sb->s_umount)) - BUG(); - - dentry = sb->s_root; - sb->s_root = NULL; - for (;;) { - struct select_data data; - - INIT_LIST_HEAD(&data.dispose); - data.start = dentry; - data.found = 0; - - d_walk(dentry, &data, umount_collect, NULL); - if (!data.found) - break; - - shrink_dentry_list(&data.dispose); - cond_resched(); - } - d_drop(dentry); - dput(dentry); - - while (!hlist_bl_empty(&sb->s_anon)) { - struct select_data data; - dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); - - INIT_LIST_HEAD(&data.dispose); - data.start = NULL; - data.found = 0; - - d_walk(dentry, &data, umount_collect, NULL); - if (data.found) - shrink_dentry_list(&data.dispose); - cond_resched(); - } -} - static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) { struct select_data *data = _data; @@ -1563,17 +1638,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); -/** - * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) - * @sb: the superblock - * @name: qstr of the name - * - * For a filesystem that just pins its dentries in memory and never - * performs lookups at all, return an unhashed IS_ROOT dentry. - */ struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) { - return __d_alloc(sb, name); + struct dentry *dentry = __d_alloc(sb, name); + if (dentry) + dentry->d_flags |= DCACHE_DISCONNECTED; + return dentry; } EXPORT_SYMBOL(d_alloc_pseudo); @@ -1615,42 +1685,14 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op) } EXPORT_SYMBOL(d_set_d_op); -static unsigned d_flags_for_inode(struct inode *inode) -{ - unsigned add_flags = DCACHE_FILE_TYPE; - - if (!inode) - return DCACHE_MISS_TYPE; - - if (S_ISDIR(inode->i_mode)) { - add_flags = DCACHE_DIRECTORY_TYPE; - if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) { - if (unlikely(!inode->i_op->lookup)) - add_flags = DCACHE_AUTODIR_TYPE; - else - inode->i_opflags |= IOP_LOOKUP; - } - } else if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { - if (unlikely(inode->i_op->follow_link)) - add_flags = DCACHE_SYMLINK_TYPE; - else - inode->i_opflags |= IOP_NOFOLLOW; - } - - if (unlikely(IS_AUTOMOUNT(inode))) - add_flags |= DCACHE_NEED_AUTOMOUNT; - return add_flags; -} - static void __d_instantiate(struct dentry *dentry, struct inode *inode) { - unsigned add_flags = d_flags_for_inode(inode); - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_ENTRY_TYPE; - dentry->d_flags |= add_flags; - if (inode) + if (inode) { + if (unlikely(IS_AUTOMOUNT(inode))) + dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; hlist_add_head(&dentry->d_alias, &inode->i_dentry); + } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); @@ -1759,33 +1801,6 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) EXPORT_SYMBOL(d_instantiate_unique); -/** - * d_instantiate_no_diralias - instantiate a non-aliased dentry - * @entry: dentry to complete - * @inode: inode to attach to this dentry - * - * Fill in inode information in the entry. If a directory alias is found, then - * return an error (and drop inode). Together with d_materialise_unique() this - * guarantees that a directory inode may never have more than one alias. - */ -int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) -{ - BUG_ON(!hlist_unhashed(&entry->d_alias)); - - spin_lock(&inode->i_lock); - if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { - spin_unlock(&inode->i_lock); - iput(inode); - return -EBUSY; - } - __d_instantiate(entry, inode); - spin_unlock(&inode->i_lock); - security_d_instantiate(entry, inode); - - return 0; -} -EXPORT_SYMBOL(d_instantiate_no_diralias); - struct dentry *d_make_root(struct inode *root_inode) { struct dentry *res = NULL; @@ -1855,7 +1870,6 @@ struct dentry *d_obtain_alias(struct inode *inode) static const struct qstr anonstring = QSTR_INIT("/", 1); struct dentry *tmp; struct dentry *res; - unsigned add_flags; if (!inode) return ERR_PTR(-ESTALE); @@ -1881,11 +1895,9 @@ struct dentry *d_obtain_alias(struct inode *inode) } /* attach a disconnected dentry */ - add_flags = d_flags_for_inode(inode) | DCACHE_DISCONNECTED; - spin_lock(&tmp->d_lock); tmp->d_inode = inode; - tmp->d_flags |= add_flags; + tmp->d_flags |= DCACHE_DISCONNECTED; hlist_add_head(&tmp->d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); @@ -2562,7 +2574,7 @@ static void __d_move(struct dentry * dentry, struct dentry * target) dentry_lock_for_move(dentry, target); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); + write_seqcount_begin(&target->d_seq); /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ @@ -2694,7 +2706,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) dentry_lock_for_move(anon, dentry); write_seqcount_begin(&dentry->d_seq); - write_seqcount_begin_nested(&anon->d_seq, DENTRY_D_LOCK_NESTED); + write_seqcount_begin(&anon->d_seq); dparent = dentry->d_parent; @@ -2713,6 +2725,7 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) spin_unlock(&dentry->d_lock); /* anon->d_lock still locked, returns locked */ + anon->d_flags &= ~DCACHE_DISCONNECTED; } /** @@ -2868,36 +2881,27 @@ static int prepend_path(const struct path *path, const struct path *root, char **buffer, int *buflen) { - struct dentry *dentry; - struct vfsmount *vfsmnt; - struct mount *mnt; + struct dentry *dentry = path->dentry; + struct vfsmount *vfsmnt = path->mnt; + struct mount *mnt = real_mount(vfsmnt); int error = 0; - unsigned seq, m_seq = 0; + unsigned seq = 0; char *bptr; int blen; rcu_read_lock(); -restart_mnt: - read_seqbegin_or_lock(&mount_lock, &m_seq); - seq = 0; - rcu_read_lock(); restart: bptr = *buffer; blen = *buflen; - error = 0; - dentry = path->dentry; - vfsmnt = path->mnt; - mnt = real_mount(vfsmnt); read_seqbegin_or_lock(&rename_lock, &seq); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { - struct mount *parent = ACCESS_ONCE(mnt->mnt_parent); /* Global root? */ - if (mnt != parent) { - dentry = ACCESS_ONCE(mnt->mnt_mountpoint); - mnt = parent; + if (mnt_has_parent(mnt)) { + dentry = mnt->mnt_mountpoint; + mnt = mnt->mnt_parent; vfsmnt = &mnt->mnt; continue; } @@ -2932,14 +2936,6 @@ restart: } done_seqretry(&rename_lock, seq); - if (!(m_seq & 1)) - rcu_read_unlock(); - if (need_seqretry(&mount_lock, m_seq)) { - m_seq = 1; - goto restart_mnt; - } - done_seqretry(&mount_lock, m_seq); - if (error >= 0 && bptr == *buffer) { if (--blen < 0) error = -ENAMETOOLONG; @@ -2975,7 +2971,9 @@ char *__d_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); + br_read_lock(&vfsmount_lock); error = prepend_path(path, root, &res, &buflen); + br_read_unlock(&vfsmount_lock); if (error < 0) return ERR_PTR(error); @@ -2992,7 +2990,9 @@ char *d_absolute_path(const struct path *path, int error; prepend(&res, &buflen, "\0", 1); + br_read_lock(&vfsmount_lock); error = prepend_path(path, &root, &res, &buflen); + br_read_unlock(&vfsmount_lock); if (error > 1) error = -EINVAL; @@ -3067,7 +3067,9 @@ char *d_path(const struct path *path, char *buf, int buflen) rcu_read_lock(); get_fs_root_rcu(current->fs, &root); + br_read_lock(&vfsmount_lock); error = path_with_deleted(path, &root, &res, &buflen); + br_read_unlock(&vfsmount_lock); rcu_read_unlock(); if (error < 0) @@ -3222,6 +3224,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) get_fs_root_and_pwd_rcu(current->fs, &root, &pwd); error = -ENOENT; + br_read_lock(&vfsmount_lock); if (!d_unlinked(pwd.dentry)) { unsigned long len; char *cwd = page + PATH_MAX; @@ -3229,6 +3232,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) prepend(&cwd, &buflen, "\0", 1); error = prepend_path(&pwd, &root, &cwd, &buflen); + br_read_unlock(&vfsmount_lock); rcu_read_unlock(); if (error < 0) @@ -3249,6 +3253,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) error = -EFAULT; } } else { + br_read_unlock(&vfsmount_lock); rcu_read_unlock(); } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 9c0444c..c7c83ff 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -566,7 +566,8 @@ void debugfs_remove_recursive(struct dentry *dentry) mutex_lock(&parent->d_inode->i_mutex); if (child != dentry) { - next = list_next_entry(child, d_u.d_child); + next = list_entry(child->d_u.d_child.next, struct dentry, + d_u.d_child); goto up; } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index a726b9f..073d30b 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -498,7 +498,6 @@ static void devpts_kill_sb(struct super_block *sb) { struct pts_fs_info *fsi = DEVPTS_SB(sb); - ida_destroy(&fsi->allocated_ptys); kfree(fsi); kill_litter_super(sb); } diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index d5abafd..88556dc 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -706,7 +706,9 @@ static int lkb_idr_is_local(int id, void *p, void *data) { struct dlm_lkb *lkb = p; - return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV; + if (!lkb->lkb_nodeid) + return 1; + return 0; } static int lkb_idr_is_any(int id, void *p, void *data) diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index e7cfbaf..60a3278 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -74,16 +74,14 @@ static int user_cmd(struct sk_buff *skb, struct genl_info *info) return 0; } -static struct genl_ops dlm_nl_ops[] = { - { - .cmd = DLM_CMD_HELLO, - .doit = user_cmd, - }, +static struct genl_ops dlm_nl_ops = { + .cmd = DLM_CMD_HELLO, + .doit = user_cmd, }; int __init dlm_netlink_init(void) { - return genl_register_family_with_ops(&family, dlm_nl_ops); + return genl_register_family_with_ops(&family, &dlm_nl_ops, 1); } void dlm_netlink_exit(void) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 2f6735d..000eae2 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -392,7 +392,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, wait_for_completion(&ecr->completion); rc = ecr->rc; - reinit_completion(&ecr->completion); + INIT_COMPLETION(ecr->completion); } out: ablkcipher_request_free(req); diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 4000f6b..bf12ba5 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -44,15 +44,15 @@ */ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - int rc; - - if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE)) - return 1; + struct dentry *lower_dentry; + int rc = 1; if (flags & LOOKUP_RCU) return -ECHILD; + lower_dentry = ecryptfs_dentry_to_lower(dentry); + if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) + goto out; rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); if (dentry->d_inode) { struct inode *lower_inode = @@ -60,17 +60,12 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) fsstack_copy_attr_all(dentry->d_inode, lower_inode); } +out: return rc; } struct kmem_cache *ecryptfs_dentry_info_cache; -static void ecryptfs_dentry_free_rcu(struct rcu_head *head) -{ - kmem_cache_free(ecryptfs_dentry_info_cache, - container_of(head, struct ecryptfs_dentry_info, rcu)); -} - /** * ecryptfs_d_release * @dentry: The ecryptfs dentry @@ -79,11 +74,15 @@ static void ecryptfs_dentry_free_rcu(struct rcu_head *head) */ static void ecryptfs_d_release(struct dentry *dentry) { - struct ecryptfs_dentry_info *p = dentry->d_fsdata; - if (p) { - path_put(&p->lower_path); - call_rcu(&p->rcu, ecryptfs_dentry_free_rcu); + if (ecryptfs_dentry_to_private(dentry)) { + if (ecryptfs_dentry_to_lower(dentry)) { + dput(ecryptfs_dentry_to_lower(dentry)); + mntput(ecryptfs_dentry_to_lower_mnt(dentry)); + } + kmem_cache_free(ecryptfs_dentry_info_cache, + ecryptfs_dentry_to_private(dentry)); } + return; } const struct dentry_operations ecryptfs_dops = { diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 90d1882..df19d34 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -261,10 +261,7 @@ struct ecryptfs_inode_info { * vfsmount too. */ struct ecryptfs_dentry_info { struct path lower_path; - union { - struct ecryptfs_crypt_stat *crypt_stat; - struct rcu_head rcu; - }; + struct ecryptfs_crypt_stat *crypt_stat; }; /** @@ -515,6 +512,13 @@ ecryptfs_dentry_to_lower(struct dentry *dentry) return ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry; } +static inline void +ecryptfs_set_dentry_lower(struct dentry *dentry, struct dentry *lower_dentry) +{ + ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.dentry = + lower_dentry; +} + static inline struct vfsmount * ecryptfs_dentry_to_lower_mnt(struct dentry *dentry) { @@ -527,6 +531,13 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry) return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path; } +static inline void +ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) +{ + ((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path.mnt = + lower_mnt; +} + #define ecryptfs_printk(type, fmt, arg...) \ __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); __printf(1, 2) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index b1eaa7a..992cf95 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -271,7 +271,7 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td) { struct file *lower_file = ecryptfs_file_to_lower(file); - if (lower_file->f_op->flush) { + if (lower_file->f_op && lower_file->f_op->flush) { filemap_write_and_wait(file->f_mapping); return lower_file->f_op->flush(lower_file, td); } @@ -305,7 +305,7 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) struct file *lower_file = NULL; lower_file = ecryptfs_file_to_lower(file); - if (lower_file->f_op->fasync) + if (lower_file->f_op && lower_file->f_op->fasync) rc = lower_file->f_op->fasync(fd, lower_file, flag); return rc; } @@ -313,10 +313,12 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) static long ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct file *lower_file = ecryptfs_file_to_lower(file); + struct file *lower_file = NULL; long rc = -ENOTTY; - if (lower_file->f_op->unlocked_ioctl) + if (ecryptfs_file_to_private(file)) + lower_file = ecryptfs_file_to_lower(file); + if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl) rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); return rc; } @@ -325,10 +327,12 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static long ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct file *lower_file = ecryptfs_file_to_lower(file); + struct file *lower_file = NULL; long rc = -ENOIOCTLCMD; - if (lower_file->f_op && lower_file->f_op->compat_ioctl) + if (ecryptfs_file_to_private(file)) + lower_file = ecryptfs_file_to_lower(file); + if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl) rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); return rc; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index c36c448..67e9b63 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -153,7 +153,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); - rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL); + rc = vfs_unlink(lower_dir_inode, lower_dentry); if (rc) { printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); goto out_unlock; @@ -208,7 +208,7 @@ ecryptfs_do_create(struct inode *directory_inode, inode = __ecryptfs_get_inode(lower_dentry->d_inode, directory_inode->i_sb); if (IS_ERR(inode)) { - vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL); + vfs_unlink(lower_dir_dentry->d_inode, lower_dentry); goto out_lock; } fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode); @@ -361,8 +361,8 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, BUG_ON(!d_count(lower_dentry)); ecryptfs_set_dentry_private(dentry, dentry_info); - dentry_info->lower_path.mnt = lower_mnt; - dentry_info->lower_path.dentry = lower_dentry; + ecryptfs_set_dentry_lower(dentry, lower_dentry); + ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); if (!lower_dentry->d_inode) { /* We want to add because we couldn't find in lower */ @@ -475,7 +475,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, dget(lower_new_dentry); lower_dir_dentry = lock_parent(lower_new_dentry); rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode, - lower_new_dentry, NULL); + lower_new_dentry); if (rc || !lower_new_dentry->d_inode) goto out_lock; rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb); @@ -640,8 +640,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_lock; } rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, - lower_new_dir_dentry->d_inode, lower_new_dentry, - NULL); + lower_new_dir_dentry->d_inode, lower_new_dentry); if (rc) goto out_lock; if (target_inode) @@ -704,6 +703,16 @@ out: return NULL; } +static void +ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) +{ + char *buf = nd_get_link(nd); + if (!IS_ERR(buf)) { + /* Free the char* */ + kfree(buf); + } +} + /** * upper_size_to_lower_size * @crypt_stat: Crypt_stat associated with file @@ -882,7 +891,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); mutex_lock(&lower_dentry->d_inode->i_mutex); - rc = notify_change(lower_dentry, &lower_ia, NULL); + rc = notify_change(lower_dentry, &lower_ia); mutex_unlock(&lower_dentry->d_inode->i_mutex); } return rc; @@ -983,7 +992,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) lower_ia.ia_valid &= ~ATTR_MODE; mutex_lock(&lower_dentry->d_inode->i_mutex); - rc = notify_change(lower_dentry, &lower_ia, NULL); + rc = notify_change(lower_dentry, &lower_ia); mutex_unlock(&lower_dentry->d_inode->i_mutex); out: fsstack_copy_attr_all(inode, lower_inode); @@ -1112,7 +1121,7 @@ out: const struct inode_operations ecryptfs_symlink_iops = { .readlink = generic_readlink, .follow_link = ecryptfs_follow_link, - .put_link = kfree_put_link, + .put_link = ecryptfs_put_link, .permission = ecryptfs_permission, .setattr = ecryptfs_setattr, .getattr = ecryptfs_getattr_link, diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1b119d3..eb1c597 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -585,7 +585,8 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags /* ->kill_sb() will take care of root_info */ ecryptfs_set_dentry_private(s->s_root, root_info); - root_info->lower_path = path; + ecryptfs_set_dentry_lower(s->s_root, path.dentry); + ecryptfs_set_dentry_lower_mnt(s->s_root, path.mnt); s->s_flags |= MS_ACTIVE; return dget(s->s_root); diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index becc725..a8766b8 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -83,10 +83,19 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) return 0; } +/* + * Retaining negative dentries for an in-memory filesystem just wastes + * memory and lookup time: arrange for them to be deleted immediately. + */ +static int efivarfs_delete_dentry(const struct dentry *dentry) +{ + return 1; +} + static struct dentry_operations efivarfs_d_ops = { .d_compare = efivarfs_d_compare, .d_hash = efivarfs_d_hash, - .d_delete = always_delete_dentry, + .d_delete = efivarfs_delete_dentry, }; static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8b5e258..810c28f 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -41,7 +41,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/compat.h> -#include <linux/rculist.h> /* * LOCKING: @@ -134,12 +133,8 @@ struct nested_calls { * of these on a server and we do not want this to take another cache line. */ struct epitem { - union { - /* RB tree node links this structure to the eventpoll RB tree */ - struct rb_node rbn; - /* Used to free the struct epitem */ - struct rcu_head rcu; - }; + /* RB tree node used to link this structure to the eventpoll RB tree */ + struct rb_node rbn; /* List header used to link this structure to the eventpoll ready list */ struct list_head rdllink; @@ -585,14 +580,14 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi) * @sproc: Pointer to the scan callback. * @priv: Private opaque data passed to the @sproc callback. * @depth: The current depth of recursive f_op->poll calls. - * @ep_locked: caller already holds ep->mtx * * Returns: The same integer error code returned by the @sproc callback. */ static int ep_scan_ready_list(struct eventpoll *ep, int (*sproc)(struct eventpoll *, struct list_head *, void *), - void *priv, int depth, bool ep_locked) + void *priv, + int depth) { int error, pwake = 0; unsigned long flags; @@ -603,9 +598,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, * We need to lock this because we could be hit by * eventpoll_release_file() and epoll_ctl(). */ - - if (!ep_locked) - mutex_lock_nested(&ep->mtx, depth); + mutex_lock_nested(&ep->mtx, depth); /* * Steal the ready list, and re-init the original one to the @@ -669,8 +662,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, } spin_unlock_irqrestore(&ep->lock, flags); - if (!ep_locked) - mutex_unlock(&ep->mtx); + mutex_unlock(&ep->mtx); /* We have to call this outside the lock */ if (pwake) @@ -679,12 +671,6 @@ static int ep_scan_ready_list(struct eventpoll *ep, return error; } -static void epi_rcu_free(struct rcu_head *head) -{ - struct epitem *epi = container_of(head, struct epitem, rcu); - kmem_cache_free(epi_cache, epi); -} - /* * Removes a "struct epitem" from the eventpoll RB tree and deallocates * all the associated resources. Must be called with "mtx" held. @@ -706,7 +692,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* Remove the current item from the list of epoll hooks */ spin_lock(&file->f_lock); - list_del_rcu(&epi->fllink); + if (ep_is_linked(&epi->fllink)) + list_del_init(&epi->fllink); spin_unlock(&file->f_lock); rb_erase(&epi->rbn, &ep->rbr); @@ -717,14 +704,9 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) spin_unlock_irqrestore(&ep->lock, flags); wakeup_source_unregister(ep_wakeup_source(epi)); - /* - * At this point it is safe to free the eventpoll item. Use the union - * field epi->rcu, since we are trying to minimize the size of - * 'struct epitem'. The 'rbn' field is no longer in use. Protected by - * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make - * use of the rbn field. - */ - call_rcu(&epi->rcu, epi_rcu_free); + + /* At this point it is safe to free the eventpoll item */ + kmem_cache_free(epi_cache, epi); atomic_long_dec(&ep->user->epoll_watches); @@ -825,34 +807,15 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, return 0; } -static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, - poll_table *pt); - -struct readyevents_arg { - struct eventpoll *ep; - bool locked; -}; - static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) { - struct readyevents_arg *arg = priv; - - return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL, - call_nests + 1, arg->locked); + return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1); } static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) { int pollflags; struct eventpoll *ep = file->private_data; - struct readyevents_arg arg; - - /* - * During ep_insert() we already hold the ep->mtx for the tfile. - * Prevent re-aquisition. - */ - arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc); - arg.ep = ep; /* Insert inside our poll wait queue */ poll_wait(file, &ep->poll_wait, wait); @@ -864,7 +827,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) * could re-enter here. */ pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, - ep_poll_readyevents_proc, &arg, ep, current); + ep_poll_readyevents_proc, ep, ep, current); return pollflags != -1 ? pollflags : 0; } @@ -909,6 +872,7 @@ static const struct file_operations eventpoll_fops = { */ void eventpoll_release_file(struct file *file) { + struct list_head *lsthead = &file->f_ep_links; struct eventpoll *ep; struct epitem *epi; @@ -926,12 +890,17 @@ void eventpoll_release_file(struct file *file) * Besides, ep_remove() acquires the lock, so we can't hold it here. */ mutex_lock(&epmutex); - list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { + + while (!list_empty(lsthead)) { + epi = list_first_entry(lsthead, struct epitem, fllink); + ep = epi->ep; + list_del_init(&epi->fllink); mutex_lock_nested(&ep->mtx, 0); ep_remove(ep, epi); mutex_unlock(&ep->mtx); } + mutex_unlock(&epmutex); } @@ -1169,9 +1138,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) struct file *child_file; struct epitem *epi; - /* CTL_DEL can remove links here, but that can't increase our count */ - rcu_read_lock(); - list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) { + list_for_each_entry(epi, &file->f_ep_links, fllink) { child_file = epi->ep->file; if (is_file_epoll(child_file)) { if (list_empty(&child_file->f_ep_links)) { @@ -1193,7 +1160,6 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) "file is not an ep!\n"); } } - rcu_read_unlock(); return error; } @@ -1265,7 +1231,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi) * Must be called with "mtx" held. */ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, - struct file *tfile, int fd, int full_check) + struct file *tfile, int fd) { int error, revents, pwake = 0; unsigned long flags; @@ -1320,7 +1286,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* Add the current item to the list of active epoll hook for this file */ spin_lock(&tfile->f_lock); - list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); + list_add_tail(&epi->fllink, &tfile->f_ep_links); spin_unlock(&tfile->f_lock); /* @@ -1331,7 +1297,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* now check if we've created too many backpaths */ error = -EINVAL; - if (full_check && reverse_path_check()) + if (reverse_path_check()) goto error_remove_epi; /* We have to drop the new item inside our item list to keep track of it */ @@ -1361,7 +1327,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, error_remove_epi: spin_lock(&tfile->f_lock); - list_del_rcu(&epi->fllink); + if (ep_is_linked(&epi->fllink)) + list_del_init(&epi->fllink); spin_unlock(&tfile->f_lock); rb_erase(&epi->rbn, &ep->rbr); @@ -1554,7 +1521,7 @@ static int ep_send_events(struct eventpoll *ep, esed.maxevents = maxevents; esed.events = events; - return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false); + return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0); } static inline struct timespec ep_set_mstimeout(long ms) @@ -1824,12 +1791,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event) { int error; - int full_check = 0; + int did_lock_epmutex = 0; struct fd f, tf; struct eventpoll *ep; struct epitem *epi; struct epoll_event epds; - struct eventpoll *tep = NULL; error = -EFAULT; if (ep_op_has_event(op) && @@ -1848,11 +1814,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, /* The target file descriptor must support poll */ error = -EPERM; - if (!tf.file->f_op->poll) + if (!tf.file->f_op || !tf.file->f_op->poll) goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - ep_take_care_of_epollwakeup(&epds); + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) + epds.events &= ~EPOLLWAKEUP; /* * We have to check that the file structure underneath the file descriptor @@ -1877,41 +1844,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * and hang them on the tfile_check_list, so we can check that we * haven't created too many possible wakeup paths. * - * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when - * the epoll file descriptor is attaching directly to a wakeup source, - * unless the epoll file descriptor is nested. The purpose of taking the - * 'epmutex' on add is to prevent complex toplogies such as loops and - * deep wakeup paths from forming in parallel through multiple - * EPOLL_CTL_ADD operations. + * We need to hold the epmutex across both ep_insert and ep_remove + * b/c we want to make sure we are looking at a coherent view of + * epoll network. */ - mutex_lock_nested(&ep->mtx, 0); + if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) { + mutex_lock(&epmutex); + did_lock_epmutex = 1; + } if (op == EPOLL_CTL_ADD) { - if (!list_empty(&f.file->f_ep_links) || - is_file_epoll(tf.file)) { - full_check = 1; - mutex_unlock(&ep->mtx); - mutex_lock(&epmutex); - if (is_file_epoll(tf.file)) { - error = -ELOOP; - if (ep_loop_check(ep, tf.file) != 0) { - clear_tfile_check_list(); - goto error_tgt_fput; - } - } else - list_add(&tf.file->f_tfile_llink, - &tfile_check_list); - mutex_lock_nested(&ep->mtx, 0); - if (is_file_epoll(tf.file)) { - tep = tf.file->private_data; - mutex_lock_nested(&tep->mtx, 1); + if (is_file_epoll(tf.file)) { + error = -ELOOP; + if (ep_loop_check(ep, tf.file) != 0) { + clear_tfile_check_list(); + goto error_tgt_fput; } - } - } - if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) { - tep = tf.file->private_data; - mutex_lock_nested(&tep->mtx, 1); + } else + list_add(&tf.file->f_tfile_llink, &tfile_check_list); } + mutex_lock_nested(&ep->mtx, 0); + /* * Try to lookup the file inside our RB tree, Since we grabbed "mtx" * above, we can be sure to be able to use the item looked up by @@ -1924,11 +1877,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, case EPOLL_CTL_ADD: if (!epi) { epds.events |= POLLERR | POLLHUP; - error = ep_insert(ep, &epds, tf.file, fd, full_check); + error = ep_insert(ep, &epds, tf.file, fd); } else error = -EEXIST; - if (full_check) - clear_tfile_check_list(); + clear_tfile_check_list(); break; case EPOLL_CTL_DEL: if (epi) @@ -1944,12 +1896,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, error = -ENOENT; break; } - if (tep != NULL) - mutex_unlock(&tep->mtx); mutex_unlock(&ep->mtx); error_tgt_fput: - if (full_check) + if (did_lock_epmutex) mutex_unlock(&epmutex); fdput(tf); @@ -106,7 +106,6 @@ static inline void put_binfmt(struct linux_binfmt * fmt) */ SYSCALL_DEFINE1(uselib, const char __user *, library) { - struct linux_binfmt *fmt; struct file *file; struct filename *tmp = getname(library); int error = PTR_ERR(tmp); @@ -137,21 +136,24 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) fsnotify_open(file); error = -ENOEXEC; + if(file->f_op) { + struct linux_binfmt * fmt; - read_lock(&binfmt_lock); - list_for_each_entry(fmt, &formats, lh) { - if (!fmt->load_shlib) - continue; - if (!try_module_get(fmt->module)) - continue; - read_unlock(&binfmt_lock); - error = fmt->load_shlib(file); read_lock(&binfmt_lock); - put_binfmt(fmt); - if (error != -ENOEXEC) - break; + list_for_each_entry(fmt, &formats, lh) { + if (!fmt->load_shlib) + continue; + if (!try_module_get(fmt->module)) + continue; + read_unlock(&binfmt_lock); + error = fmt->load_shlib(file); + read_lock(&binfmt_lock); + put_binfmt(fmt); + if (error != -ENOEXEC) + break; + } + read_unlock(&binfmt_lock); } - read_unlock(&binfmt_lock); exit: fput(file); out: @@ -1275,10 +1277,13 @@ static int check_unsafe_exec(struct linux_binprm *bprm) */ int prepare_binprm(struct linux_binprm *bprm) { - struct inode *inode = file_inode(bprm->file); - umode_t mode = inode->i_mode; + umode_t mode; + struct inode * inode = file_inode(bprm->file); int retval; + mode = inode->i_mode; + if (bprm->file->f_op == NULL) + return -EACCES; /* clear any previous set[ug]id data from a previous binary */ bprm->cred->euid = current_euid(); @@ -1380,6 +1385,10 @@ int search_binary_handler(struct linux_binprm *bprm) if (retval) return retval; + retval = audit_bprm(bprm); + if (retval) + return retval; + retval = -ENOENT; retry: read_lock(&binfmt_lock); @@ -1427,7 +1436,6 @@ static int exec_binprm(struct linux_binprm *bprm) ret = search_binary_handler(bprm); if (ret >= 0) { - audit_bprm(bprm); trace_sched_process_exec(current, old_pid, bprm); ptrace_event(PTRACE_EVENT_EXEC, old_vpid); current->did_exec = 1; @@ -1539,7 +1547,6 @@ static int do_execve_common(const char *filename, current->fs->in_exec = 0; current->in_execve = 0; acct_update_integrals(current); - task_numa_free(current); free_bprm(bprm); if (displaced) put_files_struct(displaced); @@ -1661,12 +1668,6 @@ int __get_dumpable(unsigned long mm_flags) return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret; } -/* - * This returns the actual value of the suid_dumpable flag. For things - * that are using this for checking for privilege transitions, it must - * test against SUID_DUMP_USER rather than treating it as a boolean - * value. - */ int get_dumpable(struct mm_struct *mm) { return __get_dumpable(mm->flags); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 48a359d..a235f00 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -69,162 +69,145 @@ find_acceptable_alias(struct dentry *result, return NULL; } -static bool dentry_connected(struct dentry *dentry) +/* + * Find root of a disconnected subtree and return a reference to it. + */ +static struct dentry * +find_disconnected_root(struct dentry *dentry) { dget(dentry); - while (dentry->d_flags & DCACHE_DISCONNECTED) { + while (!IS_ROOT(dentry)) { struct dentry *parent = dget_parent(dentry); - dput(dentry); - if (IS_ROOT(dentry)) { + if (!(parent->d_flags & DCACHE_DISCONNECTED)) { dput(parent); - return false; + break; } - dentry = parent; - } - dput(dentry); - return true; -} - -static void clear_disconnected(struct dentry *dentry) -{ - dget(dentry); - while (dentry->d_flags & DCACHE_DISCONNECTED) { - struct dentry *parent = dget_parent(dentry); - - WARN_ON_ONCE(IS_ROOT(dentry)); - - spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_DISCONNECTED; - spin_unlock(&dentry->d_lock); dput(dentry); dentry = parent; } - dput(dentry); -} - -/* - * Reconnect a directory dentry with its parent. - * - * This can return a dentry, or NULL, or an error. - * - * In the first case the returned dentry is the parent of the given - * dentry, and may itself need to be reconnected to its parent. - * - * In the NULL case, a concurrent VFS operation has either renamed or - * removed this directory. The concurrent operation has reconnected our - * dentry, so we no longer need to. - */ -static struct dentry *reconnect_one(struct vfsmount *mnt, - struct dentry *dentry, char *nbuf) -{ - struct dentry *parent; - struct dentry *tmp; - int err; - - parent = ERR_PTR(-EACCES); - mutex_lock(&dentry->d_inode->i_mutex); - if (mnt->mnt_sb->s_export_op->get_parent) - parent = mnt->mnt_sb->s_export_op->get_parent(dentry); - mutex_unlock(&dentry->d_inode->i_mutex); - - if (IS_ERR(parent)) { - dprintk("%s: get_parent of %ld failed, err %d\n", - __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); - return parent; - } - - dprintk("%s: find name of %lu in %lu\n", __func__, - dentry->d_inode->i_ino, parent->d_inode->i_ino); - err = exportfs_get_name(mnt, parent, nbuf, dentry); - if (err == -ENOENT) - goto out_reconnected; - if (err) - goto out_err; - dprintk("%s: found name: %s\n", __func__, nbuf); - mutex_lock(&parent->d_inode->i_mutex); - tmp = lookup_one_len(nbuf, parent, strlen(nbuf)); - mutex_unlock(&parent->d_inode->i_mutex); - if (IS_ERR(tmp)) { - dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); - goto out_err; - } - if (tmp != dentry) { - dput(tmp); - goto out_reconnected; - } - dput(tmp); - if (IS_ROOT(dentry)) { - err = -ESTALE; - goto out_err; - } - return parent; - -out_err: - dput(parent); - return ERR_PTR(err); -out_reconnected: - dput(parent); - /* - * Someone must have renamed our entry into another parent, in - * which case it has been reconnected by the rename. - * - * Or someone removed it entirely, in which case filehandle - * lookup will succeed but the directory is now IS_DEAD and - * subsequent operations on it will fail. - * - * Alternatively, maybe there was no race at all, and the - * filesystem is just corrupt and gave us a parent that doesn't - * actually contain any entry pointing to this inode. So, - * double check that this worked and return -ESTALE if not: - */ - if (!dentry_connected(dentry)) - return ERR_PTR(-ESTALE); - return NULL; + return dentry; } /* * Make sure target_dir is fully connected to the dentry tree. * - * On successful return, DCACHE_DISCONNECTED will be cleared on - * target_dir, and target_dir->d_parent->...->d_parent will reach the - * root of the filesystem. - * - * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. - * But the converse is not true: target_dir may have DCACHE_DISCONNECTED - * set but already be connected. In that case we'll verify the - * connection to root and then clear the flag. - * - * Note that target_dir could be removed by a concurrent operation. In - * that case reconnect_path may still succeed with target_dir fully - * connected, but further operations using the filehandle will fail when - * necessary (due to S_DEAD being set on the directory). + * It may already be, as the flag isn't always updated when connection happens. */ static int reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) { - struct dentry *dentry, *parent; + int noprogress = 0; + int err = -ESTALE; - dentry = dget(target_dir); - - while (dentry->d_flags & DCACHE_DISCONNECTED) { - BUG_ON(dentry == mnt->mnt_sb->s_root); + /* + * It is possible that a confused file system might not let us complete + * the path to the root. For example, if get_parent returns a directory + * in which we cannot find a name for the child. While this implies a + * very sick filesystem we don't want it to cause knfsd to spin. Hence + * the noprogress counter. If we go through the loop 10 times (2 is + * probably enough) without getting anywhere, we just give up + */ + while (target_dir->d_flags & DCACHE_DISCONNECTED && noprogress++ < 10) { + struct dentry *pd = find_disconnected_root(target_dir); + + if (!IS_ROOT(pd)) { + /* must have found a connected parent - great */ + spin_lock(&pd->d_lock); + pd->d_flags &= ~DCACHE_DISCONNECTED; + spin_unlock(&pd->d_lock); + noprogress = 0; + } else if (pd == mnt->mnt_sb->s_root) { + printk(KERN_ERR "export: Eeek filesystem root is not connected, impossible\n"); + spin_lock(&pd->d_lock); + pd->d_flags &= ~DCACHE_DISCONNECTED; + spin_unlock(&pd->d_lock); + noprogress = 0; + } else { + /* + * We have hit the top of a disconnected path, try to + * find parent and connect. + * + * Racing with some other process renaming a directory + * isn't much of a problem here. If someone renames + * the directory, it will end up properly connected, + * which is what we want + * + * Getting the parent can't be supported generically, + * the locking is too icky. + * + * Instead we just return EACCES. If server reboots + * or inodes get flushed, you lose + */ + struct dentry *ppd = ERR_PTR(-EACCES); + struct dentry *npd; + + mutex_lock(&pd->d_inode->i_mutex); + if (mnt->mnt_sb->s_export_op->get_parent) + ppd = mnt->mnt_sb->s_export_op->get_parent(pd); + mutex_unlock(&pd->d_inode->i_mutex); + + if (IS_ERR(ppd)) { + err = PTR_ERR(ppd); + dprintk("%s: get_parent of %ld failed, err %d\n", + __func__, pd->d_inode->i_ino, err); + dput(pd); + break; + } - if (IS_ROOT(dentry)) - parent = reconnect_one(mnt, dentry, nbuf); - else - parent = dget_parent(dentry); + dprintk("%s: find name of %lu in %lu\n", __func__, + pd->d_inode->i_ino, ppd->d_inode->i_ino); + err = exportfs_get_name(mnt, ppd, nbuf, pd); + if (err) { + dput(ppd); + dput(pd); + if (err == -ENOENT) + /* some race between get_parent and + * get_name? just try again + */ + continue; + break; + } + dprintk("%s: found name: %s\n", __func__, nbuf); + mutex_lock(&ppd->d_inode->i_mutex); + npd = lookup_one_len(nbuf, ppd, strlen(nbuf)); + mutex_unlock(&ppd->d_inode->i_mutex); + if (IS_ERR(npd)) { + err = PTR_ERR(npd); + dprintk("%s: lookup failed: %d\n", + __func__, err); + dput(ppd); + dput(pd); + break; + } + /* we didn't really want npd, we really wanted + * a side-effect of the lookup. + * hopefully, npd == pd, though it isn't really + * a problem if it isn't + */ + if (npd == pd) + noprogress = 0; + else + printk("%s: npd != pd\n", __func__); + dput(npd); + dput(ppd); + if (IS_ROOT(pd)) { + /* something went wrong, we have to give up */ + dput(pd); + break; + } + } + dput(pd); + } - if (!parent) - break; - dput(dentry); - if (IS_ERR(parent)) - return PTR_ERR(parent); - dentry = parent; + if (target_dir->d_flags & DCACHE_DISCONNECTED) { + /* something went wrong - oh-well */ + if (!err) + err = -ESTALE; + return err; } - dput(dentry); - clear_disconnected(target_dir); + return 0; } @@ -232,7 +215,7 @@ struct getdents_callback { struct dir_context ctx; char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ - u64 ino; /* the inum we are looking for */ + unsigned long ino; /* the inum we are looking for */ int found; /* inode matched? */ int sequence; /* sequence counter */ }; @@ -272,14 +255,10 @@ static int get_name(const struct path *path, char *name, struct dentry *child) struct inode *dir = path->dentry->d_inode; int error; struct file *file; - struct kstat stat; - struct path child_path = { - .mnt = path->mnt, - .dentry = child, - }; struct getdents_callback buffer = { .ctx.actor = filldir_one, .name = name, + .ino = child->d_inode->i_ino }; error = -ENOTDIR; @@ -289,16 +268,6 @@ static int get_name(const struct path *path, char *name, struct dentry *child) if (!dir->i_fop) goto out; /* - * inode->i_ino is unsigned long, kstat->ino is u64, so the - * former would be insufficient on 32-bit hosts when the - * filesystem supports 64-bit inode numbers. So we need to - * actually call ->getattr, not just read i_ino: - */ - error = vfs_getattr_nosec(&child_path, &stat); - if (error) - return error; - buffer.ino = stat.ino; - /* * Open the directory ... */ file = dentry_open(path, O_RDONLY, cred); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 8a33764..c260de6 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -632,8 +632,6 @@ static int ext2_get_blocks(struct inode *inode, int count = 0; ext2_fsblk_t first_block = 0; - BUG_ON(maxblocks == 0); - depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary); if (depth == 0) diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c index e98171a..1c33128 100644 --- a/fs/ext2/xip.c +++ b/fs/ext2/xip.c @@ -35,7 +35,6 @@ __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create, int rc; memset(&tmp, 0, sizeof(struct buffer_head)); - tmp.b_size = 1 << inode->i_blkbits; rc = ext2_get_block(inode, pgoff, &tmp, create); *result = tmp.b_blocknr; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 37fd31e..c50c761 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2825,10 +2825,6 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) * bitmap, and an inode table. */ overhead += ngroups * (2 + sbi->s_itb_per_group); - - /* Add the journal blocks as well */ - overhead += sbi->s_journal->j_maxlen; - sbi->s_overhead_last = overhead; smp_wmb(); sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 6ea7b14..dc5d572 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -640,7 +640,6 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) struct ext4_group_desc *gdp; ext4_group_t i; ext4_group_t ngroups = ext4_get_groups_count(sb); - struct ext4_group_info *grp; #ifdef EXT4FS_DEBUG struct ext4_super_block *es; ext4_fsblk_t bitmap_count; @@ -656,11 +655,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - grp = NULL; - if (EXT4_SB(sb)->s_group_info) - grp = ext4_get_group_info(sb, i); - if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) - desc_count += ext4_free_group_clusters(sb, gdp); + desc_count += ext4_free_group_clusters(sb, gdp); brelse(bitmap_bh); bitmap_bh = ext4_read_block_bitmap(sb, i); if (bitmap_bh == NULL) @@ -684,11 +679,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; - grp = NULL; - if (EXT4_SB(sb)->s_group_info) - grp = ext4_get_group_info(sb, i); - if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) - desc_count += ext4_free_group_clusters(sb, gdp); + desc_count += ext4_free_group_clusters(sb, gdp); } return desc_count; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e618503..af815ea 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -29,7 +29,6 @@ #include <linux/wait.h> #include <linux/blockgroup_lock.h> #include <linux/percpu_counter.h> -#include <linux/ratelimit.h> #include <crypto/hash.h> #ifdef __KERNEL__ #include <linux/compat.h> @@ -1315,11 +1314,6 @@ struct ext4_sb_info { unsigned long s_es_last_sorted; struct percpu_counter s_extent_cache_cnt; spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; - - /* Ratelimit ext4 messages. */ - struct ratelimit_state s_err_ratelimit_state; - struct ratelimit_state s_warning_ratelimit_state; - struct ratelimit_state s_msg_ratelimit_state; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1402,18 +1396,7 @@ static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \ clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \ } -/* Add these declarations here only so that these functions can be - * found by name. Otherwise, they are very hard to locate. */ -static inline int ext4_test_inode_flag(struct inode *inode, int bit); -static inline void ext4_set_inode_flag(struct inode *inode, int bit); -static inline void ext4_clear_inode_flag(struct inode *inode, int bit); EXT4_INODE_BIT_FNS(flag, flags, 0) - -/* Add these declarations here only so that these functions can be - * found by name. Otherwise, they are very hard to locate. */ -static inline int ext4_test_inode_state(struct inode *inode, int bit); -static inline void ext4_set_inode_state(struct inode *inode, int bit); -static inline void ext4_clear_inode_state(struct inode *inode, int bit); #if (BITS_PER_LONG < 64) EXT4_INODE_BIT_FNS(state, state_flags, 0) @@ -2751,6 +2734,8 @@ extern void ext4_double_down_write_data_sem(struct inode *first, struct inode *second); extern void ext4_double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode); +void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2); +void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2); extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 start_orig, __u64 start_donor, __u64 len, __u64 *moved_len); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 35f65cf..54d52af 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1666,7 +1666,7 @@ int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { - unsigned short ext1_ee_len, ext2_ee_len; + unsigned short ext1_ee_len, ext2_ee_len, max_len; /* * Make sure that both extents are initialized. We don't merge @@ -1677,6 +1677,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) return 0; + if (ext4_ext_is_uninitialized(ex1)) + max_len = EXT_UNINIT_MAX_LEN; + else + max_len = EXT_INIT_MAX_LEN; + ext1_ee_len = ext4_ext_get_actual_len(ex1); ext2_ee_len = ext4_ext_get_actual_len(ex2); @@ -1689,7 +1694,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * as an RO_COMPAT feature, refuse to merge to extents if * this can result in the top bit of ee_len being set. */ - if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) + if (ext1_ee_len + ext2_ee_len > max_len) return 0; #ifdef AGGRESSIVE_TEST if (ext1_ee_len >= 4) @@ -1715,6 +1720,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, struct ext4_extent_header *eh; unsigned int depth, len; int merge_done = 0; + int uninitialized = 0; depth = ext_depth(inode); BUG_ON(path[depth].p_hdr == NULL); @@ -1724,8 +1730,12 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) break; /* merge with next extent! */ + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(ex + 1)); + if (uninitialized) + ext4_ext_mark_uninitialized(ex); if (ex + 1 < EXT_LAST_EXTENT(eh)) { len = (EXT_LAST_EXTENT(eh) - ex - 1) @@ -1880,6 +1890,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *npath = NULL; int depth, len, err; ext4_lblk_t next; + unsigned uninitialized = 0; int mb_flags = 0; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { @@ -1931,8 +1942,18 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, if (err) return err; + /* + * ext4_can_extents_be_merged should have checked + * that either both extents are uninitialized, or + * both aren't. Thus we need to check only one of + * them here. + */ + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); + if (uninitialized) + ext4_ext_mark_uninitialized(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; @@ -1955,10 +1976,20 @@ prepend: if (err) return err; + /* + * ext4_can_extents_be_merged should have checked + * that either both extents are uninitialized, or + * both aren't. Thus we need to check only one of + * them here. + */ + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; ex->ee_block = newext->ee_block; ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + ext4_ext_get_actual_len(newext)); + if (uninitialized) + ext4_ext_mark_uninitialized(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 0ee59a6..137193f 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -432,7 +432,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, ext4fs_dirhash(qstr->name, qstr->len, &hinfo); grp = hinfo.hash; } else - grp = prandom_u32(); + get_random_bytes(&grp, sizeof(grp)); parent_group = (unsigned)grp % ngroups; for (i = 0; i < ngroups; i++) { g = (parent_group + i) % ngroups; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index bae9875..d9ecbf1 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -994,9 +994,11 @@ static int ext4_add_dirent_to_inline(handle_t *handle, struct inode *dir = dentry->d_parent->d_inode; const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; + unsigned short reclen; int err; struct ext4_dir_entry_2 *de; + reclen = EXT4_DIR_REC_LEN(namelen); err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start, inline_size, name, namelen, &de); @@ -1440,7 +1442,6 @@ int ext4_read_inline_dir(struct file *file, if (ret < 0) goto out; - ret = 0; sb = inode->i_sb; parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); offset = ctx->pos; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0757634..e274e9c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2178,9 +2178,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) * * @handle - handle for journal operations * @mpd - extent to map - * @give_up_on_write - we set this to true iff there is a fatal error and there - * is no hope of writing the data. The caller should discard - * dirty pages to avoid infinite loops. * * The function maps extent starting at mpd->lblk of length mpd->len. If it is * delayed, blocks are allocated, if it is unwritten, we may need to convert @@ -2298,7 +2295,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) struct address_space *mapping = mpd->inode->i_mapping; struct pagevec pvec; unsigned int nr_pages; - long left = mpd->wbc->nr_to_write; pgoff_t index = mpd->first_page; pgoff_t end = mpd->last_page; int tag; @@ -2334,17 +2330,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) if (page->index > end) goto out; - /* - * Accumulated enough dirty pages? This doesn't apply - * to WB_SYNC_ALL mode. For integrity sync we have to - * keep going because someone may be concurrently - * dirtying pages, and we might have synced a lot of - * newly appeared dirty pages, but have not synced all - * of the old dirty pages. - */ - if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0) - goto out; - /* If we can't merge this page, we are done. */ if (mpd->map.m_len > 0 && mpd->next_page != page->index) goto out; @@ -2379,7 +2364,19 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) if (err <= 0) goto out; err = 0; - left--; + + /* + * Accumulated enough dirty pages? This doesn't apply + * to WB_SYNC_ALL mode. For integrity sync we have to + * keep going because someone may be concurrently + * dirtying pages, and we might have synced a lot of + * newly appeared dirty pages, but have not synced all + * of the old dirty pages. + */ + if (mpd->wbc->sync_mode == WB_SYNC_NONE && + mpd->next_page - mpd->first_page >= + mpd->wbc->nr_to_write) + goto out; } pagevec_release(&pvec); cond_resched(); @@ -2423,15 +2420,16 @@ static int ext4_writepages(struct address_space *mapping, * because that could violate lock ordering on umount */ if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - goto out_writepages; + return 0; if (ext4_should_journal_data(inode)) { struct blk_plug plug; + int ret; blk_start_plug(&plug); ret = write_cache_pages(mapping, wbc, __writepage, mapping); blk_finish_plug(&plug); - goto out_writepages; + return ret; } /* @@ -2444,10 +2442,8 @@ static int ext4_writepages(struct address_space *mapping, * *never* be called, so if that ever happens, we would want * the stack trace. */ - if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { - ret = -EROFS; - goto out_writepages; - } + if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) + return -EROFS; if (ext4_should_dioread_nolock(inode)) { /* @@ -4694,15 +4690,6 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, generic_fillattr(inode, stat); /* - * If there is inline data in the inode, the inode will normally not - * have data blocks allocated (it may have an external xattr block). - * Report at least one sector for such files, so tools like tar, rsync, - * others doen't incorrectly think the file is completely sparse. - */ - if (unlikely(ext4_has_inline_data(inode))) - stat->blocks += (stat->size + 511) >> 9; - - /* * We can't update i_blocks if the block allocation is delayed * otherwise in the case of system crash before the real block * allocation is done, we will have i_blocks inconsistent with @@ -4713,8 +4700,9 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, * blocks for this file. */ delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), - EXT4_I(inode)->i_reserved_data_blocks); - stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9); + EXT4_I(inode)->i_reserved_data_blocks); + + stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); return 0; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 60589b6..a569d33 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -130,7 +130,7 @@ static long swap_inode_boot_loader(struct super_block *sb, /* Protect orig inodes against a truncate and make sure, * that only 1 swap_inode_boot_loader is running. */ - lock_two_nondirectories(inode, inode_bl); + ext4_inode_double_lock(inode, inode_bl); truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode_bl->i_data, 0); @@ -205,7 +205,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ext4_inode_resume_unlocked_dio(inode); ext4_inode_resume_unlocked_dio(inode_bl); - unlock_two_nondirectories(inode, inode_bl); + ext4_inode_double_unlock(inode, inode_bl); iput(inode_bl); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4d113ef..a41e3ba 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4794,8 +4794,8 @@ do_more: " group:%d block:%d count:%lu failed" " with %d", block_group, bit, count, err); - } else - EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); + } + ext4_lock_group(sb, block_group); mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 04434ad..214461e 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -259,7 +259,7 @@ static unsigned int mmp_new_seq(void) u32 new_seq; do { - new_seq = prandom_u32(); + get_random_bytes(&new_seq, sizeof(u32)); } while (new_seq > EXT4_MMP_SEQ_MAX); return new_seq; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 773b503..7fa4d85 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -1203,6 +1203,42 @@ mext_check_arguments(struct inode *orig_inode, } /** + * ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2 + * + * @inode1: the inode structure + * @inode2: the inode structure + * + * Lock two inodes' i_mutex + */ +void +ext4_inode_double_lock(struct inode *inode1, struct inode *inode2) +{ + BUG_ON(inode1 == inode2); + if (inode1 < inode2) { + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); + } +} + +/** + * ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2 + * + * @inode1: the inode that is released first + * @inode2: the inode that is released second + * + */ + +void +ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2) +{ + mutex_unlock(&inode1->i_mutex); + mutex_unlock(&inode2->i_mutex); +} + +/** * ext4_move_extents - Exchange the specified range of a file * * @o_filp: file structure of the original file @@ -1291,7 +1327,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, return -EINVAL; } /* Protect orig and donor inodes against a truncate */ - lock_two_nondirectories(orig_inode, donor_inode); + ext4_inode_double_lock(orig_inode, donor_inode); /* Wait for all existing dio workers */ ext4_inode_block_unlocked_dio(orig_inode); @@ -1499,7 +1535,7 @@ out: ext4_double_up_write_data_sem(orig_inode, donor_inode); ext4_inode_resume_unlocked_dio(orig_inode); ext4_inode_resume_unlocked_dio(donor_inode); - unlock_two_nondirectories(orig_inode, donor_inode); + ext4_inode_double_unlock(orig_inode, donor_inode); return ret; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index d488f80..d7d0c7b 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -197,15 +197,14 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head) static void ext4_add_complete_io(ext4_io_end_t *io_end) { struct ext4_inode_info *ei = EXT4_I(io_end->inode); - struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb); struct workqueue_struct *wq; unsigned long flags; /* Only reserved conversions from writeback should enter here */ WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); - WARN_ON(!io_end->handle && sbi->s_journal); + WARN_ON(!io_end->handle); spin_lock_irqsave(&ei->i_completed_io_lock, flags); - wq = sbi->rsv_conversion_wq; + wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; if (list_empty(&ei->i_rsv_conversion_list)) queue_work(wq, &ei->i_rsv_conversion_work); list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c977f4e..2c2e6cb 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -411,26 +411,20 @@ static void ext4_handle_error(struct super_block *sb) sb->s_id); } -#define ext4_error_ratelimit(sb) \ - ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \ - "EXT4-fs error") - void __ext4_error(struct super_block *sb, const char *function, unsigned int line, const char *fmt, ...) { struct va_format vaf; va_list args; - if (ext4_error_ratelimit(sb)) { - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT - "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", - sb->s_id, function, line, current->comm, &vaf); - va_end(args); - } + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", + sb->s_id, function, line, current->comm, &vaf); + va_end(args); save_error_info(sb, function, line); + ext4_handle_error(sb); } @@ -444,23 +438,22 @@ void __ext4_error_inode(struct inode *inode, const char *function, es->s_last_error_ino = cpu_to_le32(inode->i_ino); es->s_last_error_block = cpu_to_le64(block); - if (ext4_error_ratelimit(inode->i_sb)) { - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - if (block) - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " - "inode #%lu: block %llu: comm %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - block, current->comm, &vaf); - else - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " - "inode #%lu: comm %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - current->comm, &vaf); - va_end(args); - } save_error_info(inode->i_sb, function, line); + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (block) + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " + "inode #%lu: block %llu: comm %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + block, current->comm, &vaf); + else + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " + "inode #%lu: comm %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + current->comm, &vaf); + va_end(args); + ext4_handle_error(inode->i_sb); } @@ -476,28 +469,27 @@ void __ext4_error_file(struct file *file, const char *function, es = EXT4_SB(inode->i_sb)->s_es; es->s_last_error_ino = cpu_to_le32(inode->i_ino); - if (ext4_error_ratelimit(inode->i_sb)) { - path = d_path(&(file->f_path), pathname, sizeof(pathname)); - if (IS_ERR(path)) - path = "(unknown)"; - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - if (block) - printk(KERN_CRIT - "EXT4-fs error (device %s): %s:%d: inode #%lu: " - "block %llu: comm %s: path %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - block, current->comm, path, &vaf); - else - printk(KERN_CRIT - "EXT4-fs error (device %s): %s:%d: inode #%lu: " - "comm %s: path %s: %pV\n", - inode->i_sb->s_id, function, line, inode->i_ino, - current->comm, path, &vaf); - va_end(args); - } save_error_info(inode->i_sb, function, line); + path = d_path(&(file->f_path), pathname, sizeof(pathname)); + if (IS_ERR(path)) + path = "(unknown)"; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (block) + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: inode #%lu: " + "block %llu: comm %s: path %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + block, current->comm, path, &vaf); + else + printk(KERN_CRIT + "EXT4-fs error (device %s): %s:%d: inode #%lu: " + "comm %s: path %s: %pV\n", + inode->i_sb->s_id, function, line, inode->i_ino, + current->comm, path, &vaf); + va_end(args); + ext4_handle_error(inode->i_sb); } @@ -551,13 +543,11 @@ void __ext4_std_error(struct super_block *sb, const char *function, (sb->s_flags & MS_RDONLY)) return; - if (ext4_error_ratelimit(sb)) { - errstr = ext4_decode_error(sb, errno, nbuf); - printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", - sb->s_id, function, line, errstr); - } - + errstr = ext4_decode_error(sb, errno, nbuf); + printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", + sb->s_id, function, line, errstr); save_error_info(sb, function, line); + ext4_handle_error(sb); } @@ -607,9 +597,6 @@ void __ext4_msg(struct super_block *sb, struct va_format vaf; va_list args; - if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) - return; - va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -623,10 +610,6 @@ void __ext4_warning(struct super_block *sb, const char *function, struct va_format vaf; va_list args; - if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), - "EXT4-fs warning")) - return; - va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; @@ -650,20 +633,18 @@ __acquires(bitlock) es->s_last_error_block = cpu_to_le64(block); __save_error_info(sb, function, line); - if (ext4_error_ratelimit(sb)) { - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", - sb->s_id, function, line, grp); - if (ino) - printk(KERN_CONT "inode %lu: ", ino); - if (block) - printk(KERN_CONT "block %llu:", - (unsigned long long) block); - printk(KERN_CONT "%pV\n", &vaf); - va_end(args); - } + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", + sb->s_id, function, line, grp); + if (ino) + printk(KERN_CONT "inode %lu: ", ino); + if (block) + printk(KERN_CONT "block %llu:", (unsigned long long) block); + printk(KERN_CONT "%pV\n", &vaf); + va_end(args); if (test_opt(sb, ERRORS_CONT)) { ext4_commit_super(sb, 0); @@ -2625,12 +2606,6 @@ EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); -EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); -EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); -EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); -EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); -EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); -EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), @@ -2648,12 +2623,6 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(max_writeback_mb_bump), ATTR_LIST(extent_max_zeroout_kb), ATTR_LIST(trigger_fs_error), - ATTR_LIST(err_ratelimit_interval_ms), - ATTR_LIST(err_ratelimit_burst), - ATTR_LIST(warning_ratelimit_interval_ms), - ATTR_LIST(warning_ratelimit_burst), - ATTR_LIST(msg_ratelimit_interval_ms), - ATTR_LIST(msg_ratelimit_burst), NULL, }; @@ -3068,6 +3037,7 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_li_request *elr; + unsigned long rnd; elr = kzalloc(sizeof(*elr), GFP_KERNEL); if (!elr) @@ -3082,8 +3052,10 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, * spread the inode table initialization requests * better. */ - elr->lr_next_sched = jiffies + (prandom_u32() % - (EXT4_DEF_LI_MAX_START_DELAY * HZ)); + get_random_bytes(&rnd, sizeof(rnd)); + elr->lr_next_sched = jiffies + (unsigned long)rnd % + (EXT4_DEF_LI_MAX_START_DELAY * HZ); + return elr; } @@ -4146,11 +4118,6 @@ no_journal: if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ - /* Enable message ratelimiting. Default is 10 messages per 5 secs. */ - ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); - ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); - ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); - kfree(orig_data); return 0; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1423c48..03e9beb 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1352,7 +1352,6 @@ retry: new_extra_isize = s_min_extra_isize; kfree(is); is = NULL; kfree(bs); bs = NULL; - brelse(bh); goto retry; } error = -1; diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 214fe10..e06e099 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -63,11 +63,3 @@ config F2FS_FS_SECURITY the extended attribute support in advance. If you are not using a security module, say N. - -config F2FS_CHECK_FS - bool "F2FS consistency checking feature" - depends on F2FS_FS - help - Enables BUG_ONs which check the file system consistency in runtime. - - If you want to improve the performance, say N. diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index d0fc287..b7826ec 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -205,8 +205,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) return acl; } -static int f2fs_set_acl(struct inode *inode, int type, - struct posix_acl *acl, struct page *ipage) +static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -251,7 +250,7 @@ static int f2fs_set_acl(struct inode *inode, int type, } } - error = f2fs_setxattr(inode, name_index, "", value, size, ipage); + error = f2fs_setxattr(inode, name_index, "", value, size, NULL); kfree(value); if (!error) @@ -261,10 +260,10 @@ static int f2fs_set_acl(struct inode *inode, int type, return error; } -int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) +int f2fs_init_acl(struct inode *inode, struct inode *dir) { - struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct posix_acl *acl = NULL; + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); int error = 0; if (!S_ISLNK(inode->i_mode)) { @@ -277,19 +276,19 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) inode->i_mode &= ~current_umask(); } - if (!test_opt(sbi, POSIX_ACL) || !acl) - goto cleanup; + if (test_opt(sbi, POSIX_ACL) && acl) { - if (S_ISDIR(inode->i_mode)) { - error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl, ipage); - if (error) - goto cleanup; + if (S_ISDIR(inode->i_mode)) { + error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto cleanup; + } + error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); + if (error < 0) + return error; + if (error > 0) + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); } - error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode); - if (error < 0) - return error; - if (error > 0) - error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); cleanup: posix_acl_release(acl); return error; @@ -314,8 +313,7 @@ int f2fs_acl_chmod(struct inode *inode) error = posix_acl_chmod(&acl, GFP_KERNEL, mode); if (error) return error; - - error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, NULL); + error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl); posix_acl_release(acl); return error; } @@ -390,7 +388,7 @@ static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name, acl = NULL; } - error = f2fs_set_acl(inode, type, acl, NULL); + error = f2fs_set_acl(inode, type, acl); release_and_out: posix_acl_release(acl); diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 4963313..80f4306 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -36,9 +36,9 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL -extern struct posix_acl *f2fs_get_acl(struct inode *, int); -extern int f2fs_acl_chmod(struct inode *); -extern int f2fs_init_acl(struct inode *, struct inode *, struct page *); +extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type); +extern int f2fs_acl_chmod(struct inode *inode); +extern int f2fs_init_acl(struct inode *inode, struct inode *dir); #else #define f2fs_check_acl NULL #define f2fs_get_acl NULL @@ -49,8 +49,7 @@ static inline int f2fs_acl_chmod(struct inode *inode) return 0; } -static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, - struct page *page) +static inline int f2fs_init_acl(struct inode *inode, struct inode *dir) { return 0; } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5716e5e..bb31220 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -81,7 +81,7 @@ static int f2fs_write_meta_page(struct page *page, struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); /* Should not write any meta pages, if any IO error was occurred */ - if (wbc->for_reclaim || sbi->por_doing || + if (wbc->for_reclaim || is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { dec_page_count(sbi, F2FS_DIRTY_META); wbc->pages_skipped++; @@ -142,8 +142,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; lock_page(page); - f2fs_bug_on(page->mapping != mapping); - f2fs_bug_on(!PageDirty(page)); + BUG_ON(page->mapping != mapping); + BUG_ON(!PageDirty(page)); clear_page_dirty_for_io(page); if (f2fs_write_meta_page(page, &wbc)) { unlock_page(page); @@ -167,8 +167,6 @@ static int f2fs_set_meta_page_dirty(struct page *page) struct address_space *mapping = page->mapping; struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - trace_f2fs_set_page_dirty(page, META); - SetPageUptodate(page); if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); @@ -208,7 +206,6 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) void release_orphan_inode(struct f2fs_sb_info *sbi) { mutex_lock(&sbi->orphan_inode_mutex); - f2fs_bug_on(sbi->n_orphans == 0); sbi->n_orphans--; mutex_unlock(&sbi->orphan_inode_mutex); } @@ -228,8 +225,12 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) break; orphan = NULL; } - - new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); +retry: + new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); + if (!new) { + cond_resched(); + goto retry; + } new->ino = ino; /* add new_oentry into list which is sorted by inode number */ @@ -252,7 +253,6 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) if (orphan->ino == ino) { list_del(&orphan->list); kmem_cache_free(orphan_entry_slab, orphan); - f2fs_bug_on(sbi->n_orphans == 0); sbi->n_orphans--; break; } @@ -263,7 +263,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode = f2fs_iget(sbi->sb, ino); - f2fs_bug_on(IS_ERR(inode)); + BUG_ON(IS_ERR(inode)); clear_nlink(inode); /* truncate all the data during iput */ @@ -277,7 +277,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) return 0; - sbi->por_doing = true; + sbi->por_doing = 1; start_blk = __start_cp_addr(sbi) + 1; orphan_blkaddr = __start_sum_addr(sbi) - 1; @@ -294,7 +294,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) } /* clear Orphan Flag */ clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); - sbi->por_doing = false; + sbi->por_doing = 0; return 0; } @@ -469,7 +469,9 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) return -EEXIST; } list_add_tail(&new->list, head); - stat_inc_dirty_dir(sbi); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs++; +#endif return 0; } @@ -480,8 +482,12 @@ void set_dirty_dir_page(struct inode *inode, struct page *page) if (!S_ISDIR(inode->i_mode)) return; - - new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } new->inode = inode; INIT_LIST_HEAD(&new->list); @@ -498,9 +504,13 @@ void set_dirty_dir_page(struct inode *inode, struct page *page) void add_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct dir_inode_entry *new = - f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); - + struct dir_inode_entry *new; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } new->inode = inode; INIT_LIST_HEAD(&new->list); @@ -531,7 +541,9 @@ void remove_dirty_dir_inode(struct inode *inode) if (entry->inode == inode) { list_del(&entry->list); kmem_cache_free(inode_entry_slab, entry); - stat_dec_dirty_dir(sbi); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs--; +#endif break; } } @@ -605,10 +617,11 @@ static void block_operations(struct f2fs_sb_info *sbi) blk_start_plug(&plug); retry_flush_dents: - f2fs_lock_all(sbi); + mutex_lock_all(sbi); + /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { - f2fs_unlock_all(sbi); + mutex_unlock_all(sbi); sync_dirty_dir_inodes(sbi); goto retry_flush_dents; } @@ -631,22 +644,7 @@ retry_flush_nodes: static void unblock_operations(struct f2fs_sb_info *sbi) { mutex_unlock(&sbi->node_write); - f2fs_unlock_all(sbi); -} - -static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) -{ - DEFINE_WAIT(wait); - - for (;;) { - prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); - - if (!get_pages(sbi, F2FS_WRITEBACK)) - break; - - io_schedule(); - } - finish_wait(&sbi->cp_wait, &wait); + mutex_unlock_all(sbi); } static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) @@ -758,7 +756,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) f2fs_put_page(cp_page, 1); /* wait for previous submitted node/meta pages writeback */ - wait_on_all_pages_writeback(sbi); + while (get_pages(sbi, F2FS_WRITEBACK)) + congestion_wait(BLK_RW_ASYNC, HZ / 50); filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aa3438c..941f9b9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -68,6 +68,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, struct buffer_head *bh_result) { struct f2fs_inode_info *fi = F2FS_I(inode); +#ifdef CONFIG_F2FS_STAT_FS + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +#endif pgoff_t start_fofs, end_fofs; block_t start_blkaddr; @@ -77,8 +80,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, return 0; } - stat_inc_total_hit(inode->i_sb); - +#ifdef CONFIG_F2FS_STAT_FS + sbi->total_hit_ext++; +#endif start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; start_blkaddr = fi->ext.blk_addr; @@ -96,7 +100,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, else bh_result->b_size = UINT_MAX; - stat_inc_read_hit(inode->i_sb); +#ifdef CONFIG_F2FS_STAT_FS + sbi->read_hit_ext++; +#endif read_unlock(&fi->ext.ext_lock); return 1; } @@ -110,7 +116,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) pgoff_t fofs, start_fofs, end_fofs; block_t start_blkaddr, end_blkaddr; - f2fs_bug_on(blk_addr == NEW_ADDR); + BUG_ON(blk_addr == NEW_ADDR); fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + dn->ofs_in_node; @@ -436,7 +442,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock, } /* It does not support data allocation */ - f2fs_bug_on(create); + BUG_ON(create); if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { int i; @@ -554,9 +560,9 @@ write: inode_dec_dirty_dents(inode); err = do_write_data_page(page); } else { - f2fs_lock_op(sbi); + int ilock = mutex_lock_op(sbi); err = do_write_data_page(page); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); need_balance_fs = true; } if (err == -ENOENT) @@ -635,6 +641,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; struct dnode_of_data dn; int err = 0; + int ilock; f2fs_balance_fs(sbi); repeat: @@ -643,7 +650,7 @@ repeat: return -ENOMEM; *pagep = page; - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, ALLOC_NODE); @@ -657,7 +664,7 @@ repeat: if (err) goto err; - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; @@ -693,7 +700,7 @@ out: return 0; err: - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); f2fs_put_page(page, 1); return err; } @@ -756,8 +763,6 @@ static int f2fs_set_data_page_dirty(struct page *page) struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - trace_f2fs_set_page_dirty(page, DATA); - SetPageUptodate(page); if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 594fc1b..384c6da 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -139,7 +139,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, bool room = false; int max_slots = 0; - f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); + BUG_ON(level > MAX_DIR_HASH_DEPTH); nbucket = dir_buckets(level); nblock = bucket_blocks(level); @@ -346,7 +346,7 @@ static struct page *init_inode_metadata(struct inode *inode, goto error; } - err = f2fs_init_acl(inode, dir, page); + err = f2fs_init_acl(inode, dir); if (err) goto error; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 89dc750..608f0df 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -18,13 +18,6 @@ #include <linux/crc32.h> #include <linux/magic.h> #include <linux/kobject.h> -#include <linux/sched.h> - -#ifdef CONFIG_F2FS_CHECK_FS -#define f2fs_bug_on(condition) BUG_ON(condition) -#else -#define f2fs_bug_on(condition) -#endif /* * For mount options @@ -305,9 +298,6 @@ struct f2fs_sm_info { unsigned int main_segments; /* # of segments in main area */ unsigned int reserved_segments; /* # of reserved segments */ unsigned int ovp_segments; /* # of overprovision segments */ - - /* a threshold to reclaim prefree segments */ - unsigned int rec_prefree_segments; }; /* @@ -328,6 +318,14 @@ enum count_type { }; /* + * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS]. + * The checkpoint procedure blocks all the locks in this fs_lock array. + * Some FS operations grab free locks, and if there is no free lock, + * then wait to grab a lock in a round-robin manner. + */ +#define NR_GLOBAL_LOCKS 8 + +/* * The below are the page types of bios used in submti_bio(). * The available types are: * DATA User data pages. It operates as async mode. @@ -367,12 +365,12 @@ struct f2fs_sb_info { struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ - struct rw_semaphore cp_rwsem; /* blocking FS operations */ + struct mutex fs_lock[NR_GLOBAL_LOCKS]; /* blocking FS operations */ struct mutex node_write; /* locking node writes */ struct mutex writepages; /* mutex for writepages() */ - bool por_doing; /* recovery is doing or not */ - bool on_build_free_nids; /* build_free_nids is doing */ - wait_queue_head_t cp_wait; + unsigned char next_lock_num; /* round-robin global locks */ + int por_doing; /* recovery is doing or not */ + int on_build_free_nids; /* build_free_nids is doing */ /* for orphan inode management */ struct list_head orphan_inode_list; /* orphan inode list */ @@ -522,24 +520,48 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) cp->ckpt_flags = cpu_to_le32(ckpt_flags); } -static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) +static inline void mutex_lock_all(struct f2fs_sb_info *sbi) { - down_read(&sbi->cp_rwsem); + int i; + + for (i = 0; i < NR_GLOBAL_LOCKS; i++) { + /* + * This is the only time we take multiple fs_lock[] + * instances; the order is immaterial since we + * always hold cp_mutex, which serializes multiple + * such operations. + */ + mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); + } } -static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) +static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) { - up_read(&sbi->cp_rwsem); + int i = 0; + for (; i < NR_GLOBAL_LOCKS; i++) + mutex_unlock(&sbi->fs_lock[i]); } -static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) +static inline int mutex_lock_op(struct f2fs_sb_info *sbi) { - down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex); + unsigned char next_lock = sbi->next_lock_num % NR_GLOBAL_LOCKS; + int i = 0; + + for (; i < NR_GLOBAL_LOCKS; i++) + if (mutex_trylock(&sbi->fs_lock[i])) + return i; + + mutex_lock(&sbi->fs_lock[next_lock]); + sbi->next_lock_num++; + return next_lock; } -static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) +static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock) { - up_write(&sbi->cp_rwsem); + if (ilock < 0) + return; + BUG_ON(ilock >= NR_GLOBAL_LOCKS); + mutex_unlock(&sbi->fs_lock[ilock]); } /* @@ -590,8 +612,8 @@ static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, blkcnt_t count) { spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi->total_valid_block_count < (block_t) count); - f2fs_bug_on(inode->i_blocks < count); + BUG_ON(sbi->total_valid_block_count < (block_t) count); + BUG_ON(inode->i_blocks < count); inode->i_blocks -= count; sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); @@ -723,9 +745,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, { spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi->total_valid_block_count < count); - f2fs_bug_on(sbi->total_valid_node_count < count); - f2fs_bug_on(inode->i_blocks < count); + BUG_ON(sbi->total_valid_block_count < count); + BUG_ON(sbi->total_valid_node_count < count); + BUG_ON(inode->i_blocks < count); inode->i_blocks -= count; sbi->total_valid_node_count -= count; @@ -746,7 +768,7 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) { spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi->total_valid_inode_count == sbi->total_node_count); + BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); sbi->total_valid_inode_count++; spin_unlock(&sbi->stat_lock); } @@ -754,7 +776,7 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) { spin_lock(&sbi->stat_lock); - f2fs_bug_on(!sbi->total_valid_inode_count); + BUG_ON(!sbi->total_valid_inode_count); sbi->total_valid_inode_count--; spin_unlock(&sbi->stat_lock); return 0; @@ -775,7 +797,7 @@ static inline void f2fs_put_page(struct page *page, int unlock) return; if (unlock) { - f2fs_bug_on(!PageLocked(page)); + BUG_ON(!PageLocked(page)); unlock_page(page); } page_cache_release(page); @@ -797,20 +819,6 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); } -static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, - gfp_t flags) -{ - void *entry; -retry: - entry = kmem_cache_alloc(cachep, flags); - if (!entry) { - cond_resched(); - goto retry; - } - - return entry; -} - #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) static inline bool IS_INODE(struct page *page) @@ -971,7 +979,6 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); */ void f2fs_set_inode_flags(struct inode *); struct inode *f2fs_iget(struct super_block *, unsigned long); -int try_to_free_nats(struct f2fs_sb_info *, int); void update_inode(struct inode *, struct page *); int update_inode_page(struct inode *); int f2fs_write_inode(struct inode *, struct writeback_control *); @@ -1026,7 +1033,6 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); int truncate_xattr_node(struct inode *, struct page *); -int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); int remove_inode_page(struct inode *); struct page *new_inode_page(struct inode *, const struct qstr *); struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); @@ -1053,7 +1059,6 @@ void destroy_node_manager_caches(void); * segment.c */ void f2fs_balance_fs(struct f2fs_sb_info *); -void f2fs_balance_fs_bg(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void clear_prefree_segments(struct f2fs_sb_info *); int npages_for_summary_flush(struct f2fs_sb_info *); @@ -1167,16 +1172,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) return (struct f2fs_stat_info*)sbi->stat_info; } -#define stat_inc_call_count(si) ((si)->call_count++) -#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) -#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) -#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) -#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) -#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) -#define stat_inc_seg_type(sbi, curseg) \ - ((sbi)->segment_count[(curseg)->alloc_type]++) -#define stat_inc_block_count(sbi, curseg) \ - ((sbi)->block_count[(curseg)->alloc_type]++) +#define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_seg_count(sbi, type) \ do { \ @@ -1211,13 +1207,6 @@ void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else #define stat_inc_call_count(si) -#define stat_inc_bggc_count(si) -#define stat_inc_dirty_dir(sbi) -#define stat_dec_dirty_dir(sbi) -#define stat_inc_total_hit(sb) -#define stat_inc_read_hit(sb) -#define stat_inc_seg_type(sbi, curseg) -#define stat_inc_block_count(sbi, curseg) #define stat_inc_seg_count(si, type) #define stat_inc_tot_blk_count(si, blks) #define stat_inc_data_blk_count(si, blks) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7d714f4..02c9069 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,18 +35,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); block_t old_blk_addr; struct dnode_of_data dn; - int err; + int err, ilock; f2fs_balance_fs(sbi); sb_start_pagefault(inode->i_sb); /* block allocation */ - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); if (err) { - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); goto out; } @@ -56,12 +56,12 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, err = reserve_new_block(&dn); if (err) { f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); goto out; } } f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); file_update_time(vma->vm_file); lock_page(page); @@ -88,7 +88,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, set_page_dirty(page); SetPageUptodate(page); - trace_f2fs_vm_page_mkwrite(page, DATA); mapped: /* fill the page */ wait_on_page_writeback(page); @@ -189,9 +188,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret) goto out; } - ret = wait_on_node_pages_writeback(sbi, inode->i_ino); - if (ret) - goto out; + filemap_fdatawait_range(sbi->node_inode->i_mapping, + 0, LONG_MAX); ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); } out: @@ -272,7 +270,7 @@ static int truncate_blocks(struct inode *inode, u64 from) unsigned int blocksize = inode->i_sb->s_blocksize; struct dnode_of_data dn; pgoff_t free_from; - int count = 0; + int count = 0, ilock = -1; int err; trace_f2fs_truncate_blocks_enter(inode, from); @@ -280,13 +278,13 @@ static int truncate_blocks(struct inode *inode, u64 from) free_from = (pgoff_t) ((from + blocksize - 1) >> (sbi->log_blocksize)); - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -297,7 +295,7 @@ static int truncate_blocks(struct inode *inode, u64 from) count = ADDRS_PER_BLOCK; count -= dn.ofs_in_node; - f2fs_bug_on(count < 0); + BUG_ON(count < 0); if (dn.ofs_in_node || IS_INODE(dn.node_page)) { truncate_data_blocks_range(&dn, count); @@ -307,7 +305,7 @@ static int truncate_blocks(struct inode *inode, u64 from) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); /* lastly zero out the first data page */ truncate_partial_data_page(inode, from); @@ -418,15 +416,16 @@ static void fill_zero(struct inode *inode, pgoff_t index, { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *page; + int ilock; if (!len) return; f2fs_balance_fs(sbi); - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); page = get_new_data_page(inode, NULL, index, false); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); if (!IS_ERR(page)) { wait_on_page_writeback(page); @@ -485,6 +484,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) struct address_space *mapping = inode->i_mapping; loff_t blk_start, blk_end; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; f2fs_balance_fs(sbi); @@ -493,9 +493,9 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) truncate_inode_pages_range(mapping, blk_start, blk_end - 1); - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); ret = truncate_hole(inode, pg_start, pg_end); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); } } @@ -529,12 +529,13 @@ static int expand_inode_data(struct inode *inode, loff_t offset, for (index = pg_start; index <= pg_end; index++) { struct dnode_of_data dn; + int ilock; - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); ret = get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); break; } @@ -542,12 +543,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, ret = reserve_new_block(&dn); if (ret) { f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); break; } } f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); if (pg_start == pg_end) new_size = offset + len; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b7ad1ec..2f157e8 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -77,15 +77,13 @@ static int gc_thread_func(void *data) else wait_ms = increase_sleep_time(gc_th, wait_ms); - stat_inc_bggc_count(sbi); +#ifdef CONFIG_F2FS_STAT_FS + sbi->bg_gc++; +#endif /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi)) wait_ms = gc_th->no_gc_sleep_time; - - /* balancing f2fs's metadata periodically */ - f2fs_balance_fs_bg(sbi); - } while (!kthread_should_stop()); return 0; } @@ -238,8 +236,8 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); } -static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, - unsigned int segno, struct victim_sel_policy *p) +static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, + struct victim_sel_policy *p) { if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; @@ -295,11 +293,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } break; } - - p.offset = segno + p.ofs_unit; - if (p.ofs_unit > 1) - p.offset -= segno % p.ofs_unit; - + p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit; secno = GET_SECNO(sbi, segno); if (sec_usage_check(sbi, secno)) @@ -312,10 +306,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (p.min_cost > cost) { p.min_segno = segno; p.min_cost = cost; - } else if (unlikely(cost == max_cost)) { - continue; } + if (cost == max_cost) + continue; + if (nsearched++ >= p.max_search) { sbi->last_victim[p.gc_mode] = segno; break; @@ -363,8 +358,12 @@ static void add_gc_inode(struct inode *inode, struct list_head *ilist) iput(inode); return; } - - new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); +repeat: + new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); + if (!new_ie) { + cond_resched(); + goto repeat; + } new_ie->inode = inode; list_add_tail(&new_ie->list, ilist); } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index d0eaa9f..9339cd2 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -37,31 +37,6 @@ void f2fs_set_inode_flags(struct inode *inode) inode->i_flags |= S_DIRSYNC; } -static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) -{ - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || - S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { - if (ri->i_addr[0]) - inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); - else - inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); - } -} - -static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) -{ - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - if (old_valid_dev(inode->i_rdev)) { - ri->i_addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev)); - ri->i_addr[1] = 0; - } else { - ri->i_addr[0] = 0; - ri->i_addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); - ri->i_addr[2] = 0; - } - } -} - static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); @@ -98,6 +73,10 @@ static int do_read_inode(struct inode *inode) inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec); inode->i_generation = le32_to_cpu(ri->i_generation); + if (ri->i_addr[0]) + inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); + else + inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); fi->i_current_depth = le32_to_cpu(ri->i_current_depth); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); @@ -105,13 +84,8 @@ static int do_read_inode(struct inode *inode) fi->flags = 0; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); - get_extent_info(&fi->ext, ri->i_ext); get_inline_info(fi, ri); - - /* get rdev by using inline_info */ - __get_inode_rdev(inode, ri); - f2fs_put_page(node_page, 1); return 0; } @@ -205,10 +179,21 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); ri->i_generation = cpu_to_le32(inode->i_generation); - __set_inode_rdev(inode, ri); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { + if (old_valid_dev(inode->i_rdev)) { + ri->i_addr[0] = + cpu_to_le32(old_encode_dev(inode->i_rdev)); + ri->i_addr[1] = 0; + } else { + ri->i_addr[0] = 0; + ri->i_addr[1] = + cpu_to_le32(new_encode_dev(inode->i_rdev)); + ri->i_addr[2] = 0; + } + } + set_cold_node(inode, node_page); set_page_dirty(node_page); - clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); } @@ -229,7 +214,7 @@ int update_inode_page(struct inode *inode) int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int ret; + int ret, ilock; if (inode->i_ino == F2FS_NODE_INO(sbi) || inode->i_ino == F2FS_META_INO(sbi)) @@ -242,9 +227,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) * We need to lock here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. */ - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); ret = update_inode_page(inode); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); if (wbc) f2fs_balance_fs(sbi); @@ -258,6 +243,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) void f2fs_evict_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + int ilock; trace_f2fs_evict_inode(inode); truncate_inode_pages(&inode->i_data, 0); @@ -266,7 +252,7 @@ void f2fs_evict_inode(struct inode *inode) inode->i_ino == F2FS_META_INO(sbi)) goto no_delete; - f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents)); + BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents)); remove_dirty_dir_inode(inode); if (inode->i_nlink || is_bad_inode(inode)) @@ -279,9 +265,9 @@ void f2fs_evict_inode(struct inode *inode) if (F2FS_HAS_BLOCKS(inode)) f2fs_truncate(inode); - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); remove_inode_page(inode); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); sb_end_intwrite(inode->i_sb); no_delete: diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 575adac..2a5359c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -27,19 +27,19 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_t ino; struct inode *inode; bool nid_free = false; - int err; + int err, ilock; inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); if (!alloc_nid(sbi, &ino)) { - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); err = -ENOSPC; goto fail; } - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); inode->i_uid = current_fsuid(); @@ -115,7 +115,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; nid_t ino = 0; - int err; + int err, ilock; f2fs_balance_fs(sbi); @@ -131,9 +131,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); err = f2fs_add_link(dentry, inode); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); if (err) goto out; @@ -157,7 +157,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct inode *inode = old_dentry->d_inode; struct super_block *sb = dir->i_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); - int err; + int err, ilock; f2fs_balance_fs(sbi); @@ -165,9 +165,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, ihold(inode); set_inode_flag(F2FS_I(inode), FI_INC_LINK); - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); err = f2fs_add_link(dentry, inode); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); if (err) goto out; @@ -220,6 +220,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) struct f2fs_dir_entry *de; struct page *page; int err = -ENOENT; + int ilock; trace_f2fs_unlink_enter(dir, dentry); f2fs_balance_fs(sbi); @@ -228,16 +229,16 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (!de) goto fail; - f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); if (err) { - f2fs_unlock_op(sbi); kunmap(page); f2fs_put_page(page, 0); goto fail; } + + ilock = mutex_lock_op(sbi); f2fs_delete_entry(de, page, inode); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); @@ -253,7 +254,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; size_t symlen = strlen(symname) + 1; - int err; + int err, ilock; f2fs_balance_fs(sbi); @@ -264,9 +265,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); err = f2fs_add_link(dentry, inode); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); if (err) goto out; @@ -289,7 +290,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct inode *inode; - int err; + int err, ilock; f2fs_balance_fs(sbi); @@ -303,9 +304,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); set_inode_flag(F2FS_I(inode), FI_INC_LINK); - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); err = f2fs_add_link(dentry, inode); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); if (err) goto out_fail; @@ -341,6 +342,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; int err = 0; + int ilock; if (!new_valid_dev(rdev)) return -EINVAL; @@ -354,9 +356,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); err = f2fs_add_link(dentry, inode); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); if (err) goto out; @@ -385,7 +387,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; - int err = -ENOENT; + int err = -ENOENT, ilock = -1; f2fs_balance_fs(sbi); @@ -400,7 +402,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_old; } - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); if (new_inode) { @@ -465,7 +467,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(old_dir); } - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); return 0; put_out_dir: @@ -475,7 +477,7 @@ out_dir: kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); } - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); out_old: kunmap(old_page); f2fs_put_page(old_page, 0); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4ac4150..51ef278 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -204,7 +204,7 @@ retry: } e->ni = *ni; e->checkpointed = true; - f2fs_bug_on(ni->blk_addr == NEW_ADDR); + BUG_ON(ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { /* * when nid is reallocated, @@ -212,19 +212,19 @@ retry: * So, reinitialize it with new information. */ e->ni = *ni; - f2fs_bug_on(ni->blk_addr != NULL_ADDR); + BUG_ON(ni->blk_addr != NULL_ADDR); } if (new_blkaddr == NEW_ADDR) e->checkpointed = false; /* sanity check */ - f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); - f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && + BUG_ON(nat_get_blkaddr(e) != ni->blk_addr); + BUG_ON(nat_get_blkaddr(e) == NULL_ADDR && new_blkaddr == NULL_ADDR); - f2fs_bug_on(nat_get_blkaddr(e) == NEW_ADDR && + BUG_ON(nat_get_blkaddr(e) == NEW_ADDR && new_blkaddr == NEW_ADDR); - f2fs_bug_on(nat_get_blkaddr(e) != NEW_ADDR && + BUG_ON(nat_get_blkaddr(e) != NEW_ADDR && nat_get_blkaddr(e) != NULL_ADDR && new_blkaddr == NEW_ADDR); @@ -240,7 +240,7 @@ retry: write_unlock(&nm_i->nat_tree_lock); } -int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) +static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -495,10 +495,10 @@ static void truncate_node(struct dnode_of_data *dn) get_node_info(sbi, dn->nid, &ni); if (dn->inode->i_blocks == 0) { - f2fs_bug_on(ni.blk_addr != NULL_ADDR); + BUG_ON(ni.blk_addr != NULL_ADDR); goto invalidate; } - f2fs_bug_on(ni.blk_addr == NULL_ADDR); + BUG_ON(ni.blk_addr == NULL_ADDR); /* Deallocate node address */ invalidate_blocks(sbi, ni.blk_addr); @@ -822,7 +822,7 @@ int remove_inode_page(struct inode *inode) } /* 0 is possible, after f2fs_new_inode() is failed */ - f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); + BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1); set_new_dnode(&dn, inode, page, page, ino); truncate_node(&dn); return 0; @@ -863,7 +863,7 @@ struct page *new_node_page(struct dnode_of_data *dn, get_node_info(sbi, dn->nid, &old_ni); /* Reinitialize old_ni with new node page */ - f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); + BUG_ON(old_ni.blk_addr != NULL_ADDR); new_ni = old_ni; new_ni.ino = dn->inode->i_ino; set_node_addr(sbi, &new_ni, NEW_ADDR); @@ -969,7 +969,7 @@ repeat: goto repeat; } got_it: - f2fs_bug_on(nid != nid_of_node(page)); + BUG_ON(nid != nid_of_node(page)); mark_page_accessed(page); return page; } @@ -1148,47 +1148,6 @@ continue_unlock: return nwritten; } -int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) -{ - struct address_space *mapping = sbi->node_inode->i_mapping; - pgoff_t index = 0, end = LONG_MAX; - struct pagevec pvec; - int nr_pages; - int ret2 = 0, ret = 0; - - pagevec_init(&pvec, 0); - while ((index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_WRITEBACK, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { - unsigned i; - - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* until radix tree lookup accepts end_index */ - if (page->index > end) - continue; - - if (ino && ino_of_node(page) == ino) { - wait_on_page_writeback(page); - if (TestClearPageError(page)) - ret = -EIO; - } - } - pagevec_release(&pvec); - cond_resched(); - } - - if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) - ret2 = -ENOSPC; - if (test_and_clear_bit(AS_EIO, &mapping->flags)) - ret2 = -EIO; - if (!ret) - ret = ret2; - return ret; -} - static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { @@ -1197,14 +1156,11 @@ static int f2fs_write_node_page(struct page *page, block_t new_addr; struct node_info ni; - if (sbi->por_doing) - goto redirty_out; - wait_on_page_writeback(page); /* get old block addr of this node page */ nid = nid_of_node(page); - f2fs_bug_on(page->index != nid); + BUG_ON(page->index != nid); get_node_info(sbi, nid, &ni); @@ -1215,8 +1171,12 @@ static int f2fs_write_node_page(struct page *page, return 0; } - if (wbc->for_reclaim) - goto redirty_out; + if (wbc->for_reclaim) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; + } mutex_lock(&sbi->node_write); set_page_writeback(page); @@ -1226,12 +1186,6 @@ static int f2fs_write_node_page(struct page *page, mutex_unlock(&sbi->node_write); unlock_page(page); return 0; - -redirty_out: - dec_page_count(sbi, F2FS_DIRTY_NODES); - wbc->pages_skipped++; - set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; } /* @@ -1246,8 +1200,11 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long nr_to_write = wbc->nr_to_write; - /* balancing f2fs's metadata in background */ - f2fs_balance_fs_bg(sbi); + /* First check balancing cached NAT entries */ + if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { + f2fs_sync_fs(sbi->sb, true); + return 0; + } /* collect a number of dirty node pages and write together */ if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) @@ -1266,8 +1223,6 @@ static int f2fs_set_node_page_dirty(struct page *page) struct address_space *mapping = page->mapping; struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - trace_f2fs_set_page_dirty(page, NODE); - SetPageUptodate(page); if (!PageDirty(page)) { __set_page_dirty_nobuffers(page); @@ -1336,18 +1291,23 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) if (nid == 0) return 0; - if (build) { - /* do not add allocated nids */ - read_lock(&nm_i->nat_tree_lock); - ne = __lookup_nat_cache(nm_i, nid); - if (ne && nat_get_blkaddr(ne) != NULL_ADDR) - allocated = true; - read_unlock(&nm_i->nat_tree_lock); - if (allocated) - return 0; - } + if (!build) + goto retry; - i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); + /* do not add allocated nids */ + read_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne && nat_get_blkaddr(ne) != NULL_ADDR) + allocated = true; + read_unlock(&nm_i->nat_tree_lock); + if (allocated) + return 0; +retry: + i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); + if (!i) { + cond_resched(); + goto retry; + } i->nid = nid; i->state = NID_NEW; @@ -1390,7 +1350,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, break; blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); - f2fs_bug_on(blk_addr == NEW_ADDR); + BUG_ON(blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) { if (add_free_nid(nm_i, start_nid, true) < 0) break; @@ -1461,14 +1421,14 @@ retry: /* We should not use stale free nids created by build_free_nids */ if (nm_i->fcnt && !sbi->on_build_free_nids) { - f2fs_bug_on(list_empty(&nm_i->free_nid_list)); + BUG_ON(list_empty(&nm_i->free_nid_list)); list_for_each(this, &nm_i->free_nid_list) { i = list_entry(this, struct free_nid, list); if (i->state == NID_NEW) break; } - f2fs_bug_on(i->state != NID_NEW); + BUG_ON(i->state != NID_NEW); *nid = i->nid; i->state = NID_ALLOC; nm_i->fcnt--; @@ -1479,9 +1439,9 @@ retry: /* Let's scan nat pages and its caches to get free nids */ mutex_lock(&nm_i->build_lock); - sbi->on_build_free_nids = true; + sbi->on_build_free_nids = 1; build_free_nids(sbi); - sbi->on_build_free_nids = false; + sbi->on_build_free_nids = 0; mutex_unlock(&nm_i->build_lock); goto retry; } @@ -1496,7 +1456,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); - f2fs_bug_on(!i || i->state != NID_ALLOC); + BUG_ON(!i || i->state != NID_ALLOC); __del_from_free_nid_list(i); spin_unlock(&nm_i->free_nid_list_lock); } @@ -1514,7 +1474,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); - f2fs_bug_on(!i || i->state != NID_ALLOC); + BUG_ON(!i || i->state != NID_ALLOC); if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { __del_from_free_nid_list(i); } else { @@ -1717,7 +1677,7 @@ to_nat_page: nat_blk = page_address(page); } - f2fs_bug_on(!nat_blk); + BUG_ON(!nat_blk); raw_ne = nat_blk->entries[nid - start_nid]; flush_now: new_blkaddr = nat_get_blkaddr(ne); @@ -1821,11 +1781,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) /* destroy free nid list */ spin_lock(&nm_i->free_nid_list_lock); list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { - f2fs_bug_on(i->state == NID_ALLOC); + BUG_ON(i->state == NID_ALLOC); __del_from_free_nid_list(i); nm_i->fcnt--; } - f2fs_bug_on(nm_i->fcnt); + BUG_ON(nm_i->fcnt); spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ @@ -1839,7 +1799,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, e); } } - f2fs_bug_on(nm_i->nat_cnt); + BUG_ON(nm_i->nat_cnt); write_unlock(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index fdc8116..51ef5ee 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -64,31 +64,24 @@ static int recover_dentry(struct page *ipage, struct inode *inode) name.name = raw_inode->i_name; retry: de = f2fs_find_entry(dir, &name, &page); - if (de && inode->i_ino == le32_to_cpu(de->ino)) - goto out_unmap_put; + if (de && inode->i_ino == le32_to_cpu(de->ino)) { + kunmap(page); + f2fs_put_page(page, 0); + goto out; + } if (de) { einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { WARN_ON(1); if (PTR_ERR(einode) == -ENOENT) err = -EEXIST; - goto out_unmap_put; - } - err = acquire_orphan_inode(F2FS_SB(inode->i_sb)); - if (err) { - iput(einode); - goto out_unmap_put; + goto out; } f2fs_delete_entry(de, page, einode); iput(einode); goto retry; } err = __f2fs_add_link(dir, &name, inode); - goto out; - -out_unmap_put: - kunmap(page); - f2fs_put_page(page, 0); out: f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " "ino = %x, name = %s, dir = %lx, err = %d", @@ -292,6 +285,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct f2fs_summary sum; struct node_info ni; int err = 0, recovered = 0; + int ilock; start = start_bidx_of_node(ofs_of_node(page), fi); if (IS_INODE(page)) @@ -299,20 +293,20 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, else end = start + ADDRS_PER_BLOCK; - f2fs_lock_op(sbi); + ilock = mutex_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, start, ALLOC_NODE); if (err) { - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); return err; } wait_on_page_writeback(dn.node_page); get_node_info(sbi, dn.nid, &ni); - f2fs_bug_on(ni.ino != ino_of_node(page)); - f2fs_bug_on(ofs_of_node(dn.node_page) != ofs_of_node(page)); + BUG_ON(ni.ino != ino_of_node(page)); + BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page)); for (; start < end; start++) { block_t src, dest; @@ -322,9 +316,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { if (src == NULL_ADDR) { - err = reserve_new_block(&dn); + int err = reserve_new_block(&dn); /* We should not get -ENOSPC */ - f2fs_bug_on(err); + BUG_ON(err); } /* Check the previous node page having this index */ @@ -355,7 +349,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); err: f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); + mutex_unlock_op(sbi, ilock); f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " "recovered_data = %d blocks, err = %d", @@ -425,7 +419,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) { struct list_head inode_list; int err; - bool need_writecp = false; fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", sizeof(struct fsync_inode_entry), NULL); @@ -435,7 +428,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&inode_list); /* step #1: find fsynced inode numbers */ - sbi->por_doing = true; + sbi->por_doing = 1; err = find_fsync_dnodes(sbi, &inode_list); if (err) goto out; @@ -443,16 +436,14 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) if (list_empty(&inode_list)) goto out; - need_writecp = true; - /* step #2: recover data */ err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); - f2fs_bug_on(!list_empty(&inode_list)); + BUG_ON(!list_empty(&inode_list)); out: destroy_fsync_dnodes(&inode_list); kmem_cache_destroy(fsync_entry_slab); - sbi->por_doing = false; - if (!err && need_writecp) + sbi->por_doing = 0; + if (!err) write_checkpoint(sbi, false); return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fa284d3..09af9c7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -36,14 +36,6 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) } } -void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) -{ - /* check the # of cached NAT entries and prefree segments */ - if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || - excess_prefree_segs(sbi)) - f2fs_sync_fs(sbi->sb, true); -} - static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { @@ -58,10 +50,20 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (dirty_type == DIRTY) { struct seg_entry *sentry = get_seg_entry(sbi, segno); - enum dirty_type t = sentry->type; + enum dirty_type t = DIRTY_HOT_DATA; + + dirty_type = sentry->type; + + if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type])) + dirty_i->nr_dirty[dirty_type]++; - if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t])) - dirty_i->nr_dirty[t]++; + /* Only one bitmap should be set */ + for (; t <= DIRTY_COLD_NODE; t++) { + if (t == dirty_type) + continue; + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; + } } } @@ -74,11 +76,12 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, dirty_i->nr_dirty[dirty_type]--; if (dirty_type == DIRTY) { - struct seg_entry *sentry = get_seg_entry(sbi, segno); - enum dirty_type t = sentry->type; + enum dirty_type t = DIRTY_HOT_DATA; - if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) - dirty_i->nr_dirty[t]--; + /* clear all the bitmaps */ + for (; t <= DIRTY_COLD_NODE; t++) + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) + dirty_i->nr_dirty[t]--; if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) clear_bit(GET_SECNO(sbi, segno), @@ -139,33 +142,27 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; + unsigned int segno = -1; unsigned int total_segs = TOTAL_SEGS(sbi); - unsigned int start = 0, end = -1; mutex_lock(&dirty_i->seglist_lock); - while (1) { - int i; - start = find_next_bit(prefree_map, total_segs, end + 1); - if (start >= total_segs) + segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, + segno + 1); + if (segno >= total_segs) break; - end = find_next_zero_bit(prefree_map, total_segs, start + 1); - - for (i = start; i < end; i++) - clear_bit(i, prefree_map); - dirty_i->nr_dirty[PRE] -= end - start; - - if (!test_opt(sbi, DISCARD)) - continue; - - blkdev_issue_discard(sbi->sb->s_bdev, - START_BLOCK(sbi, start) << - sbi->log_sectors_per_block, - (1 << (sbi->log_sectors_per_block + - sbi->log_blocks_per_seg)) * (end - start), - GFP_NOFS, 0); + if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) + dirty_i->nr_dirty[PRE]--; + + /* Let's use trim */ + if (test_opt(sbi, DISCARD)) + blkdev_issue_discard(sbi->sb->s_bdev, + START_BLOCK(sbi, segno) << + sbi->log_sectors_per_block, + 1 << (sbi->log_sectors_per_block + + sbi->log_blocks_per_seg), + GFP_NOFS, 0); } mutex_unlock(&dirty_i->seglist_lock); } @@ -198,7 +195,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) new_vblocks = se->valid_blocks + del; offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); - f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || + BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) || (new_vblocks > sbi->blocks_per_seg))); se->valid_blocks = new_vblocks; @@ -238,7 +235,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); - f2fs_bug_on(addr == NULL_ADDR); + BUG_ON(addr == NULL_ADDR); if (addr == NEW_ADDR) return; @@ -270,8 +267,9 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, */ int npages_for_summary_flush(struct f2fs_sb_info *sbi) { + int total_size_bytes = 0; int valid_sum_count = 0; - int i, sum_in_page; + int i, sum_space; for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { if (sbi->ckpt->alloc_type[i] == SSR) @@ -280,12 +278,13 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi) valid_sum_count += curseg_blkoff(sbi, i); } - sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - - SUM_FOOTER_SIZE) / SUMMARY_SIZE; - if (valid_sum_count <= sum_in_page) + total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1) + + sizeof(struct nat_journal) + 2 + + sizeof(struct sit_journal) + 2; + sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE; + if (total_size_bytes < sum_space) return 1; - else if ((valid_sum_count - sum_in_page) <= - (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE) + else if (total_size_bytes < 2 * sum_space) return 2; return 3; } @@ -351,7 +350,7 @@ find_other_zone: if (dir == ALLOC_RIGHT) { secno = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), 0); - f2fs_bug_on(secno >= TOTAL_SECS(sbi)); + BUG_ON(secno >= TOTAL_SECS(sbi)); } else { go_left = 1; left_start = hint - 1; @@ -367,7 +366,7 @@ find_other_zone: } left_start = find_next_zero_bit(free_i->free_secmap, TOTAL_SECS(sbi), 0); - f2fs_bug_on(left_start >= TOTAL_SECS(sbi)); + BUG_ON(left_start >= TOTAL_SECS(sbi)); break; } secno = left_start; @@ -406,7 +405,7 @@ skip_left: } got_it: /* set it as dirty segment in free segmap */ - f2fs_bug_on(test_bit(segno, free_i->free_segmap)); + BUG_ON(test_bit(segno, free_i->free_segmap)); __set_inuse(sbi, segno); *newseg = segno; write_unlock(&free_i->segmap_lock); @@ -551,8 +550,9 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, change_curseg(sbi, type, true); else new_curseg(sbi, type, false); - - stat_inc_seg_type(sbi, curseg); +#ifdef CONFIG_F2FS_STAT_FS + sbi->segment_count[curseg->alloc_type]++; +#endif } void allocate_new_segments(struct f2fs_sb_info *sbi) @@ -597,11 +597,6 @@ static void f2fs_end_io_write(struct bio *bio, int err) if (p->is_sync) complete(p->wait); - - if (!get_pages(p->sbi, F2FS_WRITEBACK) && - !list_empty(&p->sbi->cp_wait.task_list)) - wake_up(&p->sbi->cp_wait); - kfree(p); bio_put(bio); } @@ -662,7 +657,6 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, block_t blk_addr, enum page_type type) { struct block_device *bdev = sbi->sb->s_bdev; - int bio_blocks; verify_block_addr(sbi, blk_addr); @@ -682,8 +676,7 @@ retry: goto retry; } - bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks); + sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); sbi->bio[type]->bi_private = priv; /* @@ -778,7 +771,7 @@ static int __get_segment_type(struct page *page, enum page_type p_type) return __get_segment_type_4(page, p_type); } /* NR_CURSEG_TYPE(6) logs by default */ - f2fs_bug_on(sbi->active_logs != NR_CURSEG_TYPE); + BUG_ON(sbi->active_logs != NR_CURSEG_TYPE); return __get_segment_type_6(page, p_type); } @@ -808,8 +801,9 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); - - stat_inc_block_count(sbi, curseg); +#ifdef CONFIG_F2FS_STAT_FS + sbi->block_count[curseg->alloc_type]++; +#endif /* * SIT information should be updated before segment allocation, @@ -855,7 +849,7 @@ void write_data_page(struct inode *inode, struct page *page, struct f2fs_summary sum; struct node_info ni; - f2fs_bug_on(old_blkaddr == NULL_ADDR); + BUG_ON(old_blkaddr == NULL_ADDR); get_node_info(sbi, dn->nid, &ni); set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); @@ -1128,6 +1122,8 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) SUM_JOURNAL_SIZE); written_size += SUM_JOURNAL_SIZE; + set_page_dirty(page); + /* Step 3: write summary entries */ for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { unsigned short blkoff; @@ -1146,20 +1142,18 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) summary = (struct f2fs_summary *)(kaddr + written_size); *summary = seg_i->sum_blk->entries[j]; written_size += SUMMARY_SIZE; + set_page_dirty(page); if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) continue; - set_page_dirty(page); f2fs_put_page(page, 1); page = NULL; } } - if (page) { - set_page_dirty(page); + if (page) f2fs_put_page(page, 1); - } } static void write_normal_summaries(struct f2fs_sb_info *sbi, @@ -1245,7 +1239,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, /* get current sit block page without lock */ src_page = get_meta_page(sbi, src_off); dst_page = grab_meta_page(sbi, dst_off); - f2fs_bug_on(PageDirty(src_page)); + BUG_ON(PageDirty(src_page)); src_addr = page_address(src_page); dst_addr = page_address(dst_page); @@ -1277,9 +1271,9 @@ static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) __mark_sit_entry_dirty(sbi, segno); } update_sits_in_cursum(sum, -sits_in_cursum(sum)); - return true; + return 1; } - return false; + return 0; } /* @@ -1643,7 +1637,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); - sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; err = build_sit_info(sbi); if (err) @@ -1751,8 +1744,6 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) void destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); - if (!sm_info) - return; destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 269f690..bdd10ea 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -14,8 +14,6 @@ #define NULL_SEGNO ((unsigned int)(~0)) #define NULL_SECNO ((unsigned int)(~0)) -#define DEF_RECLAIM_PREFREE_SEGMENTS 100 /* 200MB of prefree segments */ - /* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) #define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) @@ -92,8 +90,6 @@ (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) #define SECTOR_TO_BLOCK(sbi, sectors) \ (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) -#define MAX_BIO_BLOCKS(max_hw_blocks) \ - (min((int)max_hw_blocks, BIO_MAX_PAGES)) /* during checkpoint, bio_private is used to synchronize the last bio */ struct bio_private { @@ -474,11 +470,6 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) reserved_sections(sbi))); } -static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) -{ - return (prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments); -} - static inline int utilization(struct f2fs_sb_info *sbi) { return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); @@ -522,13 +513,16 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) return curseg->next_blkoff; } -#ifdef CONFIG_F2FS_CHECK_FS static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) { unsigned int end_segno = SM_I(sbi)->segment_count - 1; BUG_ON(segno > end_segno); } +/* + * This function is used for only debugging. + * NOTE: In future, we have to remove this function. + */ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) { struct f2fs_sm_info *sm_info = SM_I(sbi); @@ -547,9 +541,8 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, { struct f2fs_sm_info *sm_info = SM_I(sbi); unsigned int end_segno = sm_info->segment_count - 1; - bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; int valid_blocks = 0; - int cur_pos = 0, next_pos; + int i; /* check segment usage */ BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); @@ -558,26 +551,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, BUG_ON(segno > end_segno); /* check bitmap with valid block count */ - do { - if (is_valid) { - next_pos = find_next_zero_bit_le(&raw_sit->valid_map, - sbi->blocks_per_seg, - cur_pos); - valid_blocks += next_pos - cur_pos; - } else - next_pos = find_next_bit_le(&raw_sit->valid_map, - sbi->blocks_per_seg, - cur_pos); - cur_pos = next_pos; - is_valid = !is_valid; - } while (cur_pos < sbi->blocks_per_seg); + for (i = 0; i < sbi->blocks_per_seg; i++) + if (f2fs_test_bit(i, raw_sit->valid_map)) + valid_blocks++; BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); } -#else -#define check_seg_range(sbi, segno) -#define verify_block_addr(sbi, blk_addr) -#define check_block_count(sbi, segno, raw_sit) -#endif static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, unsigned int start) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bafff72..13d0a0f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -43,9 +43,7 @@ enum { Opt_disable_roll_forward, Opt_discard, Opt_noheap, - Opt_user_xattr, Opt_nouser_xattr, - Opt_acl, Opt_noacl, Opt_active_logs, Opt_disable_ext_identify, @@ -58,9 +56,7 @@ static match_table_t f2fs_tokens = { {Opt_disable_roll_forward, "disable_roll_forward"}, {Opt_discard, "discard"}, {Opt_noheap, "no_heap"}, - {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_active_logs, "active_logs=%u"}, {Opt_disable_ext_identify, "disable_ext_identify"}, @@ -69,40 +65,24 @@ static match_table_t f2fs_tokens = { }; /* Sysfs support for f2fs */ -enum { - GC_THREAD, /* struct f2fs_gc_thread */ - SM_INFO, /* struct f2fs_sm_info */ -}; - struct f2fs_attr { struct attribute attr; ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, const char *, size_t); - int struct_type; int offset; }; -static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) -{ - if (struct_type == GC_THREAD) - return (unsigned char *)sbi->gc_thread; - else if (struct_type == SM_INFO) - return (unsigned char *)SM_I(sbi); - return NULL; -} - static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - unsigned char *ptr = NULL; + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; unsigned int *ui; - ptr = __struct_ptr(sbi, a->struct_type); - if (!ptr) + if (!gc_kth) return -EINVAL; - ui = (unsigned int *)(ptr + a->offset); + ui = (unsigned int *)(((char *)gc_kth) + a->offset); return snprintf(buf, PAGE_SIZE, "%u\n", *ui); } @@ -111,16 +91,15 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, struct f2fs_sb_info *sbi, const char *buf, size_t count) { - unsigned char *ptr; + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; unsigned long t; unsigned int *ui; ssize_t ret; - ptr = __struct_ptr(sbi, a->struct_type); - if (!ptr) + if (!gc_kth) return -EINVAL; - ui = (unsigned int *)(ptr + a->offset); + ui = (unsigned int *)(((char *)gc_kth) + a->offset); ret = kstrtoul(skip_spaces(buf), 0, &t); if (ret < 0) @@ -156,25 +135,21 @@ static void f2fs_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } -#define F2FS_ATTR_OFFSET(_struct_type, _name, _mode, _show, _store, _offset) \ +#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ static struct f2fs_attr f2fs_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ - .struct_type = _struct_type, \ - .offset = _offset \ + .offset = offsetof(struct f2fs_gc_kthread, _elname), \ } -#define F2FS_RW_ATTR(struct_type, struct_name, name, elname) \ - F2FS_ATTR_OFFSET(struct_type, name, 0644, \ - f2fs_sbi_show, f2fs_sbi_store, \ - offsetof(struct struct_name, elname)) +#define F2FS_RW_ATTR(name, elname) \ + F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); -F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); -F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); +F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(gc_idle, gc_idle); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -182,7 +157,6 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), ATTR_LIST(gc_idle), - ATTR_LIST(reclaim_segments), NULL, }; @@ -263,9 +237,6 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, NOHEAP); break; #ifdef CONFIG_F2FS_FS_XATTR - case Opt_user_xattr: - set_opt(sbi, XATTR_USER); - break; case Opt_nouser_xattr: clear_opt(sbi, XATTR_USER); break; @@ -273,10 +244,6 @@ static int parse_options(struct super_block *sb, char *options) set_opt(sbi, INLINE_XATTR); break; #else - case Opt_user_xattr: - f2fs_msg(sb, KERN_INFO, - "user_xattr options not supported"); - break; case Opt_nouser_xattr: f2fs_msg(sb, KERN_INFO, "nouser_xattr options not supported"); @@ -287,16 +254,10 @@ static int parse_options(struct super_block *sb, char *options) break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL - case Opt_acl: - set_opt(sbi, POSIX_ACL); - break; case Opt_noacl: clear_opt(sbi, POSIX_ACL); break; #else - case Opt_acl: - f2fs_msg(sb, KERN_INFO, "acl options not supported"); - break; case Opt_noacl: f2fs_msg(sb, KERN_INFO, "noacl options not supported"); break; @@ -394,9 +355,7 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_stats(sbi); stop_gc_thread(sbi); - /* We don't need to do checkpoint when it's clean */ - if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES)) - write_checkpoint(sbi, true); + write_checkpoint(sbi, true); iput(sbi->node_inode); iput(sbi->meta_inode); @@ -768,47 +727,30 @@ static void init_sb_info(struct f2fs_sb_info *sbi) atomic_set(&sbi->nr_pages[i], 0); } -/* - * Read f2fs raw super block. - * Because we have two copies of super block, so read the first one at first, - * if the first one is invalid, move to read the second one. - */ -static int read_raw_super_block(struct super_block *sb, - struct f2fs_super_block **raw_super, - struct buffer_head **raw_super_buf) +static int validate_superblock(struct super_block *sb, + struct f2fs_super_block **raw_super, + struct buffer_head **raw_super_buf, sector_t block) { - int block = 0; + const char *super = (block == 0 ? "first" : "second"); -retry: + /* read f2fs raw super block */ *raw_super_buf = sb_bread(sb, block); if (!*raw_super_buf) { - f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", - block + 1); - if (block == 0) { - block++; - goto retry; - } else { - return -EIO; - } + f2fs_msg(sb, KERN_ERR, "unable to read %s superblock", + super); + return -EIO; } *raw_super = (struct f2fs_super_block *) ((char *)(*raw_super_buf)->b_data + F2FS_SUPER_OFFSET); /* sanity checking of raw super */ - if (sanity_check_raw_super(sb, *raw_super)) { - brelse(*raw_super_buf); - f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " - "in %dth superblock", block + 1); - if(block == 0) { - block++; - goto retry; - } else { - return -EINVAL; - } - } + if (!sanity_check_raw_super(sb, *raw_super)) + return 0; - return 0; + f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " + "in %s superblock", super); + return -EINVAL; } static int f2fs_fill_super(struct super_block *sb, void *data, int silent) @@ -818,6 +760,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; + int i; /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); @@ -830,10 +773,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_sbi; } - err = read_raw_super_block(sb, &raw_super, &raw_super_buf); - if (err) - goto free_sbi; - + err = validate_superblock(sb, &raw_super, &raw_super_buf, 0); + if (err) { + brelse(raw_super_buf); + /* check secondary superblock when primary failed */ + err = validate_superblock(sb, &raw_super, &raw_super_buf, 1); + if (err) + goto free_sb_buf; + } sb->s_fs_info = sbi; /* init some FS parameters */ sbi->active_logs = NR_CURSEG_TYPE; @@ -871,12 +818,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) mutex_init(&sbi->gc_mutex); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); + for (i = 0; i < NR_GLOBAL_LOCKS; i++) + mutex_init(&sbi->fs_lock[i]); mutex_init(&sbi->node_write); - sbi->por_doing = false; + sbi->por_doing = 0; spin_lock_init(&sbi->stat_lock); init_rwsem(&sbi->bio_sem); - init_rwsem(&sbi->cp_rwsem); - init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); /* get an inode for meta space */ @@ -975,12 +922,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) - goto free_gc; + goto fail; } err = f2fs_build_stats(sbi); if (err) - goto free_gc; + goto fail; if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -1006,12 +953,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) return 0; fail: - if (sbi->s_proc) { - remove_proc_entry("segment_info", sbi->s_proc); - remove_proc_entry(sb->s_id, f2fs_proc_root); - } - f2fs_destroy_stats(sbi); -free_gc: stop_gc_thread(sbi); free_root_inode: dput(sb->s_root); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index aa7a3f1..1ac8a5f 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -154,9 +154,6 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, } #ifdef CONFIG_F2FS_FS_SECURITY -static int __f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - struct page *ipage); static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *page) { @@ -164,7 +161,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, int err = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, xattr->name, xattr->value, xattr->value_len, (struct page *)page); if (err < 0) @@ -372,7 +369,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, alloc_nid_failed(sbi, new_nid); return PTR_ERR(xpage); } - f2fs_bug_on(new_nid); + BUG_ON(new_nid); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); @@ -472,15 +469,16 @@ cleanup: return error; } -static int __f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - struct page *ipage) +int f2fs_setxattr(struct inode *inode, int name_index, const char *name, + const void *value, size_t value_len, struct page *ipage) { + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *here, *last; void *base_addr; int found, newsize; size_t name_len; + int ilock; __u32 new_hsize; int error = -ENOMEM; @@ -495,6 +493,10 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) return -ERANGE; + f2fs_balance_fs(sbi); + + ilock = mutex_lock_op(sbi); + base_addr = read_all_xattrs(inode, ipage); if (!base_addr) goto exit; @@ -520,7 +522,7 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, */ free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); if (found) - free = free + ENTRY_SIZE(here); + free = free - ENTRY_SIZE(here); if (free < newsize) { error = -ENOSPC; @@ -576,21 +578,7 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, else update_inode_page(inode); exit: + mutex_unlock_op(sbi, ilock); kzfree(base_addr); return error; } - -int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, struct page *ipage) -{ - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - int err; - - f2fs_balance_fs(sbi); - - f2fs_lock_op(sbi); - err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); - f2fs_unlock_op(sbi); - - return err; -} diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 7c31f4b..4241e6f 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -102,7 +102,6 @@ struct msdos_sb_info { struct hlist_head dir_hashtable[FAT_HASH_SIZE]; unsigned int dirty; /* fs state before mount */ - struct rcu_head rcu; }; #define FAT_CACHE_VALID 0 /* special case for valid cache */ diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 854b578..0062da2 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -548,16 +548,6 @@ static void fat_set_state(struct super_block *sb, brelse(bh); } -static void delayed_free(struct rcu_head *p) -{ - struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu); - unload_nls(sbi->nls_disk); - unload_nls(sbi->nls_io); - if (sbi->options.iocharset != fat_default_iocharset) - kfree(sbi->options.iocharset); - kfree(sbi); -} - static void fat_put_super(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); @@ -567,7 +557,14 @@ static void fat_put_super(struct super_block *sb) iput(sbi->fsinfo_inode); iput(sbi->fat_inode); - call_rcu(&sbi->rcu, delayed_free); + unload_nls(sbi->nls_disk); + unload_nls(sbi->nls_io); + + if (sbi->options.iocharset != fat_default_iocharset) + kfree(sbi->options.iocharset); + + sb->s_fs_info = NULL; + kfree(sbi); } static struct kmem_cache *fat_inode_cachep; @@ -56,7 +56,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) return -EINVAL; } - if (filp->f_op->check_flags) + if (filp->f_op && filp->f_op->check_flags) error = filp->f_op->check_flags(arg); if (error) return error; @@ -64,7 +64,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg) /* * ->fasync() is responsible for setting the FASYNC bit. */ - if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) { + if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && + filp->f_op->fasync) { error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); if (error < 0) goto out; diff --git a/fs/file_table.c b/fs/file_table.c index 5fff903..e900ca5 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -36,6 +36,8 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; +DEFINE_STATIC_LGLOCK(files_lglock); + /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -132,6 +134,7 @@ struct file *get_empty_filp(void) return ERR_PTR(error); } + INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); @@ -237,11 +240,11 @@ static void __fput(struct file *file) locks_remove_flock(file); if (unlikely(file->f_flags & FASYNC)) { - if (file->f_op->fasync) + if (file->f_op && file->f_op->fasync) file->f_op->fasync(-1, file, 0); } ima_file_free(file); - if (file->f_op->release) + if (file->f_op && file->f_op->release) file->f_op->release(inode, file); security_file_free(file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && @@ -301,6 +304,7 @@ void fput(struct file *file) if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; + file_sb_list_del(file); if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { init_task_work(&file->f_u.fu_rcuhead, ____fput); if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) @@ -329,6 +333,7 @@ void __fput_sync(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; + file_sb_list_del(file); BUG_ON(!(task->flags & PF_KTHREAD)); __fput(file); } @@ -340,10 +345,129 @@ void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); + file_sb_list_del(file); file_free(file); } } +static inline int file_list_cpu(struct file *file) +{ +#ifdef CONFIG_SMP + return file->f_sb_list_cpu; +#else + return smp_processor_id(); +#endif +} + +/* helper for file_sb_list_add to reduce ifdefs */ +static inline void __file_sb_list_add(struct file *file, struct super_block *sb) +{ + struct list_head *list; +#ifdef CONFIG_SMP + int cpu; + cpu = smp_processor_id(); + file->f_sb_list_cpu = cpu; + list = per_cpu_ptr(sb->s_files, cpu); +#else + list = &sb->s_files; +#endif + list_add(&file->f_u.fu_list, list); +} + +/** + * file_sb_list_add - add a file to the sb's file list + * @file: file to add + * @sb: sb to add it to + * + * Use this function to associate a file with the superblock of the inode it + * refers to. + */ +void file_sb_list_add(struct file *file, struct super_block *sb) +{ + if (likely(!(file->f_mode & FMODE_WRITE))) + return; + if (!S_ISREG(file_inode(file)->i_mode)) + return; + lg_local_lock(&files_lglock); + __file_sb_list_add(file, sb); + lg_local_unlock(&files_lglock); +} + +/** + * file_sb_list_del - remove a file from the sb's file list + * @file: file to remove + * @sb: sb to remove it from + * + * Use this function to remove a file from its superblock. + */ +void file_sb_list_del(struct file *file) +{ + if (!list_empty(&file->f_u.fu_list)) { + lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); + list_del_init(&file->f_u.fu_list); + lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); + } +} + +#ifdef CONFIG_SMP + +/* + * These macros iterate all files on all CPUs for a given superblock. + * files_lglock must be held globally. + */ +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + int i; \ + for_each_possible_cpu(i) { \ + struct list_head *list; \ + list = per_cpu_ptr((__sb)->s_files, i); \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ + } \ +} + +#else + +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + struct list_head *list; \ + list = &(sb)->s_files; \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ +} + +#endif + +/** + * mark_files_ro - mark all files read-only + * @sb: superblock in question + * + * All files are marked read-only. We don't care about pending + * delete files so this should be used in 'force' mode only. + */ +void mark_files_ro(struct super_block *sb) +{ + struct file *f; + + lg_global_lock(&files_lglock); + do_file_list_for_each_entry(sb, f) { + if (!file_count(f)) + continue; + if (!(f->f_mode & FMODE_WRITE)) + continue; + spin_lock(&f->f_lock); + f->f_mode &= ~FMODE_WRITE; + spin_unlock(&f->f_lock); + if (file_check_writeable(f) != 0) + continue; + __mnt_drop_write(f->f_path.mnt); + file_release_write(f); + } while_file_list_for_each_entry; + lg_global_unlock(&files_lglock); +} + void __init files_init(unsigned long mempages) { unsigned long n; @@ -359,5 +483,6 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); + lg_lock_init(&files_lglock, "files_lglock"); percpu_counter_init(&nr_files, 0); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1f4a10e..9f4935b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -26,7 +26,6 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/tracepoint.h> -#include <linux/device.h> #include "internal.h" /* @@ -40,18 +39,13 @@ struct wb_writeback_work { long nr_pages; struct super_block *sb; - /* - * Write only inodes dirtied before this time. Don't forget to set - * older_than_this_is_set when you set this. - */ - unsigned long older_than_this; + unsigned long *older_than_this; enum writeback_sync_modes sync_mode; unsigned int tagged_writepages:1; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ - unsigned int older_than_this_is_set:1; enum wb_reason reason; /* why was writeback initiated? */ struct list_head list; /* pending work list */ @@ -252,10 +246,10 @@ static int move_expired_inodes(struct list_head *delaying_queue, int do_sb_sort = 0; int moved = 0; - WARN_ON_ONCE(!work->older_than_this_is_set); while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); - if (inode_dirtied_after(inode, work->older_than_this)) + if (work->older_than_this && + inode_dirtied_after(inode, *work->older_than_this)) break; list_move(&inode->i_wb_list, &tmp); moved++; @@ -739,8 +733,6 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, .sync_mode = WB_SYNC_NONE, .range_cyclic = 1, .reason = reason, - .older_than_this = jiffies, - .older_than_this_is_set = 1, }; spin_lock(&wb->list_lock); @@ -799,13 +791,12 @@ static long wb_writeback(struct bdi_writeback *wb, { unsigned long wb_start = jiffies; long nr_pages = work->nr_pages; + unsigned long oldest_jif; struct inode *inode; long progress; - if (!work->older_than_this_is_set) { - work->older_than_this = jiffies; - work->older_than_this_is_set = 1; - } + oldest_jif = jiffies; + work->older_than_this = &oldest_jif; spin_lock(&wb->list_lock); for (;;) { @@ -839,10 +830,10 @@ static long wb_writeback(struct bdi_writeback *wb, * safe. */ if (work->for_kupdate) { - work->older_than_this = jiffies - + oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } else if (work->for_background) - work->older_than_this = jiffies; + oldest_jif = jiffies; trace_writeback_start(wb->bdi, work); if (list_empty(&wb->b_io)) @@ -1354,21 +1345,18 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages - * @sb: the superblock - * @older_than_this: timestamp + * @sb: the superblock * * This function writes and waits on any dirty inode belonging to this - * superblock that has been dirtied before given timestamp. + * super_block. */ -void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this) +void sync_inodes_sb(struct super_block *sb) { DECLARE_COMPLETION_ONSTACK(done); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, - .older_than_this = older_than_this, - .older_than_this_is_set = 1, .range_cyclic = 0, .done = &done, .reason = WB_REASON_SYNC, diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 7dca743..d8ac61d 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -161,6 +161,6 @@ EXPORT_SYMBOL(current_umask); struct fs_struct init_fs = { .users = 1, .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), - .seq = SEQCNT_ZERO(init_fs.seq), + .seq = SEQCNT_ZERO, .umask = 0022, }; diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 29d7feb..b2a86e3 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -58,16 +58,15 @@ void fscache_cookie_init_once(void *_cookie) struct fscache_cookie *__fscache_acquire_cookie( struct fscache_cookie *parent, const struct fscache_cookie_def *def, - void *netfs_data, - bool enable) + void *netfs_data) { struct fscache_cookie *cookie; BUG_ON(!def); - _enter("{%s},{%s},%p,%u", + _enter("{%s},{%s},%p", parent ? (char *) parent->def->name : "<no-parent>", - def->name, netfs_data, enable); + def->name, netfs_data); fscache_stat(&fscache_n_acquires); @@ -107,7 +106,7 @@ struct fscache_cookie *__fscache_acquire_cookie( cookie->def = def; cookie->parent = parent; cookie->netfs_data = netfs_data; - cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET); + cookie->flags = 0; /* radix tree insertion won't use the preallocation pool unless it's * told it may not wait */ @@ -125,22 +124,16 @@ struct fscache_cookie *__fscache_acquire_cookie( break; } - if (enable) { - /* if the object is an index then we need do nothing more here - * - we create indices on disk when we need them as an index - * may exist in multiple caches */ - if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { - if (fscache_acquire_non_index_cookie(cookie) == 0) { - set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); - } else { - atomic_dec(&parent->n_children); - __fscache_cookie_put(cookie); - fscache_stat(&fscache_n_acquires_nobufs); - _leave(" = NULL"); - return NULL; - } - } else { - set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); + /* if the object is an index then we need do nothing more here - we + * create indices on disk when we need them as an index may exist in + * multiple caches */ + if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { + if (fscache_acquire_non_index_cookie(cookie) < 0) { + atomic_dec(&parent->n_children); + __fscache_cookie_put(cookie); + fscache_stat(&fscache_n_acquires_nobufs); + _leave(" = NULL"); + return NULL; } } @@ -151,39 +144,6 @@ struct fscache_cookie *__fscache_acquire_cookie( EXPORT_SYMBOL(__fscache_acquire_cookie); /* - * Enable a cookie to permit it to accept new operations. - */ -void __fscache_enable_cookie(struct fscache_cookie *cookie, - bool (*can_enable)(void *data), - void *data) -{ - _enter("%p", cookie); - - wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); - - if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) - goto out_unlock; - - if (can_enable && !can_enable(data)) { - /* The netfs decided it didn't want to enable after all */ - } else if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { - /* Wait for outstanding disablement to complete */ - __fscache_wait_on_invalidate(cookie); - - if (fscache_acquire_non_index_cookie(cookie) == 0) - set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); - } else { - set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); - } - -out_unlock: - clear_bit_unlock(FSCACHE_COOKIE_ENABLEMENT_LOCK, &cookie->flags); - wake_up_bit(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK); -} -EXPORT_SYMBOL(__fscache_enable_cookie); - -/* * acquire a non-index cookie * - this must make sure the index chain is instantiated and instantiate the * object representation too @@ -197,7 +157,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) _enter(""); - set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); + cookie->flags = 1 << FSCACHE_COOKIE_UNAVAILABLE; /* now we need to see whether the backing objects for this cookie yet * exist, if not there'll be nothing to search */ @@ -220,7 +180,9 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) _debug("cache %s", cache->tag->name); - set_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); + cookie->flags = + (1 << FSCACHE_COOKIE_LOOKING_UP) | + (1 << FSCACHE_COOKIE_NO_DATA_YET); /* ask the cache to allocate objects for this cookie and its parent * chain */ @@ -436,8 +398,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie) if (!hlist_empty(&cookie->backing_objects)) { spin_lock(&cookie->lock); - if (fscache_cookie_enabled(cookie) && - !hlist_empty(&cookie->backing_objects) && + if (!hlist_empty(&cookie->backing_objects) && !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { object = hlist_entry(cookie->backing_objects.first, @@ -491,14 +452,10 @@ void __fscache_update_cookie(struct fscache_cookie *cookie) spin_lock(&cookie->lock); - if (fscache_cookie_enabled(cookie)) { - /* update the index entry on disk in each cache backing this - * cookie. - */ - hlist_for_each_entry(object, - &cookie->backing_objects, cookie_link) { - fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); - } + /* update the index entry on disk in each cache backing this cookie */ + hlist_for_each_entry(object, + &cookie->backing_objects, cookie_link) { + fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); } spin_unlock(&cookie->lock); @@ -507,80 +464,15 @@ void __fscache_update_cookie(struct fscache_cookie *cookie) EXPORT_SYMBOL(__fscache_update_cookie); /* - * Disable a cookie to stop it from accepting new requests from the netfs. - */ -void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) -{ - struct fscache_object *object; - bool awaken = false; - - _enter("%p,%u", cookie, invalidate); - - ASSERTCMP(atomic_read(&cookie->n_active), >, 0); - - if (atomic_read(&cookie->n_children) != 0) { - printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n", - cookie->def->name); - BUG(); - } - - wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, - fscache_wait_bit, TASK_UNINTERRUPTIBLE); - if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) - goto out_unlock_enable; - - /* If the cookie is being invalidated, wait for that to complete first - * so that we can reuse the flag. - */ - __fscache_wait_on_invalidate(cookie); - - /* Dispose of the backing objects */ - set_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags); - - spin_lock(&cookie->lock); - if (!hlist_empty(&cookie->backing_objects)) { - hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { - if (invalidate) - set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); - fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); - } - } else { - if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) - awaken = true; - } - spin_unlock(&cookie->lock); - if (awaken) - wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); - - /* Wait for cessation of activity requiring access to the netfs (when - * n_active reaches 0). This makes sure outstanding reads and writes - * have completed. - */ - if (!atomic_dec_and_test(&cookie->n_active)) - wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, - TASK_UNINTERRUPTIBLE); - - /* Reset the cookie state if it wasn't relinquished */ - if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) { - atomic_inc(&cookie->n_active); - set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); - } - -out_unlock_enable: - clear_bit_unlock(FSCACHE_COOKIE_ENABLEMENT_LOCK, &cookie->flags); - wake_up_bit(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK); - _leave(""); -} -EXPORT_SYMBOL(__fscache_disable_cookie); - -/* * release a cookie back to the cache * - the object will be marked as recyclable on disk if retire is true * - all dependents of this cookie must have already been unregistered * (indices/files/pages) */ -void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) +void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) { + struct fscache_object *object; + fscache_stat(&fscache_n_relinquishes); if (retire) fscache_stat(&fscache_n_relinquishes_retire); @@ -595,10 +487,31 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) cookie, cookie->def->name, cookie->netfs_data, atomic_read(&cookie->n_active), retire); + ASSERTCMP(atomic_read(&cookie->n_active), >, 0); + + if (atomic_read(&cookie->n_children) != 0) { + printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n", + cookie->def->name); + BUG(); + } + /* No further netfs-accessing operations on this cookie permitted */ set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags); + if (retire) + set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags); - __fscache_disable_cookie(cookie, retire); + spin_lock(&cookie->lock); + hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { + fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); + } + spin_unlock(&cookie->lock); + + /* Wait for cessation of activity requiring access to the netfs (when + * n_active reaches 0). + */ + if (!atomic_dec_and_test(&cookie->n_active)) + wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, + TASK_UNINTERRUPTIBLE); /* Clear pointers back to the netfs */ cookie->netfs_data = NULL; @@ -655,7 +568,6 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) { struct fscache_operation *op; struct fscache_object *object; - bool wake_cookie = false; int ret; _enter("%p,", cookie); @@ -679,8 +591,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) spin_lock(&cookie->lock); - if (!fscache_cookie_enabled(cookie) || - hlist_empty(&cookie->backing_objects)) + if (hlist_empty(&cookie->backing_objects)) goto inconsistent; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); @@ -689,7 +600,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) op->debug_id = atomic_inc_return(&fscache_op_debug_id); - __fscache_use_cookie(cookie); + atomic_inc(&cookie->n_active); if (fscache_submit_op(object, op) < 0) goto submit_failed; @@ -711,11 +622,9 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) return ret; submit_failed: - wake_cookie = __fscache_unuse_cookie(cookie); + atomic_dec(&cookie->n_active); inconsistent: spin_unlock(&cookie->lock); - if (wake_cookie) - __fscache_wake_unused_cookie(cookie); kfree(op); _leave(" = -ESTALE"); return -ESTALE; diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c index 5a117df..10a2ade 100644 --- a/fs/fscache/fsdef.c +++ b/fs/fscache/fsdef.c @@ -59,7 +59,6 @@ struct fscache_cookie fscache_fsdef_index = { .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), .backing_objects = HLIST_HEAD_INIT, .def = &fscache_fsdef_index_def, - .flags = 1 << FSCACHE_COOKIE_ENABLED, }; EXPORT_SYMBOL(fscache_fsdef_index); diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index 989f394..b1bb611 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -45,7 +45,6 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) netfs->primary_index->def = &fscache_fsdef_netfs_def; netfs->primary_index->parent = &fscache_fsdef_index; netfs->primary_index->netfs_data = netfs; - netfs->primary_index->flags = 1 << FSCACHE_COOKIE_ENABLED; atomic_inc(&netfs->primary_index->parent->usage); atomic_inc(&netfs->primary_index->parent->n_children); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 53d35c5..86d75a6 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -495,7 +495,6 @@ void fscache_object_lookup_negative(struct fscache_object *object) * returning ENODATA. */ set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); - clear_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); _debug("wake up lookup %p", &cookie->flags); clear_bit_unlock(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); @@ -528,7 +527,6 @@ void fscache_obtained_object(struct fscache_object *object) /* We do (presumably) have data */ clear_bit_unlock(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); - clear_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); /* Allow write requests to begin stacking up and read requests * to begin shovelling data. @@ -681,8 +679,7 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob */ spin_lock(&cookie->lock); hlist_del_init(&object->cookie_link); - if (hlist_empty(&cookie->backing_objects) && - test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) + if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) awaken = true; spin_unlock(&cookie->lock); @@ -799,7 +796,7 @@ void fscache_enqueue_object(struct fscache_object *object) */ bool fscache_object_sleep_till_congested(signed long *timeoutp) { - wait_queue_head_t *cong_wq = this_cpu_ptr(&fscache_object_cong_wait); + wait_queue_head_t *cong_wq = &__get_cpu_var(fscache_object_cong_wait); DEFINE_WAIT(wait); if (fscache_object_congested()) @@ -930,7 +927,7 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj */ if (!fscache_use_cookie(object)) { ASSERT(object->cookie->stores.rnode == NULL); - set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); + set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags); _leave(" [no cookie]"); return transit_to(KILL_OBJECT); } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 7f5c658..73899c1 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -163,10 +163,12 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_stat(&fscache_n_attr_changed_calls); - if (fscache_object_is_active(object)) { + if (fscache_object_is_active(object) && + fscache_use_cookie(object)) { fscache_stat(&fscache_n_cop_attr_changed); ret = object->cache->ops->attr_changed(object); fscache_stat_d(&fscache_n_cop_attr_changed); + fscache_unuse_cookie(object); if (ret < 0) fscache_abort_object(object); } @@ -182,7 +184,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) { struct fscache_operation *op; struct fscache_object *object; - bool wake_cookie; _enter("%p", cookie); @@ -198,19 +199,15 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) } fscache_operation_init(op, fscache_attr_changed_op, NULL); - op->flags = FSCACHE_OP_ASYNC | - (1 << FSCACHE_OP_EXCLUSIVE) | - (1 << FSCACHE_OP_UNUSE_COOKIE); + op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); spin_lock(&cookie->lock); - if (!fscache_cookie_enabled(cookie) || - hlist_empty(&cookie->backing_objects)) + if (hlist_empty(&cookie->backing_objects)) goto nobufs; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); - __fscache_use_cookie(cookie); if (fscache_submit_exclusive_op(object, op) < 0) goto nobufs; spin_unlock(&cookie->lock); @@ -220,11 +217,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) return 0; nobufs: - wake_cookie = __fscache_unuse_cookie(cookie); spin_unlock(&cookie->lock); kfree(op); - if (wake_cookie) - __fscache_wake_unused_cookie(cookie); fscache_stat(&fscache_n_attr_changed_nobufs); _leave(" = %d", -ENOBUFS); return -ENOBUFS; @@ -269,6 +263,7 @@ static struct fscache_retrieval *fscache_alloc_retrieval( } fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); + atomic_inc(&cookie->n_active); op->op.flags = FSCACHE_OP_MYTHREAD | (1UL << FSCACHE_OP_WAITING) | (1UL << FSCACHE_OP_UNUSE_COOKIE); @@ -389,7 +384,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, { struct fscache_retrieval *op; struct fscache_object *object; - bool wake_cookie = false; int ret; _enter("%p,%p,,,", cookie, page); @@ -411,7 +405,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, return -ERESTARTSYS; op = fscache_alloc_retrieval(cookie, page->mapping, - end_io_func, context); + end_io_func,context); if (!op) { _leave(" = -ENOMEM"); return -ENOMEM; @@ -420,15 +414,13 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, spin_lock(&cookie->lock); - if (!fscache_cookie_enabled(cookie) || - hlist_empty(&cookie->backing_objects)) + if (hlist_empty(&cookie->backing_objects)) goto nobufs_unlock; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); ASSERT(test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)); - __fscache_use_cookie(cookie); atomic_inc(&object->n_reads); __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); @@ -483,11 +475,9 @@ error: nobufs_unlock_dec: atomic_dec(&object->n_reads); - wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); - if (wake_cookie) - __fscache_wake_unused_cookie(cookie); + atomic_dec(&cookie->n_active); kfree(op); nobufs: fscache_stat(&fscache_n_retrievals_nobufs); @@ -524,7 +514,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, { struct fscache_retrieval *op; struct fscache_object *object; - bool wake_cookie = false; int ret; _enter("%p,,%d,,,", cookie, *nr_pages); @@ -553,13 +542,11 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, spin_lock(&cookie->lock); - if (!fscache_cookie_enabled(cookie) || - hlist_empty(&cookie->backing_objects)) + if (hlist_empty(&cookie->backing_objects)) goto nobufs_unlock; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); - __fscache_use_cookie(cookie); atomic_inc(&object->n_reads); __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); @@ -614,12 +601,10 @@ error: nobufs_unlock_dec: atomic_dec(&object->n_reads); - wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); + atomic_dec(&cookie->n_active); kfree(op); - if (wake_cookie) - __fscache_wake_unused_cookie(cookie); nobufs: fscache_stat(&fscache_n_retrievals_nobufs); _leave(" = -ENOBUFS"); @@ -641,7 +626,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, { struct fscache_retrieval *op; struct fscache_object *object; - bool wake_cookie = false; int ret; _enter("%p,%p,,,", cookie, page); @@ -669,15 +653,13 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, spin_lock(&cookie->lock); - if (!fscache_cookie_enabled(cookie) || - hlist_empty(&cookie->backing_objects)) + if (hlist_empty(&cookie->backing_objects)) goto nobufs_unlock; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); - __fscache_use_cookie(cookie); if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock_dec; + goto nobufs_unlock; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_alloc_ops); @@ -707,13 +689,10 @@ error: _leave(" = %d", ret); return ret; -nobufs_unlock_dec: - wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); + atomic_dec(&cookie->n_active); kfree(op); - if (wake_cookie) - __fscache_wake_unused_cookie(cookie); nobufs: fscache_stat(&fscache_n_allocs_nobufs); _leave(" = -ENOBUFS"); @@ -910,7 +889,6 @@ int __fscache_write_page(struct fscache_cookie *cookie, { struct fscache_storage *op; struct fscache_object *object; - bool wake_cookie = false; int ret; _enter("%p,%x,", cookie, (u32) page->flags); @@ -942,8 +920,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, ret = -ENOBUFS; spin_lock(&cookie->lock); - if (!fscache_cookie_enabled(cookie) || - hlist_empty(&cookie->backing_objects)) + if (hlist_empty(&cookie->backing_objects)) goto nobufs; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); @@ -980,7 +957,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); op->store_limit = object->store_limit; - __fscache_use_cookie(cookie); + atomic_inc(&cookie->n_active); if (fscache_submit_op(object, &op->op) < 0) goto submit_failed; @@ -1007,10 +984,10 @@ already_pending: return 0; submit_failed: + atomic_dec(&cookie->n_active); spin_lock(&cookie->stores_lock); radix_tree_delete(&cookie->stores, page->index); spin_unlock(&cookie->stores_lock); - wake_cookie = __fscache_unuse_cookie(cookie); page_cache_release(page); ret = -ENOBUFS; goto nobufs; @@ -1022,8 +999,6 @@ nobufs: spin_unlock(&cookie->lock); radix_tree_preload_end(); kfree(op); - if (wake_cookie) - __fscache_wake_unused_cookie(cookie); fscache_stat(&fscache_n_stores_nobufs); _leave(" = -ENOBUFS"); return -ENOBUFS; diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b96a49b..adbfd66 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -473,7 +473,7 @@ err: static void cuse_fc_release(struct fuse_conn *fc) { struct cuse_conn *cc = fc_to_cc(fc); - kfree_rcu(cc, fc.rcu); + kfree(cc); } /** @@ -589,14 +589,11 @@ static struct attribute *cuse_class_dev_attrs[] = { ATTRIBUTE_GROUPS(cuse_class_dev); static struct miscdevice cuse_miscdev = { - .minor = CUSE_MINOR, + .minor = MISC_DYNAMIC_MINOR, .name = "cuse", .fops = &cuse_channel_fops, }; -MODULE_ALIAS_MISCDEV(CUSE_MINOR); -MODULE_ALIAS("devname:cuse"); - static int __init cuse_init(void) { int i, rc; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c3eb2c4..b7989f2 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -342,6 +342,24 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, return err; } +static struct dentry *fuse_materialise_dentry(struct dentry *dentry, + struct inode *inode) +{ + struct dentry *newent; + + if (inode && S_ISDIR(inode->i_mode)) { + struct fuse_conn *fc = get_fuse_conn(inode); + + mutex_lock(&fc->inst_mutex); + newent = d_materialise_unique(dentry, inode); + mutex_unlock(&fc->inst_mutex); + } else { + newent = d_materialise_unique(dentry, inode); + } + + return newent; +} + static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, unsigned int flags) { @@ -364,7 +382,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, if (inode && get_node_id(inode) == FUSE_ROOT_ID) goto out_iput; - newent = d_materialise_unique(entry, inode); + newent = fuse_materialise_dentry(entry, inode); err = PTR_ERR(newent); if (IS_ERR(newent)) goto out_err; @@ -583,9 +601,21 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, } kfree(forget); - err = d_instantiate_no_diralias(entry, inode); - if (err) - return err; + if (S_ISDIR(inode->i_mode)) { + struct dentry *alias; + mutex_lock(&fc->inst_mutex); + alias = d_find_alias(inode); + if (alias) { + /* New directory must have moved since mkdir */ + mutex_unlock(&fc->inst_mutex); + dput(alias); + iput(inode); + return -EBUSY; + } + d_instantiate(entry, inode); + mutex_unlock(&fc->inst_mutex); + } else + d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outarg); fuse_invalidate_attr(dir); @@ -1254,7 +1284,7 @@ static int fuse_direntplus_link(struct file *file, if (!inode) goto out; - alias = d_materialise_unique(dentry, inode); + alias = fuse_materialise_dentry(dentry, inode); err = PTR_ERR(alias); if (IS_ERR(alias)) goto out; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7e70506..4598345 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -334,8 +334,7 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) BUG_ON(req->inode != inode); curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; - if (curr_index <= index && - index < curr_index + req->num_pages) { + if (curr_index == index) { found = true; break; } @@ -1410,13 +1409,8 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) { - int i; - - for (i = 0; i < req->num_pages; i++) - __free_page(req->pages[i]); - - if (req->ff) - fuse_file_put(req->ff, false); + __free_page(req->pages[0]); + fuse_file_put(req->ff, false); } static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) @@ -1424,34 +1418,30 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) struct inode *inode = req->inode; struct fuse_inode *fi = get_fuse_inode(inode); struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; - int i; list_del(&req->writepages_entry); - for (i = 0; i < req->num_pages; i++) { - dec_bdi_stat(bdi, BDI_WRITEBACK); - dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP); - bdi_writeout_inc(bdi); - } + dec_bdi_stat(bdi, BDI_WRITEBACK); + dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP); + bdi_writeout_inc(bdi); wake_up(&fi->page_waitq); } /* Called under fc->lock, may release and reacquire it */ -static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req, - loff_t size) +static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) __releases(fc->lock) __acquires(fc->lock) { struct fuse_inode *fi = get_fuse_inode(req->inode); + loff_t size = i_size_read(req->inode); struct fuse_write_in *inarg = &req->misc.write.in; - __u64 data_size = req->num_pages * PAGE_CACHE_SIZE; if (!fc->connected) goto out_free; - if (inarg->offset + data_size <= size) { - inarg->size = data_size; + if (inarg->offset + PAGE_CACHE_SIZE <= size) { + inarg->size = PAGE_CACHE_SIZE; } else if (inarg->offset < size) { - inarg->size = size - inarg->offset; + inarg->size = size & (PAGE_CACHE_SIZE - 1); } else { /* Got truncated off completely */ goto out_free; @@ -1482,13 +1472,12 @@ __acquires(fc->lock) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); - size_t crop = i_size_read(inode); struct fuse_req *req; while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { req = list_entry(fi->queued_writes.next, struct fuse_req, list); list_del_init(&req->list); - fuse_send_writepage(fc, req, crop); + fuse_send_writepage(fc, req); } } @@ -1499,62 +1488,12 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) mapping_set_error(inode->i_mapping, req->out.h.error); spin_lock(&fc->lock); - while (req->misc.write.next) { - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_write_in *inarg = &req->misc.write.in; - struct fuse_req *next = req->misc.write.next; - req->misc.write.next = next->misc.write.next; - next->misc.write.next = NULL; - next->ff = fuse_file_get(req->ff); - list_add(&next->writepages_entry, &fi->writepages); - - /* - * Skip fuse_flush_writepages() to make it easy to crop requests - * based on primary request size. - * - * 1st case (trivial): there are no concurrent activities using - * fuse_set/release_nowrite. Then we're on safe side because - * fuse_flush_writepages() would call fuse_send_writepage() - * anyway. - * - * 2nd case: someone called fuse_set_nowrite and it is waiting - * now for completion of all in-flight requests. This happens - * rarely and no more than once per page, so this should be - * okay. - * - * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle - * of fuse_set_nowrite..fuse_release_nowrite section. The fact - * that fuse_set_nowrite returned implies that all in-flight - * requests were completed along with all of their secondary - * requests. Further primary requests are blocked by negative - * writectr. Hence there cannot be any in-flight requests and - * no invocations of fuse_writepage_end() while we're in - * fuse_set_nowrite..fuse_release_nowrite section. - */ - fuse_send_writepage(fc, next, inarg->offset + inarg->size); - } fi->writectr--; fuse_writepage_finish(fc, req); spin_unlock(&fc->lock); fuse_writepage_free(fc, req); } -static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, - struct fuse_inode *fi) -{ - struct fuse_file *ff = NULL; - - spin_lock(&fc->lock); - if (!WARN_ON(list_empty(&fi->write_files))) { - ff = list_entry(fi->write_files.next, struct fuse_file, - write_entry); - fuse_file_get(ff); - } - spin_unlock(&fc->lock); - - return ff; -} - static int fuse_writepage_locked(struct page *page) { struct address_space *mapping = page->mapping; @@ -1562,8 +1501,8 @@ static int fuse_writepage_locked(struct page *page) struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_req *req; + struct fuse_file *ff; struct page *tmp_page; - int error = -ENOMEM; set_page_writeback(page); @@ -1576,16 +1515,16 @@ static int fuse_writepage_locked(struct page *page) if (!tmp_page) goto err_free; - error = -EIO; - req->ff = fuse_write_file_get(fc, fi); - if (!req->ff) - goto err_free; + spin_lock(&fc->lock); + BUG_ON(list_empty(&fi->write_files)); + ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); + req->ff = fuse_file_get(ff); + spin_unlock(&fc->lock); - fuse_write_fill(req, req->ff, page_offset(page), 0); + fuse_write_fill(req, ff, page_offset(page), 0); copy_highpage(tmp_page, page); req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; - req->misc.write.next = NULL; req->in.argpages = 1; req->num_pages = 1; req->pages[0] = tmp_page; @@ -1611,263 +1550,19 @@ err_free: fuse_request_free(req); err: end_page_writeback(page); - return error; + return -ENOMEM; } static int fuse_writepage(struct page *page, struct writeback_control *wbc) { int err; - if (fuse_page_is_writeback(page->mapping->host, page->index)) { - /* - * ->writepages() should be called for sync() and friends. We - * should only get here on direct reclaim and then we are - * allowed to skip a page which is already in flight - */ - WARN_ON(wbc->sync_mode == WB_SYNC_ALL); - - redirty_page_for_writepage(wbc, page); - return 0; - } - err = fuse_writepage_locked(page); unlock_page(page); return err; } -struct fuse_fill_wb_data { - struct fuse_req *req; - struct fuse_file *ff; - struct inode *inode; - struct page **orig_pages; -}; - -static void fuse_writepages_send(struct fuse_fill_wb_data *data) -{ - struct fuse_req *req = data->req; - struct inode *inode = data->inode; - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); - int num_pages = req->num_pages; - int i; - - req->ff = fuse_file_get(data->ff); - spin_lock(&fc->lock); - list_add_tail(&req->list, &fi->queued_writes); - fuse_flush_writepages(inode); - spin_unlock(&fc->lock); - - for (i = 0; i < num_pages; i++) - end_page_writeback(data->orig_pages[i]); -} - -static bool fuse_writepage_in_flight(struct fuse_req *new_req, - struct page *page) -{ - struct fuse_conn *fc = get_fuse_conn(new_req->inode); - struct fuse_inode *fi = get_fuse_inode(new_req->inode); - struct fuse_req *tmp; - struct fuse_req *old_req; - bool found = false; - pgoff_t curr_index; - - BUG_ON(new_req->num_pages != 0); - - spin_lock(&fc->lock); - list_del(&new_req->writepages_entry); - list_for_each_entry(old_req, &fi->writepages, writepages_entry) { - BUG_ON(old_req->inode != new_req->inode); - curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT; - if (curr_index <= page->index && - page->index < curr_index + old_req->num_pages) { - found = true; - break; - } - } - if (!found) { - list_add(&new_req->writepages_entry, &fi->writepages); - goto out_unlock; - } - - new_req->num_pages = 1; - for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) { - BUG_ON(tmp->inode != new_req->inode); - curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT; - if (tmp->num_pages == 1 && - curr_index == page->index) { - old_req = tmp; - } - } - - if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT || - old_req->state == FUSE_REQ_PENDING)) { - struct backing_dev_info *bdi = page->mapping->backing_dev_info; - - copy_highpage(old_req->pages[0], page); - spin_unlock(&fc->lock); - - dec_bdi_stat(bdi, BDI_WRITEBACK); - dec_zone_page_state(page, NR_WRITEBACK_TEMP); - bdi_writeout_inc(bdi); - fuse_writepage_free(fc, new_req); - fuse_request_free(new_req); - goto out; - } else { - new_req->misc.write.next = old_req->misc.write.next; - old_req->misc.write.next = new_req; - } -out_unlock: - spin_unlock(&fc->lock); -out: - return found; -} - -static int fuse_writepages_fill(struct page *page, - struct writeback_control *wbc, void *_data) -{ - struct fuse_fill_wb_data *data = _data; - struct fuse_req *req = data->req; - struct inode *inode = data->inode; - struct fuse_conn *fc = get_fuse_conn(inode); - struct page *tmp_page; - bool is_writeback; - int err; - - if (!data->ff) { - err = -EIO; - data->ff = fuse_write_file_get(fc, get_fuse_inode(inode)); - if (!data->ff) - goto out_unlock; - } - - /* - * Being under writeback is unlikely but possible. For example direct - * read to an mmaped fuse file will set the page dirty twice; once when - * the pages are faulted with get_user_pages(), and then after the read - * completed. - */ - is_writeback = fuse_page_is_writeback(inode, page->index); - - if (req && req->num_pages && - (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ || - (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write || - data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) { - fuse_writepages_send(data); - data->req = NULL; - } - err = -ENOMEM; - tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); - if (!tmp_page) - goto out_unlock; - - /* - * The page must not be redirtied until the writeout is completed - * (i.e. userspace has sent a reply to the write request). Otherwise - * there could be more than one temporary page instance for each real - * page. - * - * This is ensured by holding the page lock in page_mkwrite() while - * checking fuse_page_is_writeback(). We already hold the page lock - * since clear_page_dirty_for_io() and keep it held until we add the - * request to the fi->writepages list and increment req->num_pages. - * After this fuse_page_is_writeback() will indicate that the page is - * under writeback, so we can release the page lock. - */ - if (data->req == NULL) { - struct fuse_inode *fi = get_fuse_inode(inode); - - err = -ENOMEM; - req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ); - if (!req) { - __free_page(tmp_page); - goto out_unlock; - } - - fuse_write_fill(req, data->ff, page_offset(page), 0); - req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; - req->misc.write.next = NULL; - req->in.argpages = 1; - req->background = 1; - req->num_pages = 0; - req->end = fuse_writepage_end; - req->inode = inode; - - spin_lock(&fc->lock); - list_add(&req->writepages_entry, &fi->writepages); - spin_unlock(&fc->lock); - - data->req = req; - } - set_page_writeback(page); - - copy_highpage(tmp_page, page); - req->pages[req->num_pages] = tmp_page; - req->page_descs[req->num_pages].offset = 0; - req->page_descs[req->num_pages].length = PAGE_SIZE; - - inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK); - inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); - - err = 0; - if (is_writeback && fuse_writepage_in_flight(req, page)) { - end_page_writeback(page); - data->req = NULL; - goto out_unlock; - } - data->orig_pages[req->num_pages] = page; - - /* - * Protected by fc->lock against concurrent access by - * fuse_page_is_writeback(). - */ - spin_lock(&fc->lock); - req->num_pages++; - spin_unlock(&fc->lock); - -out_unlock: - unlock_page(page); - - return err; -} - -static int fuse_writepages(struct address_space *mapping, - struct writeback_control *wbc) -{ - struct inode *inode = mapping->host; - struct fuse_fill_wb_data data; - int err; - - err = -EIO; - if (is_bad_inode(inode)) - goto out; - - data.inode = inode; - data.req = NULL; - data.ff = NULL; - - err = -ENOMEM; - data.orig_pages = kzalloc(sizeof(struct page *) * - FUSE_MAX_PAGES_PER_REQ, - GFP_NOFS); - if (!data.orig_pages) - goto out; - - err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); - if (data.req) { - /* Ignore errors if we can write at least one page */ - BUG_ON(!data.req->num_pages); - fuse_writepages_send(&data); - err = 0; - } - if (data.ff) - fuse_file_put(data.ff, false); - - kfree(data.orig_pages); -out: - return err; -} - static int fuse_launder_page(struct page *page) { int err = 0; @@ -1907,17 +1602,14 @@ static void fuse_vma_close(struct vm_area_struct *vma) static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = file_inode(vma->vm_file); - - file_update_time(vma->vm_file); - lock_page(page); - if (page->mapping != inode->i_mapping) { - unlock_page(page); - return VM_FAULT_NOPAGE; - } + /* + * Don't use page->mapping as it may become NULL from a + * concurrent truncate. + */ + struct inode *inode = vma->vm_file->f_mapping->host; fuse_wait_on_page_writeback(inode, page->index); - return VM_FAULT_LOCKED; + return 0; } static const struct vm_operations_struct fuse_file_vm_ops = { @@ -2889,7 +2581,6 @@ static const struct file_operations fuse_direct_io_file_operations = { static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, .writepage = fuse_writepage, - .writepages = fuse_writepages, .launder_page = fuse_launder_page, .readpages = fuse_readpages, .set_page_dirty = __set_page_dirty_nobuffers, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7d27309..5b9e6f3 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -321,7 +321,6 @@ struct fuse_req { struct { struct fuse_write_in in; struct fuse_write_out out; - struct fuse_req *next; } write; struct fuse_notify_retrieve_in retrieve_in; struct fuse_lk_in lk_in; @@ -375,11 +374,12 @@ struct fuse_conn { /** Lock protecting accessess to members of this structure */ spinlock_t lock; + /** Mutex protecting against directory alias creation */ + struct mutex inst_mutex; + /** Refcount */ atomic_t count; - struct rcu_head rcu; - /** The user id for this mount */ kuid_t user_id; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d468643..a8ce6da 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -565,6 +565,7 @@ void fuse_conn_init(struct fuse_conn *fc) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); + mutex_init(&fc->inst_mutex); init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); init_waitqueue_head(&fc->waitq); @@ -595,6 +596,7 @@ void fuse_conn_put(struct fuse_conn *fc) if (atomic_dec_and_test(&fc->count)) { if (fc->destroy_req) fuse_request_free(fc->destroy_req); + mutex_destroy(&fc->inst_mutex); fc->release(fc); } } @@ -918,7 +920,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) static void fuse_free_conn(struct fuse_conn *fc) { - kfree_rcu(fc, rcu); + kfree(fc); } static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index b7fc035..1f7d805 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -611,14 +611,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); if (alloc_required) { - struct gfs2_alloc_parms ap = { .aflags = 0, }; error = gfs2_quota_lock_check(ip); if (error) goto out_unlock; requested = data_blocks + ind_blocks; - ap.target = requested; - error = gfs2_inplace_reserve(ip, &ap); + error = gfs2_inplace_reserve(ip, requested, 0); if (error) goto out_qunlock; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index fe0500c..62a65fc 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1216,7 +1216,6 @@ static int do_grow(struct inode *inode, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_alloc_parms ap = { .target = 1, }; struct buffer_head *dibh; int error; int unstuff = 0; @@ -1227,7 +1226,7 @@ static int do_grow(struct inode *inode, u64 size) if (error) return error; - error = gfs2_inplace_reserve(ip, &ap); + error = gfs2_inplace_reserve(ip, 1, 0); if (error) goto do_grow_qunlock; unstuff = 1; @@ -1280,7 +1279,6 @@ do_grow_qunlock: int gfs2_setattr_size(struct inode *inode, u64 newsize) { - struct gfs2_inode *ip = GFS2_I(inode); int ret; u64 oldsize; @@ -1296,7 +1294,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) inode_dio_wait(inode); - ret = gfs2_rs_alloc(ip); + ret = gfs2_rs_alloc(GFS2_I(inode)); if (ret) goto out; @@ -1306,7 +1304,6 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) goto out; } - gfs2_rs_deltree(ip->i_res); ret = do_shrink(inode, oldsize, newsize); out: put_write_access(inode); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index efc078f..0621b46 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -383,7 +383,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = file_inode(vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; @@ -431,8 +430,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); - ap.target = data_blocks + ind_blocks; - ret = gfs2_inplace_reserve(ip, &ap); + ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); if (ret) goto out_quota_unlock; @@ -622,7 +620,7 @@ static int gfs2_release(struct inode *inode, struct file *file) if (!(file->f_mode & FMODE_WRITE)) return 0; - gfs2_rs_delete(ip, &inode->i_writecount); + gfs2_rs_delete(ip); return 0; } @@ -802,7 +800,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes; int error; @@ -853,8 +850,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); - ap.target = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip, &ap); + error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); if (error) { if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { bytes >>= 1; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c8420f7..c2f41b4 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -31,7 +31,6 @@ #include <linux/bit_spinlock.h> #include <linux/percpu.h> #include <linux/list_sort.h> -#include <linux/lockref.h> #include "gfs2.h" #include "incore.h" @@ -130,10 +129,10 @@ void gfs2_glock_free(struct gfs2_glock *gl) * */ -static void gfs2_glock_hold(struct gfs2_glock *gl) +void gfs2_glock_hold(struct gfs2_glock *gl) { - GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); - lockref_get(&gl->gl_lockref); + GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0); + atomic_inc(&gl->gl_ref); } /** @@ -188,6 +187,20 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) } /** + * gfs2_glock_put_nolock() - Decrement reference count on glock + * @gl: The glock to put + * + * This function should only be used if the caller has its own reference + * to the glock, in addition to the one it is dropping. + */ + +void gfs2_glock_put_nolock(struct gfs2_glock *gl) +{ + if (atomic_dec_and_test(&gl->gl_ref)) + GLOCK_BUG_ON(gl, 1); +} + +/** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put * @@ -198,22 +211,17 @@ void gfs2_glock_put(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); - if (lockref_put_or_lock(&gl->gl_lockref)) - return; - - lockref_mark_dead(&gl->gl_lockref); - - spin_lock(&lru_lock); - __gfs2_glock_remove_from_lru(gl); - spin_unlock(&lru_lock); - spin_unlock(&gl->gl_lockref.lock); - spin_lock_bucket(gl->gl_hash); - hlist_bl_del_rcu(&gl->gl_list); - spin_unlock_bucket(gl->gl_hash); - GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); - GLOCK_BUG_ON(gl, mapping && mapping->nrpages); - trace_gfs2_glock_put(gl); - sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); + if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { + __gfs2_glock_remove_from_lru(gl); + spin_unlock(&lru_lock); + spin_lock_bucket(gl->gl_hash); + hlist_bl_del_rcu(&gl->gl_list); + spin_unlock_bucket(gl->gl_hash); + GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); + GLOCK_BUG_ON(gl, mapping && mapping->nrpages); + trace_gfs2_glock_put(gl); + sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); + } } /** @@ -236,7 +244,7 @@ static struct gfs2_glock *search_bucket(unsigned int hash, continue; if (gl->gl_sbd != sdp) continue; - if (lockref_get_not_dead(&gl->gl_lockref)) + if (atomic_inc_not_zero(&gl->gl_ref)) return gl; } @@ -388,11 +396,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) held2 = (new_state != LM_ST_UNLOCKED); if (held1 != held2) { - GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); if (held2) - gl->gl_lockref.count++; + gfs2_glock_hold(gl); else - gl->gl_lockref.count--; + gfs2_glock_put_nolock(gl); } if (held1 && held2 && list_empty(&gl->gl_holders)) clear_bit(GLF_QUEUED, &gl->gl_flags); @@ -619,9 +626,9 @@ out: out_sched: clear_bit(GLF_LOCK, &gl->gl_flags); smp_mb__after_clear_bit(); - gl->gl_lockref.count++; + gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gl->gl_lockref.count--; + gfs2_glock_put_nolock(gl); return; out_unlock: @@ -747,7 +754,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_sbd = sdp; gl->gl_flags = 0; gl->gl_name = name; - gl->gl_lockref.count = 1; + atomic_set(&gl->gl_ref, 1); gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; @@ -1349,10 +1356,10 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) } } - gl->gl_lockref.count++; - set_bit(GLF_REPLY_PENDING, &gl->gl_flags); spin_unlock(&gl->gl_spin); - + set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + smp_wmb(); + gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } @@ -1397,19 +1404,15 @@ __acquires(&lru_lock) while(!list_empty(list)) { gl = list_entry(list->next, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); - if (!spin_trylock(&gl->gl_spin)) { - list_add(&gl->gl_lru, &lru_list); - atomic_inc(&lru_count); - continue; - } clear_bit(GLF_LRU, &gl->gl_flags); + gfs2_glock_hold(gl); spin_unlock(&lru_lock); - gl->gl_lockref.count++; + spin_lock(&gl->gl_spin); if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gl->gl_lockref.count--; + gfs2_glock_put_nolock(gl); spin_unlock(&gl->gl_spin); spin_lock(&lru_lock); } @@ -1490,7 +1493,7 @@ static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, rcu_read_lock(); hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { - if ((gl->gl_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) + if ((gl->gl_sbd == sdp) && atomic_inc_not_zero(&gl->gl_ref)) examiner(gl); } rcu_read_unlock(); @@ -1743,7 +1746,7 @@ int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), atomic_read(&gl->gl_revokes), - (int)gl->gl_lockref.count, gl->gl_hold_time); + atomic_read(&gl->gl_ref), gl->gl_hold_time); list_for_each_entry(gh, &gl->gl_holders, gh_list) { error = dump_holder(seq, gh); @@ -1899,8 +1902,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) gi->nhash = 0; } /* Skip entries for other sb and dead entries */ - } while (gi->sdp != gi->gl->gl_sbd || - __lockref_is_dead(&gi->gl->gl_lockref)); + } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0); return 0; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 6647d77..69f66e3 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -181,6 +181,8 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); +extern void gfs2_glock_hold(struct gfs2_glock *gl); +extern void gfs2_glock_put_nolock(struct gfs2_glock *gl); extern void gfs2_glock_put(struct gfs2_glock *gl); extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, struct gfs2_holder *gh); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index db908f6..e2e0a90 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -525,9 +525,9 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) if (gl->gl_demote_state == LM_ST_UNLOCKED && gl->gl_state == LM_ST_SHARED && ip) { - gl->gl_lockref.count++; + gfs2_glock_hold(gl); if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) - gl->gl_lockref.count--; + gfs2_glock_put_nolock(gl); } } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index ba1ea67..26aabd7 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -21,7 +21,6 @@ #include <linux/rbtree.h> #include <linux/ktime.h> #include <linux/percpu.h> -#include <linux/lockref.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -72,7 +71,6 @@ struct gfs2_bitmap { u32 bi_offset; u32 bi_start; u32 bi_len; - u32 bi_blocks; }; struct gfs2_rgrpd { @@ -103,25 +101,19 @@ struct gfs2_rgrpd { struct gfs2_rbm { struct gfs2_rgrpd *rgd; + struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */ u32 offset; /* The offset is bitmap relative */ - int bii; /* Bitmap index */ }; -static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm) -{ - return rbm->rgd->rd_bits + rbm->bii; -} - static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) { - return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) + - rbm->offset; + return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; } static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, const struct gfs2_rbm *rbm2) { - return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) && + return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && (rbm1->offset == rbm2->offset); } @@ -286,20 +278,6 @@ struct gfs2_blkreserv { unsigned int rs_qa_qd_num; }; -/* - * Allocation parameters - * @target: The number of blocks we'd ideally like to allocate - * @aflags: The flags (e.g. Orlov flag) - * - * The intent is to gradually expand this structure over time in - * order to give more information, e.g. alignment, min extent size - * to the allocation code. - */ -struct gfs2_alloc_parms { - u32 target; - u32 aflags; -}; - enum { GLF_LOCK = 1, GLF_DEMOTE = 3, @@ -322,9 +300,9 @@ struct gfs2_glock { struct gfs2_sbd *gl_sbd; unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; + atomic_t gl_ref; - struct lockref gl_lockref; -#define gl_spin gl_lockref.lock + spinlock_t gl_spin; /* State fields protected by gl_spin */ unsigned int gl_state:2, /* Current state */ @@ -420,10 +398,11 @@ enum { struct gfs2_quota_data { struct list_head qd_list; - struct kqid qd_id; - struct lockref qd_lockref; - struct list_head qd_lru; + struct list_head qd_reclaim; + atomic_t qd_count; + + struct kqid qd_id; unsigned long qd_flags; /* QDF_... */ s64 qd_change; @@ -537,6 +516,7 @@ struct gfs2_tune { unsigned int gt_logd_secs; + unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ unsigned int gt_quota_scale_num; /* Numerator */ unsigned int gt_quota_scale_den; /* Denominator */ @@ -714,7 +694,6 @@ struct gfs2_sbd { struct list_head sd_quota_list; atomic_t sd_quota_count; struct mutex sd_quota_mutex; - struct mutex sd_quota_sync_mutex; wait_queue_head_t sd_quota_wait; struct list_head sd_trunc_list; spinlock_t sd_trunc_lock; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 7119504..ced3257 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -379,7 +379,6 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip, static int alloc_dinode(struct gfs2_inode *ip, u32 flags) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc_parms ap = { .target = RES_DINODE, .aflags = flags, }; int error; int dblocks = 1; @@ -387,7 +386,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags) if (error) goto out; - error = gfs2_inplace_reserve(ip, &ap); + error = gfs2_inplace_reserve(ip, RES_DINODE, flags); if (error) goto out_quota; @@ -473,7 +472,6 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip, int arq) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; int error; if (arq) { @@ -481,7 +479,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, if (error) goto fail_quota_locks; - error = gfs2_inplace_reserve(dip, &ap); + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); if (error) goto fail_quota_locks; @@ -586,17 +584,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!IS_ERR(inode)) { d = d_splice_alias(inode, dentry); error = 0; - if (file) { - if (S_ISREG(inode->i_mode)) { - WARN_ON(d != NULL); - error = finish_open(file, dentry, gfs2_open_common, opened); - } else { + if (file && !IS_ERR(d)) { + if (d == NULL) + d = dentry; + if (S_ISREG(inode->i_mode)) + error = finish_open(file, d, gfs2_open_common, opened); + else error = finish_no_open(file, d); - } - } else { - dput(d); } gfs2_glock_dq_uninit(ghs); + if (IS_ERR(d)) + return PTR_ERR(d); return error; } else if (error != -ENOENT) { goto fail_gunlock; @@ -715,7 +713,7 @@ fail_gunlock2: fail_free_inode: if (ip->i_gl) gfs2_glock_put(ip->i_gl); - gfs2_rs_delete(ip, NULL); + gfs2_rs_delete(ip); free_inode_nonrcu(inode); inode = NULL; fail_gunlock: @@ -783,10 +781,8 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, error = finish_open(file, dentry, gfs2_open_common, opened); gfs2_glock_dq_uninit(&gh); - if (error) { - dput(d); + if (error) return ERR_PTR(error); - } return d; } @@ -878,12 +874,11 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, error = 0; if (alloc_required) { - struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; error = gfs2_quota_lock_check(dip); if (error) goto out_gunlock; - error = gfs2_inplace_reserve(dip, &ap); + error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); if (error) goto out_gunlock_q; @@ -1168,19 +1163,14 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, d = __gfs2_lookup(dir, dentry, file, opened); if (IS_ERR(d)) return PTR_ERR(d); - if (d != NULL) - dentry = d; - if (dentry->d_inode) { - if (!(*opened & FILE_OPENED)) { - if (d == NULL) - dget(dentry); - return finish_no_open(file, dentry); - } - dput(d); + if (d == NULL) + d = dentry; + if (d->d_inode) { + if (!(*opened & FILE_OPENED)) + return finish_no_open(file, d); return 0; } - BUG_ON(d != NULL); if (!(flags & O_CREAT)) return -ENOENT; @@ -1395,12 +1385,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock; if (alloc_required) { - struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; error = gfs2_quota_lock_check(ndip); if (error) goto out_gunlock; - error = gfs2_inplace_reserve(ndip, &ap); + error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0); if (error) goto out_gunlock_q; @@ -1517,6 +1506,13 @@ out: return NULL; } +static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + kfree(s); +} + /** * gfs2_permission - * @inode: The inode @@ -1868,7 +1864,7 @@ const struct inode_operations gfs2_dir_iops = { const struct inode_operations gfs2_symlink_iops = { .readlink = generic_readlink, .follow_link = gfs2_follow_link, - .put_link = kfree_put_link, + .put_link = gfs2_put_link, .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 2a6ba06..c8423d6 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -466,19 +466,19 @@ static void gdlm_cancel(struct gfs2_glock *gl) static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, char *lvb_bits) { - __le32 gen; + uint32_t gen; memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); - memcpy(&gen, lvb_bits, sizeof(__le32)); + memcpy(&gen, lvb_bits, sizeof(uint32_t)); *lvb_gen = le32_to_cpu(gen); } static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, char *lvb_bits) { - __le32 gen; + uint32_t gen; memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); gen = cpu_to_le32(lvb_gen); - memcpy(ls->ls_control_lvb, &gen, sizeof(__le32)); + memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); } static int all_jid_bits_clear(char *lvb) diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 0650db2..351586e 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -31,6 +31,12 @@ struct workqueue_struct *gfs2_control_wq; +static struct shrinker qd_shrinker = { + .count_objects = gfs2_qd_shrink_count, + .scan_objects = gfs2_qd_shrink_scan, + .seeks = DEFAULT_SEEKS, +}; + static void gfs2_init_inode_once(void *foo) { struct gfs2_inode *ip = foo; @@ -81,10 +87,6 @@ static int __init init_gfs2_fs(void) if (error) return error; - error = list_lru_init(&gfs2_qd_lru); - if (error) - goto fail_lru; - error = gfs2_glock_init(); if (error) goto fail; @@ -137,7 +139,7 @@ static int __init init_gfs2_fs(void) if (!gfs2_rsrv_cachep) goto fail; - register_shrinker(&gfs2_qd_shrinker); + register_shrinker(&qd_shrinker); error = register_filesystem(&gfs2_fs_type); if (error) @@ -177,9 +179,7 @@ fail_wq: fail_unregister: unregister_filesystem(&gfs2_fs_type); fail: - list_lru_destroy(&gfs2_qd_lru); -fail_lru: - unregister_shrinker(&gfs2_qd_shrinker); + unregister_shrinker(&qd_shrinker); gfs2_glock_exit(); if (gfs2_rsrv_cachep) @@ -214,14 +214,13 @@ fail_lru: static void __exit exit_gfs2_fs(void) { - unregister_shrinker(&gfs2_qd_shrinker); + unregister_shrinker(&qd_shrinker); gfs2_glock_exit(); gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); destroy_workqueue(gfs2_control_wq); - list_lru_destroy(&gfs2_qd_lru); rcu_barrier(); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 82303b4..19ff5e8 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -51,6 +51,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt) { spin_lock_init(>->gt_spin); + gt->gt_quota_simul_sync = 64; gt->gt_quota_warn_period = 10; gt->gt_quota_scale_num = 1; gt->gt_quota_scale_den = 1; @@ -93,7 +94,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) INIT_LIST_HEAD(&sdp->sd_quota_list); mutex_init(&sdp->sd_quota_mutex); - mutex_init(&sdp->sd_quota_sync_mutex); init_waitqueue_head(&sdp->sd_quota_wait); INIT_LIST_HEAD(&sdp->sd_trunc_list); spin_lock_init(&sdp->sd_trunc_lock); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 98236d0..db44135 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -50,8 +50,6 @@ #include <linux/freezer.h> #include <linux/quota.h> #include <linux/dqblk_xfs.h> -#include <linux/lockref.h> -#include <linux/list_lru.h> #include "gfs2.h" #include "incore.h" @@ -73,25 +71,29 @@ struct gfs2_quota_change_host { struct kqid qc_id; }; -/* Lock order: qd_lock -> qd->lockref.lock -> lru lock */ -static DEFINE_SPINLOCK(qd_lock); -struct list_lru gfs2_qd_lru; +static LIST_HEAD(qd_lru_list); +static atomic_t qd_lru_count = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(qd_lru_lock); -static void gfs2_qd_dispose(struct list_head *list) +unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { struct gfs2_quota_data *qd; struct gfs2_sbd *sdp; + int nr_to_scan = sc->nr_to_scan; + long freed = 0; - while (!list_empty(list)) { - qd = list_entry(list->next, struct gfs2_quota_data, qd_lru); - sdp = qd->qd_gl->gl_sbd; + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; - list_del(&qd->qd_lru); + spin_lock(&qd_lru_lock); + while (nr_to_scan && !list_empty(&qd_lru_list)) { + qd = list_entry(qd_lru_list.next, + struct gfs2_quota_data, qd_reclaim); + sdp = qd->qd_gl->gl_sbd; /* Free from the filesystem-specific list */ - spin_lock(&qd_lock); list_del(&qd->qd_list); - spin_unlock(&qd_lock); gfs2_assert_warn(sdp, !qd->qd_change); gfs2_assert_warn(sdp, !qd->qd_slot_count); @@ -101,59 +103,24 @@ static void gfs2_qd_dispose(struct list_head *list) atomic_dec(&sdp->sd_quota_count); /* Delete it from the common reclaim list */ + list_del_init(&qd->qd_reclaim); + atomic_dec(&qd_lru_count); + spin_unlock(&qd_lru_lock); kmem_cache_free(gfs2_quotad_cachep, qd); + spin_lock(&qd_lru_lock); + nr_to_scan--; + freed++; } -} - - -static enum lru_status gfs2_qd_isolate(struct list_head *item, spinlock_t *lock, void *arg) -{ - struct list_head *dispose = arg; - struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru); - - if (!spin_trylock(&qd->qd_lockref.lock)) - return LRU_SKIP; - - if (qd->qd_lockref.count == 0) { - lockref_mark_dead(&qd->qd_lockref); - list_move(&qd->qd_lru, dispose); - } - - spin_unlock(&qd->qd_lockref.lock); - return LRU_REMOVED; -} - -static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, - struct shrink_control *sc) -{ - LIST_HEAD(dispose); - unsigned long freed; - - if (!(sc->gfp_mask & __GFP_FS)) - return SHRINK_STOP; - - freed = list_lru_walk_node(&gfs2_qd_lru, sc->nid, gfs2_qd_isolate, - &dispose, &sc->nr_to_scan); - - gfs2_qd_dispose(&dispose); - + spin_unlock(&qd_lru_lock); return freed; } -static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, - struct shrink_control *sc) +unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) { - return vfs_pressure_ratio(list_lru_count_node(&gfs2_qd_lru, sc->nid)); + return vfs_pressure_ratio(atomic_read(&qd_lru_count)); } -struct shrinker gfs2_qd_shrinker = { - .count_objects = gfs2_qd_shrink_count, - .scan_objects = gfs2_qd_shrink_scan, - .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_NUMA_AWARE, -}; - - static u64 qd2index(struct gfs2_quota_data *qd) { struct kqid qid = qd->qd_id; @@ -181,11 +148,10 @@ static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid, if (!qd) return -ENOMEM; - qd->qd_lockref.count = 1; - spin_lock_init(&qd->qd_lockref.lock); + atomic_set(&qd->qd_count, 1); qd->qd_id = qid; qd->qd_slot = -1; - INIT_LIST_HEAD(&qd->qd_lru); + INIT_LIST_HEAD(&qd->qd_reclaim); error = gfs2_glock_get(sdp, qd2index(qd), &gfs2_quota_glops, CREATE, &qd->qd_gl); @@ -211,11 +177,16 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, for (;;) { found = 0; - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - if (qid_eq(qd->qd_id, qid) && - lockref_get_not_dead(&qd->qd_lockref)) { - list_lru_del(&gfs2_qd_lru, &qd->qd_lru); + if (qid_eq(qd->qd_id, qid)) { + if (!atomic_read(&qd->qd_count) && + !list_empty(&qd->qd_reclaim)) { + /* Remove it from reclaim list */ + list_del_init(&qd->qd_reclaim); + atomic_dec(&qd_lru_count); + } + atomic_inc(&qd->qd_count); found = 1; break; } @@ -231,7 +202,7 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, new_qd = NULL; } - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); if (qd) { if (new_qd) { @@ -251,19 +222,18 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, static void qd_hold(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref)); - lockref_get(&qd->qd_lockref); + gfs2_assert(sdp, atomic_read(&qd->qd_count)); + atomic_inc(&qd->qd_count); } static void qd_put(struct gfs2_quota_data *qd) { - if (lockref_put_or_lock(&qd->qd_lockref)) - return; - - qd->qd_lockref.count = 0; - list_lru_add(&gfs2_qd_lru, &qd->qd_lru); - spin_unlock(&qd->qd_lockref.lock); - + if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) { + /* Add to the reclaim list */ + list_add_tail(&qd->qd_reclaim, &qd_lru_list); + atomic_inc(&qd_lru_count); + spin_unlock(&qd_lru_lock); + } } static int slot_get(struct gfs2_quota_data *qd) @@ -272,10 +242,10 @@ static int slot_get(struct gfs2_quota_data *qd) unsigned int c, o = 0, b; unsigned char byte = 0; - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); if (qd->qd_slot_count++) { - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); return 0; } @@ -299,13 +269,13 @@ found: sdp->sd_quota_bitmap[c][o] |= 1 << b; - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); return 0; fail: qd->qd_slot_count--; - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); return -ENOSPC; } @@ -313,43 +283,23 @@ static void slot_hold(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); gfs2_assert(sdp, qd->qd_slot_count); qd->qd_slot_count++; - spin_unlock(&qd_lock); -} - -static void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, - unsigned int bit, int new_value) -{ - unsigned int c, o, b = bit; - int old_value; - - c = b / (8 * PAGE_SIZE); - b %= 8 * PAGE_SIZE; - o = b / 8; - b %= 8; - - old_value = (bitmap[c][o] & (1 << b)); - gfs2_assert_withdraw(sdp, !old_value != !new_value); - - if (new_value) - bitmap[c][o] |= 1 << b; - else - bitmap[c][o] &= ~(1 << b); + spin_unlock(&qd_lru_lock); } static void slot_put(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); gfs2_assert(sdp, qd->qd_slot_count); if (!--qd->qd_slot_count) { gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0); qd->qd_slot = -1; } - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); } static int bh_get(struct gfs2_quota_data *qd) @@ -413,25 +363,6 @@ static void bh_put(struct gfs2_quota_data *qd) mutex_unlock(&sdp->sd_quota_mutex); } -static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, - u64 *sync_gen) -{ - if (test_bit(QDF_LOCKED, &qd->qd_flags) || - !test_bit(QDF_CHANGE, &qd->qd_flags) || - (sync_gen && (qd->qd_sync_gen >= *sync_gen))) - return 0; - - if (!lockref_get_not_dead(&qd->qd_lockref)) - return 0; - - list_move_tail(&qd->qd_list, &sdp->sd_quota_list); - set_bit(QDF_LOCKED, &qd->qd_flags); - qd->qd_change_sync = qd->qd_change; - gfs2_assert_warn(sdp, qd->qd_slot_count); - qd->qd_slot_count++; - return 1; -} - static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) { struct gfs2_quota_data *qd = NULL; @@ -443,18 +374,31 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) if (sdp->sd_vfs->s_flags & MS_RDONLY) return 0; - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - found = qd_check_sync(sdp, qd, &sdp->sd_quota_sync_gen); - if (found) - break; + if (test_bit(QDF_LOCKED, &qd->qd_flags) || + !test_bit(QDF_CHANGE, &qd->qd_flags) || + qd->qd_sync_gen >= sdp->sd_quota_sync_gen) + continue; + + list_move_tail(&qd->qd_list, &sdp->sd_quota_list); + + set_bit(QDF_LOCKED, &qd->qd_flags); + gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); + atomic_inc(&qd->qd_count); + qd->qd_change_sync = qd->qd_change; + gfs2_assert_warn(sdp, qd->qd_slot_count); + qd->qd_slot_count++; + found = 1; + + break; } if (!found) qd = NULL; - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); if (qd) { gfs2_assert_warn(sdp, qd->qd_change_sync); @@ -472,6 +416,43 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) return 0; } +static int qd_trylock(struct gfs2_quota_data *qd) +{ + struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + + if (sdp->sd_vfs->s_flags & MS_RDONLY) + return 0; + + spin_lock(&qd_lru_lock); + + if (test_bit(QDF_LOCKED, &qd->qd_flags) || + !test_bit(QDF_CHANGE, &qd->qd_flags)) { + spin_unlock(&qd_lru_lock); + return 0; + } + + list_move_tail(&qd->qd_list, &sdp->sd_quota_list); + + set_bit(QDF_LOCKED, &qd->qd_flags); + gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); + atomic_inc(&qd->qd_count); + qd->qd_change_sync = qd->qd_change; + gfs2_assert_warn(sdp, qd->qd_slot_count); + qd->qd_slot_count++; + + spin_unlock(&qd_lru_lock); + + gfs2_assert_warn(sdp, qd->qd_change_sync); + if (bh_get(qd)) { + clear_bit(QDF_LOCKED, &qd->qd_flags); + slot_put(qd); + qd_put(qd); + return 0; + } + + return 1; +} + static void qd_unlock(struct gfs2_quota_data *qd) { gfs2_assert_warn(qd->qd_gl->gl_sbd, @@ -621,9 +602,9 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) x = be64_to_cpu(qc->qc_change) + change; qc->qc_change = cpu_to_be64(x); - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); qd->qd_change = x; - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); if (!x) { gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); @@ -667,7 +648,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct buffer_head *bh; struct page *page; void *kaddr, *ptr; - struct gfs2_quota q; + struct gfs2_quota q, *qp; int err, nbytes; u64 size; @@ -683,25 +664,28 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, return err; err = -EIO; - be64_add_cpu(&q.qu_value, change); - qd->qd_qb.qb_value = q.qu_value; + qp = &q; + qp->qu_value = be64_to_cpu(qp->qu_value); + qp->qu_value += change; + qp->qu_value = cpu_to_be64(qp->qu_value); + qd->qd_qb.qb_value = qp->qu_value; if (fdq) { if (fdq->d_fieldmask & FS_DQ_BSOFT) { - q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_warn = q.qu_warn; + qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_warn = qp->qu_warn; } if (fdq->d_fieldmask & FS_DQ_BHARD) { - q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_limit = q.qu_limit; + qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_limit = qp->qu_limit; } if (fdq->d_fieldmask & FS_DQ_BCOUNT) { - q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_value = q.qu_value; + qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_value = qp->qu_value; } } /* Write the quota into the quota file on disk */ - ptr = &q; + ptr = qp; nbytes = sizeof(struct gfs2_quota); get_a_page: page = find_or_create_page(mapping, index, GFP_NOFS); @@ -779,7 +763,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) { struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); - struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks, ind_blocks; struct gfs2_holder *ghs, i_gh; unsigned int qx, x; @@ -832,8 +815,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; reserved = 1 + (nalloc * (data_blocks + ind_blocks)); - ap.target = reserved; - error = gfs2_inplace_reserve(ip, &ap); + error = gfs2_inplace_reserve(ip, reserved, 0); if (error) goto out_alloc; @@ -992,9 +974,9 @@ static int need_sync(struct gfs2_quota_data *qd) if (!qd->qd_qb.qb_limit) return 0; - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); value = qd->qd_change; - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); spin_lock(>->gt_spin); num = gt->gt_quota_scale_num; @@ -1019,11 +1001,9 @@ static int need_sync(struct gfs2_quota_data *qd) void gfs2_quota_unlock(struct gfs2_inode *ip) { - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qda[4]; unsigned int count = 0; unsigned int x; - int found; if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) goto out; @@ -1036,25 +1016,9 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) sync = need_sync(qd); gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); - if (!sync) - continue; - - spin_lock(&qd_lock); - found = qd_check_sync(sdp, qd, NULL); - spin_unlock(&qd_lock); - - if (!found) - continue; - gfs2_assert_warn(sdp, qd->qd_change_sync); - if (bh_get(qd)) { - clear_bit(QDF_LOCKED, &qd->qd_flags); - slot_put(qd); - qd_put(qd); - continue; - } - - qda[count++] = qd; + if (sync && qd_trylock(qd)) + qda[count++] = qd; } if (count) { @@ -1103,9 +1067,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) continue; value = (s64)be64_to_cpu(qd->qd_qb.qb_value); - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); value += qd->qd_change; - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { print_message(qd, "exceeded"); @@ -1154,18 +1118,17 @@ int gfs2_quota_sync(struct super_block *sb, int type) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; - unsigned int max_qd = PAGE_SIZE/sizeof(struct gfs2_holder); + unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); unsigned int num_qd; unsigned int x; int error = 0; + sdp->sd_quota_sync_gen++; + qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL); if (!qda) return -ENOMEM; - mutex_lock(&sdp->sd_quota_sync_mutex); - sdp->sd_quota_sync_gen++; - do { num_qd = 0; @@ -1190,7 +1153,6 @@ int gfs2_quota_sync(struct super_block *sb, int type) } } while (!error && num_qd == max_qd); - mutex_unlock(&sdp->sd_quota_sync_mutex); kfree(qda); return error; @@ -1296,11 +1258,11 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) qd->qd_slot = slot; qd->qd_slot_count = 1; - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1); list_add(&qd->qd_list, &sdp->sd_quota_list); atomic_inc(&sdp->sd_quota_count); - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); found++; } @@ -1326,34 +1288,30 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) struct gfs2_quota_data *qd; unsigned int x; - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); while (!list_empty(head)) { qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); - /* - * To be removed in due course... we should be able to - * ensure that all refs to the qd have done by this point - * so that this rather odd test is not required - */ - spin_lock(&qd->qd_lockref.lock); - if (qd->qd_lockref.count > 1 || - (qd->qd_lockref.count && !test_bit(QDF_CHANGE, &qd->qd_flags))) { - spin_unlock(&qd->qd_lockref.lock); + if (atomic_read(&qd->qd_count) > 1 || + (atomic_read(&qd->qd_count) && + !test_bit(QDF_CHANGE, &qd->qd_flags))) { list_move(&qd->qd_list, head); - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); schedule(); - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); continue; } - spin_unlock(&qd->qd_lockref.lock); list_del(&qd->qd_list); /* Also remove if this qd exists in the reclaim list */ - list_lru_del(&gfs2_qd_lru, &qd->qd_lru); + if (!list_empty(&qd->qd_reclaim)) { + list_del_init(&qd->qd_reclaim); + atomic_dec(&qd_lru_count); + } atomic_dec(&sdp->sd_quota_count); - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); - if (!qd->qd_lockref.count) { + if (!atomic_read(&qd->qd_count)) { gfs2_assert_warn(sdp, !qd->qd_change); gfs2_assert_warn(sdp, !qd->qd_slot_count); } else @@ -1363,9 +1321,9 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) gfs2_glock_put(qd->qd_gl); kmem_cache_free(gfs2_quotad_cachep, qd); - spin_lock(&qd_lock); + spin_lock(&qd_lru_lock); } - spin_unlock(&qd_lock); + spin_unlock(&qd_lru_lock); gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); @@ -1504,7 +1462,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb, } fqs->qs_uquota.qfs_nextents = 1; /* unsupported */ fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */ - fqs->qs_incoredqs = list_lru_count(&gfs2_qd_lru); + fqs->qs_incoredqs = atomic_read(&qd_lru_count); return 0; } @@ -1615,12 +1573,10 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, if (gfs2_is_stuffed(ip)) alloc_required = 1; if (alloc_required) { - struct gfs2_alloc_parms ap = { .aflags = 0, }; gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); blocks = 1 + data_blocks + ind_blocks; - ap.target = blocks; - error = gfs2_inplace_reserve(ip, &ap); + error = gfs2_inplace_reserve(ip, blocks, 0); if (error) goto out_i; blocks += gfs2_rg_blocks(ip, blocks); diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 96e4f34..0f64d9d 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -10,10 +10,9 @@ #ifndef __QUOTA_DOT_H__ #define __QUOTA_DOT_H__ -#include <linux/list_lru.h> - struct gfs2_inode; struct gfs2_sbd; +struct shrink_control; #define NO_UID_QUOTA_CHANGE INVALID_UID #define NO_GID_QUOTA_CHANGE INVALID_GID @@ -54,8 +53,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } +extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc); +extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc); extern const struct quotactl_ops gfs2_quotactl_ops; -extern struct shrinker gfs2_qd_shrinker; -extern struct list_lru gfs2_qd_lru; #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c8d6161..6931743 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -81,12 +81,11 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; - struct gfs2_bitmap *bi = rbm_bi(rbm); - unsigned int buflen = bi->bi_len; + unsigned int buflen = rbm->bi->bi_len; const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY); - end = bi->bi_bh->b_data + bi->bi_offset + buflen; + byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); + end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; BUG_ON(byte1 >= end); @@ -96,17 +95,18 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " "new_state=%d\n", rbm->offset, cur_state, new_state); printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", - (unsigned long long)rbm->rgd->rd_addr, bi->bi_start); + (unsigned long long)rbm->rgd->rd_addr, + rbm->bi->bi_start); printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", - bi->bi_offset, bi->bi_len); + rbm->bi->bi_offset, rbm->bi->bi_len); dump_stack(); gfs2_consist_rgrpd(rbm->rgd); return; } *byte1 ^= (cur_state ^ new_state) << bit; - if (do_clone && bi->bi_clone) { - byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY); + if (do_clone && rbm->bi->bi_clone) { + byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -121,8 +121,7 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) { - struct gfs2_bitmap *bi = rbm_bi(rbm); - const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; + const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; const u8 *byte; unsigned int bit; @@ -253,53 +252,29 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) { u64 rblock = block - rbm->rgd->rd_data0; + u32 x; if (WARN_ON_ONCE(rblock > UINT_MAX)) return -EINVAL; if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) return -E2BIG; - rbm->bii = 0; + rbm->bi = rbm->rgd->rd_bits; rbm->offset = (u32)(rblock); /* Check if the block is within the first block */ - if (rbm->offset < rbm_bi(rbm)->bi_blocks) + if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) return 0; /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ rbm->offset += (sizeof(struct gfs2_rgrp) - sizeof(struct gfs2_meta_header)) * GFS2_NBBY; - rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; - rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; + x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; + rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; + rbm->bi += x; return 0; } /** - * gfs2_rbm_incr - increment an rbm structure - * @rbm: The rbm with rgd already set correctly - * - * This function takes an existing rbm structure and increments it to the next - * viable block offset. - * - * Returns: If incrementing the offset would cause the rbm to go past the - * end of the rgrp, true is returned, otherwise false. - * - */ - -static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) -{ - if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */ - rbm->offset++; - return false; - } - if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */ - return true; - - rbm->offset = 0; - rbm->bii++; - return false; -} - -/** * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned * @rbm: Position to search (value/result) * @n_unaligned: Number of unaligned blocks to check @@ -310,6 +285,7 @@ static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) { + u64 block; u32 n; u8 res; @@ -320,7 +296,8 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le (*len)--; if (*len == 0) return true; - if (gfs2_rbm_incr(rbm)) + block = gfs2_rbm_to_block(rbm); + if (gfs2_rbm_from_block(rbm, block + 1)) return true; } @@ -351,7 +328,6 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) u32 chunk_size; u8 *ptr, *start, *end; u64 block; - struct gfs2_bitmap *bi; if (n_unaligned && gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) @@ -360,12 +336,11 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) n_unaligned = len & 3; /* Start is now byte aligned */ while (len > 3) { - bi = rbm_bi(&rbm); - start = bi->bi_bh->b_data; - if (bi->bi_clone) - start = bi->bi_clone; - end = start + bi->bi_bh->b_size; - start += bi->bi_offset; + start = rbm.bi->bi_bh->b_data; + if (rbm.bi->bi_clone) + start = rbm.bi->bi_clone; + end = start + rbm.bi->bi_bh->b_size; + start += rbm.bi->bi_offset; BUG_ON(rbm.offset & 3); start += (rbm.offset / GFS2_NBBY); bytes = min_t(u32, len / GFS2_NBBY, (end - start)); @@ -630,13 +605,11 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { - struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm); - /* return reserved blocks to the rgrp */ BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &bi->bi_flags); + clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); smp_mb__after_clear_bit(); } } @@ -661,13 +634,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation - * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) { + if (ip->i_res && atomic_read(&inode->i_writecount) <= 1) { gfs2_rs_deltree(ip->i_res); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); @@ -769,21 +743,18 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) bi->bi_offset = sizeof(struct gfs2_rgrp); bi->bi_start = 0; bi->bi_len = bytes; - bi->bi_blocks = bytes * GFS2_NBBY; /* header block */ } else if (x == 0) { bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); bi->bi_offset = sizeof(struct gfs2_rgrp); bi->bi_start = 0; bi->bi_len = bytes; - bi->bi_blocks = bytes * GFS2_NBBY; /* last block */ } else if (x + 1 == length) { bytes = bytes_left; bi->bi_offset = sizeof(struct gfs2_meta_header); bi->bi_start = rgd->rd_bitbytes - bytes_left; bi->bi_len = bytes; - bi->bi_blocks = bytes * GFS2_NBBY; /* other blocks */ } else { bytes = sdp->sd_sb.sb_bsize - @@ -791,7 +762,6 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) bi->bi_offset = sizeof(struct gfs2_meta_header); bi->bi_start = rgd->rd_bitbytes - bytes_left; bi->bi_len = bytes; - bi->bi_blocks = bytes * GFS2_NBBY; } bytes_left -= bytes; @@ -1127,7 +1097,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; } - if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { + if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); @@ -1161,7 +1131,7 @@ int update_rgrp_lvb(struct gfs2_rgrpd *rgd) if (rgd->rd_flags & GFS2_RDF_UPTODATE) return 0; - if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) + if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) return gfs2_rgrp_bh_get(rgd); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); @@ -1422,12 +1392,12 @@ static void rs_insert(struct gfs2_inode *ip) * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor * @ip: pointer to the inode for which we're reserving blocks - * @ap: the allocation parameters + * @requested: number of blocks required for this allocation * */ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, - const struct gfs2_alloc_parms *ap) + unsigned requested) { struct gfs2_rbm rbm = { .rgd = rgd, }; u64 goal; @@ -1440,7 +1410,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, if (S_ISDIR(inode->i_mode)) extlen = 1; else { - extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target); + extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); } if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) @@ -1584,14 +1554,14 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, const struct gfs2_inode *ip, bool nowrap) { struct buffer_head *bh; - int initial_bii; + struct gfs2_bitmap *initial_bi; u32 initial_offset; u32 offset; u8 *buffer; + int index; int n = 0; int iters = rbm->rgd->rd_length; int ret; - struct gfs2_bitmap *bi; /* If we are not starting at the beginning of a bitmap, then we * need to add one to the bitmap count to ensure that we search @@ -1601,53 +1571,52 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, iters++; while(1) { - bi = rbm_bi(rbm); - if (test_bit(GBF_FULL, &bi->bi_flags) && + if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && (state == GFS2_BLKST_FREE)) goto next_bitmap; - bh = bi->bi_bh; - buffer = bh->b_data + bi->bi_offset; + bh = rbm->bi->bi_bh; + buffer = bh->b_data + rbm->bi->bi_offset; WARN_ON(!buffer_uptodate(bh)); - if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) - buffer = bi->bi_clone + bi->bi_offset; + if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) + buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; initial_offset = rbm->offset; - offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state); + offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); if (offset == BFITNOENT) goto bitmap_full; rbm->offset = offset; if (ip == NULL) return 0; - initial_bii = rbm->bii; + initial_bi = rbm->bi; ret = gfs2_reservation_check_and_update(rbm, ip, minext); if (ret == 0) return 0; if (ret > 0) { - n += (rbm->bii - initial_bii); + n += (rbm->bi - initial_bi); goto next_iter; } if (ret == -E2BIG) { - rbm->bii = 0; + index = 0; rbm->offset = 0; - n += (rbm->bii - initial_bii); + n += (rbm->bi - initial_bi); goto res_covered_end_of_rgrp; } return ret; bitmap_full: /* Mark bitmap as full and fall through */ - if ((state == GFS2_BLKST_FREE) && initial_offset == 0) { - struct gfs2_bitmap *bi = rbm_bi(rbm); - set_bit(GBF_FULL, &bi->bi_flags); - } + if ((state == GFS2_BLKST_FREE) && initial_offset == 0) + set_bit(GBF_FULL, &rbm->bi->bi_flags); next_bitmap: /* Find next bitmap in the rgrp */ rbm->offset = 0; - rbm->bii++; - if (rbm->bii == rbm->rgd->rd_length) - rbm->bii = 0; + index = rbm->bi - rbm->rgd->rd_bits; + index++; + if (index == rbm->rgd->rd_length) + index = 0; res_covered_end_of_rgrp: - if ((rbm->bii == 0) && nowrap) + rbm->bi = &rbm->rgd->rd_bits[index]; + if ((index == 0) && nowrap) break; n++; next_iter: @@ -1676,7 +1645,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip struct gfs2_inode *ip; int error; int found = 0; - struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; + struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; while (1) { down_write(&sdp->sd_log_flush_lock); @@ -1831,12 +1800,12 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for - * @ap: the allocation parameters + * @requested: the number of blocks to be reserved * * Returns: errno */ -int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) +int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; @@ -1848,16 +1817,17 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - if (gfs2_assert_warn(sdp, ap->target)) + if (gfs2_assert_warn(sdp, requested)) return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rbm.rgd; + flags = 0; /* Yoda: Do or do not. There is no try */ } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { rs->rs_rbm.rgd = begin = ip->i_rgd; } else { rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } - if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) + if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) skip = gfs2_orlov_skip(ip); if (rs->rs_rbm.rgd == NULL) return -EBADSLT; @@ -1899,14 +1869,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a /* Get a reservation if we don't already have one */ if (!gfs2_rs_active(rs)) - rg_mblk_search(rs->rs_rbm.rgd, ip, ap); + rg_mblk_search(rs->rs_rbm.rgd, ip, requested); /* Skip rgrps when we can't get a reservation on first pass */ if (!gfs2_rs_active(rs) && (loops < 1)) goto check_rgrp; /* If rgrp has enough free space, use it */ - if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { + if (rs->rs_rbm.rgd->rd_free_clone >= requested) { ip->i_rgd = rs->rs_rbm.rgd; return 0; } @@ -2003,14 +1973,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, *n = 1; block = gfs2_rbm_to_block(rbm); - gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh); + gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh); gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; - gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); + gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh); gfs2_setbit(&pos, true, GFS2_BLKST_USED); (*n)++; block++; @@ -2031,7 +2001,6 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u32 blen, unsigned char new_state) { struct gfs2_rbm rbm; - struct gfs2_bitmap *bi; rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); if (!rbm.rgd) { @@ -2042,15 +2011,15 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, while (blen--) { gfs2_rbm_from_block(&rbm, bstart); - bi = rbm_bi(&rbm); bstart++; - if (!bi->bi_clone) { - bi->bi_clone = kmalloc(bi->bi_bh->b_size, - GFP_NOFS | __GFP_NOFAIL); - memcpy(bi->bi_clone + bi->bi_offset, - bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); + if (!rbm.bi->bi_clone) { + rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, + GFP_NOFS | __GFP_NOFAIL); + memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, + rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, + rbm.bi->bi_len); } - gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh); + gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh); gfs2_setbit(&rbm, false, new_state); } @@ -2134,35 +2103,6 @@ out: } /** - * gfs2_set_alloc_start - Set starting point for block allocation - * @rbm: The rbm which will be set to the required location - * @ip: The gfs2 inode - * @dinode: Flag to say if allocation includes a new inode - * - * This sets the starting point from the reservation if one is active - * otherwise it falls back to guessing a start point based on the - * inode's goal block or the last allocation point in the rgrp. - */ - -static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, - const struct gfs2_inode *ip, bool dinode) -{ - u64 goal; - - if (gfs2_rs_active(ip->i_res)) { - *rbm = ip->i_res->rs_rbm; - return; - } - - if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) - goal = ip->i_goal; - else - goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; - - gfs2_rbm_from_block(rbm, goal); -} - -/** * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * @ip: the inode to allocate the block for * @bn: Used to return the starting block number @@ -2180,14 +2120,22 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, struct buffer_head *dibh; struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; + u64 goal; u64 block; /* block, within the file system scope */ int error; - gfs2_set_alloc_start(&rbm, ip, dinode); + if (gfs2_rs_active(ip->i_res)) + goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); + else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) + goal = ip->i_goal; + else + goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; + + gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); if (error == -ENOSPC) { - gfs2_set_alloc_start(&rbm, ip, dinode); + gfs2_rbm_from_block(&rbm, goal); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3a10d2f..5b3f4a8 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -40,7 +40,7 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); #define GFS2_AF_ORLOV 1 -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags); extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, @@ -48,7 +48,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, extern int gfs2_rs_alloc(struct gfs2_inode *ip); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 35da5b1..e5639de 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1526,7 +1526,7 @@ out_unlock: out: /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); - gfs2_rs_delete(ip, NULL); + gfs2_rs_delete(ip); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index d09f6ed..aa5c480 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -587,6 +587,7 @@ TUNE_ATTR(max_readahead, 0); TUNE_ATTR(complain_secs, 0); TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); +TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); @@ -596,6 +597,7 @@ static struct attribute *tune_attrs[] = { &tune_attr_max_readahead.attr, &tune_attr_complain_secs.attr, &tune_attr_statfs_slow.attr, + &tune_attr_quota_simul_sync.attr, &tune_attr_statfs_quantum.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index f7109f6..6402fb6 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -268,3 +268,23 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, return rv; } +void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, + unsigned int bit, int new_value) +{ + unsigned int c, o, b = bit; + int old_value; + + c = b / (8 * PAGE_SIZE); + b %= 8 * PAGE_SIZE; + o = b / 8; + b %= 8; + + old_value = (bitmap[c][o] & (1 << b)); + gfs2_assert_withdraw(sdp, !old_value != !new_value); + + if (new_value) + bitmap[c][o] |= 1 << b; + else + bitmap[c][o] &= ~(1 << b); +} + diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index b7ffb09..8053573 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -164,6 +164,8 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, #define gfs2_tune_get(sdp, field) \ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) +void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, + unsigned int bit, int new_value); int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...); #endif /* __UTIL_DOT_H__ */ diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 8c6a6f6..ecd37f3 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -723,7 +723,6 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, unsigned int blks, ea_skeleton_call_t skeleton_call, void *private) { - struct gfs2_alloc_parms ap = { .target = blks }; struct buffer_head *dibh; int error; @@ -735,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) return error; - error = gfs2_inplace_reserve(ip, &ap); + error = gfs2_inplace_reserve(ip, blks, 0); if (error) goto out_gunlock_q; diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index f6bd266..2a1d712 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h @@ -153,6 +153,11 @@ struct hfs_btree_header_rec { u32 reserved3[16]; } __packed; +#define HFS_NODE_INDEX 0x00 /* An internal (index) node */ +#define HFS_NODE_HEADER 0x01 /* The tree header node (node 0) */ +#define HFS_NODE_MAP 0x02 /* Holds part of the bitmap of used nodes */ +#define HFS_NODE_LEAF 0xFF /* A leaf (ndNHeight==1) node */ + #define BTREE_ATTR_BADCLOSE 0x00000001 /* b-tree not closed properly. not used by hfsplus. */ #define HFS_TREE_BIGKEYS 0x00000002 /* key length is u16 instead of u8. diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 0fcec8b..0c6540c 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -15,118 +15,6 @@ #include "hfsplus_fs.h" #include "hfsplus_raw.h" -/* - * Initial source code of clump size calculation is gotten - * from http://opensource.apple.com/tarballs/diskdev_cmds/ - */ -#define CLUMP_ENTRIES 15 - -static short clumptbl[CLUMP_ENTRIES * 3] = { -/* - * Volume Attributes Catalog Extents - * Size Clump (MB) Clump (MB) Clump (MB) - */ - /* 1GB */ 4, 4, 4, - /* 2GB */ 6, 6, 4, - /* 4GB */ 8, 8, 4, - /* 8GB */ 11, 11, 5, - /* - * For volumes 16GB and larger, we want to make sure that a full OS - * install won't require fragmentation of the Catalog or Attributes - * B-trees. We do this by making the clump sizes sufficiently large, - * and by leaving a gap after the B-trees for them to grow into. - * - * For SnowLeopard 10A298, a FullNetInstall with all packages selected - * results in: - * Catalog B-tree Header - * nodeSize: 8192 - * totalNodes: 31616 - * freeNodes: 1978 - * (used = 231.55 MB) - * Attributes B-tree Header - * nodeSize: 8192 - * totalNodes: 63232 - * freeNodes: 958 - * (used = 486.52 MB) - * - * We also want Time Machine backup volumes to have a sufficiently - * large clump size to reduce fragmentation. - * - * The series of numbers for Catalog and Attribute form a geometric - * series. For Catalog (16GB to 512GB), each term is 8**(1/5) times - * the previous term. For Attributes (16GB to 512GB), each term is - * 4**(1/5) times the previous term. For 1TB to 16TB, each term is - * 2**(1/5) times the previous term. - */ - /* 16GB */ 64, 32, 5, - /* 32GB */ 84, 49, 6, - /* 64GB */ 111, 74, 7, - /* 128GB */ 147, 111, 8, - /* 256GB */ 194, 169, 9, - /* 512GB */ 256, 256, 11, - /* 1TB */ 294, 294, 14, - /* 2TB */ 338, 338, 16, - /* 4TB */ 388, 388, 20, - /* 8TB */ 446, 446, 25, - /* 16TB */ 512, 512, 32 -}; - -u32 hfsplus_calc_btree_clump_size(u32 block_size, u32 node_size, - u64 sectors, int file_id) -{ - u32 mod = max(node_size, block_size); - u32 clump_size; - int column; - int i; - - /* Figure out which column of the above table to use for this file. */ - switch (file_id) { - case HFSPLUS_ATTR_CNID: - column = 0; - break; - case HFSPLUS_CAT_CNID: - column = 1; - break; - default: - column = 2; - break; - } - - /* - * The default clump size is 0.8% of the volume size. And - * it must also be a multiple of the node and block size. - */ - if (sectors < 0x200000) { - clump_size = sectors << 2; /* 0.8 % */ - if (clump_size < (8 * node_size)) - clump_size = 8 * node_size; - } else { - /* turn exponent into table index... */ - for (i = 0, sectors = sectors >> 22; - sectors && (i < CLUMP_ENTRIES - 1); - ++i, sectors = sectors >> 1) { - /* empty body */ - } - - clump_size = clumptbl[column + (i) * 3] * 1024 * 1024; - } - - /* - * Round the clump size to a multiple of node and block size. - * NOTE: This rounds down. - */ - clump_size /= mod; - clump_size *= mod; - - /* - * Rounding down could have rounded down to 0 if the block size was - * greater than the clump size. If so, just use one block or node. - */ - if (clump_size == 0) - clump_size = mod; - - return clump_size; -} /* Get a reference to a B*Tree and do some initial checks */ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 08846425b..2b9cd01 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -127,14 +127,6 @@ struct hfs_bnode { #define HFS_BNODE_DELETED 4 /* - * Attributes file states - */ -#define HFSPLUS_EMPTY_ATTR_TREE 0 -#define HFSPLUS_CREATING_ATTR_TREE 1 -#define HFSPLUS_VALID_ATTR_TREE 2 -#define HFSPLUS_FAILED_ATTR_TREE 3 - -/* * HFS+ superblock info (built from Volume Header on disk) */ @@ -149,7 +141,6 @@ struct hfsplus_sb_info { struct hfs_btree *ext_tree; struct hfs_btree *cat_tree; struct hfs_btree *attr_tree; - atomic_t attr_tree_state; struct inode *alloc_file; struct inode *hidden_dir; struct nls_table *nls; @@ -389,7 +380,6 @@ int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *); int hfsplus_block_free(struct super_block *, u32, u32); /* btree.c */ -u32 hfsplus_calc_btree_clump_size(u32, u32, u64, int); struct hfs_btree *hfs_btree_open(struct super_block *, u32); void hfs_btree_close(struct hfs_btree *); int hfs_btree_write(struct hfs_btree *); diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 8ffb3a8..452ede0 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -156,10 +156,10 @@ struct hfs_bnode_desc { } __packed; /* HFS+ BTree node types */ -#define HFS_NODE_INDEX 0x00 /* An internal (index) node */ -#define HFS_NODE_HEADER 0x01 /* The tree header node (node 0) */ -#define HFS_NODE_MAP 0x02 /* Holds part of the bitmap of used nodes */ -#define HFS_NODE_LEAF 0xFF /* A leaf (ndNHeight==1) node */ +#define HFS_NODE_INDEX 0x00 +#define HFS_NODE_HEADER 0x01 +#define HFS_NODE_MAP 0x02 +#define HFS_NODE_LEAF 0xFF /* HFS+ BTree header */ struct hfs_btree_header_rec { @@ -187,9 +187,6 @@ struct hfs_btree_header_rec { /* HFS+ BTree misc info */ #define HFSPLUS_TREE_HEAD 0 #define HFSPLUS_NODE_MXSZ 32768 -#define HFSPLUS_ATTR_TREE_NODE_SIZE 8192 -#define HFSPLUS_BTREE_HDR_NODE_RECS_COUNT 3 -#define HFSPLUS_BTREE_HDR_USER_BYTES 128 /* Some special File ID numbers (stolen from hfs.h) */ #define HFSPLUS_POR_CNID 1 /* Parent Of the Root */ diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 80875aa..4c4d142 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -474,14 +474,12 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) pr_err("failed to load catalog file\n"); goto out_close_ext_tree; } - atomic_set(&sbi->attr_tree_state, HFSPLUS_EMPTY_ATTR_TREE); if (vhdr->attr_file.total_blocks != 0) { sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID); if (!sbi->attr_tree) { pr_err("failed to load attributes file\n"); goto out_close_cat_tree; } - atomic_set(&sbi->attr_tree_state, HFSPLUS_VALID_ATTR_TREE); } sb->s_xattr = hfsplus_xattr_handlers; diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index e9a97a0..b51a607 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -24,6 +24,13 @@ struct hfsplus_wd { u16 embed_count; }; +static void hfsplus_end_io_sync(struct bio *bio, int err) +{ + if (err) + clear_bit(BIO_UPTODATE, &bio->bi_flags); + complete(bio->bi_private); +} + /* * hfsplus_submit_bio - Perfrom block I/O * @sb: super block of volume for I/O @@ -46,6 +53,7 @@ struct hfsplus_wd { int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf, void **data, int rw) { + DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; int ret = 0; u64 io_size; @@ -65,6 +73,8 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, bio = bio_alloc(GFP_NOIO, 1); bio->bi_sector = sector; bio->bi_bdev = sb->s_bdev; + bio->bi_end_io = hfsplus_end_io_sync; + bio->bi_private = &wait; if (!(rw & WRITE) && data) *data = (u8 *)buf + offset; @@ -83,7 +93,12 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, buf = (u8 *)buf + len; } - ret = submit_bio_wait(rw, bio); + submit_bio(rw, bio); + wait_for_completion(&wait); + + if (!bio_flagged(bio, BIO_UPTODATE)) + ret = -EIO; + out: bio_put(bio); return ret < 0 ? ret : 0; diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 3c6136f..bd8471f 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -127,211 +127,6 @@ static int can_set_xattr(struct inode *inode, const char *name, return 0; } -static void hfsplus_init_header_node(struct inode *attr_file, - u32 clump_size, - char *buf, u16 node_size) -{ - struct hfs_bnode_desc *desc; - struct hfs_btree_header_rec *head; - u16 offset; - __be16 *rec_offsets; - u32 hdr_node_map_rec_bits; - char *bmp; - u32 used_nodes; - u32 used_bmp_bytes; - loff_t tmp; - - hfs_dbg(ATTR_MOD, "init_hdr_attr_file: clump %u, node_size %u\n", - clump_size, node_size); - - /* The end of the node contains list of record offsets */ - rec_offsets = (__be16 *)(buf + node_size); - - desc = (struct hfs_bnode_desc *)buf; - desc->type = HFS_NODE_HEADER; - desc->num_recs = cpu_to_be16(HFSPLUS_BTREE_HDR_NODE_RECS_COUNT); - offset = sizeof(struct hfs_bnode_desc); - *--rec_offsets = cpu_to_be16(offset); - - head = (struct hfs_btree_header_rec *)(buf + offset); - head->node_size = cpu_to_be16(node_size); - tmp = i_size_read(attr_file); - do_div(tmp, node_size); - head->node_count = cpu_to_be32(tmp); - head->free_nodes = cpu_to_be32(be32_to_cpu(head->node_count) - 1); - head->clump_size = cpu_to_be32(clump_size); - head->attributes |= cpu_to_be32(HFS_TREE_BIGKEYS | HFS_TREE_VARIDXKEYS); - head->max_key_len = cpu_to_be16(HFSPLUS_ATTR_KEYLEN - sizeof(u16)); - offset += sizeof(struct hfs_btree_header_rec); - *--rec_offsets = cpu_to_be16(offset); - offset += HFSPLUS_BTREE_HDR_USER_BYTES; - *--rec_offsets = cpu_to_be16(offset); - - hdr_node_map_rec_bits = 8 * (node_size - offset - (4 * sizeof(u16))); - if (be32_to_cpu(head->node_count) > hdr_node_map_rec_bits) { - u32 map_node_bits; - u32 map_nodes; - - desc->next = cpu_to_be32(be32_to_cpu(head->leaf_tail) + 1); - map_node_bits = 8 * (node_size - sizeof(struct hfs_bnode_desc) - - (2 * sizeof(u16)) - 2); - map_nodes = (be32_to_cpu(head->node_count) - - hdr_node_map_rec_bits + - (map_node_bits - 1)) / map_node_bits; - be32_add_cpu(&head->free_nodes, 0 - map_nodes); - } - - bmp = buf + offset; - used_nodes = - be32_to_cpu(head->node_count) - be32_to_cpu(head->free_nodes); - used_bmp_bytes = used_nodes / 8; - if (used_bmp_bytes) { - memset(bmp, 0xFF, used_bmp_bytes); - bmp += used_bmp_bytes; - used_nodes %= 8; - } - *bmp = ~(0xFF >> used_nodes); - offset += hdr_node_map_rec_bits / 8; - *--rec_offsets = cpu_to_be16(offset); -} - -static int hfsplus_create_attributes_file(struct super_block *sb) -{ - int err = 0; - struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); - struct inode *attr_file; - struct hfsplus_inode_info *hip; - u32 clump_size; - u16 node_size = HFSPLUS_ATTR_TREE_NODE_SIZE; - char *buf; - int index, written; - struct address_space *mapping; - struct page *page; - int old_state = HFSPLUS_EMPTY_ATTR_TREE; - - hfs_dbg(ATTR_MOD, "create_attr_file: ino %d\n", HFSPLUS_ATTR_CNID); - -check_attr_tree_state_again: - switch (atomic_read(&sbi->attr_tree_state)) { - case HFSPLUS_EMPTY_ATTR_TREE: - if (old_state != atomic_cmpxchg(&sbi->attr_tree_state, - old_state, - HFSPLUS_CREATING_ATTR_TREE)) - goto check_attr_tree_state_again; - break; - case HFSPLUS_CREATING_ATTR_TREE: - /* - * This state means that another thread is in process - * of AttributesFile creation. Theoretically, it is - * possible to be here. But really __setxattr() method - * first of all calls hfs_find_init() for lookup in - * B-tree of CatalogFile. This method locks mutex of - * CatalogFile's B-tree. As a result, if some thread - * is inside AttributedFile creation operation then - * another threads will be waiting unlocking of - * CatalogFile's B-tree's mutex. However, if code will - * change then we will return error code (-EAGAIN) from - * here. Really, it means that first try to set of xattr - * fails with error but second attempt will have success. - */ - return -EAGAIN; - case HFSPLUS_VALID_ATTR_TREE: - return 0; - case HFSPLUS_FAILED_ATTR_TREE: - return -EOPNOTSUPP; - default: - BUG(); - } - - attr_file = hfsplus_iget(sb, HFSPLUS_ATTR_CNID); - if (IS_ERR(attr_file)) { - pr_err("failed to load attributes file\n"); - return PTR_ERR(attr_file); - } - - BUG_ON(i_size_read(attr_file) != 0); - - hip = HFSPLUS_I(attr_file); - - clump_size = hfsplus_calc_btree_clump_size(sb->s_blocksize, - node_size, - sbi->sect_count, - HFSPLUS_ATTR_CNID); - - mutex_lock(&hip->extents_lock); - hip->clump_blocks = clump_size >> sbi->alloc_blksz_shift; - mutex_unlock(&hip->extents_lock); - - if (sbi->free_blocks <= (hip->clump_blocks << 1)) { - err = -ENOSPC; - goto end_attr_file_creation; - } - - while (hip->alloc_blocks < hip->clump_blocks) { - err = hfsplus_file_extend(attr_file); - if (unlikely(err)) { - pr_err("failed to extend attributes file\n"); - goto end_attr_file_creation; - } - hip->phys_size = attr_file->i_size = - (loff_t)hip->alloc_blocks << sbi->alloc_blksz_shift; - hip->fs_blocks = hip->alloc_blocks << sbi->fs_shift; - inode_set_bytes(attr_file, attr_file->i_size); - } - - buf = kzalloc(node_size, GFP_NOFS); - if (!buf) { - pr_err("failed to allocate memory for header node\n"); - err = -ENOMEM; - goto end_attr_file_creation; - } - - hfsplus_init_header_node(attr_file, clump_size, buf, node_size); - - mapping = attr_file->i_mapping; - - index = 0; - written = 0; - for (; written < node_size; index++, written += PAGE_CACHE_SIZE) { - void *kaddr; - - page = read_mapping_page(mapping, index, NULL); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto failed_header_node_init; - } - - kaddr = kmap_atomic(page); - memcpy(kaddr, buf + written, - min_t(size_t, PAGE_CACHE_SIZE, node_size - written)); - kunmap_atomic(kaddr); - - set_page_dirty(page); - page_cache_release(page); - } - - hfsplus_mark_inode_dirty(attr_file, HFSPLUS_I_ATTR_DIRTY); - - sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID); - if (!sbi->attr_tree) - pr_err("failed to load attributes file\n"); - -failed_header_node_init: - kfree(buf); - -end_attr_file_creation: - iput(attr_file); - - if (!err) - atomic_set(&sbi->attr_tree_state, HFSPLUS_VALID_ATTR_TREE); - else if (err == -ENOSPC) - atomic_set(&sbi->attr_tree_state, HFSPLUS_EMPTY_ATTR_TREE); - else - atomic_set(&sbi->attr_tree_state, HFSPLUS_FAILED_ATTR_TREE); - - return err; -} - int __hfsplus_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags) { @@ -416,9 +211,8 @@ int __hfsplus_setxattr(struct inode *inode, const char *name, } if (!HFSPLUS_SB(inode->i_sb)->attr_tree) { - err = hfsplus_create_attributes_file(inode->i_sb); - if (unlikely(err)) - goto end_setxattr; + err = -EOPNOTSUPP; + goto end_setxattr; } if (hfsplus_attr_exists(inode, name)) { diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index db23ce1..2543728 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -33,6 +33,15 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file)) +static int hostfs_d_delete(const struct dentry *dentry) +{ + return 1; +} + +static const struct dentry_operations hostfs_dentry_ops = { + .d_delete = hostfs_d_delete, +}; + /* Changed in hostfs_args before the kernel starts running */ static char *root_ino = ""; static int append = 0; @@ -916,7 +925,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_blocksize_bits = 10; sb->s_magic = HOSTFS_SUPER_MAGIC; sb->s_op = &hostfs_sbops; - sb->s_d_op = &simple_dentry_operations; + sb->s_d_op = &hostfs_dentry_ops; sb->s_maxbytes = MAX_LFS_FILESIZE; /* NULL is printed as <NULL> by sprintf: avoid that. */ diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 6797bf8..1b39863 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -80,7 +80,6 @@ struct hpfs_sb_info { unsigned sb_c_bitmap; /* current bitmap */ unsigned sb_max_fwd_alloc; /* max forwad allocation */ int sb_timeshift; - struct rcu_head rcu; }; /* Four 512-byte buffers and the 2k block obtained by concatenating them */ diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 1b39afd..345713d 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -407,7 +407,7 @@ again: /*printk("HPFS: truncating file before delete.\n");*/ newattrs.ia_size = 0; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - err = notify_change(dentry, &newattrs, NULL); + err = notify_change(dentry, &newattrs); put_write_access(inode); if (!err) goto again; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index b8d01ef..4334cda 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -101,24 +101,18 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2, return 0; } -static void free_sbi(struct hpfs_sb_info *sbi) -{ - kfree(sbi->sb_cp_table); - kfree(sbi->sb_bmp_dir); - kfree(sbi); -} - -static void lazy_free_sbi(struct rcu_head *rcu) -{ - free_sbi(container_of(rcu, struct hpfs_sb_info, rcu)); -} - static void hpfs_put_super(struct super_block *s) { + struct hpfs_sb_info *sbi = hpfs_sb(s); + hpfs_lock(s); unmark_dirty(s); hpfs_unlock(s); - call_rcu(&hpfs_sb(s)->rcu, lazy_free_sbi); + + kfree(sbi->sb_cp_table); + kfree(sbi->sb_bmp_dir); + s->s_fs_info = NULL; + kfree(sbi); } unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) @@ -491,6 +485,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) } s->s_fs_info = sbi; + sbi->sb_bmp_dir = NULL; + sbi->sb_cp_table = NULL; + mutex_init(&sbi->hpfs_mutex); hpfs_lock(s); @@ -682,7 +679,10 @@ bail2: brelse(bh0); bail1: bail0: hpfs_unlock(s); - free_sbi(sbi); + kfree(sbi->sb_bmp_dir); + kfree(sbi->sb_cp_table); + s->s_fs_info = NULL; + kfree(sbi); return -EINVAL; } @@ -773,11 +773,15 @@ static struct inode *find_inode(struct super_block *sb, repeat: hlist_for_each_entry(inode, head, i_hash) { - if (inode->i_sb != sb) + spin_lock(&inode->i_lock); + if (inode->i_sb != sb) { + spin_unlock(&inode->i_lock); continue; - if (!test(inode, data)) + } + if (!test(inode, data)) { + spin_unlock(&inode->i_lock); continue; - spin_lock(&inode->i_lock); + } if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; @@ -800,11 +804,15 @@ static struct inode *find_inode_fast(struct super_block *sb, repeat: hlist_for_each_entry(inode, head, i_hash) { - if (inode->i_ino != ino) + spin_lock(&inode->i_lock); + if (inode->i_ino != ino) { + spin_unlock(&inode->i_lock); continue; - if (inode->i_sb != sb) + } + if (inode->i_sb != sb) { + spin_unlock(&inode->i_lock); continue; - spin_lock(&inode->i_lock); + } if (inode->i_state & (I_FREEING|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; @@ -943,42 +951,6 @@ void unlock_new_inode(struct inode *inode) EXPORT_SYMBOL(unlock_new_inode); /** - * lock_two_nondirectories - take two i_mutexes on non-directory objects - * @inode1: first inode to lock - * @inode2: second inode to lock - */ -void lock_two_nondirectories(struct inode *inode1, struct inode *inode2) -{ - WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); - if (inode1 == inode2 || !inode2) { - mutex_lock(&inode1->i_mutex); - return; - } - WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); - if (inode1 < inode2) { - mutex_lock(&inode1->i_mutex); - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2); - } else { - mutex_lock(&inode2->i_mutex); - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2); - } -} -EXPORT_SYMBOL(lock_two_nondirectories); - -/** - * unlock_two_nondirectories - release locks from lock_two_nondirectories() - * @inode1: first inode to unlock - * @inode2: second inode to unlock - */ -void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) -{ - mutex_unlock(&inode1->i_mutex); - if (inode2 && inode2 != inode1) - mutex_unlock(&inode2->i_mutex); -} -EXPORT_SYMBOL(unlock_two_nondirectories); - -/** * iget5_locked - obtain an inode from a mounted file system * @sb: super block of file system * @hashval: hash value (usually inode number) to get @@ -1603,11 +1575,7 @@ static int __remove_suid(struct dentry *dentry, int kill) struct iattr newattrs; newattrs.ia_valid = ATTR_FORCE | kill; - /* - * Note we call this on write, so notify_change will not - * encounter any conflicting delegations: - */ - return notify_change(dentry, &newattrs, NULL); + return notify_change(dentry, &newattrs); } int file_remove_suid(struct file *file) diff --git a/fs/internal.h b/fs/internal.h index 4657424..513e0d8 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -9,6 +9,8 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/lglock.h> + struct super_block; struct file_system_type; struct linux_binprm; @@ -60,6 +62,8 @@ extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); +extern struct lglock vfsmount_lock; + extern int __mnt_want_write(struct vfsmount *); extern int __mnt_want_write_file(struct file *); extern void __mnt_drop_write(struct vfsmount *); @@ -73,6 +77,9 @@ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); +extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); /* @@ -37,7 +37,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd, { int error = -ENOTTY; - if (!filp->f_op->unlocked_ioctl) + if (!filp->f_op || !filp->f_op->unlocked_ioctl) goto out; error = filp->f_op->unlocked_ioctl(filp, cmd, arg); @@ -501,7 +501,7 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { - if (filp->f_op->fasync) + if (filp->f_op && filp->f_op->fasync) /* fasync() adjusts filp->f_flags */ error = filp->f_op->fasync(fd, filp, on); else diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 4a9e10e..e5d408a 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -181,7 +181,7 @@ struct iso9660_options{ * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hash_common(struct qstr *qstr, int ms) +isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms) { const char *name; int len; @@ -202,7 +202,7 @@ isofs_hash_common(struct qstr *qstr, int ms) * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hashi_common(struct qstr *qstr, int ms) +isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms) { const char *name; int len; @@ -259,13 +259,13 @@ static int isofs_dentry_cmp_common( static int isofs_hash(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hash_common(qstr, 0); + return isofs_hash_common(dentry, qstr, 0); } static int isofs_hashi(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hashi_common(qstr, 0); + return isofs_hashi_common(dentry, qstr, 0); } static int @@ -286,13 +286,13 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, static int isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hash_common(qstr, 1); + return isofs_hash_common(dentry, qstr, 1); } static int isofs_hashi_ms(const struct dentry *dentry, struct qstr *qstr) { - return isofs_hashi_common(qstr, 1); + return isofs_hashi_common(dentry, qstr, 1); } static int diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index aa603e0..be0c39b 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -26,6 +26,7 @@ #include <linux/mm.h> #include <linux/highmem.h> #include <linux/hrtimer.h> +#include <linux/backing-dev.h> static void __journal_temp_unlink_buffer(struct journal_head *jh); @@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle) alloc_transaction: if (!journal->j_running_transaction) { - new_transaction = kzalloc(sizeof(*new_transaction), - GFP_NOFS|__GFP_NOFAIL); + new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS); if (!new_transaction) { - ret = -ENOMEM; - goto out; + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto alloc_transaction; } } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 09b3ed4..fe3c052 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -515,10 +515,6 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) c = JFFS2_SB_INFO(sb); - /* Do not support the MLC nand */ - if (c->mtd->type == MTD_MLCNANDFLASH) - return -EINVAL; - #ifndef CONFIG_JFFS2_FS_WRITEBUFFER if (c->mtd->type == MTD_NANDFLASH) { pr_err("Cannot operate on NAND flash unless jffs2 NAND support is compiled in\n"); @@ -10,7 +10,6 @@ #include <linux/vfs.h> #include <linux/quotaops.h> #include <linux/mutex.h> -#include <linux/namei.h> #include <linux/exportfs.h> #include <linux/writeback.h> #include <linux/buffer_head.h> /* sync_mapping_buffers */ @@ -32,7 +31,6 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9); return 0; } -EXPORT_SYMBOL(simple_getattr); int simple_statfs(struct dentry *dentry, struct kstatfs *buf) { @@ -41,22 +39,15 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = NAME_MAX; return 0; } -EXPORT_SYMBOL(simple_statfs); /* * Retaining negative dentries for an in-memory filesystem just wastes * memory and lookup time: arrange for them to be deleted immediately. */ -int always_delete_dentry(const struct dentry *dentry) +static int simple_delete_dentry(const struct dentry *dentry) { return 1; } -EXPORT_SYMBOL(always_delete_dentry); - -const struct dentry_operations simple_dentry_operations = { - .d_delete = always_delete_dentry, -}; -EXPORT_SYMBOL(simple_dentry_operations); /* * Lookup the data. This is trivial - if the dentry didn't already @@ -64,6 +55,10 @@ EXPORT_SYMBOL(simple_dentry_operations); */ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { + static const struct dentry_operations simple_dentry_operations = { + .d_delete = simple_delete_dentry, + }; + if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); if (!dentry->d_sb->s_d_op) @@ -71,7 +66,6 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned d_add(dentry, NULL); return NULL; } -EXPORT_SYMBOL(simple_lookup); int dcache_dir_open(struct inode *inode, struct file *file) { @@ -81,14 +75,12 @@ int dcache_dir_open(struct inode *inode, struct file *file) return file->private_data ? 0 : -ENOMEM; } -EXPORT_SYMBOL(dcache_dir_open); int dcache_dir_close(struct inode *inode, struct file *file) { dput(file->private_data); return 0; } -EXPORT_SYMBOL(dcache_dir_close); loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) { @@ -131,7 +123,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) mutex_unlock(&dentry->d_inode->i_mutex); return offset; } -EXPORT_SYMBOL(dcache_dir_lseek); /* Relationship between i_mode and the DT_xxx types */ static inline unsigned char dt_type(struct inode *inode) @@ -181,13 +172,11 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) spin_unlock(&dentry->d_lock); return 0; } -EXPORT_SYMBOL(dcache_readdir); ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) { return -EISDIR; } -EXPORT_SYMBOL(generic_read_dir); const struct file_operations simple_dir_operations = { .open = dcache_dir_open, @@ -197,12 +186,10 @@ const struct file_operations simple_dir_operations = { .iterate = dcache_readdir, .fsync = noop_fsync, }; -EXPORT_SYMBOL(simple_dir_operations); const struct inode_operations simple_dir_inode_operations = { .lookup = simple_lookup, }; -EXPORT_SYMBOL(simple_dir_inode_operations); static const struct super_operations simple_super_operations = { .statfs = simple_statfs, @@ -257,7 +244,6 @@ Enomem: deactivate_locked_super(s); return ERR_PTR(-ENOMEM); } -EXPORT_SYMBOL(mount_pseudo); int simple_open(struct inode *inode, struct file *file) { @@ -265,7 +251,6 @@ int simple_open(struct inode *inode, struct file *file) file->private_data = inode->i_private; return 0; } -EXPORT_SYMBOL(simple_open); int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { @@ -278,7 +263,6 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den d_instantiate(dentry, inode); return 0; } -EXPORT_SYMBOL(simple_link); int simple_empty(struct dentry *dentry) { @@ -299,7 +283,6 @@ out: spin_unlock(&dentry->d_lock); return ret; } -EXPORT_SYMBOL(simple_empty); int simple_unlink(struct inode *dir, struct dentry *dentry) { @@ -310,7 +293,6 @@ int simple_unlink(struct inode *dir, struct dentry *dentry) dput(dentry); return 0; } -EXPORT_SYMBOL(simple_unlink); int simple_rmdir(struct inode *dir, struct dentry *dentry) { @@ -322,7 +304,6 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) drop_nlink(dir); return 0; } -EXPORT_SYMBOL(simple_rmdir); int simple_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) @@ -349,7 +330,6 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; } -EXPORT_SYMBOL(simple_rename); /** * simple_setattr - setattr for simple filesystem @@ -390,7 +370,6 @@ int simple_readpage(struct file *file, struct page *page) unlock_page(page); return 0; } -EXPORT_SYMBOL(simple_readpage); int simple_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, @@ -414,7 +393,6 @@ int simple_write_begin(struct file *file, struct address_space *mapping, } return 0; } -EXPORT_SYMBOL(simple_write_begin); /** * simple_write_end - .write_end helper for non-block-device FSes @@ -466,7 +444,6 @@ int simple_write_end(struct file *file, struct address_space *mapping, return copied; } -EXPORT_SYMBOL(simple_write_end); /* * the inodes created here are not hashed. If you use iunique to generate @@ -535,7 +512,6 @@ out: dput(root); return -ENOMEM; } -EXPORT_SYMBOL(simple_fill_super); static DEFINE_SPINLOCK(pin_fs_lock); @@ -558,7 +534,6 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c mntput(mnt); return 0; } -EXPORT_SYMBOL(simple_pin_fs); void simple_release_fs(struct vfsmount **mount, int *count) { @@ -570,7 +545,6 @@ void simple_release_fs(struct vfsmount **mount, int *count) spin_unlock(&pin_fs_lock); mntput(mnt); } -EXPORT_SYMBOL(simple_release_fs); /** * simple_read_from_buffer - copy data from the buffer to user space @@ -605,7 +579,6 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, *ppos = pos + count; return count; } -EXPORT_SYMBOL(simple_read_from_buffer); /** * simple_write_to_buffer - copy data from user space to the buffer @@ -640,7 +613,6 @@ ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, *ppos = pos + count; return count; } -EXPORT_SYMBOL(simple_write_to_buffer); /** * memory_read_from_buffer - copy data from the buffer @@ -672,7 +644,6 @@ ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, return count; } -EXPORT_SYMBOL(memory_read_from_buffer); /* * Transaction based IO. @@ -694,7 +665,6 @@ void simple_transaction_set(struct file *file, size_t n) smp_mb(); ar->size = n; } -EXPORT_SYMBOL(simple_transaction_set); char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) { @@ -726,7 +696,6 @@ char *simple_transaction_get(struct file *file, const char __user *buf, size_t s return ar->data; } -EXPORT_SYMBOL(simple_transaction_get); ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { @@ -736,14 +705,12 @@ ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size return 0; return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); } -EXPORT_SYMBOL(simple_transaction_read); int simple_transaction_release(struct inode *inode, struct file *file) { free_page((unsigned long)file->private_data); return 0; } -EXPORT_SYMBOL(simple_transaction_release); /* Simple attribute files */ @@ -779,14 +746,12 @@ int simple_attr_open(struct inode *inode, struct file *file, return nonseekable_open(inode, file); } -EXPORT_SYMBOL_GPL(simple_attr_open); int simple_attr_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } -EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */ /* read from the buffer that is filled with the get function */ ssize_t simple_attr_read(struct file *file, char __user *buf, @@ -822,7 +787,6 @@ out: mutex_unlock(&attr->mutex); return ret; } -EXPORT_SYMBOL_GPL(simple_attr_read); /* interpret the buffer as a number to call the set function with */ ssize_t simple_attr_write(struct file *file, const char __user *buf, @@ -855,7 +819,6 @@ out: mutex_unlock(&attr->mutex); return ret; } -EXPORT_SYMBOL_GPL(simple_attr_write); /** * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation @@ -994,56 +957,39 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) { return 0; } -EXPORT_SYMBOL(noop_fsync); - -void kfree_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} -EXPORT_SYMBOL(kfree_put_link); - -/* - * nop .set_page_dirty method so that people can use .page_mkwrite on - * anon inodes. - */ -static int anon_set_page_dirty(struct page *page) -{ - return 0; -}; -/* - * A single inode exists for all anon_inode files. Contrary to pipes, - * anon_inode inodes have no associated per-instance data, so we need - * only allocate one of them. - */ -struct inode *alloc_anon_inode(struct super_block *s) -{ - static const struct address_space_operations anon_aops = { - .set_page_dirty = anon_set_page_dirty, - }; - struct inode *inode = new_inode_pseudo(s); - - if (!inode) - return ERR_PTR(-ENOMEM); - - inode->i_ino = get_next_ino(); - inode->i_mapping->a_ops = &anon_aops; - - /* - * Mark the inode dirty from the very beginning, - * that way it will never be moved to the dirty - * list because mark_inode_dirty() will think - * that it already _is_ on the dirty list. - */ - inode->i_state = I_DIRTY; - inode->i_mode = S_IRUSR | S_IWUSR; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); - inode->i_flags |= S_PRIVATE; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - return inode; -} -EXPORT_SYMBOL(alloc_anon_inode); +EXPORT_SYMBOL(dcache_dir_close); +EXPORT_SYMBOL(dcache_dir_lseek); +EXPORT_SYMBOL(dcache_dir_open); +EXPORT_SYMBOL(dcache_readdir); +EXPORT_SYMBOL(generic_read_dir); +EXPORT_SYMBOL(mount_pseudo); +EXPORT_SYMBOL(simple_write_begin); +EXPORT_SYMBOL(simple_write_end); +EXPORT_SYMBOL(simple_dir_inode_operations); +EXPORT_SYMBOL(simple_dir_operations); +EXPORT_SYMBOL(simple_empty); +EXPORT_SYMBOL(simple_fill_super); +EXPORT_SYMBOL(simple_getattr); +EXPORT_SYMBOL(simple_open); +EXPORT_SYMBOL(simple_link); +EXPORT_SYMBOL(simple_lookup); +EXPORT_SYMBOL(simple_pin_fs); +EXPORT_SYMBOL(simple_readpage); +EXPORT_SYMBOL(simple_release_fs); +EXPORT_SYMBOL(simple_rename); +EXPORT_SYMBOL(simple_rmdir); +EXPORT_SYMBOL(simple_statfs); +EXPORT_SYMBOL(noop_fsync); +EXPORT_SYMBOL(simple_unlink); +EXPORT_SYMBOL(simple_read_from_buffer); +EXPORT_SYMBOL(simple_write_to_buffer); +EXPORT_SYMBOL(memory_read_from_buffer); +EXPORT_SYMBOL(simple_transaction_set); +EXPORT_SYMBOL(simple_transaction_get); +EXPORT_SYMBOL(simple_transaction_read); +EXPORT_SYMBOL(simple_transaction_release); +EXPORT_SYMBOL_GPL(simple_attr_open); +EXPORT_SYMBOL_GPL(simple_attr_release); +EXPORT_SYMBOL_GPL(simple_attr_read); +EXPORT_SYMBOL_GPL(simple_attr_write); @@ -134,7 +134,7 @@ #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) -#define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) +#define IS_LEASE(fl) (fl->fl_flags & FL_LEASE) static bool lease_breaking(struct file_lock *fl) { @@ -1292,40 +1292,28 @@ static void time_out_leases(struct inode *inode) } } -static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) -{ - if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) - return false; - return locks_conflict(breaker, lease); -} - /** * __break_lease - revoke all outstanding leases on file * @inode: the inode of the file to return - * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: - * break all leases - * @type: FL_LEASE: break leases and delegations; FL_DELEG: break - * only delegations + * @mode: the open mode (read or write) * * break_lease (inlined for speed) has checked there already is at least * some kind of lock (maybe a lease) on this file. Leases are broken on * a call to open() or truncate(). This function can sleep unless you * specified %O_NONBLOCK to your open(). */ -int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) +int __break_lease(struct inode *inode, unsigned int mode) { int error = 0; struct file_lock *new_fl, *flock; struct file_lock *fl; unsigned long break_time; int i_have_this_lease = 0; - bool lease_conflict = false; int want_write = (mode & O_ACCMODE) != O_RDONLY; new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); if (IS_ERR(new_fl)) return PTR_ERR(new_fl); - new_fl->fl_flags = type; spin_lock(&inode->i_lock); @@ -1335,16 +1323,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) if ((flock == NULL) || !IS_LEASE(flock)) goto out; - for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { - if (leases_conflict(fl, new_fl)) { - lease_conflict = true; - if (fl->fl_owner == current->files) - i_have_this_lease = 1; - } - } - if (!lease_conflict) + if (!locks_conflict(flock, new_fl)) goto out; + for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) + if (fl->fl_owner == current->files) + i_have_this_lease = 1; + break_time = 0; if (lease_break_time > 0) { break_time = jiffies + lease_break_time * HZ; @@ -1353,8 +1338,6 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) } for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { - if (!leases_conflict(fl, new_fl)) - continue; if (want_write) { if (fl->fl_flags & FL_UNLOCK_PENDING) continue; @@ -1396,7 +1379,7 @@ restart: */ for (flock = inode->i_flock; flock && IS_LEASE(flock); flock = flock->fl_next) { - if (leases_conflict(new_fl, flock)) + if (locks_conflict(new_fl, flock)) goto restart; } error = 0; @@ -1477,27 +1460,9 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp struct file_lock *fl, **before, **my_before = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; - bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; lease = *flp; - /* - * In the delegation case we need mutual exclusion with - * a number of operations that take the i_mutex. We trylock - * because delegations are an optional optimization, and if - * there's some chance of a conflict--we'd rather not - * bother, maybe that's a sign this just isn't a good file to - * hand out a delegation on. - */ - if (is_deleg && !mutex_trylock(&inode->i_mutex)) - return -EAGAIN; - - if (is_deleg && arg == F_WRLCK) { - /* Write delegations are not currently supported: */ - mutex_unlock(&inode->i_mutex); - WARN_ON_ONCE(1); - return -EINVAL; - } error = -EAGAIN; if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) @@ -1549,10 +1514,9 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp goto out; locks_insert_lock(before, lease); - error = 0; + return 0; + out: - if (is_deleg) - mutex_unlock(&inode->i_mutex); return error; } @@ -1615,7 +1579,7 @@ EXPORT_SYMBOL(generic_setlease); static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) { - if (filp->f_op->setlease) + if (filp->f_op && filp->f_op->setlease) return filp->f_op->setlease(filp, arg, lease); else return generic_setlease(filp, arg, lease); @@ -1807,7 +1771,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) if (error) goto out_free; - if (f.file->f_op->flock) + if (f.file->f_op && f.file->f_op->flock) error = f.file->f_op->flock(f.file, (can_sleep) ? F_SETLKW : F_SETLK, lock); @@ -1833,7 +1797,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) */ int vfs_test_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op->lock) + if (filp->f_op && filp->f_op->lock) return filp->f_op->lock(filp, F_GETLK, fl); posix_test_lock(filp, fl); return 0; @@ -1945,7 +1909,7 @@ out: */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) { - if (filp->f_op->lock) + if (filp->f_op && filp->f_op->lock) return filp->f_op->lock(filp, cmd, fl); else return posix_lock_file(filp, fl, conf); @@ -2218,7 +2182,7 @@ void locks_remove_flock(struct file *filp) if (!inode->i_flock) return; - if (filp->f_op->flock) { + if (filp->f_op && filp->f_op->flock) { struct file_lock fl = { .fl_pid = current->tgid, .fl_file = filp, @@ -2282,7 +2246,7 @@ EXPORT_SYMBOL(posix_unblock_lock); */ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op->lock) + if (filp->f_op && filp->f_op->lock) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 0f95f0d..550475c 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -14,10 +14,16 @@ #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) +static void request_complete(struct bio *bio, int err) +{ + complete((struct completion *)bio->bi_private); +} + static int sync_request(struct page *page, struct block_device *bdev, int rw) { struct bio bio; struct bio_vec bio_vec; + struct completion complete; bio_init(&bio); bio.bi_max_vecs = 1; @@ -29,8 +35,13 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) bio.bi_size = PAGE_SIZE; bio.bi_bdev = bdev; bio.bi_sector = page->index * (PAGE_SIZE >> 9); + init_completion(&complete); + bio.bi_private = &complete; + bio.bi_end_io = request_complete; - return submit_bio_wait(rw, &bio); + submit_bio(rw, &bio); + wait_for_completion(&complete); + return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO; } static int bdev_readpage(void *_sb, struct page *page) diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig index f2a0cfc..6624684 100644 --- a/fs/minix/Kconfig +++ b/fs/minix/Kconfig @@ -18,7 +18,7 @@ config MINIX_FS config MINIX_FS_NATIVE_ENDIAN def_bool MINIX_FS - depends on M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU) + depends on H8300 || M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU) config MINIX_FS_BIG_ENDIAN_16BIT_INDEXED def_bool MINIX_FS @@ -29,7 +29,6 @@ struct mount { struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; - struct rcu_head mnt_rcu; #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; #else @@ -56,7 +55,7 @@ struct mount { int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; - struct path mnt_ex_mountpoint; + int mnt_ghosts; }; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ @@ -77,28 +76,13 @@ static inline int is_mounted(struct vfsmount *mnt) return !IS_ERR_OR_NULL(real_mount(mnt)); } -extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); -extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); - -extern bool legitimize_mnt(struct vfsmount *, unsigned); +extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); static inline void get_mnt_ns(struct mnt_namespace *ns) { atomic_inc(&ns->count); } -extern seqlock_t mount_lock; - -static inline void lock_mount_hash(void) -{ - write_seqlock(&mount_lock); -} - -static inline void unlock_mount_hash(void) -{ - write_sequnlock(&mount_lock); -} - struct proc_mounts { struct seq_file m; struct mnt_namespace *ns; @@ -482,6 +482,18 @@ EXPORT_SYMBOL(path_put); * to restart the path walk from the beginning in ref-walk mode. */ +static inline void lock_rcu_walk(void) +{ + br_read_lock(&vfsmount_lock); + rcu_read_lock(); +} + +static inline void unlock_rcu_walk(void) +{ + rcu_read_unlock(); + br_read_unlock(&vfsmount_lock); +} + /** * unlazy_walk - try to switch to ref-walk mode. * @nd: nameidata pathwalk data @@ -500,21 +512,25 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) BUG_ON(!(nd->flags & LOOKUP_RCU)); /* - * After legitimizing the bastards, terminate_walk() - * will do the right thing for non-RCU mode, and all our - * subsequent exit cases should rcu_read_unlock() - * before returning. Do vfsmount first; if dentry - * can't be legitimized, just set nd->path.dentry to NULL - * and rely on dput(NULL) being a no-op. + * Get a reference to the parent first: we're + * going to make "path_put(nd->path)" valid in + * non-RCU context for "terminate_walk()". + * + * If this doesn't work, return immediately with + * RCU walking still active (and then we will do + * the RCU walk cleanup in terminate_walk()). */ - if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) + if (!lockref_get_not_dead(&parent->d_lockref)) return -ECHILD; - nd->flags &= ~LOOKUP_RCU; - if (!lockref_get_not_dead(&parent->d_lockref)) { - nd->path.dentry = NULL; - goto out; - } + /* + * After the mntget(), we terminate_walk() will do + * the right thing for non-RCU mode, and all our + * subsequent exit cases should unlock_rcu_walk() + * before returning. + */ + mntget(nd->path.mnt); + nd->flags &= ~LOOKUP_RCU; /* * For a negative lookup, the lookup sequence point is the parents @@ -550,17 +566,17 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) spin_unlock(&fs->lock); } - rcu_read_unlock(); + unlock_rcu_walk(); return 0; unlock_and_drop_dentry: spin_unlock(&fs->lock); drop_dentry: - rcu_read_unlock(); + unlock_rcu_walk(); dput(dentry); goto drop_root_mnt; out: - rcu_read_unlock(); + unlock_rcu_walk(); drop_root_mnt: if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; @@ -592,22 +608,17 @@ static int complete_walk(struct nameidata *nd) if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) { - rcu_read_unlock(); - return -ECHILD; - } if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) { - rcu_read_unlock(); - mntput(nd->path.mnt); + unlock_rcu_walk(); return -ECHILD; } if (read_seqcount_retry(&dentry->d_seq, nd->seq)) { - rcu_read_unlock(); + unlock_rcu_walk(); dput(dentry); - mntput(nd->path.mnt); return -ECHILD; } - rcu_read_unlock(); + mntget(nd->path.mnt); + unlock_rcu_walk(); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -898,15 +909,15 @@ int follow_up(struct path *path) struct mount *parent; struct dentry *mountpoint; - read_seqlock_excl(&mount_lock); + br_read_lock(&vfsmount_lock); parent = mnt->mnt_parent; if (parent == mnt) { - read_sequnlock_excl(&mount_lock); + br_read_unlock(&vfsmount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); - read_sequnlock_excl(&mount_lock); + br_read_unlock(&vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -1037,8 +1048,8 @@ static int follow_managed(struct path *path, unsigned flags) /* Something is mounted on this dentry in another * namespace and/or whatever was mounted there in this - * namespace got unmounted before lookup_mnt() could - * get it */ + * namespace got unmounted before we managed to get the + * vfsmount_lock */ } /* Handle an automount point */ @@ -1100,7 +1111,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, if (!d_mountpoint(path->dentry)) break; - mounted = __lookup_mnt(path->mnt, path->dentry); + mounted = __lookup_mnt(path->mnt, path->dentry, 1); if (!mounted) break; path->mnt = &mounted->mnt; @@ -1121,7 +1132,7 @@ static void follow_mount_rcu(struct nameidata *nd) { while (d_mountpoint(nd->path.dentry)) { struct mount *mounted; - mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); if (!mounted) break; nd->path.mnt = &mounted->mnt; @@ -1163,7 +1174,7 @@ failed: nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - rcu_read_unlock(); + unlock_rcu_walk(); return -ECHILD; } @@ -1297,8 +1308,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, } /* - * Call i_op->lookup on the dentry. The dentry must be negative and - * unhashed. + * Call i_op->lookup on the dentry. The dentry must be negative but may be + * hashed if it was pouplated with DCACHE_NEED_LOOKUP. * * dir->d_inode->i_mutex must be held */ @@ -1490,7 +1501,7 @@ static void terminate_walk(struct nameidata *nd) nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - rcu_read_unlock(); + unlock_rcu_walk(); } } @@ -1500,9 +1511,18 @@ static void terminate_walk(struct nameidata *nd) * so we keep a cache of "no, this doesn't need follow_link" * for the common case. */ -static inline int should_follow_link(struct dentry *dentry, int follow) +static inline int should_follow_link(struct inode *inode, int follow) { - return unlikely(d_is_symlink(dentry)) ? follow : 0; + if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) { + if (likely(inode->i_op->follow_link)) + return follow; + + /* This gets set once for the inode lifetime */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_NOFOLLOW; + spin_unlock(&inode->i_lock); + } + return 0; } static inline int walk_component(struct nameidata *nd, struct path *path, @@ -1532,7 +1552,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, if (!inode) goto out_path_put; - if (should_follow_link(path->dentry, follow)) { + if (should_follow_link(inode, follow)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { err = -ECHILD; @@ -1591,6 +1611,26 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) } /* + * We really don't want to look at inode->i_op->lookup + * when we don't have to. So we keep a cache bit in + * the inode ->i_opflags field that says "yes, we can + * do lookup on this inode". + */ +static inline int can_lookup(struct inode *inode) +{ + if (likely(inode->i_opflags & IOP_LOOKUP)) + return 1; + if (likely(!inode->i_op->lookup)) + return 0; + + /* We do this once for the lifetime of the inode */ + spin_lock(&inode->i_lock); + inode->i_opflags |= IOP_LOOKUP; + spin_unlock(&inode->i_lock); + return 1; +} + +/* * We can do the critical dentry name comparison and hashing * operations one word at a time, but we are limited to: * @@ -1793,7 +1833,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (err) return err; } - if (!d_is_directory(nd->path.dentry)) { + if (!can_lookup(nd->inode)) { err = -ENOTDIR; break; } @@ -1811,10 +1851,9 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->flags = flags | LOOKUP_JUMPED; nd->depth = 0; if (flags & LOOKUP_ROOT) { - struct dentry *root = nd->root.dentry; - struct inode *inode = root->d_inode; + struct inode *inode = nd->root.dentry->d_inode; if (*name) { - if (!d_is_directory(root)) + if (!can_lookup(inode)) return -ENOTDIR; retval = inode_permission(inode, MAY_EXEC); if (retval) @@ -1823,9 +1862,8 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - rcu_read_lock(); + lock_rcu_walk(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - nd->m_seq = read_seqbegin(&mount_lock); } else { path_get(&nd->path); } @@ -1834,10 +1872,9 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->root.mnt = NULL; - nd->m_seq = read_seqbegin(&mount_lock); if (*name=='/') { if (flags & LOOKUP_RCU) { - rcu_read_lock(); + lock_rcu_walk(); set_root_rcu(nd); } else { set_root(nd); @@ -1849,7 +1886,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct fs_struct *fs = current->fs; unsigned seq; - rcu_read_lock(); + lock_rcu_walk(); do { seq = read_seqcount_begin(&fs->seq); @@ -1870,7 +1907,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, dentry = f.file->f_path.dentry; if (*name) { - if (!d_is_directory(dentry)) { + if (!can_lookup(dentry->d_inode)) { fdput(f); return -ENOTDIR; } @@ -1881,7 +1918,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (f.need_put) *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - rcu_read_lock(); + lock_rcu_walk(); } else { path_get(&nd->path); fdput(f); @@ -1952,7 +1989,7 @@ static int path_lookupat(int dfd, const char *name, err = complete_walk(nd); if (!err && nd->flags & LOOKUP_DIRECTORY) { - if (!d_is_directory(nd->path.dentry)) { + if (!can_lookup(nd->inode)) { path_put(&nd->path); err = -ENOTDIR; } @@ -2244,7 +2281,7 @@ done: } path->dentry = dentry; path->mnt = mntget(nd->path.mnt); - if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW)) + if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW)) return 1; follow_mount(path); error = 0; @@ -2389,14 +2426,12 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) * 10. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ -static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) +static int may_delete(struct inode *dir,struct dentry *victim,int isdir) { - struct inode *inode = victim->d_inode; int error; - if (d_is_negative(victim)) + if (!victim->d_inode) return -ENOENT; - BUG_ON(!inode); BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); @@ -2406,16 +2441,15 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) return error; if (IS_APPEND(dir)) return -EPERM; - - if (check_sticky(dir, inode) || IS_APPEND(inode) || - IS_IMMUTABLE(inode) || IS_SWAPFILE(inode)) + if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| + IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) return -EPERM; if (isdir) { - if (!d_is_directory(victim) && !d_is_autodir(victim)) + if (!S_ISDIR(victim->d_inode->i_mode)) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; - } else if (d_is_directory(victim) || d_is_autodir(victim)) + } else if (S_ISDIR(victim->d_inode->i_mode)) return -EISDIR; if (IS_DEADDIR(dir)) return -ENOENT; @@ -2434,7 +2468,6 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) */ static inline int may_create(struct inode *dir, struct dentry *child) { - audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) @@ -2950,7 +2983,7 @@ retry_lookup: /* * create/update audit record if it already exists. */ - if (d_is_positive(path->dentry)) + if (path->dentry->d_inode) audit_inode(name, path->dentry, 0); /* @@ -2979,12 +3012,12 @@ retry_lookup: finish_lookup: /* we _can_ be in RCU mode here */ error = -ENOENT; - if (d_is_negative(path->dentry)) { + if (!inode) { path_to_nameidata(path, nd); goto out; } - if (should_follow_link(path->dentry, !symlink_ok)) { + if (should_follow_link(inode, !symlink_ok)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { error = -ECHILD; @@ -3013,11 +3046,10 @@ finish_open: } audit_inode(name, nd->path.dentry, 0); error = -EISDIR; - if ((open_flag & O_CREAT) && - (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry))) + if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) goto out; error = -ENOTDIR; - if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry)) + if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode)) goto out; if (!S_ISREG(nd->inode->i_mode)) will_truncate = false; @@ -3243,7 +3275,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, nd.root.mnt = mnt; nd.root.dentry = dentry; - if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN) + if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) return ERR_PTR(-ELOOP); file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU); @@ -3293,9 +3325,8 @@ struct dentry *kern_path_create(int dfd, const char *pathname, goto unlock; error = -EEXIST; - if (d_is_positive(dentry)) + if (dentry->d_inode) goto fail; - /* * Special case - lookup gave negative, but... we had foo/bar/ * From the vfs_mknod() POV we just have a negative dentry - @@ -3616,27 +3647,8 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) return do_rmdir(AT_FDCWD, pathname); } -/** - * vfs_unlink - unlink a filesystem object - * @dir: parent directory - * @dentry: victim - * @delegated_inode: returns victim inode, if the inode is delegated. - * - * The caller must hold dir->i_mutex. - * - * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and - * return a reference to the inode in delegated_inode. The caller - * should then break the delegation on that inode and retry. Because - * breaking a delegation may take a long time, the caller should drop - * dir->i_mutex before doing so. - * - * Alternatively, a caller may pass NULL for delegated_inode. This may - * be appropriate for callers that expect the underlying filesystem not - * to be NFS exported. - */ -int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) +int vfs_unlink(struct inode *dir, struct dentry *dentry) { - struct inode *target = dentry->d_inode; int error = may_delete(dir, dentry, 0); if (error) @@ -3645,26 +3657,22 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate if (!dir->i_op->unlink) return -EPERM; - mutex_lock(&target->i_mutex); + mutex_lock(&dentry->d_inode->i_mutex); if (d_mountpoint(dentry)) error = -EBUSY; else { error = security_inode_unlink(dir, dentry); if (!error) { - error = try_break_deleg(target, delegated_inode); - if (error) - goto out; error = dir->i_op->unlink(dir, dentry); if (!error) dont_mount(dentry); } } -out: - mutex_unlock(&target->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { - fsnotify_link_count(target); + fsnotify_link_count(dentry->d_inode); d_delete(dentry); } @@ -3684,7 +3692,6 @@ static long do_unlinkat(int dfd, const char __user *pathname) struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; - struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; retry: name = user_path_parent(dfd, pathname, &nd, lookup_flags); @@ -3699,7 +3706,7 @@ retry: error = mnt_want_write(nd.path.mnt); if (error) goto exit1; -retry_deleg: + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); @@ -3708,25 +3715,19 @@ retry_deleg: if (nd.last.name[nd.last.len]) goto slashes; inode = dentry->d_inode; - if (d_is_negative(dentry)) + if (!inode) goto slashes; ihold(inode); error = security_path_unlink(&nd.path, dentry); if (error) goto exit2; - error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode); + error = vfs_unlink(nd.path.dentry->d_inode, dentry); exit2: dput(dentry); } mutex_unlock(&nd.path.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ - inode = NULL; - if (delegated_inode) { - error = break_deleg_wait(&delegated_inode); - if (!error) - goto retry_deleg; - } mnt_drop_write(nd.path.mnt); exit1: path_put(&nd.path); @@ -3739,12 +3740,8 @@ exit1: return error; slashes: - if (d_is_negative(dentry)) - error = -ENOENT; - else if (d_is_directory(dentry) || d_is_autodir(dentry)) - error = -EISDIR; - else - error = -ENOTDIR; + error = !dentry->d_inode ? -ENOENT : + S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; goto exit2; } @@ -3820,26 +3817,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn return sys_symlinkat(oldname, AT_FDCWD, newname); } -/** - * vfs_link - create a new link - * @old_dentry: object to be linked - * @dir: new parent - * @new_dentry: where to create the new link - * @delegated_inode: returns inode needing a delegation break - * - * The caller must hold dir->i_mutex - * - * If vfs_link discovers a delegation on the to-be-linked file in need - * of breaking, it will return -EWOULDBLOCK and return a reference to the - * inode in delegated_inode. The caller should then break the delegation - * and retry. Because breaking a delegation may take a long time, the - * caller should drop the i_mutex before doing so. - * - * Alternatively, a caller may pass NULL for delegated_inode. This may - * be appropriate for callers that expect the underlying filesystem not - * to be NFS exported. - */ -int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) +int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; @@ -3875,11 +3853,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de error = -ENOENT; else if (max_links && inode->i_nlink >= max_links) error = -EMLINK; - else { - error = try_break_deleg(inode, delegated_inode); - if (!error) - error = dir->i_op->link(old_dentry, dir, new_dentry); - } + else + error = dir->i_op->link(old_dentry, dir, new_dentry); if (!error && (inode->i_state & I_LINKABLE)) { spin_lock(&inode->i_lock); @@ -3906,7 +3881,6 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, { struct dentry *new_dentry; struct path old_path, new_path; - struct inode *delegated_inode = NULL; int how = 0; int error; @@ -3945,14 +3919,9 @@ retry: error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; - error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode); + error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); out_dput: done_path_create(&new_path, new_dentry); - if (delegated_inode) { - error = break_deleg_wait(&delegated_inode); - if (!error) - goto retry; - } if (retry_estale(error, how)) { how |= LOOKUP_REVAL; goto retry; @@ -3977,8 +3946,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * That's where 4.4 screws up. Current fix: serialization on * sb->s_vfs_rename_mutex. We might be more accurate, but that's another * story. - * c) we have to lock _four_ objects - parents and victim (if it exists), - * and source (if it is not a directory). + * c) we have to lock _three_ objects - parents and victim (if it exists). * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change @@ -4051,11 +4019,9 @@ out: } static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - struct inode **delegated_inode) + struct inode *new_dir, struct dentry *new_dentry) { struct inode *target = new_dentry->d_inode; - struct inode *source = old_dentry->d_inode; int error; error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); @@ -4063,20 +4029,13 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, return error; dget(new_dentry); - lock_two_nondirectories(source, target); + if (target) + mutex_lock(&target->i_mutex); error = -EBUSY; if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) goto out; - error = try_break_deleg(source, delegated_inode); - if (error) - goto out; - if (target) { - error = try_break_deleg(target, delegated_inode); - if (error) - goto out; - } error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (error) goto out; @@ -4086,38 +4045,17 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) d_move(old_dentry, new_dentry); out: - unlock_two_nondirectories(source, target); + if (target) + mutex_unlock(&target->i_mutex); dput(new_dentry); return error; } -/** - * vfs_rename - rename a filesystem object - * @old_dir: parent of source - * @old_dentry: source - * @new_dir: parent of destination - * @new_dentry: destination - * @delegated_inode: returns an inode needing a delegation break - * - * The caller must hold multiple mutexes--see lock_rename()). - * - * If vfs_rename discovers a delegation in need of breaking at either - * the source or destination, it will return -EWOULDBLOCK and return a - * reference to the inode in delegated_inode. The caller should then - * break the delegation and retry. Because breaking a delegation may - * take a long time, the caller should drop all locks before doing - * so. - * - * Alternatively, a caller may pass NULL for delegated_inode. This may - * be appropriate for callers that expect the underlying filesystem not - * to be NFS exported. - */ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - struct inode **delegated_inode) + struct inode *new_dir, struct dentry *new_dentry) { int error; - int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry); + int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); const unsigned char *old_name; if (old_dentry->d_inode == new_dentry->d_inode) @@ -4142,7 +4080,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (is_dir) error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); else - error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode); + error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); if (!error) fsnotify_move(old_dir, new_dir, old_name, is_dir, new_dentry->d_inode, old_dentry); @@ -4158,7 +4096,6 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, struct dentry *old_dentry, *new_dentry; struct dentry *trap; struct nameidata oldnd, newnd; - struct inode *delegated_inode = NULL; struct filename *from; struct filename *to; unsigned int lookup_flags = 0; @@ -4198,7 +4135,6 @@ retry: newnd.flags &= ~LOOKUP_PARENT; newnd.flags |= LOOKUP_RENAME_TARGET; -retry_deleg: trap = lock_rename(new_dir, old_dir); old_dentry = lookup_hash(&oldnd); @@ -4207,10 +4143,10 @@ retry_deleg: goto exit3; /* source must exist */ error = -ENOENT; - if (d_is_negative(old_dentry)) + if (!old_dentry->d_inode) goto exit4; /* unless the source is a directory trailing slashes give -ENOTDIR */ - if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) { + if (!S_ISDIR(old_dentry->d_inode->i_mode)) { error = -ENOTDIR; if (oldnd.last.name[oldnd.last.len]) goto exit4; @@ -4235,19 +4171,13 @@ retry_deleg: if (error) goto exit5; error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry, - &delegated_inode); + new_dir->d_inode, new_dentry); exit5: dput(new_dentry); exit4: dput(old_dentry); exit3: unlock_rename(new_dir, old_dir); - if (delegated_inode) { - error = break_deleg_wait(&delegated_inode); - if (!error) - goto retry_deleg; - } mnt_drop_write(oldnd.path.mnt); exit2: if (retry_estale(error, lookup_flags)) diff --git a/fs/namespace.c b/fs/namespace.c index ac2ce8a..da5c494 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -39,7 +39,7 @@ static int mnt_group_start = 1; static struct list_head *mount_hashtable __read_mostly; static struct list_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; -static DECLARE_RWSEM(namespace_sem); +static struct rw_semaphore namespace_sem; /* /sys/fs */ struct kobject *fs_kobj; @@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj); * It should be taken for write in all cases where the vfsmount * tree or hash is modified or when a vfsmount structure is modified. */ -__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); +DEFINE_BRLOCK(vfsmount_lock); static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { @@ -63,6 +63,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) return tmp & (HASH_SIZE - 1); } +#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) + /* * allocation is serialized by namespace_sem, but we need the spinlock to * serialize with freeing. @@ -456,7 +458,7 @@ static int mnt_make_readonly(struct mount *mnt) { int ret = 0; - lock_mount_hash(); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -490,15 +492,15 @@ static int mnt_make_readonly(struct mount *mnt) */ smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct mount *mnt) { - lock_mount_hash(); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags &= ~MNT_READONLY; - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); } int sb_prepare_remount_readonly(struct super_block *sb) @@ -510,7 +512,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; - lock_mount_hash(); + br_write_lock(&vfsmount_lock); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -532,7 +534,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); return err; } @@ -547,56 +549,30 @@ static void free_vfsmnt(struct mount *mnt) kmem_cache_free(mnt_cache, mnt); } -/* call under rcu_read_lock */ -bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) -{ - struct mount *mnt; - if (read_seqretry(&mount_lock, seq)) - return false; - if (bastard == NULL) - return true; - mnt = real_mount(bastard); - mnt_add_count(mnt, 1); - if (likely(!read_seqretry(&mount_lock, seq))) - return true; - if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { - mnt_add_count(mnt, -1); - return false; - } - rcu_read_unlock(); - mntput(bastard); - rcu_read_lock(); - return false; -} - /* - * find the first mount at @dentry on vfsmount @mnt. - * call under rcu_read_lock() + * find the first or last mount at @dentry on vfsmount @mnt depending on + * @dir. If @dir is set return the first mount else return the last mount. + * vfsmount_lock must be held for read or write. */ -struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) +struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, + int dir) { struct list_head *head = mount_hashtable + hash(mnt, dentry); - struct mount *p; - - list_for_each_entry_rcu(p, head, mnt_hash) - if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) - return p; - return NULL; -} + struct list_head *tmp = head; + struct mount *p, *found = NULL; -/* - * find the last mount at @dentry on vfsmount @mnt. - * mount_lock must be held. - */ -struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) -{ - struct list_head *head = mount_hashtable + hash(mnt, dentry); - struct mount *p; - - list_for_each_entry_reverse(p, head, mnt_hash) - if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) - return p; - return NULL; + for (;;) { + tmp = dir ? tmp->next : tmp->prev; + p = NULL; + if (tmp == head) + break; + p = list_entry(tmp, struct mount, mnt_hash); + if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) { + found = p; + break; + } + } + return found; } /* @@ -618,17 +594,17 @@ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) struct vfsmount *lookup_mnt(struct path *path) { struct mount *child_mnt; - struct vfsmount *m; - unsigned seq; - rcu_read_lock(); - do { - seq = read_seqbegin(&mount_lock); - child_mnt = __lookup_mnt(path->mnt, path->dentry); - m = child_mnt ? &child_mnt->mnt : NULL; - } while (!legitimize_mnt(m, seq)); - rcu_read_unlock(); - return m; + br_read_lock(&vfsmount_lock); + child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); + if (child_mnt) { + mnt_add_count(child_mnt, 1); + br_read_unlock(&vfsmount_lock); + return &child_mnt->mnt; + } else { + br_read_unlock(&vfsmount_lock); + return NULL; + } } static struct mountpoint *new_mountpoint(struct dentry *dentry) @@ -820,9 +796,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - lock_mount_hash(); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); @@ -863,9 +839,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - lock_mount_hash(); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); if ((flag & CL_SLAVE) || ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { @@ -896,66 +872,64 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, return ERR_PTR(err); } -static void delayed_free(struct rcu_head *head) +static inline void mntfree(struct mount *mnt) { - struct mount *mnt = container_of(head, struct mount, mnt_rcu); - kfree(mnt->mnt_devname); -#ifdef CONFIG_SMP - free_percpu(mnt->mnt_pcp); -#endif - kmem_cache_free(mnt_cache, mnt); + struct vfsmount *m = &mnt->mnt; + struct super_block *sb = m->mnt_sb; + + /* + * This probably indicates that somebody messed + * up a mnt_want/drop_write() pair. If this + * happens, the filesystem was probably unable + * to make r/w->r/o transitions. + */ + /* + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. + */ + WARN_ON(mnt_get_writers(mnt)); + fsnotify_vfsmount_delete(m); + dput(m->mnt_root); + free_vfsmnt(mnt); + deactivate_super(sb); } static void mntput_no_expire(struct mount *mnt) { put_again: - rcu_read_lock(); - mnt_add_count(mnt, -1); - if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ - rcu_read_unlock(); +#ifdef CONFIG_SMP + br_read_lock(&vfsmount_lock); + if (likely(mnt->mnt_ns)) { + /* shouldn't be the last one */ + mnt_add_count(mnt, -1); + br_read_unlock(&vfsmount_lock); return; } - lock_mount_hash(); + br_read_unlock(&vfsmount_lock); + + br_write_lock(&vfsmount_lock); + mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { - rcu_read_unlock(); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); return; } +#else + mnt_add_count(mnt, -1); + if (likely(mnt_get_count(mnt))) + return; + br_write_lock(&vfsmount_lock); +#endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; - rcu_read_unlock(); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); acct_auto_close_mnt(&mnt->mnt); goto put_again; } - if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { - rcu_read_unlock(); - unlock_mount_hash(); - return; - } - mnt->mnt.mnt_flags |= MNT_DOOMED; - rcu_read_unlock(); list_del(&mnt->mnt_instance); - unlock_mount_hash(); - - /* - * This probably indicates that somebody messed - * up a mnt_want/drop_write() pair. If this - * happens, the filesystem was probably unable - * to make r/w->r/o transitions. - */ - /* - * The locking used to deal with mnt_count decrement provides barriers, - * so mnt_get_writers() below is safe. - */ - WARN_ON(mnt_get_writers(mnt)); - fsnotify_vfsmount_delete(&mnt->mnt); - dput(mnt->mnt.mnt_root); - deactivate_super(mnt->mnt.mnt_sb); - mnt_free_id(mnt); - call_rcu(&mnt->mnt_rcu, delayed_free); + br_write_unlock(&vfsmount_lock); + mntfree(mnt); } void mntput(struct vfsmount *mnt) @@ -980,21 +954,21 @@ EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { - lock_mount_hash(); + br_write_lock(&vfsmount_lock); real_mount(mnt)->mnt_pinned++; - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *m) { struct mount *mnt = real_mount(m); - lock_mount_hash(); + br_write_lock(&vfsmount_lock); if (mnt->mnt_pinned) { mnt_add_count(mnt, 1); mnt->mnt_pinned--; } - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -1111,12 +1085,12 @@ int may_umount_tree(struct vfsmount *m) BUG_ON(!m); /* write lock needed for mnt_get_count */ - lock_mount_hash(); + br_write_lock(&vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += mnt_get_count(p); minimum_refs += 2; } - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1143,10 +1117,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - lock_mount_hash(); + br_write_lock(&vfsmount_lock); if (propagate_mount_busy(real_mount(mnt), 2)) ret = 0; - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1168,13 +1142,23 @@ static void namespace_unlock(void) list_splice_init(&unmounted, &head); up_write(&namespace_sem); - synchronize_rcu(); - while (!list_empty(&head)) { mnt = list_first_entry(&head, struct mount, mnt_hash); list_del_init(&mnt->mnt_hash); - if (mnt->mnt_ex_mountpoint.mnt) - path_put(&mnt->mnt_ex_mountpoint); + if (mnt_has_parent(mnt)) { + struct dentry *dentry; + struct mount *m; + + br_write_lock(&vfsmount_lock); + dentry = mnt->mnt_mountpoint; + m = mnt->mnt_parent; + mnt->mnt_mountpoint = mnt->mnt.mnt_root; + mnt->mnt_parent = mnt; + m->mnt_ghosts--; + br_write_unlock(&vfsmount_lock); + dput(dentry); + mntput(&m->mnt); + } mntput(&mnt->mnt); } } @@ -1185,13 +1169,10 @@ static inline void namespace_lock(void) } /* - * mount_lock must be held + * vfsmount lock must be held for write * namespace_sem must be held for write - * how = 0 => just this tree, don't propagate - * how = 1 => propagate; we know that nobody else has reference to any victims - * how = 2 => lazy umount */ -void umount_tree(struct mount *mnt, int how) +void umount_tree(struct mount *mnt, int propagate) { LIST_HEAD(tmp_list); struct mount *p; @@ -1199,7 +1180,7 @@ void umount_tree(struct mount *mnt, int how) for (p = mnt; p; p = next_mnt(p, mnt)) list_move(&p->mnt_hash, &tmp_list); - if (how) + if (propagate) propagate_umount(&tmp_list); list_for_each_entry(p, &tmp_list, mnt_hash) { @@ -1207,16 +1188,10 @@ void umount_tree(struct mount *mnt, int how) list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; - if (how < 2) - p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { + p->mnt_parent->mnt_ghosts++; put_mountpoint(p->mnt_mp); - /* move the reference to mountpoint into ->mnt_ex_mountpoint */ - p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; - p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; - p->mnt_mountpoint = p->mnt.mnt_root; - p->mnt_parent = p; p->mnt_mp = NULL; } change_mnt_propagation(p, MS_PRIVATE); @@ -1250,12 +1225,12 @@ static int do_umount(struct mount *mnt, int flags) * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ - lock_mount_hash(); + br_write_lock(&vfsmount_lock); if (mnt_get_count(mnt) != 2) { - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); return -EBUSY; } - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1297,23 +1272,19 @@ static int do_umount(struct mount *mnt, int flags) } namespace_lock(); - lock_mount_hash(); + br_write_lock(&vfsmount_lock); event++; - if (flags & MNT_DETACH) { + if (!(flags & MNT_DETACH)) + shrink_submounts(mnt); + + retval = -EBUSY; + if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { if (!list_empty(&mnt->mnt_list)) - umount_tree(mnt, 2); + umount_tree(mnt, 1); retval = 0; - } else { - shrink_submounts(mnt); - retval = -EBUSY; - if (!propagate_mount_busy(mnt, 2)) { - if (!list_empty(&mnt->mnt_list)) - umount_tree(mnt, 1); - retval = 0; - } } - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); namespace_unlock(); return retval; } @@ -1456,18 +1427,18 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt.mnt_root, flag); if (IS_ERR(q)) goto out; - lock_mount_hash(); + br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, parent, p->mnt_mp); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); } } return res; out: if (res) { - lock_mount_hash(); + br_write_lock(&vfsmount_lock); umount_tree(res, 0); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); } return q; } @@ -1489,9 +1460,9 @@ struct vfsmount *collect_mounts(struct path *path) void drop_collected_mounts(struct vfsmount *mnt) { namespace_lock(); - lock_mount_hash(); + br_write_lock(&vfsmount_lock); umount_tree(real_mount(mnt), 0); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); namespace_unlock(); } @@ -1618,7 +1589,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, if (err) goto out_cleanup_ids; - lock_mount_hash(); + br_write_lock(&vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1637,7 +1608,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); return 0; @@ -1739,10 +1710,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - lock_mount_hash(); + br_write_lock(&vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); out_unlock: namespace_unlock(); @@ -1814,9 +1785,9 @@ static int do_loopback(struct path *path, const char *old_name, err = graft_tree(mnt, parent, mp); if (err) { - lock_mount_hash(); + br_write_lock(&vfsmount_lock); umount_tree(mnt, 0); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); } out2: unlock_mount(mp); @@ -1875,13 +1846,17 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - lock_mount_hash(); + br_write_lock(&vfsmount_lock); mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; - touch_mnt_namespace(mnt->mnt_ns); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); } up_write(&sb->s_umount); + if (!err) { + br_write_lock(&vfsmount_lock); + touch_mnt_namespace(mnt->mnt_ns); + br_write_unlock(&vfsmount_lock); + } return err; } @@ -1997,7 +1972,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) struct mount *parent; int err; - mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); + mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); mp = lock_mount(path); if (IS_ERR(mp)) @@ -2102,7 +2077,9 @@ fail: /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { namespace_lock(); + br_write_lock(&vfsmount_lock); list_del_init(&mnt->mnt_expire); + br_write_unlock(&vfsmount_lock); namespace_unlock(); } mntput(m); @@ -2118,9 +2095,11 @@ fail: void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { namespace_lock(); + br_write_lock(&vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); + br_write_unlock(&vfsmount_lock); namespace_unlock(); } EXPORT_SYMBOL(mnt_set_expiry); @@ -2139,7 +2118,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; namespace_lock(); - lock_mount_hash(); + br_write_lock(&vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -2158,7 +2137,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1); } - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); namespace_unlock(); } @@ -2214,7 +2193,7 @@ resume: * process a list of expirable mountpoints with the intent of discarding any * submounts of a specific parent mountpoint * - * mount_lock must be held for write + * vfsmount_lock must be held for write */ static void shrink_submounts(struct mount *mnt) { @@ -2435,25 +2414,20 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) return new_ns; } -struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, - struct user_namespace *user_ns, struct fs_struct *new_fs) +/* + * Allocate a new namespace structure and populate it with contents + * copied from the namespace of the passed in task structure. + */ +static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, + struct user_namespace *user_ns, struct fs_struct *fs) { struct mnt_namespace *new_ns; struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; struct mount *p, *q; - struct mount *old; + struct mount *old = mnt_ns->root; struct mount *new; int copy_flags; - BUG_ON(!ns); - - if (likely(!(flags & CLONE_NEWNS))) { - get_mnt_ns(ns); - return ns; - } - - old = ns->root; - new_ns = alloc_mnt_ns(user_ns); if (IS_ERR(new_ns)) return new_ns; @@ -2461,7 +2435,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, namespace_lock(); /* First pass: copy the tree topology */ copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; - if (user_ns != ns->user_ns) + if (user_ns != mnt_ns->user_ns) copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; new = copy_tree(old, old->mnt.mnt_root, copy_flags); if (IS_ERR(new)) { @@ -2470,7 +2444,9 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, return ERR_CAST(new); } new_ns->root = new; + br_write_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new->mnt_list); + br_write_unlock(&vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2481,13 +2457,13 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, q = new; while (p) { q->mnt_ns = new_ns; - if (new_fs) { - if (&p->mnt == new_fs->root.mnt) { - new_fs->root.mnt = mntget(&q->mnt); + if (fs) { + if (&p->mnt == fs->root.mnt) { + fs->root.mnt = mntget(&q->mnt); rootmnt = &p->mnt; } - if (&p->mnt == new_fs->pwd.mnt) { - new_fs->pwd.mnt = mntget(&q->mnt); + if (&p->mnt == fs->pwd.mnt) { + fs->pwd.mnt = mntget(&q->mnt); pwdmnt = &p->mnt; } } @@ -2508,6 +2484,23 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, return new_ns; } +struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, + struct user_namespace *user_ns, struct fs_struct *new_fs) +{ + struct mnt_namespace *new_ns; + + BUG_ON(!ns); + get_mnt_ns(ns); + + if (!(flags & CLONE_NEWNS)) + return ns; + + new_ns = dup_mnt_ns(ns, user_ns, new_fs); + + put_mnt_ns(ns); + return new_ns; +} + /** * create_mnt_ns - creates a private namespace and adds a root filesystem * @mnt: pointer to the new root filesystem mountpoint @@ -2600,7 +2593,7 @@ out_type: /* * Return true if path is reachable from root * - * namespace_sem or mount_lock is held + * namespace_sem or vfsmount_lock is held */ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, const struct path *root) @@ -2615,9 +2608,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, int path_is_under(struct path *path1, struct path *path2) { int res; - read_seqlock_excl(&mount_lock); + br_read_lock(&vfsmount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - read_sequnlock_excl(&mount_lock); + br_read_unlock(&vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2708,7 +2701,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, if (!is_path_reachable(old_mnt, old.dentry, &new)) goto out4; root_mp->m_count++; /* pin it so it won't go away */ - lock_mount_hash(); + br_write_lock(&vfsmount_lock); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { @@ -2720,7 +2713,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); touch_mnt_namespace(current->nsproxy->mnt_ns); - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); chroot_fs_refs(&root, &new); put_mountpoint(root_mp); error = 0; @@ -2774,6 +2767,8 @@ void __init mnt_init(void) unsigned u; int err; + init_rwsem(&namespace_sem); + mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); @@ -2790,6 +2785,8 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mountpoint_hashtable[u]); + br_lock_init(&vfsmount_lock); + err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", @@ -2805,7 +2802,11 @@ void put_mnt_ns(struct mnt_namespace *ns) { if (!atomic_dec_and_test(&ns->count)) return; - drop_collected_mounts(&ns->root->mnt); + namespace_lock(); + br_write_lock(&vfsmount_lock); + umount_tree(ns->root, 0); + br_write_unlock(&vfsmount_lock); + namespace_unlock(); free_mnt_ns(ns); } @@ -2828,8 +2829,9 @@ void kern_unmount(struct vfsmount *mnt) { /* release long term mount so mount point can be released */ if (!IS_ERR_OR_NULL(mnt)) { + br_write_lock(&vfsmount_lock); real_mount(mnt)->mnt_ns = NULL; - synchronize_rcu(); /* yecchhh... */ + br_write_unlock(&vfsmount_lock); mntput(mnt); } } @@ -2873,7 +2875,7 @@ bool fs_fully_visible(struct file_system_type *type) if (unlikely(!ns)) return false; - down_read(&namespace_sem); + namespace_lock(); list_for_each_entry(mnt, &ns->list, mnt_list) { struct mount *child; if (mnt->mnt.mnt_sb->s_type != type) @@ -2894,7 +2896,7 @@ bool fs_fully_visible(struct file_system_type *type) next: ; } found: - up_read(&namespace_sem); + namespace_unlock(); return visible; } diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index c320ac5..3be0474 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -339,8 +339,9 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags) if (val) goto finished; - DDPRINTK("ncp_lookup_validate: %pd2 not valid, age=%ld, server lookup\n", - dentry, NCP_GET_AGE(dentry)); + DDPRINTK("ncp_lookup_validate: %s/%s not valid, age=%ld, server lookup\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + NCP_GET_AGE(dentry)); len = sizeof(__name); if (ncp_is_server_root(dir)) { @@ -358,8 +359,8 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags) res = ncp_obtain_info(server, dir, __name, &(finfo.i)); } finfo.volume = finfo.i.volNumber; - DDPRINTK("ncp_lookup_validate: looked for %pd/%s, res=%d\n", - dentry->d_parent, __name, res); + DDPRINTK("ncp_lookup_validate: looked for %s/%s, res=%d\n", + dentry->d_parent->d_name.name, __name, res); /* * If we didn't find it, or if it has a different dirEntNum to * what we remember, it's not valid any more. @@ -453,7 +454,8 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx) ctl.page = NULL; ctl.cache = NULL; - DDPRINTK("ncp_readdir: reading %pD2, pos=%d\n", file, + DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, (int) ctx->pos); result = -EIO; @@ -738,10 +740,12 @@ ncp_do_readdir(struct file *file, struct dir_context *ctx, int more; size_t bufsize; - DPRINTK("ncp_do_readdir: %pD2, fpos=%ld\n", file, + DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n", + dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) ctx->pos); - PPRINTK("ncp_do_readdir: init %pD, volnum=%d, dirent=%u\n", - file, NCP_FINFO(dir)->volNumber, NCP_FINFO(dir)->dirEntNum); + PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n", + dentry->d_name.name, NCP_FINFO(dir)->volNumber, + NCP_FINFO(dir)->dirEntNum); err = ncp_initialize_search(server, dir, &seq); if (err) { @@ -846,7 +850,8 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig if (!ncp_conn_valid(server)) goto finished; - PPRINTK("ncp_lookup: server lookup for %pd2\n", dentry); + PPRINTK("ncp_lookup: server lookup for %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); len = sizeof(__name); if (ncp_is_server_root(dir)) { @@ -862,7 +867,8 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsig if (!res) res = ncp_obtain_info(server, dir, __name, &(finfo.i)); } - PPRINTK("ncp_lookup: looked for %pd2, res=%d\n", dentry, res); + PPRINTK("ncp_lookup: looked for %s/%s, res=%d\n", + dentry->d_parent->d_name.name, __name, res); /* * If we didn't find an entry, make a negative dentry. */ @@ -909,7 +915,8 @@ out: return error; out_close: - PPRINTK("ncp_instantiate: %pd2 failed, closing file\n", dentry); + PPRINTK("ncp_instantiate: %s/%s failed, closing file\n", + dentry->d_parent->d_name.name, dentry->d_name.name); ncp_close_file(NCP_SERVER(dir), finfo->file_handle); goto out; } @@ -923,7 +930,8 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode, int opmode; __u8 __name[NCP_MAXPATHLEN + 1]; - PPRINTK("ncp_create_new: creating %pd2, mode=%hx\n", dentry, mode); + PPRINTK("ncp_create_new: creating %s/%s, mode=%hx\n", + dentry->d_parent->d_name.name, dentry->d_name.name, mode); ncp_age_dentry(server, dentry); len = sizeof(__name); @@ -952,7 +960,8 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode, error = -ENAMETOOLONG; else if (result < 0) error = result; - DPRINTK("ncp_create: %pd2 failed\n", dentry); + DPRINTK("ncp_create: %s/%s failed\n", + dentry->d_parent->d_name.name, dentry->d_name.name); goto out; } opmode = O_WRONLY; @@ -985,7 +994,8 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) int error, len; __u8 __name[NCP_MAXPATHLEN + 1]; - DPRINTK("ncp_mkdir: making %pd2\n", dentry); + DPRINTK("ncp_mkdir: making %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); ncp_age_dentry(server, dentry); len = sizeof(__name); @@ -1022,7 +1032,8 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) int error, result, len; __u8 __name[NCP_MAXPATHLEN + 1]; - DPRINTK("ncp_rmdir: removing %pd2\n", dentry); + DPRINTK("ncp_rmdir: removing %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); len = sizeof(__name); error = ncp_io2vol(server, __name, &len, dentry->d_name.name, @@ -1067,7 +1078,8 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) int error; server = NCP_SERVER(dir); - DPRINTK("ncp_unlink: unlinking %pd2\n", dentry); + DPRINTK("ncp_unlink: unlinking %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); /* * Check whether to close the file ... @@ -1087,7 +1099,8 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) #endif switch (error) { case 0x00: - DPRINTK("ncp: removed %pd2\n", dentry); + DPRINTK("ncp: removed %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); break; case 0x85: case 0x8A: @@ -1120,7 +1133,9 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, int old_len, new_len; __u8 __old_name[NCP_MAXPATHLEN + 1], __new_name[NCP_MAXPATHLEN + 1]; - DPRINTK("ncp_rename: %pd2 to %pd2\n", old_dentry, new_dentry); + DPRINTK("ncp_rename: %s/%s to %s/%s\n", + old_dentry->d_parent->d_name.name, old_dentry->d_name.name, + new_dentry->d_parent->d_name.name, new_dentry->d_name.name); ncp_age_dentry(server, old_dentry); ncp_age_dentry(server, new_dentry); @@ -1150,8 +1165,8 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, #endif switch (error) { case 0x00: - DPRINTK("ncp renamed %pd -> %pd.\n", - old_dentry, new_dentry); + DPRINTK("ncp renamed %s -> %s.\n", + old_dentry->d_name.name,new_dentry->d_name.name); break; case 0x9E: error = -ENAMETOOLONG; diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 8f5074e..122e260 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -107,7 +107,8 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) void* freepage; size_t freelen; - DPRINTK("ncp_file_read: enter %pd2\n", dentry); + DPRINTK("ncp_file_read: enter %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); pos = *ppos; @@ -165,7 +166,8 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) file_accessed(file); - DPRINTK("ncp_file_read: exit %pd2\n", dentry); + DPRINTK("ncp_file_read: exit %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); outrel: ncp_inode_close(inode); return already_read ? already_read : error; @@ -182,7 +184,8 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * int errno; void* bouncebuffer; - DPRINTK("ncp_file_write: enter %pd2\n", dentry); + DPRINTK("ncp_file_write: enter %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); if ((ssize_t) count < 0) return -EINVAL; pos = *ppos; @@ -261,7 +264,8 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * i_size_write(inode, pos); mutex_unlock(&inode->i_mutex); } - DPRINTK("ncp_file_write: exit %pd2\n", dentry); + DPRINTK("ncp_file_write: exit %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); outrel: ncp_inode_close(inode); return already_written ? already_written : errno; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 2cf2ebe..4659da6 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -782,17 +782,6 @@ out: return error; } -static void delayed_free(struct rcu_head *p) -{ - struct ncp_server *server = container_of(p, struct ncp_server, rcu); -#ifdef CONFIG_NCPFS_NLS - /* unload the NLS charsets */ - unload_nls(server->nls_vol); - unload_nls(server->nls_io); -#endif /* CONFIG_NCPFS_NLS */ - kfree(server); -} - static void ncp_put_super(struct super_block *sb) { struct ncp_server *server = NCP_SBP(sb); @@ -803,6 +792,11 @@ static void ncp_put_super(struct super_block *sb) ncp_stop_tasks(server); +#ifdef CONFIG_NCPFS_NLS + /* unload the NLS charsets */ + unload_nls(server->nls_vol); + unload_nls(server->nls_io); +#endif /* CONFIG_NCPFS_NLS */ mutex_destroy(&server->rcv.creq_mutex); mutex_destroy(&server->root_setup_lock); mutex_destroy(&server->mutex); @@ -819,7 +813,8 @@ static void ncp_put_super(struct super_block *sb) vfree(server->rxbuf); vfree(server->txbuf); vfree(server->packet); - call_rcu(&server->rcu, delayed_free); + sb->s_fs_info = NULL; + kfree(server); } static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h index b81e97a..c51b2c5 100644 --- a/fs/ncpfs/ncp_fs_sb.h +++ b/fs/ncpfs/ncp_fs_sb.h @@ -38,7 +38,7 @@ struct ncp_mount_data_kernel { }; struct ncp_server { - struct rcu_head rcu; + struct ncp_mount_data_kernel m; /* Nearly all of the mount data is of interest for us later, so we store it completely. */ diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 3dece03..b5e80b0 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -116,17 +116,17 @@ config NFS_V4_2 config PNFS_FILE_LAYOUT tristate depends on NFS_V4_1 - default NFS_V4 + default m config PNFS_BLOCK tristate depends on NFS_V4_1 && BLK_DEV_DM - default NFS_V4 + default m config PNFS_OBJLAYOUT tristate depends on NFS_V4_1 && SCSI_OSD_ULD - default NFS_V4 + default m config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN string "NFSv4.1 Implementation ID Domain" @@ -140,17 +140,6 @@ config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN If the NFS client is unchanged from the upstream kernel, this option should be set to the default "kernel.org". -config NFS_V4_1_MIGRATION - bool "NFSv4.1 client support for migration" - depends on NFS_V4_1 - default n - help - This option makes the NFS client advertise to NFSv4.1 servers that - it can support NFSv4 migration. - - The NFSv4.1 pieces of the Linux NFSv4 migration implementation are - still experimental. If you are not an NFSv4 developer, say N here. - config NFS_V4_SECURITY_LABEL bool depends on NFS_V4_2 && SECURITY diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 9838fb0..8485978 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -36,7 +36,6 @@ #include <linux/nfs_fs.h> #include <linux/sunrpc/rpc_pipe_fs.h> -#include "../nfs4_fs.h" #include "../pnfs.h" #include "../netns.h" diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 4d01614..9c3e117 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -44,7 +44,7 @@ static inline sector_t normalize(sector_t s, int base) { sector_t tmp = s; /* Since do_div modifies its argument */ - return s - sector_div(tmp, base); + return s - do_div(tmp, base); } static inline sector_t normalize_up(sector_t s, int base) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 073b4cf..67cd732 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -164,7 +164,8 @@ nfs41_callback_up(struct svc_serv *serv) svc_xprt_put(serv->sv_bc_xprt); serv->sv_bc_xprt = NULL; } - dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp)); + dprintk("--> %s return %ld\n", __func__, + IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0); return rqstp; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1d09289..2dceee4 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -590,8 +590,6 @@ int nfs_create_rpc_client(struct nfs_client *clp, if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_DISCRTRY; - if (test_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags)) - args.flags |= RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT; if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags)) @@ -786,10 +784,8 @@ static int nfs_init_server(struct nfs_server *server, goto error; server->port = data->nfs_server.port; - server->auth_info = data->auth_info; - error = nfs_init_server_rpcclient(server, &timeparms, - data->selected_flavor); + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); if (error < 0) goto error; @@ -930,7 +926,6 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->acdirmax = source->acdirmax; target->caps = source->caps; target->options = source->options; - target->auth_info = source->auth_info; } EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); @@ -948,7 +943,7 @@ void nfs_server_insert_lists(struct nfs_server *server) } EXPORT_SYMBOL_GPL(nfs_server_insert_lists); -void nfs_server_remove_lists(struct nfs_server *server) +static void nfs_server_remove_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs_net *nn; @@ -965,7 +960,6 @@ void nfs_server_remove_lists(struct nfs_server *server) synchronize_rcu(); } -EXPORT_SYMBOL_GPL(nfs_server_remove_lists); /* * Allocate and initialise a server record diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 812154a..02b0df7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -98,7 +98,9 @@ nfs_opendir(struct inode *inode, struct file *filp) struct nfs_open_dir_context *ctx; struct rpc_cred *cred; - dfprintk(FILE, "NFS: open dir(%pD2)\n", filp); + dfprintk(FILE, "NFS: open dir(%s/%s)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name); nfs_inc_stats(inode, NFSIOS_VFSOPEN); @@ -295,10 +297,11 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des if (ctx->duped > 0 && ctx->dup_cookie == *desc->dir_cookie) { if (printk_ratelimit()) { - pr_notice("NFS: directory %pD2 contains a readdir loop." + pr_notice("NFS: directory %s/%s contains a readdir loop." "Please contact your server vendor. " "The file: %s has duplicate cookie %llu\n", - desc->file, + desc->file->f_dentry->d_parent->d_name.name, + desc->file->f_dentry->d_name.name, array->array[i].string.name, *desc->dir_cookie); } @@ -819,8 +822,9 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) struct nfs_open_dir_context *dir_ctx = file->private_data; int res = 0; - dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", - file, (long long)ctx->pos); + dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (long long)ctx->pos); nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); /* @@ -876,17 +880,22 @@ out: nfs_unblock_sillyrename(dentry); if (res > 0) res = 0; - dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); + dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + res); return res; } static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { - struct inode *inode = file_inode(filp); + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; struct nfs_open_dir_context *dir_ctx = filp->private_data; - dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n", - filp, offset, whence); + dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name, + offset, whence); mutex_lock(&inode->i_mutex); switch (whence) { @@ -916,12 +925,15 @@ out: static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, int datasync) { - struct inode *inode = file_inode(filp); + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; - dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync); + dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + datasync); mutex_lock(&inode->i_mutex); - nfs_inc_stats(inode, NFSIOS_VFSFSYNC); + nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); mutex_unlock(&inode->i_mutex); return 0; } @@ -1061,8 +1073,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) } if (is_bad_inode(inode)) { - dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", - __func__, dentry); + dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name); goto out_bad; } @@ -1112,8 +1125,9 @@ out_set_verifier: nfs_advise_use_readdirplus(dir); out_valid_noent: dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", - __func__, dentry); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name); return 1; out_zap_parent: nfs_zap_caches(dir); @@ -1125,13 +1139,7 @@ out_zap_parent: if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ nfs_zap_caches(inode); - /* - * We can't d_drop the root of a disconnected tree: - * its d_hash is on the s_anon list and d_drop() would hide - * it from shrink_dcache_for_unmount(), leading to busy - * inodes on unmount and further oopses. - */ - if (IS_ROOT(dentry)) + if (dentry->d_flags & DCACHE_DISCONNECTED) goto out_valid; } /* If we have submounts, don't unhash ! */ @@ -1139,16 +1147,18 @@ out_zap_parent: goto out_valid; dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", - __func__, dentry); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name); return 0; out_error: nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); nfs4_label_free(label); dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", - __func__, dentry, error); + dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name, error); return error; } @@ -1172,14 +1182,16 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) * eventually need to do something more here. */ if (!inode) { - dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n", - __func__, dentry); + dfprintk(LOOKUPCACHE, "%s: %s/%s has negative inode\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name); return 1; } if (is_bad_inode(inode)) { - dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", - __func__, dentry); + dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n", + __func__, dentry->d_parent->d_name.name, + dentry->d_name.name); return 0; } @@ -1194,8 +1206,9 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) */ static int nfs_dentry_delete(const struct dentry *dentry) { - dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n", - dentry, dentry->d_flags); + dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + dentry->d_flags); /* Unhash any dentry with a stale inode */ if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode)) @@ -1273,7 +1286,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in struct nfs4_label *label = NULL; int error; - dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry); + dfprintk(VFS, "NFS: lookup(%s/%s)\n", + dentry->d_parent->d_name.name, dentry->d_name.name); nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); res = ERR_PTR(-ENAMETOOLONG); @@ -1367,7 +1381,7 @@ static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, i static int do_open(struct inode *inode, struct file *filp) { - nfs_fscache_open_file(inode, filp); + nfs_fscache_set_inode_cookie(inode, filp); return 0; } @@ -1404,8 +1418,8 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, /* Expect a negative dentry */ BUG_ON(dentry->d_inode); - dfprintk(VFS, "NFS: atomic_open(%s/%ld), %pd\n", - dir->i_sb->s_id, dir->i_ino, dentry); + dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); err = nfs_check_flags(open_flags); if (err) @@ -1594,8 +1608,8 @@ int nfs_create(struct inode *dir, struct dentry *dentry, int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; - dfprintk(VFS, "NFS: create(%s/%ld), %pd\n", - dir->i_sb->s_id, dir->i_ino, dentry); + dfprintk(VFS, "NFS: create(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; @@ -1621,8 +1635,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) struct iattr attr; int status; - dfprintk(VFS, "NFS: mknod(%s/%ld), %pd\n", - dir->i_sb->s_id, dir->i_ino, dentry); + dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); if (!new_valid_dev(rdev)) return -EINVAL; @@ -1650,8 +1664,8 @@ int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct iattr attr; int error; - dfprintk(VFS, "NFS: mkdir(%s/%ld), %pd\n", - dir->i_sb->s_id, dir->i_ino, dentry); + dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); attr.ia_valid = ATTR_MODE; attr.ia_mode = mode | S_IFDIR; @@ -1678,8 +1692,8 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) { int error; - dfprintk(VFS, "NFS: rmdir(%s/%ld), %pd\n", - dir->i_sb->s_id, dir->i_ino, dentry); + dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n", + dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); trace_nfs_rmdir_enter(dir, dentry); if (dentry->d_inode) { @@ -1714,7 +1728,8 @@ static int nfs_safe_remove(struct dentry *dentry) struct inode *inode = dentry->d_inode; int error = -EBUSY; - dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry); + dfprintk(VFS, "NFS: safe_remove(%s/%s)\n", + dentry->d_parent->d_name.name, dentry->d_name.name); /* If the dentry was sillyrenamed, we simply call d_delete() */ if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { @@ -1747,8 +1762,8 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) int error; int need_rehash = 0; - dfprintk(VFS, "NFS: unlink(%s/%ld, %pd)\n", dir->i_sb->s_id, - dir->i_ino, dentry); + dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, + dir->i_ino, dentry->d_name.name); trace_nfs_unlink_enter(dir, dentry); spin_lock(&dentry->d_lock); @@ -1798,8 +1813,8 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) unsigned int pathlen = strlen(symname); int error; - dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s)\n", dir->i_sb->s_id, - dir->i_ino, dentry, symname); + dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, + dir->i_ino, dentry->d_name.name, symname); if (pathlen > PAGE_SIZE) return -ENAMETOOLONG; @@ -1821,9 +1836,9 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); trace_nfs_symlink_exit(dir, dentry, error); if (error != 0) { - dfprintk(VFS, "NFS: symlink(%s/%ld, %pd, %s) error %d\n", + dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, - dentry, symname, error); + dentry->d_name.name, symname, error); d_drop(dentry); __free_page(page); return error; @@ -1850,8 +1865,9 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) struct inode *inode = old_dentry->d_inode; int error; - dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n", - old_dentry, dentry); + dfprintk(VFS, "NFS: link(%s/%s -> %s/%s)\n", + old_dentry->d_parent->d_name.name, old_dentry->d_name.name, + dentry->d_parent->d_name.name, dentry->d_name.name); trace_nfs_link_enter(inode, dir, dentry); NFS_PROTO(inode)->return_delegation(inode); @@ -1899,8 +1915,9 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *dentry = NULL, *rehash = NULL; int error = -EBUSY; - dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n", - old_dentry, new_dentry, + dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", + old_dentry->d_parent->d_name.name, old_dentry->d_name.name, + new_dentry->d_parent->d_name.name, new_dentry->d_name.name, d_count(new_dentry)); trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d71d66c..91ff089 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -124,8 +124,9 @@ static inline int put_dreq(struct nfs_direct_req *dreq) ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) { #ifndef CONFIG_NFS_SWAP - dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", - iocb->ki_filp, (long long) pos, nr_segs); + dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", + iocb->ki_filp->f_path.dentry->d_name.name, + (long long) pos, nr_segs); return -EINVAL; #else @@ -908,8 +909,10 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); - dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", - file, count, (long long) pos); + dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + count, (long long) pos); retval = 0; if (!count) @@ -962,8 +965,10 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, count = iov_length(iov, nr_segs); nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); - dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", - file, count, (long long) pos); + dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + count, (long long) pos); retval = generic_write_checks(file, &pos, &count, 0); if (retval) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index d25f10f..fc0f95e 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -46,9 +46,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, #include <linux/sunrpc/cache.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/rpc_pipe_fs.h> -#include <linux/nfs_fs.h> -#include "nfs4_fs.h" #include "dns_resolve.h" #include "cache_lib.h" #include "netns.h" diff --git a/fs/nfs/file.c b/fs/nfs/file.c index e2fcacf..1e6bfdb 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -65,7 +65,9 @@ nfs_file_open(struct inode *inode, struct file *filp) { int res; - dprintk("NFS: open file(%pD2)\n", filp); + dprintk("NFS: open file(%s/%s)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name); nfs_inc_stats(inode, NFSIOS_VFSOPEN); res = nfs_check_flags(filp->f_flags); @@ -79,7 +81,9 @@ nfs_file_open(struct inode *inode, struct file *filp) int nfs_file_release(struct inode *inode, struct file *filp) { - dprintk("NFS: release(%pD2)\n", filp); + dprintk("NFS: release(%s/%s)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return nfs_release(inode, filp); @@ -119,8 +123,10 @@ force_reval: loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) { - dprintk("NFS: llseek file(%pD2, %lld, %d)\n", - filp, offset, whence); + dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, + offset, whence); /* * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate @@ -144,9 +150,12 @@ EXPORT_SYMBOL_GPL(nfs_file_llseek); int nfs_file_flush(struct file *file, fl_owner_t id) { - struct inode *inode = file_inode(file); + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; - dprintk("NFS: flush(%pD2)\n", file); + dprintk("NFS: flush(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); nfs_inc_stats(inode, NFSIOS_VFSFLUSH); if ((file->f_mode & FMODE_WRITE) == 0) @@ -168,14 +177,15 @@ ssize_t nfs_file_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct inode *inode = file_inode(iocb->ki_filp); + struct dentry * dentry = iocb->ki_filp->f_path.dentry; + struct inode * inode = dentry->d_inode; ssize_t result; if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); - dprintk("NFS: read(%pD2, %lu@%lu)\n", - iocb->ki_filp, + dprintk("NFS: read(%s/%s, %lu@%lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); @@ -193,11 +203,13 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, unsigned int flags) { - struct inode *inode = file_inode(filp); + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; ssize_t res; - dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", - filp, (unsigned long) count, (unsigned long long) *ppos); + dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long long) *ppos); res = nfs_revalidate_mapping(inode, filp->f_mapping); if (!res) { @@ -212,10 +224,12 @@ EXPORT_SYMBOL_GPL(nfs_file_splice_read); int nfs_file_mmap(struct file * file, struct vm_area_struct * vma) { - struct inode *inode = file_inode(file); + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; int status; - dprintk("NFS: mmap(%pD2)\n", file); + dprintk("NFS: mmap(%s/%s)\n", + dentry->d_parent->d_name.name, dentry->d_name.name); /* Note: generic_file_mmap() returns ENOSYS on nommu systems * so we call that before revalidating the mapping @@ -244,12 +258,15 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap); int nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) { + struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = file_inode(file); + struct inode *inode = dentry->d_inode; int have_error, do_resend, status; int ret = 0; - dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); + dprintk("NFS: fsync file(%s/%s) datasync %d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); @@ -354,8 +371,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, struct page *page; int once_thru = 0; - dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%ld), %u@%lld)\n", - file, mapping->host->i_ino, len, (long long) pos); + dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + mapping->host->i_ino, len, (long long) pos); start: /* @@ -395,8 +414,10 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, struct nfs_open_context *ctx = nfs_file_open_context(file); int status; - dfprintk(PAGECACHE, "NFS: write_end(%pD2(%ld), %u@%lld)\n", - file, mapping->host->i_ino, len, (long long) pos); + dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, + mapping->host->i_ino, len, (long long) pos); /* * Zero any uninitialised parts of the page, and then mark the page @@ -580,21 +601,22 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct file *filp = vma->vm_file; - struct inode *inode = file_inode(filp); + struct dentry *dentry = filp->f_path.dentry; unsigned pagelen; int ret = VM_FAULT_NOPAGE; struct address_space *mapping; - dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%ld), offset %lld)\n", - filp, filp->f_mapping->host->i_ino, + dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + filp->f_mapping->host->i_ino, (long long)page_offset(page)); /* make sure the cache has finished storing the page */ - nfs_fscache_wait_on_page_write(NFS_I(inode), page); + nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); lock_page(page); mapping = page_file_mapping(page); - if (mapping != inode->i_mapping) + if (mapping != dentry->d_inode->i_mapping) goto out_unlock; wait_on_page_writeback(page); @@ -637,21 +659,22 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { - struct file *file = iocb->ki_filp; - struct inode *inode = file_inode(file); + struct dentry * dentry = iocb->ki_filp->f_path.dentry; + struct inode * inode = dentry->d_inode; unsigned long written = 0; ssize_t result; size_t count = iov_length(iov, nr_segs); - result = nfs_key_timeout_notify(file, inode); + result = nfs_key_timeout_notify(iocb->ki_filp, inode); if (result) return result; - if (file->f_flags & O_DIRECT) + if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); - dprintk("NFS: write(%pD2, %lu@%Ld)\n", - file, (unsigned long) count, (long long) pos); + dprintk("NFS: write(%s/%s, %lu@%Ld)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -659,8 +682,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, /* * O_APPEND implies that we must revalidate the file length. */ - if (file->f_flags & O_APPEND) { - result = nfs_revalidate_file_size(inode, file); + if (iocb->ki_filp->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, iocb->ki_filp); if (result) goto out; } @@ -674,8 +697,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, written = result; /* Return error values for O_DSYNC and IS_SYNC() */ - if (result >= 0 && nfs_need_sync_write(file, inode)) { - int err = vfs_fsync(file, 0); + if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { + int err = vfs_fsync(iocb->ki_filp, 0); if (err < 0) result = err; } @@ -694,12 +717,14 @@ ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, struct file *filp, loff_t *ppos, size_t count, unsigned int flags) { - struct inode *inode = file_inode(filp); + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; unsigned long written = 0; ssize_t ret; - dprintk("NFS splice_write(%pD2, %lu@%llu)\n", - filp, (unsigned long) count, (unsigned long long) *ppos); + dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long long) *ppos); /* * The combination of splice and an O_APPEND destination is disallowed. @@ -858,8 +883,10 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) int ret = -ENOLCK; int is_local = 0; - dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n", - filp, fl->fl_type, fl->fl_flags, + dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, + fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); nfs_inc_stats(inode, NFSIOS_VFSLOCK); @@ -896,8 +923,10 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) struct inode *inode = filp->f_mapping->host; int is_local = 0; - dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n", - filp, fl->fl_type, fl->fl_flags); + dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", + filp->f_path.dentry->d_parent->d_name.name, + filp->f_path.dentry->d_name.name, + fl->fl_type, fl->fl_flags); if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; @@ -931,7 +960,9 @@ EXPORT_SYMBOL_GPL(nfs_flock); */ int nfs_setlease(struct file *file, long arg, struct file_lock **fl) { - dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg); + dprintk("NFS: setlease(%s/%s, arg=%ld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, arg); return -EINVAL; } EXPORT_SYMBOL_GPL(nfs_setlease); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 3ef01f0..24d1d1c 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -39,7 +39,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp) /* create a cache index for looking up filehandles */ clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index, &nfs_fscache_server_index_def, - clp, true); + clp); dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n", clp, clp->fscache); } @@ -139,7 +139,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int /* create a cache index for looking up filehandles */ nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache, &nfs_fscache_super_index_def, - nfss, true); + nfss); dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", nfss, nfss->fscache); return; @@ -178,79 +178,163 @@ void nfs_fscache_release_super_cookie(struct super_block *sb) /* * Initialise the per-inode cache cookie pointer for an NFS inode. */ -void nfs_fscache_init_inode(struct inode *inode) +void nfs_fscache_init_inode_cookie(struct inode *inode) { + NFS_I(inode)->fscache = NULL; + if (S_ISREG(inode->i_mode)) + set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); +} + +/* + * Get the per-inode cache cookie for an NFS inode. + */ +static void nfs_fscache_enable_inode_cookie(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; struct nfs_inode *nfsi = NFS_I(inode); - nfsi->fscache = NULL; - if (!S_ISREG(inode->i_mode)) + if (nfsi->fscache || !NFS_FSCACHE(inode)) return; - nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache, - &nfs_fscache_inode_object_def, - nfsi, false); + + if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) { + nfsi->fscache = fscache_acquire_cookie( + NFS_SB(sb)->fscache, + &nfs_fscache_inode_object_def, + nfsi); + + dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n", + sb, nfsi, nfsi->fscache); + } } /* * Release a per-inode cookie. */ -void nfs_fscache_clear_inode(struct inode *inode) +void nfs_fscache_release_inode_cookie(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - struct fscache_cookie *cookie = nfs_i_fscache(inode); - dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie); + dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", + nfsi, nfsi->fscache); - fscache_relinquish_cookie(cookie, false); + fscache_relinquish_cookie(nfsi->fscache, 0); nfsi->fscache = NULL; } -static bool nfs_fscache_can_enable(void *data) +/* + * Retire a per-inode cookie, destroying the data attached to it. + */ +void nfs_fscache_zap_inode_cookie(struct inode *inode) { - struct inode *inode = data; + struct nfs_inode *nfsi = NFS_I(inode); - return !inode_is_open_for_write(inode); + dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n", + nfsi, nfsi->fscache); + + fscache_relinquish_cookie(nfsi->fscache, 1); + nfsi->fscache = NULL; } /* - * Enable or disable caching for a file that is being opened as appropriate. - * The cookie is allocated when the inode is initialised, but is not enabled at - * that time. Enablement is deferred to file-open time to avoid stat() and - * access() thrashing the cache. - * - * For now, with NFS, only regular files that are open read-only will be able - * to use the cache. - * - * We enable the cache for an inode if we open it read-only and it isn't - * currently open for writing. We disable the cache if the inode is open - * write-only. - * - * The caller uses the file struct to pin i_writecount on the inode before - * calling us when a file is opened for writing, so we can make use of that. - * - * Note that this may be invoked multiple times in parallel by parallel - * nfs_open() functions. + * Turn off the cache with regard to a per-inode cookie if opened for writing, + * invalidating all the pages in the page cache relating to the associated + * inode to clear the per-page caching. + */ +static void nfs_fscache_disable_inode_cookie(struct inode *inode) +{ + clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); + + if (NFS_I(inode)->fscache) { + dfprintk(FSCACHE, + "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); + + /* Need to uncache any pages attached to this inode that + * fscache knows about before turning off the cache. + */ + fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode); + nfs_fscache_zap_inode_cookie(inode); + } +} + +/* + * wait_on_bit() sleep function for uninterruptible waiting */ -void nfs_fscache_open_file(struct inode *inode, struct file *filp) +static int nfs_fscache_wait_bit(void *flags) +{ + schedule(); + return 0; +} + +/* + * Lock against someone else trying to also acquire or relinquish a cookie + */ +static inline void nfs_fscache_inode_lock(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - struct fscache_cookie *cookie = nfs_i_fscache(inode); - if (!fscache_cookie_valid(cookie)) - return; + while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags)) + wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK, + nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE); +} - if (inode_is_open_for_write(inode)) { - dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi); - clear_bit(NFS_INO_FSCACHE, &nfsi->flags); - fscache_disable_cookie(cookie, true); - fscache_uncache_all_inode_pages(cookie, inode); - } else { - dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi); - fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode); - if (fscache_cookie_enabled(cookie)) - set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); +/* + * Unlock cookie management lock + */ +static inline void nfs_fscache_inode_unlock(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + smp_mb__before_clear_bit(); + clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK); +} + +/* + * Decide if we should enable or disable local caching for this inode. + * - For now, with NFS, only regular files that are open read-only will be able + * to use the cache. + * - May be invoked multiple times in parallel by parallel nfs_open() functions. + */ +void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) +{ + if (NFS_FSCACHE(inode)) { + nfs_fscache_inode_lock(inode); + if ((filp->f_flags & O_ACCMODE) != O_RDONLY) + nfs_fscache_disable_inode_cookie(inode); + else + nfs_fscache_enable_inode_cookie(inode); + nfs_fscache_inode_unlock(inode); + } +} +EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie); + +/* + * Replace a per-inode cookie due to revalidation detecting a file having + * changed on the server. + */ +void nfs_fscache_reset_inode_cookie(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_server *nfss = NFS_SERVER(inode); + NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache); + + nfs_fscache_inode_lock(inode); + if (nfsi->fscache) { + /* retire the current fscache cache and get a new one */ + fscache_relinquish_cookie(nfsi->fscache, 1); + + nfsi->fscache = fscache_acquire_cookie( + nfss->nfs_client->fscache, + &nfs_fscache_inode_object_def, + nfsi); + + dfprintk(FSCACHE, + "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n", + nfss, nfsi, old, nfsi->fscache); } + nfs_fscache_inode_unlock(inode); } -EXPORT_SYMBOL_GPL(nfs_fscache_open_file); /* * Release the caching state associated with a page, if the page isn't busy @@ -260,11 +344,12 @@ EXPORT_SYMBOL_GPL(nfs_fscache_open_file); int nfs_fscache_release_page(struct page *page, gfp_t gfp) { if (PageFsCache(page)) { - struct fscache_cookie *cookie = nfs_i_fscache(page->mapping->host); + struct nfs_inode *nfsi = NFS_I(page->mapping->host); + struct fscache_cookie *cookie = nfsi->fscache; BUG_ON(!cookie); dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", - cookie, page, NFS_I(page->mapping->host)); + cookie, page, nfsi); if (!fscache_maybe_release_page(cookie, page, gfp)) return 0; @@ -282,12 +367,13 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) */ void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) { - struct fscache_cookie *cookie = nfs_i_fscache(inode); + struct nfs_inode *nfsi = NFS_I(inode); + struct fscache_cookie *cookie = nfsi->fscache; BUG_ON(!cookie); dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n", - cookie, page, NFS_I(inode)); + cookie, page, nfsi); fscache_wait_on_page_write(cookie, page); @@ -331,9 +417,9 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, dfprintk(FSCACHE, "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", - nfs_i_fscache(inode), page, page->index, page->flags, inode); + NFS_I(inode)->fscache, page, page->index, page->flags, inode); - ret = fscache_read_or_alloc_page(nfs_i_fscache(inode), + ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache, page, nfs_readpage_from_fscache_complete, ctx, @@ -373,9 +459,9 @@ int __nfs_readpages_from_fscache(struct nfs_open_context *ctx, int ret; dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", - nfs_i_fscache(inode), npages, inode); + NFS_I(inode)->fscache, npages, inode); - ret = fscache_read_or_alloc_pages(nfs_i_fscache(inode), + ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache, mapping, pages, nr_pages, nfs_readpage_from_fscache_complete, ctx, @@ -420,15 +506,15 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) dfprintk(FSCACHE, "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", - nfs_i_fscache(inode), page, page->index, page->flags, sync); + NFS_I(inode)->fscache, page, page->index, page->flags, sync); - ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL); + ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL); dfprintk(FSCACHE, "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", page, page->index, page->flags, ret); if (ret != 0) { - fscache_uncache_page(nfs_i_fscache(inode), page); + fscache_uncache_page(NFS_I(inode)->fscache, page); nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index d7fe3e7..4ecb766 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -76,9 +76,11 @@ extern void nfs_fscache_release_client_cookie(struct nfs_client *); extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int); extern void nfs_fscache_release_super_cookie(struct super_block *); -extern void nfs_fscache_init_inode(struct inode *); -extern void nfs_fscache_clear_inode(struct inode *); -extern void nfs_fscache_open_file(struct inode *, struct file *); +extern void nfs_fscache_init_inode_cookie(struct inode *); +extern void nfs_fscache_release_inode_cookie(struct inode *); +extern void nfs_fscache_zap_inode_cookie(struct inode *); +extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *); +extern void nfs_fscache_reset_inode_cookie(struct inode *); extern void __nfs_fscache_invalidate_page(struct page *, struct inode *); extern int nfs_fscache_release_page(struct page *, gfp_t); @@ -185,10 +187,12 @@ static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} -static inline void nfs_fscache_init_inode(struct inode *inode) {} -static inline void nfs_fscache_clear_inode(struct inode *inode) {} -static inline void nfs_fscache_open_file(struct inode *inode, - struct file *filp) {} +static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_set_inode_cookie(struct inode *inode, + struct file *filp) {} +static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {} static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 00ad1c2..eda8879 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -122,7 +122,7 @@ void nfs_clear_inode(struct inode *inode) WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files)); nfs_zap_acl_cache(inode); nfs_access_zap_cache(inode); - nfs_fscache_clear_inode(inode); + nfs_fscache_release_inode_cookie(inode); } EXPORT_SYMBOL_GPL(nfs_clear_inode); @@ -274,6 +274,12 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, if (label == NULL) return; + if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL) == 0) + return; + + if (NFS_SERVER(inode)->nfs_client->cl_minorversion < 2) + return; + if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) { error = security_inode_notifysecctx(inode, label->label, label->len); @@ -312,7 +318,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) } EXPORT_SYMBOL_GPL(nfs4_label_alloc); #else -void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, +void inline nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, struct nfs4_label *label) { } @@ -453,7 +459,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfsi->attrtimeo_timestamp = now; nfsi->access_cache = RB_ROOT; - nfs_fscache_init_inode(inode); + nfs_fscache_init_inode_cookie(inode); unlock_new_inode(inode); } else @@ -848,7 +854,7 @@ int nfs_open(struct inode *inode, struct file *filp) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); put_nfs_open_context(ctx); - nfs_fscache_open_file(inode, filp); + nfs_fscache_set_inode_cookie(inode, filp); return 0; } @@ -917,8 +923,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - nfs_setsecurity(inode, fattr, label); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1205,7 +1209,6 @@ u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh) * not on the result */ return nfs_fhandle_hash(fh); } -EXPORT_SYMBOL_GPL(_nfs_display_fhandle_hash); /* * _nfs_display_fhandle - display an NFS file handle on the console @@ -1250,7 +1253,6 @@ void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) } } } -EXPORT_SYMBOL_GPL(_nfs_display_fhandle); #endif /** diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8b5cc04..38da8c2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -88,8 +88,8 @@ struct nfs_parsed_mount_data { unsigned int namlen; unsigned int options; unsigned int bsize; - struct nfs_auth_info auth_info; - rpc_authflavor_t selected_flavor; + unsigned int auth_flavor_len; + rpc_authflavor_t auth_flavors[1]; char *client_address; unsigned int version; unsigned int minorversion; @@ -154,7 +154,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, rpc_authflavor_t); int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); void nfs_server_insert_lists(struct nfs_server *); -void nfs_server_remove_lists(struct nfs_server *); void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int); int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, rpc_authflavor_t); @@ -175,8 +174,6 @@ extern struct nfs_server *nfs4_create_server( struct nfs_subversion *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); -extern int nfs4_update_server(struct nfs_server *server, const char *hostname, - struct sockaddr *sap, size_t salen); extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, @@ -269,21 +266,6 @@ extern const u32 nfs41_maxgetdevinfo_overhead; extern struct rpc_procinfo nfs4_procedures[]; #endif -#ifdef CONFIG_NFS_V4_SECURITY_LABEL -extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags); -static inline void nfs4_label_free(struct nfs4_label *label) -{ - if (label) { - kfree(label->label); - kfree(label); - } - return; -} -#else -static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } -static inline void nfs4_label_free(void *label) {} -#endif /* CONFIG_NFS_V4_SECURITY_LABEL */ - /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern struct nfs_client *nfs_init_client(struct nfs_client *clp, @@ -341,7 +323,6 @@ extern struct file_system_type nfs_xdev_fs_type; extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif -bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); void nfs_initialise_sb(struct super_block *); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index b5a0afc..348b535 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -253,8 +253,9 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, dprintk("--> nfs_do_submount()\n"); - dprintk("%s: submounting on %pd2\n", __func__, - dentry); + dprintk("%s: submounting on %s/%s\n", __func__, + dentry->d_parent->d_name.name, + dentry->d_name.name); if (page == NULL) goto out; devname = nfs_devname(dentry, page, PAGE_SIZE); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 01b6f6a..90cb10d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -321,7 +321,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call create %pd\n", dentry); + dprintk("NFS call create %s\n", dentry->d_name.name); data = nfs3_alloc_createdata(); if (data == NULL) @@ -548,7 +548,7 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, if (len > NFS3_MAXPATHLEN) return -ENAMETOOLONG; - dprintk("NFS call symlink %pd\n", dentry); + dprintk("NFS call symlink %s\n", dentry->d_name.name); data = nfs3_alloc_createdata(); if (data == NULL) @@ -576,7 +576,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call mkdir %pd\n", dentry); + dprintk("NFS call mkdir %s\n", dentry->d_name.name); sattr->ia_mode &= ~current_umask(); @@ -695,7 +695,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, umode_t mode = sattr->ia_mode; int status = -ENOMEM; - dprintk("NFS call mknod %pd %u:%u\n", dentry, + dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); sattr->ia_mode &= ~current_umask(); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5609edc..28842ab 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -9,14 +9,6 @@ #ifndef __LINUX_FS_NFS_NFS4_FS_H #define __LINUX_FS_NFS_NFS4_FS_H -#if defined(CONFIG_NFS_V4_2) -#define NFS4_MAX_MINOR_VERSION 2 -#elif defined(CONFIG_NFS_V4_1) -#define NFS4_MAX_MINOR_VERSION 1 -#else -#define NFS4_MAX_MINOR_VERSION 0 -#endif - #if IS_ENABLED(CONFIG_NFS_V4) #define NFS4_MAX_LOOP_ON_RECOVER (10) @@ -37,8 +29,6 @@ enum nfs4_client_state { NFS4CLNT_SERVER_SCOPE_MISMATCH, NFS4CLNT_PURGE_STATE, NFS4CLNT_BIND_CONN_TO_SESSION, - NFS4CLNT_MOVED, - NFS4CLNT_LEASE_MOVED, }; #define NFS4_RENEW_TIMEOUT 0x01 @@ -60,7 +50,6 @@ struct nfs4_minor_version_ops { const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; const struct nfs4_state_maintenance_ops *state_renewal_ops; - const struct nfs4_mig_recovery_ops *mig_recovery_ops; }; #define NFS_SEQID_CONFIRMED 1 @@ -214,12 +203,6 @@ struct nfs4_state_maintenance_ops { int (*renew_lease)(struct nfs_client *, struct rpc_cred *); }; -struct nfs4_mig_recovery_ops { - int (*get_locations)(struct inode *, struct nfs4_fs_locations *, - struct page *, struct rpc_cred *); - int (*fsid_present)(struct inode *, struct rpc_cred *); -}; - extern const struct dentry_operations nfs4_dentry_operations; /* dir.c */ @@ -230,11 +213,10 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *, extern struct file_system_type nfs4_fs_type; /* nfs4namespace.c */ +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); -int nfs4_replace_transport(struct nfs_server *server, - const struct nfs4_fs_locations *locations); /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); @@ -249,9 +231,6 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, struct nfs4_fs_locations *, struct page *); -extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, - struct page *page, struct rpc_cred *); -extern int nfs4_proc_fsid_present(struct inode *, struct rpc_cred *); extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *, struct nfs_fh *, struct nfs_fattr *); extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); @@ -432,8 +411,6 @@ extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); -extern int nfs4_schedule_migration_recovery(const struct nfs_server *); -extern void nfs4_schedule_lease_moved_recovery(struct nfs_client *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_server_scope(struct nfs_client *, struct nfs41_server_scope **); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index b4a160a..a860ab5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -197,7 +197,6 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; clp->cl_minorversion = cl_init->minorversion; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; - clp->cl_mig_gen = 1; return clp; error: @@ -369,7 +368,6 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, if (clp->cl_minorversion != 0) __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); - __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I); if (error == -EINVAL) error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX); @@ -926,7 +924,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, dprintk("Server FSID: %llx:%llx\n", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); - nfs_display_fhandle(mntfh, "Pseudo-fs root FH"); + dprintk("Mount FH: %d\n", mntfh->size); nfs4_session_set_rwsize(server); @@ -949,8 +947,9 @@ out: * Create a version 4 volume record */ static int nfs4_init_server(struct nfs_server *server, - struct nfs_parsed_mount_data *data) + const struct nfs_parsed_mount_data *data) { + rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; struct rpc_timeout timeparms; int error; @@ -962,15 +961,9 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; server->options = data->options; - server->auth_info = data->auth_info; - /* Use the first specified auth flavor. If this flavor isn't - * allowed by the server, use the SECINFO path to try the - * other specified flavors */ - if (data->auth_info.flavor_len >= 1) - data->selected_flavor = data->auth_info.flavors[0]; - else - data->selected_flavor = RPC_AUTH_UNIX; + if (data->auth_flavor_len >= 1) + pseudoflavor = data->auth_flavors[0]; /* Get a client record */ error = nfs4_set_client(server, @@ -978,7 +971,7 @@ static int nfs4_init_server(struct nfs_server *server, (const struct sockaddr *)&data->nfs_server.address, data->nfs_server.addrlen, data->client_address, - data->selected_flavor, + pseudoflavor, data->nfs_server.protocol, &timeparms, data->minorversion, @@ -998,8 +991,7 @@ static int nfs4_init_server(struct nfs_server *server, server->port = data->nfs_server.port; - error = nfs_init_server_rpcclient(server, &timeparms, - data->selected_flavor); + error = nfs_init_server_rpcclient(server, &timeparms, pseudoflavor); error: /* Done */ @@ -1026,7 +1018,7 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, if (!server) return ERR_PTR(-ENOMEM); - auth_probe = mount_info->parsed->auth_info.flavor_len < 1; + auth_probe = mount_info->parsed->auth_flavor_len < 1; /* set up the general RPC client */ error = nfs4_init_server(server, mount_info->parsed); @@ -1054,7 +1046,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, { struct nfs_client *parent_client; struct nfs_server *server, *parent_server; - bool auth_probe; int error; dprintk("--> nfs4_create_referral_server()\n"); @@ -1087,9 +1078,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (error < 0) goto error; - auth_probe = parent_server->auth_info.flavor_len < 1; - - error = nfs4_server_common_setup(server, mntfh, auth_probe); + error = nfs4_server_common_setup(server, mntfh, + !(parent_server->flags & NFS_MOUNT_SECFLAVOUR)); if (error < 0) goto error; @@ -1101,111 +1091,3 @@ error: dprintk("<-- nfs4_create_referral_server() = error %d\n", error); return ERR_PTR(error); } - -/* - * Grab the destination's particulars, including lease expiry time. - * - * Returns zero if probe succeeded and retrieved FSID matches the FSID - * we have cached. - */ -static int nfs_probe_destination(struct nfs_server *server) -{ - struct inode *inode = server->super->s_root->d_inode; - struct nfs_fattr *fattr; - int error; - - fattr = nfs_alloc_fattr(); - if (fattr == NULL) - return -ENOMEM; - - /* Sanity: the probe won't work if the destination server - * does not recognize the migrated FH. */ - error = nfs_probe_fsinfo(server, NFS_FH(inode), fattr); - - nfs_free_fattr(fattr); - return error; -} - -/** - * nfs4_update_server - Move an nfs_server to a different nfs_client - * - * @server: represents FSID to be moved - * @hostname: new end-point's hostname - * @sap: new end-point's socket address - * @salen: size of "sap" - * - * The nfs_server must be quiescent before this function is invoked. - * Either its session is drained (NFSv4.1+), or its transport is - * plugged and drained (NFSv4.0). - * - * Returns zero on success, or a negative errno value. - */ -int nfs4_update_server(struct nfs_server *server, const char *hostname, - struct sockaddr *sap, size_t salen) -{ - struct nfs_client *clp = server->nfs_client; - struct rpc_clnt *clnt = server->client; - struct xprt_create xargs = { - .ident = clp->cl_proto, - .net = &init_net, - .dstaddr = sap, - .addrlen = salen, - .servername = hostname, - }; - char buf[INET6_ADDRSTRLEN + 1]; - struct sockaddr_storage address; - struct sockaddr *localaddr = (struct sockaddr *)&address; - int error; - - dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__, - (unsigned long long)server->fsid.major, - (unsigned long long)server->fsid.minor, - hostname); - - error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout); - if (error != 0) { - dprintk("<-- %s(): rpc_switch_client_transport returned %d\n", - __func__, error); - goto out; - } - - error = rpc_localaddr(clnt, localaddr, sizeof(address)); - if (error != 0) { - dprintk("<-- %s(): rpc_localaddr returned %d\n", - __func__, error); - goto out; - } - - error = -EAFNOSUPPORT; - if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) { - dprintk("<-- %s(): rpc_ntop returned %d\n", - __func__, error); - goto out; - } - - nfs_server_remove_lists(server); - error = nfs4_set_client(server, hostname, sap, salen, buf, - clp->cl_rpcclient->cl_auth->au_flavor, - clp->cl_proto, clnt->cl_timeout, - clp->cl_minorversion, clp->cl_net); - nfs_put_client(clp); - if (error != 0) { - nfs_server_insert_lists(server); - dprintk("<-- %s(): nfs4_set_client returned %d\n", - __func__, error); - goto out; - } - - if (server->nfs_client->cl_hostname == NULL) - server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); - nfs_server_insert_lists(server); - - error = nfs_probe_destination(server); - if (error < 0) - goto out; - - dprintk("<-- %s() succeeded\n", __func__); - -out: - return error; -} diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 8de3407..77efaf1 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -31,7 +31,9 @@ nfs4_file_open(struct inode *inode, struct file *filp) * -EOPENSTALE. The VFS will retry the lookup/create/open. */ - dprintk("NFS: open file(%pd2)\n", dentry); + dprintk("NFS: open file(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); if ((openflags & O_ACCMODE) == 3) openflags--; @@ -73,7 +75,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_file_set_open_context(filp, ctx); - nfs_fscache_open_file(inode, filp); + nfs_fscache_set_inode_cookie(inode, filp); err = 0; out_put_ctx: diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 4e7f05d..2288cd3 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -137,7 +137,6 @@ static size_t nfs_parse_server_name(char *string, size_t len, /** * nfs_find_best_sec - Find a security mechanism supported locally - * @server: NFS server struct * @flavors: List of security tuples returned by SECINFO procedure * * Return the pseudoflavor of the first security mechanism in @@ -146,8 +145,7 @@ static size_t nfs_parse_server_name(char *string, size_t len, * is searched in the order returned from the server, per RFC 3530 * recommendation. */ -static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server, - struct nfs4_secinfo_flavors *flavors) +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) { rpc_authflavor_t pseudoflavor; struct nfs4_secinfo4 *secinfo; @@ -162,19 +160,12 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server, case RPC_AUTH_GSS: pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor, &secinfo->flavor_info); - /* make sure pseudoflavor matches sec= mount opt */ - if (pseudoflavor != RPC_AUTH_MAXFLAVOR && - nfs_auth_info_match(&server->auth_info, - pseudoflavor)) + if (pseudoflavor != RPC_AUTH_MAXFLAVOR) return pseudoflavor; break; } } - /* if there were any sec= options then nothing matched */ - if (server->auth_info.flavor_len > 0) - return -EPERM; - return RPC_AUTH_UNIX; } @@ -196,7 +187,7 @@ static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr goto out; } - flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors); + flavor = nfs_find_best_sec(flavors); out: put_page(page); @@ -292,7 +283,8 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry, if (locations == NULL || locations->nlocations <= 0) goto out; - dprintk("%s: referral at %pd2\n", __func__, dentry); + dprintk("%s: referral at %s/%s\n", __func__, + dentry->d_parent->d_name.name, dentry->d_name.name); page = (char *) __get_free_page(GFP_USER); if (!page) @@ -356,8 +348,8 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry * mnt = ERR_PTR(-ENOENT); parent = dget_parent(dentry); - dprintk("%s: getting locations for %pd2\n", - __func__, dentry); + dprintk("%s: getting locations for %s/%s\n", + __func__, parent->d_name.name, dentry->d_name.name); err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page); dput(parent); @@ -398,7 +390,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, if (client->cl_auth->au_flavor != flavor) flavor = client->cl_auth->au_flavor; - else { + else if (!(server->flags & NFS_MOUNT_SECFLAVOUR)) { rpc_authflavor_t new = nfs4_negotiate_security(dir, name); if ((int)new >= 0) flavor = new; @@ -408,104 +400,3 @@ out: rpc_shutdown_client(client); return mnt; } - -/* - * Try one location from the fs_locations array. - * - * Returns zero on success, or a negative errno value. - */ -static int nfs4_try_replacing_one_location(struct nfs_server *server, - char *page, char *page2, - const struct nfs4_fs_location *location) -{ - const size_t addr_bufsize = sizeof(struct sockaddr_storage); - struct sockaddr *sap; - unsigned int s; - size_t salen; - int error; - - sap = kmalloc(addr_bufsize, GFP_KERNEL); - if (sap == NULL) - return -ENOMEM; - - error = -ENOENT; - for (s = 0; s < location->nservers; s++) { - const struct nfs4_string *buf = &location->servers[s]; - char *hostname; - - if (buf->len <= 0 || buf->len > PAGE_SIZE) - continue; - - if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len) != NULL) - continue; - - salen = nfs_parse_server_name(buf->data, buf->len, - sap, addr_bufsize, server); - if (salen == 0) - continue; - rpc_set_port(sap, NFS_PORT); - - error = -ENOMEM; - hostname = kstrndup(buf->data, buf->len, GFP_KERNEL); - if (hostname == NULL) - break; - - error = nfs4_update_server(server, hostname, sap, salen); - kfree(hostname); - if (error == 0) - break; - } - - kfree(sap); - return error; -} - -/** - * nfs4_replace_transport - set up transport to destination server - * - * @server: export being migrated - * @locations: fs_locations array - * - * Returns zero on success, or a negative errno value. - * - * The client tries all the entries in the "locations" array, in the - * order returned by the server, until one works or the end of the - * array is reached. - */ -int nfs4_replace_transport(struct nfs_server *server, - const struct nfs4_fs_locations *locations) -{ - char *page = NULL, *page2 = NULL; - int loc, error; - - error = -ENOENT; - if (locations == NULL || locations->nlocations <= 0) - goto out; - - error = -ENOMEM; - page = (char *) __get_free_page(GFP_USER); - if (!page) - goto out; - page2 = (char *) __get_free_page(GFP_USER); - if (!page2) - goto out; - - for (loc = 0; loc < locations->nlocations; loc++) { - const struct nfs4_fs_location *location = - &locations->locations[loc]; - - if (location == NULL || location->nservers <= 0 || - location->rootpath.ncomponents == 0) - continue; - - error = nfs4_try_replacing_one_location(server, page, - page2, location); - if (error == 0) - break; - } - -out: - free_page((unsigned long)page); - free_page((unsigned long)page2); - return error; -} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 15052b8..d53d678 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -105,6 +105,9 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0) return NULL; + if (NFS_SERVER(dir)->nfs_client->cl_minorversion < 2) + return NULL; + err = security_dentry_init_security(dentry, sattr->ia_mode, &dentry->d_name, (void **)&label->label, &label->len); if (err == 0) @@ -381,14 +384,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; - case -NFS4ERR_MOVED: - ret = nfs4_schedule_migration_recovery(server); - if (ret < 0) - break; - goto wait_on_recovery; - case -NFS4ERR_LEASE_MOVED: - nfs4_schedule_lease_moved_recovery(clp); - goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -436,8 +431,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc return nfs4_map_errors(ret); wait_on_recovery: ret = nfs4_wait_clnt_recover(clp); - if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - return -EIO; if (ret == 0) exception->retry = 1; return ret; @@ -1325,24 +1318,31 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) int ret; if (!data->rpc_done) { - if (data->rpc_status) { - ret = data->rpc_status; - goto err; - } - /* cached opens have already been processed */ - goto update; + ret = data->rpc_status; + goto err; } + ret = -ESTALE; + if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) || + !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) || + !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE)) + goto err; + + ret = -ENOMEM; + state = nfs4_get_open_state(inode, data->owner); + if (state == NULL) + goto err; + ret = nfs_refresh_inode(inode, &data->f_attr); if (ret) goto err; + nfs_setsecurity(inode, &data->f_attr, data->f_label); + if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); -update: update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode); - atomic_inc(&state->count); return state; err: @@ -1575,12 +1575,6 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct /* Don't recall a delegation if it was lost */ nfs4_schedule_lease_recovery(server->nfs_client); return -EAGAIN; - case -NFS4ERR_MOVED: - nfs4_schedule_migration_recovery(server); - return -EAGAIN; - case -NFS4ERR_LEASE_MOVED: - nfs4_schedule_lease_moved_recovery(server->nfs_client); - return -EAGAIN; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: @@ -2518,8 +2512,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data) calldata->roc_barrier); nfs_set_open_stateid(state, &calldata->res.stateid, 0); renew_lease(server, calldata->timestamp); + nfs4_close_clear_stateid_flags(state, + calldata->arg.fmode); break; - case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: @@ -2527,13 +2522,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (calldata->arg.fmode == 0) break; default: - if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { + if (nfs4_async_handle_error(task, server, state) == -EAGAIN) rpc_restart_call_prepare(task); - goto out_release; - } } - nfs4_close_clear_stateid_flags(state, calldata->arg.fmode); -out_release: nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); @@ -2706,10 +2697,6 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) nfs4_close_state(ctx->state, ctx->mode); } -#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) -#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL) - static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { struct nfs4_server_caps_arg args = { @@ -2725,25 +2712,12 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { - /* Sanity check the server answers */ - switch (server->nfs_client->cl_minorversion) { - case 0: - res.attr_bitmask[1] &= FATTR4_WORD1_NFS40_MASK; - res.attr_bitmask[2] = 0; - break; - case 1: - res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK; - break; - case 2: - res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; - } memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME| - NFS_CAP_CTIME|NFS_CAP_MTIME| - NFS_CAP_SECURITY_LABEL); + NFS_CAP_CTIME|NFS_CAP_MTIME); if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) server->caps |= NFS_CAP_ACLS; if (res.has_links != 0) @@ -2772,12 +2746,14 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f #endif memcpy(server->attr_bitmask_nl, res.attr_bitmask, sizeof(server->attr_bitmask)); - server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (server->caps & NFS_CAP_SECURITY_LABEL) { + server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + res.attr_bitmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + } memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; - server->cache_consistency_bitmask[2] = 0; server->acl_bitmask = res.acl_bitmask; server->fh_expire_type = res.fh_expire_type; } @@ -2888,24 +2864,11 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, int status = -EPERM; size_t i; - if (server->auth_info.flavor_len > 0) { - /* try each flavor specified by user */ - for (i = 0; i < server->auth_info.flavor_len; i++) { - status = nfs4_lookup_root_sec(server, fhandle, info, - server->auth_info.flavors[i]); - if (status == -NFS4ERR_WRONGSEC || status == -EACCES) - continue; - break; - } - } else { - /* no flavors specified by user, try default list */ - for (i = 0; i < ARRAY_SIZE(flav_array); i++) { - status = nfs4_lookup_root_sec(server, fhandle, info, - flav_array[i]); - if (status == -NFS4ERR_WRONGSEC || status == -EACCES) - continue; - break; - } + for (i = 0; i < ARRAY_SIZE(flav_array); i++) { + status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); + if (status == -NFS4ERR_WRONGSEC || status == -EACCES) + continue; + break; } /* @@ -2947,6 +2910,9 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, status = nfs4_lookup_root(server, fhandle, info); if (status != -NFS4ERR_WRONGSEC) break; + /* Did user force a 'sec=' mount option? */ + if (server->flags & NFS_MOUNT_SECFLAVOUR) + break; default: status = nfs4_do_find_root_sec(server, fhandle, info); } @@ -3015,16 +2981,11 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir, status = nfs4_proc_fs_locations(client, dir, name, locations, page); if (status != 0) goto out; - - /* - * If the fsid didn't change, this is a migration event, not a - * referral. Cause us to drop into the exception handler, which - * will kick off migration recovery. - */ + /* Make sure server returned a different fsid for the referral */ if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { dprintk("%s: server did not return a different fsid for" " a referral at %s\n", __func__, name->name); - status = -NFS4ERR_MOVED; + status = -EIO; goto out; } /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */ @@ -3204,6 +3165,9 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, err = -EPERM; if (client != *clnt) goto out; + /* No security negotiation if the user specified 'sec=' */ + if (NFS_SERVER(dir)->flags & NFS_MOUNT_SECFLAVOUR) + goto out; client = nfs4_create_sec_client(client, dir, name); if (IS_ERR(client)) return PTR_ERR(client); @@ -3774,8 +3738,9 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; - dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__, - dentry, + dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__, + dentry->d_parent->d_name.name, + dentry->d_name.name, (unsigned long long)cookie); nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; @@ -4256,13 +4221,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) unsigned long timestamp = data->timestamp; trace_nfs4_renew_async(clp, task->tk_status); - switch (task->tk_status) { - case 0: - break; - case -NFS4ERR_LEASE_MOVED: - nfs4_schedule_lease_moved_recovery(clp); - break; - default: + if (task->tk_status < 0) { /* Unless we're shutting down, schedule state recovery! */ if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) return; @@ -4616,7 +4575,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, struct nfs4_label label = {0, 0, buflen, buf}; u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; - struct nfs4_getattr_arg arg = { + struct nfs4_getattr_arg args = { .fh = NFS_FH(inode), .bitmask = bitmask, }; @@ -4627,14 +4586,14 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], - .rpc_argp = &arg, + .rpc_argp = &args, .rpc_resp = &res, }; int ret; nfs_fattr_init(&fattr); - ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0); + ret = rpc_call_sync(server->client, &msg, 0); if (ret) return ret; if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) @@ -4671,7 +4630,7 @@ static int _nfs4_do_set_security_label(struct inode *inode, struct iattr sattr = {0}; struct nfs_server *server = NFS_SERVER(inode); const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; - struct nfs_setattrargs arg = { + struct nfs_setattrargs args = { .fh = NFS_FH(inode), .iap = &sattr, .server = server, @@ -4685,14 +4644,14 @@ static int _nfs4_do_set_security_label(struct inode *inode, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], - .rpc_argp = &arg, + .rpc_argp = &args, .rpc_resp = &res, }; int status; - nfs4_stateid_copy(&arg.stateid, &zero_stateid); + nfs4_stateid_copy(&args.stateid, &zero_stateid); - status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); + status = rpc_call_sync(server->client, &msg, 0); if (status) dprintk("%s failed: %d\n", __func__, status); @@ -4776,24 +4735,17 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (state == NULL) break; if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto recovery_failed; + goto stateid_invalid; goto wait_on_recovery; case -NFS4ERR_EXPIRED: if (state != NULL) { if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto recovery_failed; + goto stateid_invalid; } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; - case -NFS4ERR_MOVED: - if (nfs4_schedule_migration_recovery(server) < 0) - goto recovery_failed; - goto wait_on_recovery; - case -NFS4ERR_LEASE_MOVED: - nfs4_schedule_lease_moved_recovery(clp); - goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -4805,28 +4757,29 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - goto wait_on_recovery; + task->tk_status = 0; + return -EAGAIN; #endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: rpc_delay(task, NFS4_POLL_RETRY_MAX); + task->tk_status = 0; + return -EAGAIN; case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: - goto restart_call; + task->tk_status = 0; + return -EAGAIN; } task->tk_status = nfs4_map_errors(task->tk_status); return 0; -recovery_failed: +stateid_invalid: task->tk_status = -EIO; return 0; wait_on_recovery: rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); - if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - goto recovery_failed; -restart_call: task->tk_status = 0; return -EAGAIN; } @@ -4990,16 +4943,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); switch (task->tk_status) { - case 0: - renew_lease(data->res.server, data->timestamp); - break; - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - task->tk_status = 0; + case 0: + renew_lease(data->res.server, data->timestamp); break; default: if (nfs4_async_handle_error(task, data->res.server, NULL) == @@ -5159,7 +5106,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock status = 0; } request->fl_ops->fl_release_private(request); - request->fl_ops = NULL; out: return status; } @@ -5833,7 +5779,6 @@ struct nfs_release_lockowner_data { struct nfs_release_lockowner_args args; struct nfs4_sequence_args seq_args; struct nfs4_sequence_res seq_res; - unsigned long timestamp; }; static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) @@ -5841,27 +5786,12 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata struct nfs_release_lockowner_data *data = calldata; nfs40_setup_sequence(data->server, &data->seq_args, &data->seq_res, task); - data->timestamp = jiffies; } static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) { struct nfs_release_lockowner_data *data = calldata; - struct nfs_server *server = data->server; - nfs40_sequence_done(task, &data->seq_res); - - switch (task->tk_status) { - case 0: - renew_lease(server, data->timestamp); - break; - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: - case -NFS4ERR_LEASE_MOVED: - case -NFS4ERR_DELAY: - if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) - rpc_restart_call_prepare(task); - } } static void nfs4_release_lockowner_release(void *calldata) @@ -6060,283 +5990,6 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, return err; } -/* - * This operation also signals the server that this client is - * performing migration recovery. The server can stop returning - * NFS4ERR_LEASE_MOVED to this client. A RENEW operation is - * appended to this compound to identify the client ID which is - * performing recovery. - */ -static int _nfs40_proc_get_locations(struct inode *inode, - struct nfs4_fs_locations *locations, - struct page *page, struct rpc_cred *cred) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct rpc_clnt *clnt = server->client; - u32 bitmask[2] = { - [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, - }; - struct nfs4_fs_locations_arg args = { - .clientid = server->nfs_client->cl_clientid, - .fh = NFS_FH(inode), - .page = page, - .bitmask = bitmask, - .migration = 1, /* skip LOOKUP */ - .renew = 1, /* append RENEW */ - }; - struct nfs4_fs_locations_res res = { - .fs_locations = locations, - .migration = 1, - .renew = 1, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], - .rpc_argp = &args, - .rpc_resp = &res, - .rpc_cred = cred, - }; - unsigned long now = jiffies; - int status; - - nfs_fattr_init(&locations->fattr); - locations->server = server; - locations->nlocations = 0; - - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); - status = nfs4_call_sync_sequence(clnt, server, &msg, - &args.seq_args, &res.seq_res); - if (status) - return status; - - renew_lease(server, now); - return 0; -} - -#ifdef CONFIG_NFS_V4_1 - -/* - * This operation also signals the server that this client is - * performing migration recovery. The server can stop asserting - * SEQ4_STATUS_LEASE_MOVED for this client. The client ID - * performing this operation is identified in the SEQUENCE - * operation in this compound. - * - * When the client supports GETATTR(fs_locations_info), it can - * be plumbed in here. - */ -static int _nfs41_proc_get_locations(struct inode *inode, - struct nfs4_fs_locations *locations, - struct page *page, struct rpc_cred *cred) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct rpc_clnt *clnt = server->client; - u32 bitmask[2] = { - [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, - }; - struct nfs4_fs_locations_arg args = { - .fh = NFS_FH(inode), - .page = page, - .bitmask = bitmask, - .migration = 1, /* skip LOOKUP */ - }; - struct nfs4_fs_locations_res res = { - .fs_locations = locations, - .migration = 1, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], - .rpc_argp = &args, - .rpc_resp = &res, - .rpc_cred = cred, - }; - int status; - - nfs_fattr_init(&locations->fattr); - locations->server = server; - locations->nlocations = 0; - - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); - status = nfs4_call_sync_sequence(clnt, server, &msg, - &args.seq_args, &res.seq_res); - if (status == NFS4_OK && - res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED) - status = -NFS4ERR_LEASE_MOVED; - return status; -} - -#endif /* CONFIG_NFS_V4_1 */ - -/** - * nfs4_proc_get_locations - discover locations for a migrated FSID - * @inode: inode on FSID that is migrating - * @locations: result of query - * @page: buffer - * @cred: credential to use for this operation - * - * Returns NFS4_OK on success, a negative NFS4ERR status code if the - * operation failed, or a negative errno if a local error occurred. - * - * On success, "locations" is filled in, but if the server has - * no locations information, NFS_ATTR_FATTR_V4_LOCATIONS is not - * asserted. - * - * -NFS4ERR_LEASE_MOVED is returned if the server still has leases - * from this client that require migration recovery. - */ -int nfs4_proc_get_locations(struct inode *inode, - struct nfs4_fs_locations *locations, - struct page *page, struct rpc_cred *cred) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = server->nfs_client; - const struct nfs4_mig_recovery_ops *ops = - clp->cl_mvops->mig_recovery_ops; - struct nfs4_exception exception = { }; - int status; - - dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__, - (unsigned long long)server->fsid.major, - (unsigned long long)server->fsid.minor, - clp->cl_hostname); - nfs_display_fhandle(NFS_FH(inode), __func__); - - do { - status = ops->get_locations(inode, locations, page, cred); - if (status != -NFS4ERR_DELAY) - break; - nfs4_handle_exception(server, status, &exception); - } while (exception.retry); - return status; -} - -/* - * This operation also signals the server that this client is - * performing "lease moved" recovery. The server can stop - * returning NFS4ERR_LEASE_MOVED to this client. A RENEW operation - * is appended to this compound to identify the client ID which is - * performing recovery. - */ -static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - struct rpc_clnt *clnt = server->client; - struct nfs4_fsid_present_arg args = { - .fh = NFS_FH(inode), - .clientid = clp->cl_clientid, - .renew = 1, /* append RENEW */ - }; - struct nfs4_fsid_present_res res = { - .renew = 1, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT], - .rpc_argp = &args, - .rpc_resp = &res, - .rpc_cred = cred, - }; - unsigned long now = jiffies; - int status; - - res.fh = nfs_alloc_fhandle(); - if (res.fh == NULL) - return -ENOMEM; - - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); - status = nfs4_call_sync_sequence(clnt, server, &msg, - &args.seq_args, &res.seq_res); - nfs_free_fhandle(res.fh); - if (status) - return status; - - do_renew_lease(clp, now); - return 0; -} - -#ifdef CONFIG_NFS_V4_1 - -/* - * This operation also signals the server that this client is - * performing "lease moved" recovery. The server can stop asserting - * SEQ4_STATUS_LEASE_MOVED for this client. The client ID performing - * this operation is identified in the SEQUENCE operation in this - * compound. - */ -static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct rpc_clnt *clnt = server->client; - struct nfs4_fsid_present_arg args = { - .fh = NFS_FH(inode), - }; - struct nfs4_fsid_present_res res = { - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT], - .rpc_argp = &args, - .rpc_resp = &res, - .rpc_cred = cred, - }; - int status; - - res.fh = nfs_alloc_fhandle(); - if (res.fh == NULL) - return -ENOMEM; - - nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); - status = nfs4_call_sync_sequence(clnt, server, &msg, - &args.seq_args, &res.seq_res); - nfs_free_fhandle(res.fh); - if (status == NFS4_OK && - res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED) - status = -NFS4ERR_LEASE_MOVED; - return status; -} - -#endif /* CONFIG_NFS_V4_1 */ - -/** - * nfs4_proc_fsid_present - Is this FSID present or absent on server? - * @inode: inode on FSID to check - * @cred: credential to use for this operation - * - * Server indicates whether the FSID is present, moved, or not - * recognized. This operation is necessary to clear a LEASE_MOVED - * condition for this client ID. - * - * Returns NFS4_OK if the FSID is present on this server, - * -NFS4ERR_MOVED if the FSID is no longer present, a negative - * NFS4ERR code if some error occurred on the server, or a - * negative errno if a local failure occurred. - */ -int nfs4_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_client *clp = server->nfs_client; - const struct nfs4_mig_recovery_ops *ops = - clp->cl_mvops->mig_recovery_ops; - struct nfs4_exception exception = { }; - int status; - - dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__, - (unsigned long long)server->fsid.major, - (unsigned long long)server->fsid.minor, - clp->cl_hostname); - nfs_display_fhandle(NFS_FH(inode), __func__); - - do { - status = ops->fsid_present(inode, cred); - if (status != -NFS4ERR_DELAY) - break; - nfs4_handle_exception(server, status, &exception); - } while (exception.retry); - return status; -} - /** * If 'use_integrity' is true and the state managment nfs_client * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient @@ -6623,14 +6276,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, struct nfs41_exchange_id_args args = { .verifier = &verifier, .client = clp, -#ifdef CONFIG_NFS_V4_1_MIGRATION - .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | - EXCHGID4_FLAG_BIND_PRINC_STATEID | - EXCHGID4_FLAG_SUPP_MOVED_MIGR, -#else .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | - EXCHGID4_FLAG_BIND_PRINC_STATEID, -#endif + EXCHGID4_FLAG_BIND_PRINC_STATEID, }; struct nfs41_exchange_id_res res = { 0 @@ -7598,14 +7245,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; server = NFS_SERVER(lrp->args.inode); - switch (task->tk_status) { - default: - task->tk_status = 0; - case 0: - break; - case -NFS4ERR_DELAY: - if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN) - break; + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { rpc_restart_call_prepare(task); return; } @@ -7976,9 +7616,6 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, break; } - if (!nfs_auth_info_match(&server->auth_info, flavor)) - flavor = RPC_AUTH_MAXFLAVOR; - if (flavor != RPC_AUTH_MAXFLAVOR) { err = nfs4_lookup_root_sec(server, fhandle, info, flavor); @@ -8250,18 +7887,6 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { }; #endif -static const struct nfs4_mig_recovery_ops nfs40_mig_recovery_ops = { - .get_locations = _nfs40_proc_get_locations, - .fsid_present = _nfs40_proc_fsid_present, -}; - -#if defined(CONFIG_NFS_V4_1) -static const struct nfs4_mig_recovery_ops nfs41_mig_recovery_ops = { - .get_locations = _nfs41_proc_get_locations, - .fsid_present = _nfs41_proc_fsid_present, -}; -#endif /* CONFIG_NFS_V4_1 */ - static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .init_caps = NFS_CAP_READDIRPLUS @@ -8277,7 +7902,6 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, .state_renewal_ops = &nfs40_state_renewal_ops, - .mig_recovery_ops = &nfs40_mig_recovery_ops, }; #if defined(CONFIG_NFS_V4_1) @@ -8298,7 +7922,6 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, - .mig_recovery_ops = &nfs41_mig_recovery_ops, }; #endif diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 059c01b..cc14cbb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -239,12 +239,14 @@ static void nfs4_end_drain_session(struct nfs_client *clp) } } +#if defined(CONFIG_NFS_V4_1) + static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl) { set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); spin_lock(&tbl->slot_tbl_lock); if (tbl->highest_used_slotid != NFS4_NO_SLOT) { - reinit_completion(&tbl->complete); + INIT_COMPLETION(tbl->complete); spin_unlock(&tbl->slot_tbl_lock); return wait_for_completion_interruptible(&tbl->complete); } @@ -268,8 +270,6 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) return nfs4_drain_slot_tbl(&ses->fc_slot_table); } -#if defined(CONFIG_NFS_V4_1) - static int nfs41_setup_state_renewal(struct nfs_client *clp) { int status; @@ -1197,74 +1197,20 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); -/** - * nfs4_schedule_migration_recovery - trigger migration recovery - * - * @server: FSID that is migrating - * - * Returns zero if recovery has started, otherwise a negative NFS4ERR - * value is returned. - */ -int nfs4_schedule_migration_recovery(const struct nfs_server *server) -{ - struct nfs_client *clp = server->nfs_client; - - if (server->fh_expire_type != NFS4_FH_PERSISTENT) { - pr_err("NFS: volatile file handles not supported (server %s)\n", - clp->cl_hostname); - return -NFS4ERR_IO; - } - - if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - return -NFS4ERR_IO; - - dprintk("%s: scheduling migration recovery for (%llx:%llx) on %s\n", - __func__, - (unsigned long long)server->fsid.major, - (unsigned long long)server->fsid.minor, - clp->cl_hostname); - - set_bit(NFS_MIG_IN_TRANSITION, - &((struct nfs_server *)server)->mig_status); - set_bit(NFS4CLNT_MOVED, &clp->cl_state); - - nfs4_schedule_state_manager(clp); - return 0; -} -EXPORT_SYMBOL_GPL(nfs4_schedule_migration_recovery); - -/** - * nfs4_schedule_lease_moved_recovery - start lease-moved recovery - * - * @clp: server to check for moved leases - * - */ -void nfs4_schedule_lease_moved_recovery(struct nfs_client *clp) -{ - dprintk("%s: scheduling lease-moved recovery for client ID %llx on %s\n", - __func__, clp->cl_clientid, clp->cl_hostname); - - set_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state); - nfs4_schedule_state_manager(clp); -} -EXPORT_SYMBOL_GPL(nfs4_schedule_lease_moved_recovery); - int nfs4_wait_clnt_recover(struct nfs_client *clp) { int res; might_sleep(); - atomic_inc(&clp->cl_count); res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, nfs_wait_bit_killable, TASK_KILLABLE); if (res) - goto out; + return res; + if (clp->cl_cons_state < 0) - res = clp->cl_cons_state; -out: - nfs_put_client(clp); - return res; + return clp->cl_cons_state; + return 0; } int nfs4_client_recover_expired_lease(struct nfs_client *clp) @@ -1429,8 +1375,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: goto out; default: - printk(KERN_ERR "NFS: %s: unhandled error %d\n", - __func__, status); + printk(KERN_ERR "NFS: %s: unhandled error %d. " + "Zeroing state\n", __func__, status); case -ENOMEM: case -NFS4ERR_DENIED: case -NFS4ERR_RECLAIM_BAD: @@ -1476,7 +1422,7 @@ restart: if (status >= 0) { status = nfs4_reclaim_locks(state, ops); if (status >= 0) { - if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) { + if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) { spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) @@ -1493,12 +1439,15 @@ restart: } switch (status) { default: - printk(KERN_ERR "NFS: %s: unhandled error %d\n", - __func__, status); + printk(KERN_ERR "NFS: %s: unhandled error %d. " + "Zeroing state\n", __func__, status); case -ENOENT: case -ENOMEM: case -ESTALE: - /* Open state on this file cannot be recovered */ + /* + * Open state on this file cannot be recovered + * All we can do is revert to using the zero stateid. + */ nfs4_state_mark_recovery_failed(state, status); break; case -EAGAIN: @@ -1679,6 +1628,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) nfs4_state_end_reclaim_reboot(clp); break; case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_LEASE_MOVED: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); @@ -1879,168 +1829,6 @@ static int nfs4_purge_lease(struct nfs_client *clp) return 0; } -/* - * Try remote migration of one FSID from a source server to a - * destination server. The source server provides a list of - * potential destinations. - * - * Returns zero or a negative NFS4ERR status code. - */ -static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred) -{ - struct nfs_client *clp = server->nfs_client; - struct nfs4_fs_locations *locations = NULL; - struct inode *inode; - struct page *page; - int status, result; - - dprintk("--> %s: FSID %llx:%llx on \"%s\"\n", __func__, - (unsigned long long)server->fsid.major, - (unsigned long long)server->fsid.minor, - clp->cl_hostname); - - result = 0; - page = alloc_page(GFP_KERNEL); - locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); - if (page == NULL || locations == NULL) { - dprintk("<-- %s: no memory\n", __func__); - goto out; - } - - inode = server->super->s_root->d_inode; - result = nfs4_proc_get_locations(inode, locations, page, cred); - if (result) { - dprintk("<-- %s: failed to retrieve fs_locations: %d\n", - __func__, result); - goto out; - } - - result = -NFS4ERR_NXIO; - if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { - dprintk("<-- %s: No fs_locations data, migration skipped\n", - __func__); - goto out; - } - - nfs4_begin_drain_session(clp); - - status = nfs4_replace_transport(server, locations); - if (status != 0) { - dprintk("<-- %s: failed to replace transport: %d\n", - __func__, status); - goto out; - } - - result = 0; - dprintk("<-- %s: migration succeeded\n", __func__); - -out: - if (page != NULL) - __free_page(page); - kfree(locations); - if (result) { - pr_err("NFS: migration recovery failed (server %s)\n", - clp->cl_hostname); - set_bit(NFS_MIG_FAILED, &server->mig_status); - } - return result; -} - -/* - * Returns zero or a negative NFS4ERR status code. - */ -static int nfs4_handle_migration(struct nfs_client *clp) -{ - const struct nfs4_state_maintenance_ops *ops = - clp->cl_mvops->state_renewal_ops; - struct nfs_server *server; - struct rpc_cred *cred; - - dprintk("%s: migration reported on \"%s\"\n", __func__, - clp->cl_hostname); - - spin_lock(&clp->cl_lock); - cred = ops->get_state_renewal_cred_locked(clp); - spin_unlock(&clp->cl_lock); - if (cred == NULL) - return -NFS4ERR_NOENT; - - clp->cl_mig_gen++; -restart: - rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - int status; - - if (server->mig_gen == clp->cl_mig_gen) - continue; - server->mig_gen = clp->cl_mig_gen; - - if (!test_and_clear_bit(NFS_MIG_IN_TRANSITION, - &server->mig_status)) - continue; - - rcu_read_unlock(); - status = nfs4_try_migration(server, cred); - if (status < 0) { - put_rpccred(cred); - return status; - } - goto restart; - } - rcu_read_unlock(); - put_rpccred(cred); - return 0; -} - -/* - * Test each nfs_server on the clp's cl_superblocks list to see - * if it's moved to another server. Stop when the server no longer - * returns NFS4ERR_LEASE_MOVED. - */ -static int nfs4_handle_lease_moved(struct nfs_client *clp) -{ - const struct nfs4_state_maintenance_ops *ops = - clp->cl_mvops->state_renewal_ops; - struct nfs_server *server; - struct rpc_cred *cred; - - dprintk("%s: lease moved reported on \"%s\"\n", __func__, - clp->cl_hostname); - - spin_lock(&clp->cl_lock); - cred = ops->get_state_renewal_cred_locked(clp); - spin_unlock(&clp->cl_lock); - if (cred == NULL) - return -NFS4ERR_NOENT; - - clp->cl_mig_gen++; -restart: - rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { - struct inode *inode; - int status; - - if (server->mig_gen == clp->cl_mig_gen) - continue; - server->mig_gen = clp->cl_mig_gen; - - rcu_read_unlock(); - - inode = server->super->s_root->d_inode; - status = nfs4_proc_fsid_present(inode, cred); - if (status != -NFS4ERR_MOVED) - goto restart; /* wasn't this one */ - if (nfs4_try_migration(server, cred) == -NFS4ERR_LEASE_MOVED) - goto restart; /* there are more */ - goto out; - } - rcu_read_unlock(); - -out: - put_rpccred(cred); - return 0; -} - /** * nfs4_discover_server_trunking - Detect server IP address trunking * @@ -2093,15 +1881,10 @@ again: nfs4_root_machine_cred(clp); goto again; } - if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) + if (i > 2) break; case -NFS4ERR_CLID_INUSE: case -NFS4ERR_WRONGSEC: - /* No point in retrying if we already used RPC_AUTH_UNIX */ - if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) { - status = -EPERM; - break; - } clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); @@ -2234,10 +2017,9 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) nfs41_handle_server_reboot(clp); if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | - SEQ4_STATUS_ADMIN_STATE_REVOKED)) + SEQ4_STATUS_ADMIN_STATE_REVOKED | + SEQ4_STATUS_LEASE_MOVED)) nfs41_handle_state_revoked(clp); - if (flags & SEQ4_STATUS_LEASE_MOVED) - nfs4_schedule_lease_moved_recovery(clp); if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) nfs41_handle_recallable_state_revoked(clp); if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT) @@ -2375,20 +2157,7 @@ static void nfs4_state_manager(struct nfs_client *clp) status = nfs4_check_lease(clp); if (status < 0) goto out_error; - } - - if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) { - section = "migration"; - status = nfs4_handle_migration(clp); - if (status < 0) - goto out_error; - } - - if (test_and_clear_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state)) { - section = "lease moved"; - status = nfs4_handle_lease_moved(clp); - if (status < 0) - goto out_error; + continue; } /* First recover reboot state... */ diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 65ab0a0..e26acdd 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -261,9 +261,9 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, res = nfs_follow_remote_path(root_mnt, export_path); - dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", - PTR_ERR_OR_ZERO(res), - IS_ERR(res) ? " [error]" : ""); + dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", + IS_ERR(res) ? PTR_ERR(res) : 0, + IS_ERR(res) ? " [error]" : ""); return res; } @@ -319,9 +319,9 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, data->mnt_path = export_path; res = nfs_follow_remote_path(root_mnt, export_path); - dprintk("<-- nfs4_referral_mount() = %d%s\n", - PTR_ERR_OR_ZERO(res), - IS_ERR(res) ? " [error]" : ""); + dprintk("<-- nfs4_referral_mount() = %ld%s\n", + IS_ERR(res) ? PTR_ERR(res) : 0, + IS_ERR(res) ? " [error]" : ""); return res; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5be2868..79210d2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -105,8 +105,12 @@ static int nfs4_stat_to_errno(int); #ifdef CONFIG_NFS_V4_SECURITY_LABEL /* PI(4 bytes) + LFS(4 bytes) + 1(for null terminator?) + MAXLABELLEN */ #define nfs4_label_maxsz (4 + 4 + 1 + XDR_QUADLEN(NFS4_MAXLABELLEN)) +#define encode_readdir_space 24 +#define encode_readdir_bitmask_sz 3 #else #define nfs4_label_maxsz 0 +#define encode_readdir_space 20 +#define encode_readdir_bitmask_sz 2 #endif /* We support only one layout type per file system */ #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) @@ -591,13 +595,11 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_getattr_maxsz + \ - encode_renew_maxsz) + encode_getattr_maxsz) #define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_getattr_maxsz + \ - decode_renew_maxsz) + decode_getattr_maxsz) #define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -734,15 +736,13 @@ static int nfs4_stat_to_errno(int); encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lookup_maxsz + \ - encode_fs_locations_maxsz + \ - encode_renew_maxsz) + encode_fs_locations_maxsz) #define NFS4_dec_fs_locations_sz \ (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lookup_maxsz + \ - decode_fs_locations_maxsz + \ - decode_renew_maxsz) + decode_fs_locations_maxsz) #define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -751,18 +751,6 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_secinfo_maxsz) -#define NFS4_enc_fsid_present_sz \ - (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_getfh_maxsz + \ - encode_renew_maxsz) -#define NFS4_dec_fsid_present_sz \ - (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_getfh_maxsz + \ - decode_renew_maxsz) #if defined(CONFIG_NFS_V4_1) #define NFS4_enc_bind_conn_to_session_sz \ (compound_encode_hdr_maxsz + \ @@ -1577,8 +1565,6 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg }; uint32_t dircount = readdir->count >> 1; __be32 *p, verf[2]; - uint32_t attrlen = 0; - unsigned int i; if (readdir->plus) { attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| @@ -1587,27 +1573,26 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; - attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; dircount >>= 1; } /* Use mounted_on_fileid only if the server supports it */ if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) attrs[0] |= FATTR4_WORD0_FILEID; - for (i = 0; i < ARRAY_SIZE(attrs); i++) { - attrs[i] &= readdir->bitmask[i]; - if (attrs[i] != 0) - attrlen = i+1; - } encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); encode_uint64(xdr, readdir->cookie); encode_nfs4_verifier(xdr, &readdir->verifier); - p = reserve_space(xdr, 12 + (attrlen << 2)); + p = reserve_space(xdr, encode_readdir_space); *p++ = cpu_to_be32(dircount); *p++ = cpu_to_be32(readdir->count); - *p++ = cpu_to_be32(attrlen); - for (i = 0; i < attrlen; i++) - *p++ = cpu_to_be32(attrs[i]); + *p++ = cpu_to_be32(encode_readdir_bitmask_sz); + *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); + *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); + if (encode_readdir_bitmask_sz > 2) { + if (hdr->minorversion > 1) + attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; + p++, *p++ = cpu_to_be32(attrs[2] & readdir->bitmask[2]); + } memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: cookie = %llu, verifier = %08x:%08x, bitmap = %08x:%08x:%08x\n", @@ -2702,20 +2687,11 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); - if (args->migration) { - encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen; - encode_fs_locations(xdr, args->bitmask, &hdr); - if (args->renew) - encode_renew(xdr, args->clientid, &hdr); - } else { - encode_putfh(xdr, args->dir_fh, &hdr); - encode_lookup(xdr, args->name, &hdr); - replen = hdr.replen; - encode_fs_locations(xdr, args->bitmask, &hdr); - } + encode_putfh(xdr, args->dir_fh, &hdr); + encode_lookup(xdr, args->name, &hdr); + replen = hdr.replen; /* get the attribute into args->page */ + encode_fs_locations(xdr, args->bitmask, &hdr); - /* Set up reply kvec to capture returned fs_locations array. */ xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page, 0, PAGE_SIZE); encode_nops(&hdr); @@ -2739,26 +2715,6 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req, encode_nops(&hdr); } -/* - * Encode FSID_PRESENT request - */ -static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req, - struct xdr_stream *xdr, - struct nfs4_fsid_present_arg *args) -{ - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - encode_getfh(xdr, &hdr); - if (args->renew) - encode_renew(xdr, args->clientid, &hdr); - encode_nops(&hdr); -} - #if defined(CONFIG_NFS_V4_1) /* * BIND_CONN_TO_SESSION request @@ -6868,26 +6824,13 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, status = decode_putfh(xdr); if (status) goto out; - if (res->migration) { - xdr_enter_page(xdr, PAGE_SIZE); - status = decode_getfattr_generic(xdr, - &res->fs_locations->fattr, - NULL, res->fs_locations, - NULL, res->fs_locations->server); - if (status) - goto out; - if (res->renew) - status = decode_renew(xdr); - } else { - status = decode_lookup(xdr); - if (status) - goto out; - xdr_enter_page(xdr, PAGE_SIZE); - status = decode_getfattr_generic(xdr, - &res->fs_locations->fattr, + status = decode_lookup(xdr); + if (status) + goto out; + xdr_enter_page(xdr, PAGE_SIZE); + status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, NULL, res->fs_locations, NULL, res->fs_locations->server); - } out: return status; } @@ -6916,34 +6859,6 @@ out: return status; } -/* - * Decode FSID_PRESENT response - */ -static int nfs4_xdr_dec_fsid_present(struct rpc_rqst *rqstp, - struct xdr_stream *xdr, - struct nfs4_fsid_present_res *res) -{ - struct compound_hdr hdr; - int status; - - status = decode_compound_hdr(xdr, &hdr); - if (status) - goto out; - status = decode_sequence(xdr, &res->seq_res, rqstp); - if (status) - goto out; - status = decode_putfh(xdr); - if (status) - goto out; - status = decode_getfh(xdr, res->fh); - if (status) - goto out; - if (res->renew) - status = decode_renew(xdr); -out: - return status; -} - #if defined(CONFIG_NFS_V4_1) /* * Decode BIND_CONN_TO_SESSION response @@ -7458,7 +7373,6 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), PROC(SECINFO, enc_secinfo, dec_secinfo), - PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present), #if defined(CONFIG_NFS_V4_1) PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), PROC(CREATE_SESSION, enc_create_session, dec_create_session), diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index fddbba2..a8f57c7 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -235,7 +235,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, }; int status = -ENOMEM; - dprintk("NFS call create %pd\n", dentry); + dprintk("NFS call create %s\n", dentry->d_name.name); data = nfs_alloc_createdata(dir, dentry, sattr); if (data == NULL) goto out; @@ -265,7 +265,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, umode_t mode; int status = -ENOMEM; - dprintk("NFS call mknod %pd\n", dentry); + dprintk("NFS call mknod %s\n", dentry->d_name.name); mode = sattr->ia_mode; if (S_ISFIFO(mode)) { @@ -423,7 +423,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, }; int status = -ENAMETOOLONG; - dprintk("NFS call symlink %pd\n", dentry); + dprintk("NFS call symlink %s\n", dentry->d_name.name); if (len > NFS2_MAXPATHLEN) goto out; @@ -462,7 +462,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) }; int status = -ENOMEM; - dprintk("NFS call mkdir %pd\n", dentry); + dprintk("NFS call mkdir %s\n", dentry->d_name.name); data = nfs_alloc_createdata(dir, dentry, sattr); if (data == NULL) goto out; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 910ed90..a03b9c6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -497,8 +497,7 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) static const struct { rpc_authflavor_t flavour; const char *str; - } sec_flavours[NFS_AUTH_INFO_MAX_FLAVORS] = { - /* update NFS_AUTH_INFO_MAX_FLAVORS when this list changes! */ + } sec_flavours[] = { { RPC_AUTH_NULL, "null" }, { RPC_AUTH_UNIX, "sys" }, { RPC_AUTH_GSS_KRB5, "krb5" }, @@ -924,7 +923,8 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) data->mount_server.port = NFS_UNSPEC_PORT; data->nfs_server.port = NFS_UNSPEC_PORT; data->nfs_server.protocol = XPRT_TRANSPORT_TCP; - data->selected_flavor = RPC_AUTH_MAXFLAVOR; + data->auth_flavors[0] = RPC_AUTH_MAXFLAVOR; + data->auth_flavor_len = 0; data->minorversion = 0; data->need_mount = true; data->net = current->nsproxy->net_ns; @@ -1019,51 +1019,12 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) } } -/* - * Add 'flavor' to 'auth_info' if not already present. - * Returns true if 'flavor' ends up in the list, false otherwise - */ -static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, - rpc_authflavor_t flavor) -{ - unsigned int i; - unsigned int max_flavor_len = (sizeof(auth_info->flavors) / - sizeof(auth_info->flavors[0])); - - /* make sure this flavor isn't already in the list */ - for (i = 0; i < auth_info->flavor_len; i++) { - if (flavor == auth_info->flavors[i]) - return true; - } - - if (auth_info->flavor_len + 1 >= max_flavor_len) { - dfprintk(MOUNT, "NFS: too many sec= flavors\n"); - return false; - } - - auth_info->flavors[auth_info->flavor_len++] = flavor; - return true; -} - -/* - * Return true if 'match' is in auth_info or auth_info is empty. - * Return false otherwise. - */ -bool nfs_auth_info_match(const struct nfs_auth_info *auth_info, - rpc_authflavor_t match) +static void nfs_set_auth_parsed_mount_data(struct nfs_parsed_mount_data *data, + rpc_authflavor_t pseudoflavor) { - int i; - - if (!auth_info->flavor_len) - return true; - - for (i = 0; i < auth_info->flavor_len; i++) { - if (auth_info->flavors[i] == match) - return true; - } - return false; + data->auth_flavors[0] = pseudoflavor; + data->auth_flavor_len = 1; } -EXPORT_SYMBOL_GPL(nfs_auth_info_match); /* * Parse the value of the 'sec=' option. @@ -1073,55 +1034,49 @@ static int nfs_parse_security_flavors(char *value, { substring_t args[MAX_OPT_ARGS]; rpc_authflavor_t pseudoflavor; - char *p; dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); - while ((p = strsep(&value, ":")) != NULL) { - switch (match_token(p, nfs_secflavor_tokens, args)) { - case Opt_sec_none: - pseudoflavor = RPC_AUTH_NULL; - break; - case Opt_sec_sys: - pseudoflavor = RPC_AUTH_UNIX; - break; - case Opt_sec_krb5: - pseudoflavor = RPC_AUTH_GSS_KRB5; - break; - case Opt_sec_krb5i: - pseudoflavor = RPC_AUTH_GSS_KRB5I; - break; - case Opt_sec_krb5p: - pseudoflavor = RPC_AUTH_GSS_KRB5P; - break; - case Opt_sec_lkey: - pseudoflavor = RPC_AUTH_GSS_LKEY; - break; - case Opt_sec_lkeyi: - pseudoflavor = RPC_AUTH_GSS_LKEYI; - break; - case Opt_sec_lkeyp: - pseudoflavor = RPC_AUTH_GSS_LKEYP; - break; - case Opt_sec_spkm: - pseudoflavor = RPC_AUTH_GSS_SPKM; - break; - case Opt_sec_spkmi: - pseudoflavor = RPC_AUTH_GSS_SPKMI; - break; - case Opt_sec_spkmp: - pseudoflavor = RPC_AUTH_GSS_SPKMP; - break; - default: - dfprintk(MOUNT, - "NFS: sec= option '%s' not recognized\n", p); - return 0; - } - - if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor)) - return 0; + switch (match_token(value, nfs_secflavor_tokens, args)) { + case Opt_sec_none: + pseudoflavor = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + pseudoflavor = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + pseudoflavor = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + pseudoflavor = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + pseudoflavor = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + pseudoflavor = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + pseudoflavor = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + pseudoflavor = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + pseudoflavor = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + pseudoflavor = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + pseudoflavor = RPC_AUTH_GSS_SPKMP; + break; + default: + return 0; } + mnt->flags |= NFS_MOUNT_SECFLAVOUR; + nfs_set_auth_parsed_mount_data(mnt, pseudoflavor); return 1; } @@ -1614,7 +1569,7 @@ static int nfs_parse_mount_options(char *raw, goto out_minorversion_mismatch; if (mnt->options & NFS_OPTION_MIGRATION && - (mnt->version != 4 || mnt->minorversion != 0)) + mnt->version != 4 && mnt->minorversion != 0) goto out_migration_misuse; /* @@ -1668,14 +1623,12 @@ out_security_failure: } /* - * Ensure that a specified authtype in args->auth_info is supported by - * the server. Returns 0 and sets args->selected_flavor if it's ok, and - * -EACCES if not. + * Ensure that the specified authtype in args->auth_flavors[0] is supported by + * the server. Returns 0 if it's ok, and -EACCES if not. */ -static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, +static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args, rpc_authflavor_t *server_authlist, unsigned int count) { - rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR; unsigned int i; /* @@ -1687,20 +1640,17 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, * can be used. */ for (i = 0; i < count; i++) { - flavor = server_authlist[i]; - - if (nfs_auth_info_match(&args->auth_info, flavor) || - flavor == RPC_AUTH_NULL) + if (args->auth_flavors[0] == server_authlist[i] || + server_authlist[i] == RPC_AUTH_NULL) goto out; } - dfprintk(MOUNT, - "NFS: specified auth flavors not supported by server\n"); + dfprintk(MOUNT, "NFS: auth flavor %u not supported by server\n", + args->auth_flavors[0]); return -EACCES; out: - args->selected_flavor = flavor; - dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor); + dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]); return 0; } @@ -1788,10 +1738,9 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf * Was a sec= authflavor specified in the options? First, verify * whether the server supports it, and then just try to use it if so. */ - if (args->auth_info.flavor_len > 0) { - status = nfs_verify_authflavors(args, authlist, authlist_len); - dfprintk(MOUNT, "NFS: using auth flavor %u\n", - args->selected_flavor); + if (args->auth_flavor_len > 0) { + status = nfs_verify_authflavor(args, authlist, authlist_len); + dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]); if (status) return ERR_PTR(status); return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); @@ -1820,7 +1769,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Fallthrough */ } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); - args->selected_flavor = flavor; + nfs_set_auth_parsed_mount_data(args, flavor); server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); if (!IS_ERR(server)) return server; @@ -1836,7 +1785,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Last chance! Try AUTH_UNIX */ dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); - args->selected_flavor = RPC_AUTH_UNIX; + nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX); return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); } @@ -2023,9 +1972,9 @@ static int nfs23_validate_mount_data(void *options, args->bsize = data->bsize; if (data->flags & NFS_MOUNT_SECFLAVOUR) - args->selected_flavor = data->pseudoflavor; + nfs_set_auth_parsed_mount_data(args, data->pseudoflavor); else - args->selected_flavor = RPC_AUTH_UNIX; + nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX); if (!args->nfs_server.hostname) goto out_nomem; @@ -2159,6 +2108,9 @@ static int nfs_validate_text_mount_data(void *options, nfs_set_port(sap, &args->nfs_server.port, port); + if (args->auth_flavor_len > 1) + goto out_bad_auth; + return nfs_parse_devname(dev_name, &args->nfs_server.hostname, max_namelen, @@ -2178,6 +2130,10 @@ out_invalid_transport_udp: out_no_address: dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); return -EINVAL; + +out_bad_auth: + dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n"); + return -EINVAL; } static int @@ -2187,10 +2143,8 @@ nfs_compare_remount_data(struct nfs_server *nfss, if (data->flags != nfss->flags || data->rsize != nfss->rsize || data->wsize != nfss->wsize || - data->version != nfss->nfs_client->rpc_ops->version || - data->minorversion != nfss->nfs_client->cl_minorversion || data->retrans != nfss->client->cl_timeout->to_retries || - data->selected_flavor != nfss->client->cl_auth->au_flavor || + data->auth_flavors[0] != nfss->client->cl_auth->au_flavor || data->acregmin != nfss->acregmin / HZ || data->acregmax != nfss->acregmax / HZ || data->acdirmin != nfss->acdirmin / HZ || @@ -2235,8 +2189,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) data->rsize = nfss->rsize; data->wsize = nfss->wsize; data->retrans = nfss->client->cl_timeout->to_retries; - data->selected_flavor = nfss->client->cl_auth->au_flavor; - data->auth_info = nfss->auth_info; + nfs_set_auth_parsed_mount_data(data, nfss->client->cl_auth->au_flavor); data->acregmin = nfss->acregmin / HZ; data->acregmax = nfss->acregmax / HZ; data->acdirmin = nfss->acdirmin / HZ; @@ -2244,14 +2197,12 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; data->nfs_server.port = nfss->port; data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; - data->version = nfsvers; - data->minorversion = nfss->nfs_client->cl_minorversion; memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, data->nfs_server.addrlen); /* overwrite those values with any that were specified */ - error = -EINVAL; - if (!nfs_parse_mount_options((char *)options, data)) + error = nfs_parse_mount_options((char *)options, data); + if (error < 0) goto out; /* @@ -2381,7 +2332,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n goto Ebusy; if (a->acdirmax != b->acdirmax) goto Ebusy; - if (b->auth_info.flavor_len > 0 && + if (b->flags & NFS_MOUNT_SECFLAVOUR && clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) goto Ebusy; return 1; @@ -2579,7 +2530,6 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, mntroot = ERR_PTR(error); goto error_splat_bdi; } - server->super = s; } if (!s->s_root) { @@ -2763,9 +2713,9 @@ static int nfs4_validate_mount_data(void *options, data->auth_flavours, sizeof(pseudoflavor))) return -EFAULT; - args->selected_flavor = pseudoflavor; + nfs_set_auth_parsed_mount_data(args, pseudoflavor); } else - args->selected_flavor = RPC_AUTH_UNIX; + nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX); c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(c)) diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 11d7894..bb939ed 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -493,15 +493,17 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) unsigned long long fileid; struct dentry *sdentry; struct rpc_task *task; - int error = -EBUSY; + int error = -EIO; - dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n", - dentry, d_count(dentry)); + dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + d_count(dentry)); nfs_inc_stats(dir, NFSIOS_SILLYRENAME); /* * We don't allow a dentry to be silly-renamed twice. */ + error = -EBUSY; if (dentry->d_flags & DCACHE_NFSFS_RENAMED) goto out; @@ -520,8 +522,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) SILLYNAME_FILEID_LEN, fileid, SILLYNAME_COUNTER_LEN, sillycounter); - dfprintk(VFS, "NFS: trying to rename %pd to %s\n", - dentry, silly); + dfprintk(VFS, "NFS: trying to rename %s to %s\n", + dentry->d_name.name, silly); sdentry = lookup_one_len(silly, dentry->d_parent, slen); /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c1d5482..ac1dc33 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -954,8 +954,10 @@ int nfs_updatepage(struct file *file, struct page *page, nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); - dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n", - file, count, (long long)(page_file_offset(page) + offset)); + dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", + file->f_path.dentry->d_parent->d_name.name, + file->f_path.dentry->d_name.name, count, + (long long)(page_file_offset(page) + offset)); if (nfs_can_extend_write(file, page, inode)) { count = max(count + offset, nfs_page_length(page)); diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f994e75..dc8f1ef 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -95,7 +95,7 @@ config NFSD_V4_SECURITY_LABEL Smack policies on NFSv4 files, say N. WARNING: there is still a chance of backwards-incompatible protocol changes. - For now we recommend "Y" only for developers and testers. + For now we recommend "Y" only for developers and testers." config NFSD_FAULT_INJECTION bool "NFS server manual fault injection" diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8513c59..5f38ea3 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -536,12 +536,16 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (err) goto out3; exp.ex_anon_uid= make_kuid(&init_user_ns, an_int); + if (!uid_valid(exp.ex_anon_uid)) + goto out3; /* anon gid */ err = get_int(&mesg, &an_int); if (err) goto out3; exp.ex_anon_gid= make_kgid(&init_user_ns, an_int); + if (!gid_valid(exp.ex_anon_gid)) + goto out3; /* fsid */ err = get_int(&mesg, &an_int); @@ -579,26 +583,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.ex_uuid); if (err) goto out4; - /* - * No point caching this if it would immediately expire. - * Also, this protects exportfs's dummy export from the - * anon_uid/anon_gid checks: - */ - if (exp.h.expiry_time < seconds_since_boot()) - goto out4; - /* - * For some reason exportfs has been passing down an - * invalid (-1) uid & gid on the "dummy" export which it - * uses to test export support. To make sure exportfs - * sees errors from check_export we therefore need to - * delay these checks till after check_export: - */ - err = -EINVAL; - if (!uid_valid(exp.ex_anon_uid)) - goto out4; - if (!gid_valid(exp.ex_anon_gid)) - goto out4; - err = 0; } expp = svc_export_lookup(&exp); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 9c271f4..e0a65a9 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -385,8 +385,8 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) status = vfs_rmdir(parent->d_inode, child); if (status) - printk("failed to remove client recovery directory %pd\n", - child); + printk("failed to remove client recovery directory %s\n", + child->d_name.name); /* Keep trying, success or failure: */ return 0; } @@ -410,15 +410,15 @@ out: nfs4_release_reclaim(nn); if (status) printk("nfsd4: failed to purge old clients from recovery" - " directory %pD\n", nn->rec_file); + " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); } static int load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) { if (child->d_name.len != HEXDIR_LEN - 1) { - printk("nfsd4: illegal name %pd in recovery directory\n", - child); + printk("nfsd4: illegal name %s in recovery directory\n", + child->d_name.name); /* Keep trying; maybe the others are OK: */ return 0; } @@ -437,7 +437,7 @@ nfsd4_recdir_load(struct net *net) { status = nfsd4_list_rec_dir(load_recdir, nn); if (status) printk("nfsd4: failed loading clients from recovery" - " directory %pD\n", nn->rec_file); + " directory %s\n", nn->rec_file->f_path.dentry->d_name.name); return status; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 105d6fa..0874998 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -402,16 +402,11 @@ static void remove_stid(struct nfs4_stid *s) idr_remove(stateids, s->sc_stateid.si_opaque.so_id); } -static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) -{ - kmem_cache_free(slab, s); -} - void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { - nfs4_free_stid(deleg_slab, &dp->dl_stid); + kmem_cache_free(deleg_slab, dp); num_delegations--; } } @@ -615,7 +610,7 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp) static void free_generic_stateid(struct nfs4_ol_stateid *stp) { remove_stid(&stp->st_stid); - nfs4_free_stid(stateid_slab, &stp->st_stid); + kmem_cache_free(stateid_slab, stp); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -673,6 +668,7 @@ static void unhash_open_stateid(struct nfs4_ol_stateid *stp) static void release_open_stateid(struct nfs4_ol_stateid *stp) { unhash_open_stateid(stp); + unhash_stid(&stp->st_stid); free_generic_stateid(stp); } @@ -694,6 +690,7 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; if (s) { + unhash_stid(&s->st_stid); free_generic_stateid(s); oo->oo_last_closed_stid = NULL; } @@ -1130,11 +1127,6 @@ destroy_client(struct nfs4_client *clp) dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); destroy_delegation(dp); } - list_splice_init(&clp->cl_revoked, &reaplist); - while (!list_empty(&reaplist)) { - dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - destroy_revoked_delegation(dp); - } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); release_openowner(oo); @@ -3016,7 +3008,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return NULL; locks_init_lock(fl); fl->fl_lmops = &nfsd_lease_mng_ops; - fl->fl_flags = FL_DELEG; + fl->fl_flags = FL_LEASE; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; fl->fl_owner = (fl_owner_t)(dp->dl_file); @@ -3162,7 +3154,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; out_free: - remove_stid(&dp->dl_stid); + unhash_stid(&dp->dl_stid); nfs4_put_delegation(dp); out_no_deleg: open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; @@ -3851,8 +3843,9 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_confirm on file %pd\n", - cstate->current_fh.fh_dentry); + dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", + (int)cstate->current_fh.fh_dentry->d_name.len, + cstate->current_fh.fh_dentry->d_name.name); status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status) @@ -3929,8 +3922,9 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfs4_ol_stateid *stp; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", - cstate->current_fh.fh_dentry); + dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", + (int)cstate->current_fh.fh_dentry->d_name.len, + cstate->current_fh.fh_dentry->d_name.name); /* We don't yet support WANT bits: */ if (od->od_deleg_want) @@ -3986,8 +3980,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - dprintk("NFSD: nfsd4_close on file %pd\n", - cstate->current_fh.fh_dentry); + dprintk("NFSD: nfsd4_close on file %.*s\n", + (int)cstate->current_fh.fh_dentry->d_name.len, + cstate->current_fh.fh_dentry->d_name.name); nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, @@ -4003,9 +3998,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_close_open_stateid(stp); - if (cstate->minorversion) + if (cstate->minorversion) { + unhash_stid(&stp->st_stid); free_generic_stateid(stp); - else + } else oo->oo_last_closed_stid = stp; if (list_empty(&oo->oo_owner.so_stateids)) { @@ -5126,6 +5122,7 @@ out_recovery: return ret; } +/* should be called with the state lock held */ void nfs4_state_shutdown_net(struct net *net) { @@ -5136,7 +5133,6 @@ nfs4_state_shutdown_net(struct net *net) cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); - nfs4_lock_state(); INIT_LIST_HEAD(&reaplist); spin_lock(&recall_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { @@ -5151,7 +5147,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - nfs4_unlock_state(); } void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index ee7237f..d9454fe 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -141,8 +141,8 @@ xdr_error: \ static void next_decode_page(struct nfsd4_compoundargs *argp) { - argp->p = page_address(argp->pagelist[0]); argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; @@ -411,7 +411,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, label->data = kzalloc(dummy32 + 1, GFP_KERNEL); if (!label->data) return nfserr_jukebox; - label->len = dummy32; defer_free(argp, kfree, label->data); memcpy(label->data, buf, dummy32); } @@ -946,16 +945,13 @@ static __be32 nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) { DECODE_HEAD; - - if (argp->minorversion >= 1) - return nfserr_notsupp; - + status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); if (status) return status; READ_BUF(4); READ32(open_conf->oc_seqid); - + DECODE_TAIL; } @@ -995,14 +991,6 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) } static __be32 -nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) -{ - if (argp->minorversion == 0) - return nfs_ok; - return nfserr_notsupp; -} - -static __be32 nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) { DECODE_HEAD; @@ -1073,9 +1061,6 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) { DECODE_HEAD; - if (argp->minorversion >= 1) - return nfserr_notsupp; - READ_BUF(sizeof(clientid_t)); COPYMEM(clientid, sizeof(clientid_t)); @@ -1126,9 +1111,6 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient { DECODE_HEAD; - if (argp->minorversion >= 1) - return nfserr_notsupp; - READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE); @@ -1155,9 +1137,6 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s { DECODE_HEAD; - if (argp->minorversion >= 1) - return nfserr_notsupp; - READ_BUF(8 + NFS4_VERIFIER_SIZE); COPYMEM(&scd_c->sc_clientid, 8); COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE); @@ -1229,7 +1208,6 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) len -= pages * PAGE_SIZE; argp->p = (__be32 *)page_address(argp->pagelist[0]); - argp->pagelist++; argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); } argp->p += XDR_QUADLEN(len); @@ -1242,9 +1220,6 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel { DECODE_HEAD; - if (argp->minorversion >= 1) - return nfserr_notsupp; - READ_BUF(12); COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); READ32(rlockowner->rl_owner.len); @@ -1544,7 +1519,7 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm, [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_noop, [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, [OP_READ] = (nfsd4_dec)nfsd4_decode_read, [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, @@ -1561,6 +1536,46 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner, +}; + +static nfsd4_dec nfsd41_dec_ops[] = { + [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, + [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, + [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, + [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, + [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, + [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, + [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, + [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, + [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, + [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, + [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, + [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, + [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, + [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, + [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_READ] = (nfsd4_dec)nfsd4_decode_read, + [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, + [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, + [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, + [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, + [OP_RENEW] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, + [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, + [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp, + [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp, /* new operations for NFSv4.1 */ [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl, @@ -1584,53 +1599,24 @@ static nfsd4_dec nfsd4_dec_ops[] = { [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, }; -static inline bool -nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) -{ - if (op->opnum < FIRST_NFS4_OP) - return false; - else if (argp->minorversion == 0 && op->opnum > LAST_NFS40_OP) - return false; - else if (argp->minorversion == 1 && op->opnum > LAST_NFS41_OP) - return false; - else if (argp->minorversion == 2 && op->opnum > LAST_NFS42_OP) - return false; - return true; -} +struct nfsd4_minorversion_ops { + nfsd4_dec *decoders; + int nops; +}; -/* - * Return a rough estimate of the maximum possible reply size. Note the - * estimate includes rpc headers so is meant to be passed to - * svc_reserve, not svc_reserve_auth. - * - * Also note the current compound encoding permits only one operation to - * use pages beyond the first one, so the maximum possible length is the - * maximum over these values, not the sum. - */ -static int nfsd4_max_reply(u32 opnum) -{ - switch (opnum) { - case OP_READLINK: - case OP_READDIR: - /* - * Both of these ops take a single page for data and put - * the head and tail in another page: - */ - return 2 * PAGE_SIZE; - case OP_READ: - return INT_MAX; - default: - return PAGE_SIZE; - } -} +static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { + [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, + [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, + [2] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, +}; static __be32 nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { DECODE_HEAD; struct nfsd4_op *op; + struct nfsd4_minorversion_ops *ops; bool cachethis = false; - int max_reply = PAGE_SIZE; int i; READ_BUF(4); @@ -1654,9 +1640,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) } } - if (argp->minorversion > NFSD_SUPPORTED_MINOR_VERSION) + if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion)) argp->opcnt = 0; + ops = &nfsd4_minorversion[argp->minorversion]; for (i = 0; i < argp->opcnt; i++) { op = &argp->ops[i]; op->replay = NULL; @@ -1664,8 +1651,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) READ_BUF(4); READ32(op->opnum); - if (nfsd4_opnum_in_range(argp, op)) - op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); + if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP) + op->status = ops->decoders[op->opnum](argp, &op->u); else { op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; @@ -1680,14 +1667,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) * op in the compound wants to be cached: */ cachethis |= nfsd4_cache_this_op(op); - - max_reply = max(max_reply, nfsd4_max_reply(op->opnum)); } /* Sessions make the DRC unnecessary: */ if (argp->minorversion) cachethis = false; - if (max_reply != INT_MAX) - svc_reserve(argp->rqstp, max_reply); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; DECODE_TAIL; @@ -2392,7 +2375,7 @@ out_acl: if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { if ((buflen -= 8) < 0) goto out_resource; - WRITE64(exp->ex_path.mnt->mnt_sb->s_maxbytes); + WRITE64(~(u64)0); } if (bmval0 & FATTR4_WORD0_MAXLINK) { if ((buflen -= 4) < 0) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3c37b16..814afaa 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -47,7 +47,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) tdentry = parent; } if (tdentry != exp->ex_path.dentry) - dprintk("nfsd_acceptable failed at %p %pd\n", tdentry, tdentry); + dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); rv = (tdentry == exp->ex_path.dentry); dput(tdentry); return rv; @@ -253,8 +253,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (S_ISDIR(dentry->d_inode->i_mode) && (dentry->d_flags & DCACHE_DISCONNECTED)) { - printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %pd2\n", - dentry); + printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); } fhp->fh_dentry = dentry; @@ -361,9 +361,10 @@ skip_pseudoflavor_check: error = nfsd_permission(rqstp, exp, dentry, access); if (error) { - dprintk("fh_verify: %pd2 permission failure, " + dprintk("fh_verify: %s/%s permission failure, " "acc=%x, error=%d\n", - dentry, + dentry->d_parent->d_name.name, + dentry->d_name.name, access, ntohl(error)); } out: @@ -513,13 +514,14 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ struct inode * inode = dentry->d_inode; + struct dentry *parent = dentry->d_parent; __u32 *datap; dev_t ex_dev = exp_sb(exp)->s_dev; - dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n", + dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", MAJOR(ex_dev), MINOR(ex_dev), (long) exp->ex_path.dentry->d_inode->i_ino, - dentry, + parent->d_name.name, dentry->d_name.name, (inode ? inode->i_ino : 0)); /* Choose filehandle version and fsid type based on @@ -532,13 +534,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fh_put(ref_fh); if (fhp->fh_locked || fhp->fh_dentry) { - printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", - dentry); + printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", + parent->d_name.name, dentry->d_name.name); } if (fhp->fh_maxsize < NFS_FHSIZE) - printk(KERN_ERR "fh_compose: called with maxsize %d! %pd2\n", + printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", fhp->fh_maxsize, - dentry); + parent->d_name.name, dentry->d_name.name); fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp; @@ -598,20 +600,22 @@ fh_update(struct svc_fh *fhp) _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); } else { if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) - return 0; + goto out; _fh_update(fhp, fhp->fh_export, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) return nfserr_opnotsupp; } +out: return 0; + out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); - return nfserr_serverfault; + goto out; out_negative: - printk(KERN_ERR "fh_update: %pd2 still negative!\n", - dentry); - return nfserr_serverfault; + printk(KERN_ERR "fh_update: %s/%s still negative!\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + goto out; } /* diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 4775bc4..e5e6707 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -173,8 +173,8 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) BUG_ON(!dentry); if (fhp->fh_locked) { - printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", - dentry); + printk(KERN_WARNING "fh_lock: %s/%s already locked!\n", + dentry->d_parent->d_name.name, dentry->d_name.name); return; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7eea63c..c827acb 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -298,12 +298,41 @@ commit_metadata(struct svc_fh *fhp) } /* - * Go over the attributes and take care of the small differences between - * NFS semantics and what Linux expects. + * Set various file attributes. + * N.B. After this call fhp needs an fh_put */ -static void -nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) +__be32 +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + int check_guard, time_t guardtime) { + struct dentry *dentry; + struct inode *inode; + int accmode = NFSD_MAY_SATTR; + umode_t ftype = 0; + __be32 err; + int host_err; + int size_change = 0; + + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; + + /* Get inode */ + err = fh_verify(rqstp, fhp, ftype, accmode); + if (err) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + /* Ignore any mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; + + if (!iap->ia_valid) + goto out; + /* * NFSv2 does not differentiate between "set-[ac]time-to-now" * which only requires access, and "set-[ac]time-to-X" which @@ -313,7 +342,8 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) * convert to "set to now" instead of "set to explicit time" * * We only call inode_change_ok as the last test as technically - * it is not an interface that we should be using. + * it is not an interface that we should be using. It is only + * valid if the filesystem does not define it's own i_op->setattr. */ #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) #define MAX_TOUCH_TIME_ERROR (30*60) @@ -339,6 +369,30 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) iap->ia_valid &= ~BOTH_TIME_SET; } } + + /* + * The size case is special. + * It changes the file as well as the attributes. + */ + if (iap->ia_valid & ATTR_SIZE) { + if (iap->ia_size < inode->i_size) { + err = nfsd_permission(rqstp, fhp->fh_export, dentry, + NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); + if (err) + goto out; + } + + host_err = get_write_access(inode); + if (host_err) + goto out_nfserr; + + size_change = 1; + host_err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (host_err) { + put_write_access(inode); + goto out_nfserr; + } + } /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { @@ -361,111 +415,32 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } } -} -static __be32 -nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct iattr *iap) -{ - struct inode *inode = fhp->fh_dentry->d_inode; - int host_err; - - if (iap->ia_size < inode->i_size) { - __be32 err; - - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); - if (err) - return err; - } - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserrno; - - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) - goto out_put_write_access; - return 0; - -out_put_write_access: - put_write_access(inode); -out_nfserrno: - return nfserrno(host_err); -} - -/* - * Set various file attributes. After this call fhp needs an fh_put. - */ -__be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) -{ - struct dentry *dentry; - struct inode *inode; - int accmode = NFSD_MAY_SATTR; - umode_t ftype = 0; - __be32 err; - int host_err; - int size_change = 0; - - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) - ftype = S_IFREG; - - /* Get inode */ - err = fh_verify(rqstp, fhp, ftype, accmode); - if (err) - goto out; - - dentry = fhp->fh_dentry; - inode = dentry->d_inode; - - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - - if (!iap->ia_valid) - goto out; - - nfsd_sanitize_attrs(inode, iap); - - /* - * The size case is special, it changes the file in addition to the - * attributes. - */ - if (iap->ia_valid & ATTR_SIZE) { - err = nfsd_get_write_access(rqstp, fhp, iap); - if (err) - goto out; - size_change = 1; - } + /* Change the attributes. */ iap->ia_valid |= ATTR_CTIME; - if (check_guard && guardtime != inode->i_ctime.tv_sec) { - err = nfserr_notsync; - goto out_put_write_access; - } - - host_err = nfsd_break_lease(inode); - if (host_err) - goto out_put_write_access_nfserror; - - fh_lock(fhp); - host_err = notify_change(dentry, iap, NULL); - fh_unlock(fhp); + err = nfserr_notsync; + if (!check_guard || guardtime == inode->i_ctime.tv_sec) { + host_err = nfsd_break_lease(inode); + if (host_err) + goto out_nfserr; + fh_lock(fhp); -out_put_write_access_nfserror: - err = nfserrno(host_err); -out_put_write_access: + host_err = notify_change(dentry, iap); + err = nfserrno(host_err); + fh_unlock(fhp); + } if (size_change) put_write_access(inode); if (!err) commit_metadata(fhp); out: return err; + +out_nfserr: + err = nfserrno(host_err); + goto out; } #if defined(CONFIG_NFSD_V2_ACL) || \ @@ -1013,11 +988,7 @@ static void kill_suid(struct dentry *dentry) ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; mutex_lock(&dentry->d_inode->i_mutex); - /* - * Note we call this on write, so notify_change will not - * encounter any conflicting delegations: - */ - notify_change(dentry, &ia, NULL); + notify_change(dentry, &ia); mutex_unlock(&dentry->d_inode->i_mutex); } @@ -1346,8 +1317,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (!fhp->fh_locked) { /* not actually possible */ printk(KERN_ERR - "nfsd_create: parent %pd2 not locked!\n", - dentry); + "nfsd_create: parent %s/%s not locked!\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); err = nfserr_io; goto out; } @@ -1357,8 +1329,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ err = nfserr_exist; if (dchild->d_inode) { - dprintk("nfsd_create: dentry %pd/%pd not negative!\n", - dentry, dchild); + dprintk("nfsd_create: dentry %s/%s not negative!\n", + dentry->d_name.name, dchild->d_name.name); goto out; } @@ -1765,7 +1737,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, err = nfserrno(host_err); goto out_dput; } - host_err = vfs_link(dold, dirp, dnew, NULL); + host_err = vfs_link(dold, dirp, dnew); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); if (!err) @@ -1866,7 +1838,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (host_err) goto out_dput_new; } - host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); + host_err = vfs_rename(fdir, odentry, tdir, ndentry); if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) @@ -1939,7 +1911,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (host_err) goto out_put; if (type != S_IFDIR) - host_err = vfs_unlink(dirp, rdentry, NULL); + host_err = vfs_unlink(dirp, rdentry); else host_err = vfs_rmdir(dirp, rdentry); if (!host_err) diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index ffb9b36..2778b02 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -55,7 +55,7 @@ * * Return 1 if the attributes match and 0 if not. * - * NOTE: This function runs with the inode_hash_lock spin lock held so it is not + * NOTE: This function runs with the inode->i_lock spin lock held so it is not * allowed to sleep. */ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index dc7411f..17e6bdd 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1025,7 +1025,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle, for(i = count; i < (num_got + count); i++) { bhs[i] = sb_getblk(osb->sb, first_blkno); if (bhs[i] == NULL) { - status = -ENOMEM; + status = -EIO; mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index aeb44e8..f37d3c0 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -80,7 +80,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(fe->i_clusters))) { - err = -ENOMEM; mlog(ML_ERROR, "block offset is outside the allocated size: " "%llu\n", (unsigned long long)iblock); goto bail; @@ -93,7 +92,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, iblock; buffer_cache_bh = sb_getblk(osb->sb, blkno); if (!buffer_cache_bh) { - err = -ENOMEM; mlog(ML_ERROR, "couldn't getblock for symlink!\n"); goto bail; } @@ -594,11 +592,26 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, ocfs2_rw_unlock(inode, level); } +/* + * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen + * from ext3. PageChecked() bits have been removed as OCFS2 does not + * do journalled data. + */ +static void ocfs2_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) +{ + journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; + + jbd2_journal_invalidatepage(journal, page, offset, length); +} + static int ocfs2_releasepage(struct page *page, gfp_t wait) { + journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; + if (!page_has_buffers(page)) return 0; - return try_to_free_buffers(page); + return jbd2_journal_try_to_free_buffers(journal, page, wait); } static ssize_t ocfs2_direct_IO(int rw, @@ -1789,7 +1802,8 @@ try_again: data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; credits = ocfs2_calc_extend_credits(inode->i_sb, - &di->id2.i_list); + &di->id2.i_list, + clusters_to_alloc); } @@ -1883,14 +1897,10 @@ out_commit: out: ocfs2_free_write_ctxt(wc); - if (data_ac) { + if (data_ac) ocfs2_free_alloc_context(data_ac); - data_ac = NULL; - } - if (meta_ac) { + if (meta_ac) ocfs2_free_alloc_context(meta_ac); - meta_ac = NULL; - } if (ret == -ENOSPC && try_free) { /* @@ -2077,7 +2087,7 @@ const struct address_space_operations ocfs2_aops = { .write_end = ocfs2_write_end, .bmap = ocfs2_bmap, .direct_IO = ocfs2_direct_IO, - .invalidatepage = block_invalidatepage, + .invalidatepage = ocfs2_invalidatepage, .releasepage = ocfs2_releasepage, .migratepage = buffer_migrate_page, .is_partially_uptodate = block_is_partially_uptodate, diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 5b704c6..5d18ad1 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -115,7 +115,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, if (bhs[i] == NULL) { bhs[i] = sb_getblk(osb->sb, block++); if (bhs[i] == NULL) { - status = -ENOMEM; + status = -EIO; mlog_errno(status); goto bail; } @@ -214,7 +214,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, bhs[i] = sb_getblk(sb, block++); if (bhs[i] == NULL) { ocfs2_metadata_cache_io_unlock(ci); - status = -ENOMEM; + status = -EIO; mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 73920ff..363f0dc 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -35,7 +35,6 @@ #include <linux/time.h> #include <linux/debugfs.h> #include <linux/slab.h> -#include <linux/bitmap.h> #include "heartbeat.h" #include "tcp.h" @@ -283,6 +282,15 @@ struct o2hb_bio_wait_ctxt { int wc_error; }; +static int o2hb_pop_count(void *map, int count) +{ + int i = -1, pop = 0; + + while ((i = find_next_bit(map, count, i + 1)) < count) + pop++; + return pop; +} + static void o2hb_write_timeout(struct work_struct *work) { int failed, quorum; @@ -299,9 +307,9 @@ static void o2hb_write_timeout(struct work_struct *work) spin_lock_irqsave(&o2hb_live_lock, flags); if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); - failed = bitmap_weight(o2hb_failed_region_bitmap, + failed = o2hb_pop_count(&o2hb_failed_region_bitmap, O2NM_MAX_REGIONS); - quorum = bitmap_weight(o2hb_quorum_region_bitmap, + quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS); spin_unlock_irqrestore(&o2hb_live_lock, flags); @@ -757,7 +765,7 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg) * If global heartbeat active, unpin all regions if the * region count > CUT_OFF */ - if (bitmap_weight(o2hb_quorum_region_bitmap, + if (o2hb_pop_count(&o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF) o2hb_region_unpin(NULL); unlock: @@ -946,9 +954,23 @@ out: return changed; } -static int o2hb_highest_node(unsigned long *nodes, int numbits) +/* This could be faster if we just implmented a find_last_bit, but I + * don't think the circumstances warrant it. */ +static int o2hb_highest_node(unsigned long *nodes, + int numbits) { - return find_last_bit(nodes, numbits); + int highest, node; + + highest = numbits; + node = -1; + while ((node = find_next_bit(nodes, numbits, node + 1)) != -1) { + if (node >= numbits) + break; + + highest = node; + } + + return highest; } static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) @@ -1807,7 +1829,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, live_threshold = O2HB_LIVE_THRESHOLD; if (o2hb_global_heartbeat_active()) { spin_lock(&o2hb_live_lock); - if (bitmap_weight(o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1) + if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1) live_threshold <<= 1; spin_unlock(&o2hb_live_lock); } @@ -2158,7 +2180,7 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, if (!o2hb_dependent_users) goto unlock; - if (bitmap_weight(o2hb_quorum_region_bitmap, + if (o2hb_pop_count(&o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF) o2hb_region_pin(NULL); @@ -2458,7 +2480,7 @@ static int o2hb_region_inc_user(const char *region_uuid) if (o2hb_dependent_users > 1) goto unlock; - if (bitmap_weight(o2hb_quorum_region_bitmap, + if (o2hb_pop_count(&o2hb_quorum_region_bitmap, O2NM_MAX_REGIONS) <= O2HB_PIN_CUT_OFF) ret = o2hb_region_pin(NULL); diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 2260fb9..baa2b9e 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -199,8 +199,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; #define mlog_errno(st) do { \ int _st = (st); \ if (_st != -ERESTARTSYS && _st != -EINTR && \ - _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \ - _st != -EDQUOT) \ + _st != AOP_TRUNCATED_PAGE && _st != -ENOSPC) \ mlog(ML_ERROR, "status = %lld\n", (long long)_st); \ } while (0) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 91a7e85..30544ce 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2349,7 +2349,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb, dx_root_bh = sb_getblk(osb->sb, dr_blkno); if (dx_root_bh == NULL) { - ret = -ENOMEM; + ret = -EIO; goto out; } ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh); @@ -2422,7 +2422,7 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb, for (i = 0; i < num_dx_leaves; i++) { bh = sb_getblk(osb->sb, start_blk + i); if (bh == NULL) { - ret = -ENOMEM; + ret = -EIO; goto out; } dx_leaves[i] = bh; @@ -2929,7 +2929,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); dirdata_bh = sb_getblk(sb, blkno); if (!dirdata_bh) { - ret = -ENOMEM; + ret = -EIO; mlog_errno(ret); goto out_commit; } @@ -3159,7 +3159,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb, *new_bh = sb_getblk(sb, p_blkno); if (!*new_bh) { - status = -ENOMEM; + status = -EIO; mlog_errno(status); goto bail; } @@ -3284,7 +3284,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, if (ocfs2_dir_resv_allowed(osb)) data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv; - credits = ocfs2_calc_extend_credits(sb, el); + credits = ocfs2_calc_extend_credits(sb, el, 1); } else { spin_unlock(&OCFS2_I(dir)->ip_lock); credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; @@ -3716,7 +3716,7 @@ static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb, { int credits = ocfs2_clusters_to_blocks(osb->sb, 2); - credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list); + credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list, 1); credits += ocfs2_quota_trans_credits(osb->sb); return credits; } diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index af3f7aa..cf0f103 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1885,10 +1885,8 @@ ok: * up nodes that this node contacted */ while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, nn+1)) < O2NM_MAX_NODES) { - if (nn != dlm->node_num && nn != assert->node_idx) { + if (nn != dlm->node_num && nn != assert->node_idx) master_request = 1; - break; - } } } mle->master = assert->node_idx; @@ -2356,10 +2354,6 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, assert_spin_locked(&res->spinlock); - /* delay migration when the lockres is in MIGRATING state */ - if (res->state & DLM_LOCK_RES_MIGRATING) - return 0; - if (res->owner != dlm->node_num) return 0; diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 7035af0..0b5adca 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1886,13 +1886,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, if (ml->type == LKM_NLMODE) goto skip_lvb; - /* - * If the lock is in the blocked list it can't have a valid lvb, - * so skip it - */ - if (ml->list == DLM_BLOCKED_LIST) - goto skip_lvb; - if (!dlm_lvb_is_empty(mres->lvb)) { if (lksb->flags & DLM_LKSB_PUT_LVB) { /* other node was trying to update diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 3407b2c..3a44a64 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1304,7 +1304,7 @@ static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) { wait_for_completion(&mw->mw_complete); /* Re-arm the completion in case we want to wait on it again */ - reinit_completion(&mw->mw_complete); + INIT_COMPLETION(mw->mw_complete); return mw->mw_status; } @@ -1355,7 +1355,7 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, else ret = mw->mw_status; /* Re-arm the completion in case we want to wait on it again */ - reinit_completion(&mw->mw_complete); + INIT_COMPLETION(mw->mw_complete); return ret; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6fff128..d71903c 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -580,7 +580,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, int did_quota = 0; /* - * Unwritten extent only exists for file systems which + * This function only exists for file systems which don't * support holes. */ BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); @@ -603,7 +603,8 @@ restart_all: goto leave; } - credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list); + credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list, + clusters_to_add); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index f29a90f..f87f9bd 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -386,9 +386,19 @@ static int ocfs2_read_locked_inode(struct inode *inode, u32 generation = 0; status = -EINVAL; + if (inode == NULL || inode->i_sb == NULL) { + mlog(ML_ERROR, "bad inode\n"); + return status; + } sb = inode->i_sb; osb = OCFS2_SB(sb); + if (!args) { + mlog(ML_ERROR, "bad inode args\n"); + make_bad_inode(inode); + return status; + } + /* * To improve performance of cold-cache inode stats, we take * the cluster lock here if possible. diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 9ff4e8c..0b479ba 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -524,7 +524,8 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb) * the result may be wrong. */ static inline int ocfs2_calc_extend_credits(struct super_block *sb, - struct ocfs2_extent_list *root_el) + struct ocfs2_extent_list *root_el, + u32 bits_wanted) { int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks; diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 631a982..3d3f3c8 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -201,7 +201,8 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode, } } - *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el); + *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el, + clusters_to_move + 2); mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n", extra_blocks, clusters_to_move, *credits); @@ -1066,10 +1067,8 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) if (status) return status; - if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) { - status = -EPERM; + if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) goto out_drop; - } if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { status = -EPERM; @@ -1091,10 +1090,8 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) goto out_free; } - if (range.me_start > i_size_read(inode)) { - status = -EINVAL; + if (range.me_start > i_size_read(inode)) goto out_free; - } if (range.me_start + range.me_len > i_size_read(inode)) range.me_len = i_size_read(inode) - range.me_start; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 4f791f6..be3f867 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -489,7 +489,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, *new_fe_bh = sb_getblk(osb->sb, fe_blkno); if (!*new_fe_bh) { - status = -ENOMEM; + status = -EIO; mlog_errno(status); goto leave; } diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 55767e1..bf4dfc1 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -612,11 +612,6 @@ static int ocfs2_create_refcount_tree(struct inode *inode, } new_bh = sb_getblk(inode->i_sb, first_blkno); - if (!new_bh) { - ret = -ENOMEM; - mlog_errno(ret); - goto out_commit; - } ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh); ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh, @@ -1315,7 +1310,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle, new_bh = sb_getblk(sb, blkno); if (new_bh == NULL) { - ret = -ENOMEM; + ret = -EIO; mlog_errno(ret); goto out; } @@ -1566,7 +1561,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle, new_bh = sb_getblk(sb, blkno); if (new_bh == NULL) { - ret = -ENOMEM; + ret = -EIO; mlog_errno(ret); goto out; } @@ -2507,7 +2502,8 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh); *meta_add += ocfs2_extend_meta_needed(et.et_root_el); *credits += ocfs2_calc_extend_credits(sb, - et.et_root_el); + et.et_root_el, + ref_blocks); } else { *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS; *meta_add += 1; @@ -2878,7 +2874,8 @@ static int ocfs2_lock_refcount_allocators(struct super_block *sb, meta_add = ocfs2_extend_meta_needed(et->et_root_el); - *credits += ocfs2_calc_extend_credits(sb, et->et_root_el); + *credits += ocfs2_calc_extend_credits(sb, et->et_root_el, + num_clusters + 2); ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh, p_cluster, num_clusters, @@ -3034,7 +3031,7 @@ int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, for (i = 0; i < blocks; i++, old_block++, new_block++) { new_bh = sb_getblk(osb->sb, new_block); if (new_bh == NULL) { - ret = -ENOMEM; + ret = -EIO; mlog_errno(ret); break; } @@ -3628,7 +3625,8 @@ int ocfs2_refcounted_xattr_delete_need(struct inode *inode, ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh); *credits += ocfs2_calc_extend_credits(inode->i_sb, - et.et_root_el); + et.et_root_el, + ref_blocks); } out: diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 822ebc1..ec55add 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -469,7 +469,6 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) struct ocfs2_chain_list *cl; struct ocfs2_chain_rec *cr; u16 cl_bpc; - u64 bg_ptr; if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) return -EROFS; @@ -514,7 +513,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); if (ret) { mlog_errno(ret); - goto out_free_group_bh; + goto out_unlock; } trace_ocfs2_group_add((unsigned long long)input->group, @@ -524,7 +523,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) if (IS_ERR(handle)) { mlog_errno(PTR_ERR(handle)); ret = -EINVAL; - goto out_free_group_bh; + goto out_unlock; } cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); @@ -539,14 +538,12 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) } group = (struct ocfs2_group_desc *)group_bh->b_data; - bg_ptr = le64_to_cpu(group->bg_next_group); group->bg_next_group = cr->c_blkno; ocfs2_journal_dirty(handle, group_bh); ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode), main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { - group->bg_next_group = cpu_to_le64(bg_ptr); mlog_errno(ret); goto out_commit; } @@ -577,11 +574,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) out_commit: ocfs2_commit_trans(osb, handle); - -out_free_group_bh: - brelse(group_bh); - out_unlock: + brelse(group_bh); brelse(main_bm_bh); ocfs2_inode_unlock(main_bm_inode, 1); diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index cb7ec0b..39abf89 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -643,7 +643,7 @@ error: #define FS_OCFS2_NM 1 -static struct ctl_table ocfs2_nm_table[] = { +static ctl_table ocfs2_nm_table[] = { { .procname = "hb_ctl_path", .data = ocfs2_hb_ctl_path, @@ -654,7 +654,7 @@ static struct ctl_table ocfs2_nm_table[] = { { } }; -static struct ctl_table ocfs2_mod_table[] = { +static ctl_table ocfs2_mod_table[] = { { .procname = "nm", .data = NULL, @@ -665,7 +665,7 @@ static struct ctl_table ocfs2_mod_table[] = { { } }; -static struct ctl_table ocfs2_kern_table[] = { +static ctl_table ocfs2_kern_table[] = { { .procname = "ocfs2", .data = NULL, @@ -676,7 +676,7 @@ static struct ctl_table ocfs2_kern_table[] = { { } }; -static struct ctl_table ocfs2_root_table[] = { +static ctl_table ocfs2_root_table[] = { { .procname = "fs", .data = NULL, diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 2c91452..5397c07 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -481,7 +481,7 @@ ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle, bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { - status = -ENOMEM; + status = -EIO; mlog_errno(status); goto bail; } @@ -661,7 +661,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle, bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { - status = -ENOMEM; + status = -EIO; mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c414929..d4e81e4 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1848,8 +1848,8 @@ static int ocfs2_get_sector(struct super_block *sb, *bh = sb_getblk(sb, block); if (!*bh) { - mlog_errno(-ENOMEM); - return -ENOMEM; + mlog_errno(-EIO); + return -EIO; } lock_buffer(*bh); if (!buffer_dirty(*bh)) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index f0a1326..6ce0686 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -377,7 +377,7 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, xb_blkno + i); if (!bucket->bu_bhs[i]) { - rc = -ENOMEM; + rc = -EIO; mlog_errno(rc); break; } @@ -754,7 +754,8 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, BUG_ON(why == RESTART_META); credits = ocfs2_calc_extend_credits(inode->i_sb, - &vb->vb_xv->xr_list); + &vb->vb_xv->xr_list, + clusters_to_add); status = ocfs2_extend_trans(handle, credits); if (status < 0) { status = -ENOMEM; @@ -2864,12 +2865,6 @@ static int ocfs2_create_xattr_block(struct inode *inode, } new_bh = sb_getblk(inode->i_sb, first_blkno); - if (!new_bh) { - ret = -ENOMEM; - mlog_errno(ret); - goto end; - } - ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh); ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode), @@ -3045,7 +3040,8 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { clusters_add += new_clusters; credits += ocfs2_calc_extend_credits(inode->i_sb, - &def_xv.xv.xr_list); + &def_xv.xv.xr_list, + new_clusters); } goto meta_guess; @@ -3110,7 +3106,8 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, if (!ocfs2_xattr_is_local(xe)) credits += ocfs2_calc_extend_credits( inode->i_sb, - &def_xv.xv.xr_list); + &def_xv.xv.xr_list, + new_clusters); goto out; } } @@ -3135,7 +3132,9 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode, meta_add += ocfs2_extend_meta_needed(&xv->xr_list); clusters_add += new_clusters - old_clusters; credits += ocfs2_calc_extend_credits(inode->i_sb, - &xv->xr_list); + &xv->xr_list, + new_clusters - + old_clusters); if (value_size >= OCFS2_XATTR_ROOT_SIZE) goto out; } @@ -3181,7 +3180,7 @@ meta_guess: &xb->xb_attrs.xb_root.xt_list; meta_add += ocfs2_extend_meta_needed(el); credits += ocfs2_calc_extend_credits(inode->i_sb, - el); + el, 1); } else credits += OCFS2_SUBALLOC_ALLOC + 1; @@ -6217,7 +6216,8 @@ static int ocfs2_value_metas_in_xattr_header(struct super_block *sb, le16_to_cpu(xv->xr_list.l_next_free_rec); *credits += ocfs2_calc_extend_credits(sb, - &def_xv.xv.xr_list); + &def_xv.xv.xr_list, + le32_to_cpu(xv->xr_clusters)); /* * If the value is a tree with depth > 1, We don't go deep @@ -6782,7 +6782,7 @@ static int ocfs2_lock_reflink_xattr_rec_allocators( metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el); *credits += ocfs2_calc_extend_credits(osb->sb, - xt_et->et_root_el); + xt_et->et_root_el, len); if (metas.num_metas) { ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas, @@ -57,8 +57,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, newattrs.ia_valid |= ret | ATTR_FORCE; mutex_lock(&dentry->d_inode->i_mutex); - /* Note any delegations or leases have already been broken: */ - ret = notify_change(dentry, &newattrs, NULL); + ret = notify_change(dentry, &newattrs); mutex_unlock(&dentry->d_inode->i_mutex); return ret; } @@ -465,28 +464,21 @@ out: static int chmod_common(struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; struct iattr newattrs; int error; error = mnt_want_write(path->mnt); if (error) return error; -retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chmod(path, mode); if (error) goto out_unlock; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change(path->dentry, &newattrs); out_unlock: mutex_unlock(&inode->i_mutex); - if (delegated_inode) { - error = break_deleg_wait(&delegated_inode); - if (!error) - goto retry_deleg; - } mnt_drop_write(path->mnt); return error; } @@ -530,7 +522,6 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) static int chown_common(struct path *path, uid_t user, gid_t group) { struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; int error; struct iattr newattrs; kuid_t uid; @@ -555,17 +546,12 @@ static int chown_common(struct path *path, uid_t user, gid_t group) if (!S_ISDIR(inode->i_mode)) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; -retry_deleg: mutex_lock(&inode->i_mutex); error = security_path_chown(path, uid, gid); if (!error) - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change(path->dentry, &newattrs); mutex_unlock(&inode->i_mutex); - if (delegated_inode) { - error = break_deleg_wait(&delegated_inode); - if (!error) - goto retry_deleg; - } + return error; } @@ -699,6 +685,7 @@ static int do_dentry_open(struct file *f, } f->f_mapping = inode->i_mapping; + file_sb_list_add(f, inode->i_sb); if (unlikely(f->f_mode & FMODE_PATH)) { f->f_op = &empty_fops; @@ -706,10 +693,6 @@ static int do_dentry_open(struct file *f, } f->f_op = fops_get(inode->i_fop); - if (unlikely(WARN_ON(!f->f_op))) { - error = -ENODEV; - goto cleanup_all; - } error = security_file_open(f, cred); if (error) @@ -719,7 +702,7 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; - if (!open) + if (!open && f->f_op) open = f->f_op->open; if (open) { error = open(inode, f); @@ -737,6 +720,7 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); + file_sb_list_del(f); if (f->f_mode & FMODE_WRITE) { put_write_access(inode); if (!special_file(inode->i_mode)) { @@ -1039,7 +1023,7 @@ int filp_close(struct file *filp, fl_owner_t id) return 0; } - if (filp->f_op->flush) + if (filp->f_op && filp->f_op->flush) retval = filp->f_op->flush(filp, id); if (likely(!(filp->f_mode & FMODE_PATH))) { @@ -726,25 +726,11 @@ pipe_poll(struct file *filp, poll_table *wait) return mask; } -static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe) -{ - int kill = 0; - - spin_lock(&inode->i_lock); - if (!--pipe->files) { - inode->i_pipe = NULL; - kill = 1; - } - spin_unlock(&inode->i_lock); - - if (kill) - free_pipe_info(pipe); -} - static int pipe_release(struct inode *inode, struct file *file) { - struct pipe_inode_info *pipe = file->private_data; + struct pipe_inode_info *pipe = inode->i_pipe; + int kill = 0; __pipe_lock(pipe); if (file->f_mode & FMODE_READ) @@ -757,9 +743,17 @@ pipe_release(struct inode *inode, struct file *file) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } + spin_lock(&inode->i_lock); + if (!--pipe->files) { + inode->i_pipe = NULL; + kill = 1; + } + spin_unlock(&inode->i_lock); __pipe_unlock(pipe); - put_pipe_info(inode, pipe); + if (kill) + free_pipe_info(pipe); + return 0; } @@ -1020,6 +1014,7 @@ static int fifo_open(struct inode *inode, struct file *filp) { struct pipe_inode_info *pipe; bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; + int kill = 0; int ret; filp->f_version = 0; @@ -1135,9 +1130,15 @@ err_wr: goto err; err: + spin_lock(&inode->i_lock); + if (!--pipe->files) { + inode->i_pipe = NULL; + kill = 1; + } + spin_unlock(&inode->i_lock); __pipe_unlock(pipe); - - put_pipe_info(inode, pipe); + if (kill) + free_pipe_info(pipe); return ret; } @@ -264,12 +264,12 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, prev_src_mnt = child; } out: - lock_mount_hash(); + br_write_lock(&vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct mount, mnt_hash); umount_tree(child, 0); } - unlock_mount_hash(); + br_write_unlock(&vfsmount_lock); return ret; } @@ -278,7 +278,8 @@ out: */ static inline int do_refcount_check(struct mount *mnt, int count) { - return mnt_get_count(mnt) > count; + int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; + return (mycount > count); } /* @@ -310,7 +311,7 @@ int propagate_mount_busy(struct mount *mnt, int refcnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint); + child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint, 0); if (child && list_empty(&child->mnt_mounts) && (ret = do_refcount_check(child, 1))) break; @@ -332,8 +333,8 @@ static void __propagate_umount(struct mount *mnt) for (m = propagation_next(parent, parent); m; m = propagation_next(m, parent)) { - struct mount *child = __lookup_mnt_last(&m->mnt, - mnt->mnt_mountpoint); + struct mount *child = __lookup_mnt(&m->mnt, + mnt->mnt_mountpoint, 0); /* * umount the child only if the child has no * other children diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 2183fcf..15af622 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -31,10 +31,6 @@ config PROC_FS config PROC_KCORE bool "/proc/kcore support" if !ARM depends on PROC_FS && MMU - help - Provides a virtual ELF core file of the live kernel. This can - be read with gdb and other ELF tools. No modifications can be - made using this mechanism. config PROC_VMCORE bool "/proc/vmcore support" diff --git a/fs/proc/array.c b/fs/proc/array.c index 1bd2077..cbd0f1b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -183,7 +183,6 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" - "Ngid:\t%d\n" "Pid:\t%d\n" "PPid:\t%d\n" "TracerPid:\t%d\n" @@ -191,7 +190,6 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), task_tgid_nr_ns(p, ns), - task_numa_group_id(p), pid_nr_ns(pid, ns), ppid, tpid, from_kuid_munged(user_ns, cred->uid), diff --git a/fs/proc/base.c b/fs/proc/base.c index 03c8d74..1485e38 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1151,16 +1151,10 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, goto out_free_page; } - - /* is userspace tring to explicitly UNSET the loginuid? */ - if (loginuid == AUDIT_UID_UNSET) { - kloginuid = INVALID_UID; - } else { - kloginuid = make_kuid(file->f_cred->user_ns, loginuid); - if (!uid_valid(kloginuid)) { - length = -EINVAL; - goto out_free_page; - } + kloginuid = make_kuid(file->f_cred->user_ns, loginuid); + if (!uid_valid(kloginuid)) { + length = -EINVAL; + goto out_free_page; } length = audit_set_loginuid(kloginuid); diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index 51942d5..b701eaa 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -29,6 +29,7 @@ static int show_console_dev(struct seq_file *m, void *v) char flags[ARRAY_SIZE(con_flags) + 1]; struct console *con = v; unsigned int a; + int len; dev_t dev = 0; if (con->device) { @@ -46,10 +47,11 @@ static int show_console_dev(struct seq_file *m, void *v) con_flags[a].name : ' '; flags[a] = 0; - seq_setwidth(m, 21 - 1); - seq_printf(m, "%s%d", con->name, con->index); - seq_pad(m, ' '); - seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', + seq_printf(m, "%s%d%n", con->name, con->index, &len); + len = 21 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-', con->write ? 'W' : '-', con->unblank ? 'U' : '-', flags); if (dev) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index cca93b6..737e156 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -175,6 +175,22 @@ static const struct inode_operations proc_link_inode_operations = { }; /* + * As some entries in /proc are volatile, we want to + * get rid of unused dentries. This could be made + * smarter: we could keep a "volatile" flag in the + * inode to indicate which ones to keep. + */ +static int proc_delete_dentry(const struct dentry * dentry) +{ + return 1; +} + +static const struct dentry_operations proc_dentry_operations = +{ + .d_delete = proc_delete_dentry, +}; + +/* * Don't create negative dentries here, return -ENOENT by hand * instead. */ @@ -193,7 +209,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &simple_dentry_operations); + d_set_d_op(dentry, &proc_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 28955d4..8eaa1ba 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -285,23 +285,19 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma) return rv; } -static unsigned long -proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, - unsigned long len, unsigned long pgoff, - unsigned long flags) +static unsigned long proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct proc_dir_entry *pde = PDE(file_inode(file)); unsigned long rv = -EIO; - unsigned long (*get_area)(struct file *, unsigned long, unsigned long, - unsigned long, unsigned long) = NULL; + unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long) = NULL; if (use_pde(pde)) { #ifdef CONFIG_MMU - get_area = current->mm->get_unmapped_area; + get_unmapped_area = current->mm->get_unmapped_area; #endif if (pde->proc_fops->get_unmapped_area) - get_area = pde->proc_fops->get_unmapped_area; - if (get_area) - rv = get_area(file, orig_addr, len, pgoff, flags); + get_unmapped_area = pde->proc_fops->get_unmapped_area; + if (get_unmapped_area) + rv = get_unmapped_area(file, orig_addr, len, pgoff, flags); unuse_pde(pde); } return rv; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 5ed0e52..06ea155 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -255,7 +255,8 @@ static int kcore_update_ram(void) end_pfn = 0; for_each_node_state(nid, N_MEMORY) { unsigned long node_end; - node_end = node_end_pfn(nid); + node_end = NODE_DATA(nid)->node_start_pfn + + NODE_DATA(nid)->node_spanned_pages; if (end_pfn < node_end) end_pfn = node_end; } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index a77d2b2..59d85d6 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -1,8 +1,8 @@ #include <linux/fs.h> +#include <linux/hugetlb.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/hugetlb.h> #include <linux/mman.h> #include <linux/mmzone.h> #include <linux/proc_fs.h> @@ -24,6 +24,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) { struct sysinfo i; unsigned long committed; + unsigned long allowed; struct vmalloc_info vmi; long cached; unsigned long pages[NR_LRU_LISTS]; @@ -36,6 +37,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) si_meminfo(&i); si_swapinfo(&i); committed = percpu_counter_read_positive(&vm_committed_as); + allowed = ((totalram_pages - hugetlb_total_pages()) + * sysctl_overcommit_ratio / 100) + total_swap_pages; cached = global_page_state(NR_FILE_PAGES) - total_swapcache_pages() - i.bufferram; @@ -144,7 +147,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(global_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), K(global_page_state(NR_WRITEBACK_TEMP)), - K(vm_commit_limit()), + K(allowed), K(committed), (unsigned long)VMALLOC_TOTAL >> 10, vmi.used >> 10, diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 9ae46b8..49a7fff 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -42,6 +42,12 @@ static const struct inode_operations ns_inode_operations = { .setattr = proc_setattr, }; +static int ns_delete_dentry(const struct dentry *dentry) +{ + /* Don't cache namespace inodes when not in use */ + return 1; +} + static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) { struct inode *inode = dentry->d_inode; @@ -53,7 +59,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) const struct dentry_operations ns_dentry_operations = { - .d_delete = always_delete_dentry, + .d_delete = ns_delete_dentry, .d_dname = ns_dname, }; diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 5f9bc8a..ccfd99b 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -39,7 +39,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) unsigned long ino = 0; struct file *file; dev_t dev = 0; - int flags; + int flags, len; flags = region->vm_flags; file = region->vm_file; @@ -50,9 +50,8 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) ino = inode->i_ino; } - seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, - "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", region->vm_start, region->vm_end, flags & VM_READ ? 'r' : '-', @@ -60,10 +59,13 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', ((loff_t)region->vm_pgoff) << PAGE_SHIFT, - MAJOR(dev), MINOR(dev), ino); + MAJOR(dev), MINOR(dev), ino, &len); if (file) { - seq_pad(m, ' '); + len = 25 + sizeof(void *) * 6 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c", len, ' '); seq_path(m, &file->f_path, ""); } diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 70779b2..106a835 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -14,13 +14,16 @@ #include <linux/of.h> #include <linux/export.h> #include <linux/slab.h> +#include <asm/prom.h> #include <asm/uaccess.h> #include "internal.h" static inline void set_node_proc_entry(struct device_node *np, struct proc_dir_entry *de) { +#ifdef HAVE_ARCH_DEVTREE_FIXUPS np->pde = de; +#endif } static struct proc_dir_entry *proc_device_tree; diff --git a/fs/proc/self.c b/fs/proc/self.c index ffeb202..6b6a993 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -36,10 +36,18 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } +static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + kfree(s); +} + static const struct inode_operations proc_self_inode_operations = { .readlink = proc_self_readlink, .follow_link = proc_self_follow_link, - .put_link = kfree_put_link, + .put_link = proc_self_put_link, }; static unsigned self_inum; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index fb52b54..390bdab 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -62,8 +62,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) total_rss << (PAGE_SHIFT-10), data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, - (PTRS_PER_PTE * sizeof(pte_t) * - atomic_long_read(&mm->nr_ptes)) >> 10, + (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, swap << (PAGE_SHIFT-10)); } @@ -84,6 +83,14 @@ unsigned long task_statm(struct mm_struct *mm, return mm->total_vm; } +static void pad_len_spaces(struct seq_file *m, int len) +{ + len = 25 + sizeof(void*) * 6 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c", len, ' '); +} + #ifdef CONFIG_NUMA /* * These functions are for numa_maps but called in generic **maps seq_file @@ -261,6 +268,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; + int len; const char *name = NULL; if (file) { @@ -278,8 +286,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) if (stack_guard_page_end(vma, end)) end -= PAGE_SIZE; - seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); - seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", start, end, flags & VM_READ ? 'r' : '-', @@ -287,14 +294,14 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', pgoff, - MAJOR(dev), MINOR(dev), ino); + MAJOR(dev), MINOR(dev), ino, &len); /* * Print the dentry name for named mappings, and a * special [heap] marker for the heap: */ if (file) { - seq_pad(m, ' '); + pad_len_spaces(m, len); seq_path(m, &file->f_path, "\n"); goto done; } @@ -326,7 +333,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) name = "[stack]"; } else { /* Thread stack in /proc/PID/maps */ - seq_pad(m, ' '); + pad_len_spaces(m, len); seq_printf(m, "[stack:%d]", tid); } } @@ -334,7 +341,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) done: if (name) { - seq_pad(m, ' '); + pad_len_spaces(m, len); seq_puts(m, name); } seq_putc(m, '\n'); @@ -498,9 +505,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma) == 1) { smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); - spin_unlock(ptl); + spin_unlock(&walk->mm->page_table_lock); mss->anonymous_thp += HPAGE_PMD_SIZE; return 0; } @@ -554,9 +561,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_NONLINEAR)] = "nl", [ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_DONTDUMP)] = "dd", -#ifdef CONFIG_MEM_SOFT_DIRTY - [ilog2(VM_SOFTDIRTY)] = "sd", -#endif [ilog2(VM_MIXEDMAP)] = "mm", [ilog2(VM_HUGEPAGE)] = "hg", [ilog2(VM_NOHUGEPAGE)] = "nh", @@ -991,14 +995,13 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, { struct vm_area_struct *vma; struct pagemapread *pm = walk->private; - spinlock_t *ptl; pte_t *pte; int err = 0; pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); - if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { int pmd_flags2; if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) @@ -1016,7 +1019,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (err) break; } - spin_unlock(ptl); + spin_unlock(&walk->mm->page_table_lock); return err; } @@ -1318,7 +1321,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, md = walk->private; - if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, md->vma) == 1) { pte_t huge_pte = *(pte_t *)pmd; struct page *page; @@ -1326,7 +1329,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, if (page) gather_stats(page, md, pte_dirty(huge_pte), HPAGE_PMD_SIZE/PAGE_SIZE); - spin_unlock(ptl); + spin_unlock(&walk->mm->page_table_lock); return 0; } @@ -1384,8 +1387,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) struct mm_struct *mm = vma->vm_mm; struct mm_walk walk = {}; struct mempolicy *pol; - char buffer[64]; - int nid; + int n; + char buffer[50]; if (!mm) return 0; @@ -1401,8 +1404,10 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) walk.mm = mm; pol = get_vma_policy(task, vma, vma->vm_start); - mpol_to_str(buffer, sizeof(buffer), pol); + n = mpol_to_str(buffer, sizeof(buffer), pol); mpol_cond_put(pol); + if (n < 0) + return n; seq_printf(m, "%08lx %s", vma->vm_start, buffer); @@ -1455,9 +1460,9 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) if (md->writeback) seq_printf(m, " writeback=%lu", md->writeback); - for_each_node_state(nid, N_MEMORY) - if (md->node[nid]) - seq_printf(m, " N%d=%lu", nid, md->node[nid]); + for_each_node_state(n, N_MEMORY) + if (md->node[n]) + seq_printf(m, " N%d=%lu", n, md->node[n]); out: seq_putc(m, '\n'); diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 678455d..56123a6 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -123,6 +123,14 @@ unsigned long task_statm(struct mm_struct *mm, return size; } +static void pad_len_spaces(struct seq_file *m, int len) +{ + len = 25 + sizeof(void*) * 6 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c", len, ' '); +} + /* * display a single VMA to a sequenced file */ @@ -134,7 +142,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, unsigned long ino = 0; struct file *file; dev_t dev = 0; - int flags; + int flags, len; unsigned long long pgoff = 0; flags = vma->vm_flags; @@ -147,9 +155,8 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; } - seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, - "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', @@ -157,16 +164,16 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', pgoff, - MAJOR(dev), MINOR(dev), ino); + MAJOR(dev), MINOR(dev), ino, &len); if (file) { - seq_pad(m, ' '); + pad_len_spaces(m, len); seq_path(m, &file->f_path, ""); } else if (mm) { pid_t tid = vm_is_stack(priv->task, vma, is_pid); if (tid != 0) { - seq_pad(m, ' '); + pad_len_spaces(m, len); /* * Thread stack in /proc/PID/task/TID/maps or * the main process stack. diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 439406e..5fe34c3 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -20,15 +20,15 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) struct proc_mounts *p = proc_mounts(file->private_data); struct mnt_namespace *ns = p->ns; unsigned res = POLLIN | POLLRDNORM; - int event; poll_wait(file, &p->ns->poll, wait); - event = ACCESS_ONCE(ns->event); - if (p->m.poll_event != event) { - p->m.poll_event = event; + br_read_lock(&vfsmount_lock); + if (p->m.poll_event != ns->event) { + p->m.poll_event = ns->event; res |= POLLERR | POLLPRI; } + br_read_unlock(&vfsmount_lock); return res; } diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index e62c818..d024505 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -60,6 +60,10 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir, struct buffer_head *bh; *res_dir = NULL; + if (!dir->i_sb) { + printk(KERN_WARNING "qnx4: no superblock on dir.\n"); + return NULL; + } bh = NULL; block = offset = blkofs = 0; while (blkofs * QNX4_BLOCK_SIZE + offset < dir->i_size) { diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 72d2917..16e8abb 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -9,25 +9,13 @@ #include <net/netlink.h> #include <net/genetlink.h> -static const struct genl_multicast_group quota_mcgrps[] = { - { .name = "events", }, -}; - /* Netlink family structure for quota */ static struct genl_family quota_genl_family = { - /* - * Needed due to multicast group ID abuse - old code assumed - * the family ID was also a valid multicast group ID (which - * isn't true) and userspace might thus rely on it. Assign a - * static ID for this group to make dealing with that easier. - */ - .id = GENL_ID_VFS_DQUOT, + .id = GENL_ID_GENERATE, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, .maxattr = QUOTA_NL_A_MAX, - .mcgrps = quota_mcgrps, - .n_mcgrps = ARRAY_SIZE(quota_mcgrps), }; /** @@ -90,7 +78,7 @@ void quota_send_warning(struct kqid qid, dev_t dev, goto attr_err_out; genlmsg_end(skb, msg_head); - genlmsg_multicast("a_genl_family, skb, 0, 0, GFP_NOFS); + genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); return; attr_err_out: printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 2b363e2..dea86e8 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -117,7 +117,6 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr) static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) { - memset(dst, 0, sizeof(*dst)); dst->dqb_bhardlimit = src->d_blk_hardlimit; dst->dqb_bsoftlimit = src->d_blk_softlimit; dst->dqb_curspace = src->d_bcount; diff --git a/fs/read_write.c b/fs/read_write.c index 58e440d..e3cd280 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -257,7 +257,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) fn = no_llseek; if (file->f_mode & FMODE_LSEEK) { - if (file->f_op->llseek) + if (file->f_op && file->f_op->llseek) fn = file->f_op->llseek; } return fn(file, offset, whence); @@ -384,7 +384,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op->read && !file->f_op->aio_read) + if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) return -EINVAL; if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) return -EFAULT; @@ -433,7 +433,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t const char __user *p; ssize_t ret; - if (!file->f_op->write && !file->f_op->aio_write) + if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) return -EINVAL; old_fs = get_fs(); @@ -460,7 +460,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op->write && !file->f_op->aio_write) + if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) return -EINVAL; if (unlikely(!access_ok(VERIFY_READ, buf, count))) return -EFAULT; @@ -727,6 +727,11 @@ static ssize_t do_readv_writev(int type, struct file *file, io_fn_t fn; iov_fn_t fnv; + if (!file->f_op) { + ret = -EINVAL; + goto out; + } + ret = rw_copy_check_uvector(type, uvector, nr_segs, ARRAY_SIZE(iovstack), iovstack, &iov); if (ret <= 0) @@ -773,7 +778,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op->aio_read && !file->f_op->read) + if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) return -EINVAL; return do_readv_writev(READ, file, vec, vlen, pos); @@ -786,7 +791,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op->aio_write && !file->f_op->write) + if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) return -EINVAL; return do_readv_writev(WRITE, file, vec, vlen, pos); @@ -901,6 +906,10 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, io_fn_t fn; iov_fn_t fnv; + ret = -EINVAL; + if (!file->f_op) + goto out; + ret = -EFAULT; if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) goto out; @@ -956,7 +965,7 @@ static size_t compat_readv(struct file *file, goto out; ret = -EINVAL; - if (!file->f_op->aio_read && !file->f_op->read) + if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) goto out; ret = compat_do_readv_writev(READ, file, vec, vlen, pos); @@ -1023,7 +1032,7 @@ static size_t compat_writev(struct file *file, goto out; ret = -EINVAL; - if (!file->f_op->aio_write && !file->f_op->write) + if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) goto out; ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); diff --git a/fs/readdir.c b/fs/readdir.c index 5b53d99..93d71e5 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -24,7 +24,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); int res = -ENOTDIR; - if (!file->f_op->iterate) + if (!file->f_op || !file->f_op->iterate) goto out; res = security_file_permission(file, MAY_READ); diff --git a/fs/select.c b/fs/select.c index 467bb1c..dfd5cb1 100644 --- a/fs/select.c +++ b/fs/select.c @@ -454,7 +454,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) const struct file_operations *f_op; f_op = f.file->f_op; mask = DEFAULT_POLLMASK; - if (f_op->poll) { + if (f_op && f_op->poll) { wait_key_set(wait, in, out, bit, busy_flag); mask = (*f_op->poll)(f.file, wait); @@ -761,7 +761,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, mask = POLLNVAL; if (f.file) { mask = DEFAULT_POLLMASK; - if (f.file->f_op->poll) { + if (f.file->f_op && f.file->f_op->poll) { pwait->_key = pollfd->events|POLLERR|POLLHUP; pwait->_key |= busy_flag; mask = f.file->f_op->poll(f.file, pwait); diff --git a/fs/seq_file.c b/fs/seq_file.c index 1d641bb..a290157 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -136,7 +136,6 @@ static int traverse(struct seq_file *m, loff_t offset) Eoverflow: m->op->stop(m, p); kfree(m->buf); - m->count = 0; m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); return !m->buf ? -ENOMEM : -EAGAIN; } @@ -233,10 +232,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) goto Fill; m->op->stop(m, p); kfree(m->buf); - m->count = 0; m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); if (!m->buf) goto Enomem; + m->count = 0; m->version = 0; pos = m->index; p = m->op->start(m, &pos); @@ -767,21 +766,6 @@ int seq_write(struct seq_file *seq, const void *data, size_t len) } EXPORT_SYMBOL(seq_write); -/** - * seq_pad - write padding spaces to buffer - * @m: seq_file identifying the buffer to which data should be written - * @c: the byte to append after padding if non-zero - */ -void seq_pad(struct seq_file *m, char c) -{ - int size = m->pad_until - m->count; - if (size > 0) - seq_printf(m, "%*s", size, ""); - if (c) - seq_putc(m, c); -} -EXPORT_SYMBOL(seq_pad); - struct list_head *seq_list_start(struct list_head *head, loff_t pos) { struct list_head *lh; diff --git a/fs/splice.c b/fs/splice.c index 46a08f7..3b7ee65 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -695,7 +695,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, loff_t pos = sd->pos; int more; - if (!likely(file->f_op->sendpage)) + if (!likely(file->f_op && file->f_op->sendpage)) return -EINVAL; more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; @@ -1099,7 +1099,7 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); - if (out->f_op->splice_write) + if (out->f_op && out->f_op->splice_write) splice_write = out->f_op->splice_write; else splice_write = default_file_splice_write; @@ -1125,7 +1125,7 @@ static long do_splice_to(struct file *in, loff_t *ppos, if (unlikely(ret < 0)) return ret; - if (in->f_op->splice_read) + if (in->f_op && in->f_op->splice_read) splice_read = in->f_op->splice_read; else splice_read = default_file_splice_read; diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index b6fa865..c70111e 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -25,78 +25,6 @@ config SQUASHFS If unsure, say N. -choice - prompt "File decompression options" - depends on SQUASHFS - help - Squashfs now supports two options for decompressing file - data. Traditionally Squashfs has decompressed into an - intermediate buffer and then memcopied it into the page cache. - Squashfs now supports the ability to decompress directly into - the page cache. - - If unsure, select "Decompress file data into an intermediate buffer" - -config SQUASHFS_FILE_CACHE - bool "Decompress file data into an intermediate buffer" - help - Decompress file data into an intermediate buffer and then - memcopy it into the page cache. - -config SQUASHFS_FILE_DIRECT - bool "Decompress files directly into the page cache" - help - Directly decompress file data into the page cache. - Doing so can significantly improve performance because - it eliminates a memcpy and it also removes the lock contention - on the single buffer. - -endchoice - -choice - prompt "Decompressor parallelisation options" - depends on SQUASHFS - help - Squashfs now supports three parallelisation options for - decompression. Each one exhibits various trade-offs between - decompression performance and CPU and memory usage. - - If in doubt, select "Single threaded compression" - -config SQUASHFS_DECOMP_SINGLE - bool "Single threaded compression" - help - Traditionally Squashfs has used single-threaded decompression. - Only one block (data or metadata) can be decompressed at any - one time. This limits CPU and memory usage to a minimum. - -config SQUASHFS_DECOMP_MULTI - bool "Use multiple decompressors for parallel I/O" - help - By default Squashfs uses a single decompressor but it gives - poor performance on parallel I/O workloads when using multiple CPU - machines due to waiting on decompressor availability. - - If you have a parallel I/O workload and your system has enough memory, - using this option may improve overall I/O performance. - - This decompressor implementation uses up to two parallel - decompressors per core. It dynamically allocates decompressors - on a demand basis. - -config SQUASHFS_DECOMP_MULTI_PERCPU - bool "Use percpu multiple decompressors for parallel I/O" - help - By default Squashfs uses a single decompressor but it gives - poor performance on parallel I/O workloads when using multiple CPU - machines due to waiting on decompressor availability. - - This decompressor implementation uses a maximum of one - decompressor per core. It uses percpu variables to ensure - decompression is load-balanced across the cores. - -endchoice - config SQUASHFS_XATTR bool "Squashfs XATTR support" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 4132520..110b047 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,11 +5,6 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o -squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o -squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o -squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o -squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o -squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 0cea9b9..41d108e 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -36,7 +36,6 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" -#include "page_actor.h" /* * Read the metadata block length, this is stored in the first two @@ -87,16 +86,16 @@ static struct buffer_head *get_block_length(struct super_block *sb, * generated a larger block - this does occasionally happen with compression * algorithms). */ -int squashfs_read_data(struct super_block *sb, u64 index, int length, - u64 *next_index, struct squashfs_page_actor *output) +int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, + int length, u64 *next_index, int srclength, int pages) { struct squashfs_sb_info *msblk = sb->s_fs_info; struct buffer_head **bh; int offset = index & ((1 << msblk->devblksize_log2) - 1); u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, avail, i; + int bytes, compressed, b = 0, k = 0, page = 0, avail; - bh = kcalloc(((output->length + msblk->devblksize - 1) + bh = kcalloc(((srclength + msblk->devblksize - 1) >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); if (bh == NULL) return -ENOMEM; @@ -112,9 +111,9 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, *next_index = index + length; TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", - index, compressed ? "" : "un", length, output->length); + index, compressed ? "" : "un", length, srclength); - if (length < 0 || length > output->length || + if (length < 0 || length > srclength || (index + length) > msblk->bytes_used) goto read_failure; @@ -146,7 +145,7 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, TRACE("Block @ 0x%llx, %scompressed size %d\n", index, compressed ? "" : "un", length); - if (length < 0 || length > output->length || + if (length < 0 || length > srclength || (index + length) > msblk->bytes_used) goto block_release; @@ -159,15 +158,9 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, ll_rw_block(READ, b - 1, bh + 1); } - for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; - } - if (compressed) { - length = squashfs_decompress(msblk, bh, b, offset, length, - output); + length = squashfs_decompress(msblk, buffer, bh, b, offset, + length, srclength, pages); if (length < 0) goto read_failure; } else { @@ -175,20 +168,22 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, * Block is uncompressed. */ int in, pg_offset = 0; - void *data = squashfs_first_page(output); for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; + wait_on_buffer(bh[k]); + if (!buffer_uptodate(bh[k])) + goto block_release; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { - data = squashfs_next_page(output); + page++; pg_offset = 0; } avail = min_t(int, in, PAGE_CACHE_SIZE - pg_offset); - memcpy(data + pg_offset, bh[k]->b_data + offset, - avail); + memcpy(buffer[page] + pg_offset, + bh[k]->b_data + offset, avail); in -= avail; pg_offset += avail; offset += avail; @@ -196,7 +191,6 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length, offset = 0; put_bh(bh[k]); } - squashfs_finish_page(output); } kfree(bh); diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 1cb70a0..af0b738 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -56,7 +56,6 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs.h" -#include "page_actor.h" /* * Look-up block in cache, and increment usage count. If not in cache, read @@ -120,8 +119,9 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, entry->error = 0; spin_unlock(&cache->lock); - entry->length = squashfs_read_data(sb, block, length, - &entry->next_index, entry->actor); + entry->length = squashfs_read_data(sb, entry->data, + block, length, &entry->next_index, + cache->block_size, cache->pages); spin_lock(&cache->lock); @@ -220,7 +220,6 @@ void squashfs_cache_delete(struct squashfs_cache *cache) kfree(cache->entry[i].data[j]); kfree(cache->entry[i].data); } - kfree(cache->entry[i].actor); } kfree(cache->entry); @@ -281,13 +280,6 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, goto cleanup; } } - - entry->actor = squashfs_page_actor_init(entry->data, - cache->pages, 0); - if (entry->actor == NULL) { - ERROR("Failed to allocate %s cache entry\n", name); - goto cleanup; - } } return cache; @@ -418,7 +410,6 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; int i, res; void *table, *buffer, **data; - struct squashfs_page_actor *actor; table = buffer = kmalloc(length, GFP_KERNEL); if (table == NULL) @@ -430,28 +421,19 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) goto failed; } - actor = squashfs_page_actor_init(data, pages, length); - if (actor == NULL) { - res = -ENOMEM; - goto failed2; - } - for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) data[i] = buffer; - res = squashfs_read_data(sb, block, length | - SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); + res = squashfs_read_data(sb, data, block, length | + SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages); kfree(data); - kfree(actor); if (res < 0) goto failed; return table; -failed2: - kfree(data); failed: kfree(table); return ERR_PTR(res); diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index ac22fe7..3f6271d 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -30,7 +30,6 @@ #include "squashfs_fs_sb.h" #include "decompressor.h" #include "squashfs.h" -#include "page_actor.h" /* * This file (and decompressor.h) implements a decompressor framework for @@ -38,29 +37,29 @@ */ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { - NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 + NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 }; #ifndef CONFIG_SQUASHFS_LZO static const struct squashfs_decompressor squashfs_lzo_comp_ops = { - NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 + NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 }; #endif #ifndef CONFIG_SQUASHFS_XZ static const struct squashfs_decompressor squashfs_xz_comp_ops = { - NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 + NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 }; #endif #ifndef CONFIG_SQUASHFS_ZLIB static const struct squashfs_decompressor squashfs_zlib_comp_ops = { - NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 + NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 }; #endif static const struct squashfs_decompressor squashfs_unknown_comp_ops = { - NULL, NULL, NULL, NULL, 0, "unknown", 0 + NULL, NULL, NULL, 0, "unknown", 0 }; static const struct squashfs_decompressor *decompressor[] = { @@ -84,11 +83,10 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) } -static void *get_comp_opts(struct super_block *sb, unsigned short flags) +void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; - void *buffer = NULL, *comp_opts; - struct squashfs_page_actor *actor = NULL; + void *strm, *buffer = NULL; int length = 0; /* @@ -96,46 +94,23 @@ static void *get_comp_opts(struct super_block *sb, unsigned short flags) */ if (SQUASHFS_COMP_OPTS(flags)) { buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (buffer == NULL) { - comp_opts = ERR_PTR(-ENOMEM); - goto out; - } - - actor = squashfs_page_actor_init(&buffer, 1, 0); - if (actor == NULL) { - comp_opts = ERR_PTR(-ENOMEM); - goto out; - } + if (buffer == NULL) + return ERR_PTR(-ENOMEM); - length = squashfs_read_data(sb, - sizeof(struct squashfs_super_block), 0, NULL, actor); + length = squashfs_read_data(sb, &buffer, + sizeof(struct squashfs_super_block), 0, NULL, + PAGE_CACHE_SIZE, 1); if (length < 0) { - comp_opts = ERR_PTR(length); - goto out; + strm = ERR_PTR(length); + goto finished; } } - comp_opts = squashfs_comp_opts(msblk, buffer, length); + strm = msblk->decompressor->init(msblk, buffer, length); -out: - kfree(actor); +finished: kfree(buffer); - return comp_opts; -} - - -void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags) -{ - struct squashfs_sb_info *msblk = sb->s_fs_info; - void *stream, *comp_opts = get_comp_opts(sb, flags); - - if (IS_ERR(comp_opts)) - return comp_opts; - - stream = squashfs_decompressor_create(msblk, comp_opts); - if (IS_ERR(stream)) - kfree(comp_opts); - return stream; + return strm; } diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index af09853..330073e 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -24,22 +24,28 @@ */ struct squashfs_decompressor { - void *(*init)(struct squashfs_sb_info *, void *); - void *(*comp_opts)(struct squashfs_sb_info *, void *, int); + void *(*init)(struct squashfs_sb_info *, void *, int); void (*free)(void *); - int (*decompress)(struct squashfs_sb_info *, void *, - struct buffer_head **, int, int, int, - struct squashfs_page_actor *); + int (*decompress)(struct squashfs_sb_info *, void **, + struct buffer_head **, int, int, int, int, int); int id; char *name; int supported; }; -static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk, - void *buff, int length) +static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, + void *s) { - return msblk->decompressor->comp_opts ? - msblk->decompressor->comp_opts(msblk, buff, length) : NULL; + if (msblk->decompressor) + msblk->decompressor->free(s); +} + +static inline int squashfs_decompress(struct squashfs_sb_info *msblk, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) +{ + return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, + length, srclength, pages); } #ifdef CONFIG_SQUASHFS_XZ diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c deleted file mode 100644 index d6008a6..0000000 --- a/fs/squashfs/decompressor_multi.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) 2013 - * Minchan Kim <minchan@kernel.org> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ -#include <linux/types.h> -#include <linux/mutex.h> -#include <linux/slab.h> -#include <linux/buffer_head.h> -#include <linux/sched.h> -#include <linux/wait.h> -#include <linux/cpumask.h> - -#include "squashfs_fs.h" -#include "squashfs_fs_sb.h" -#include "decompressor.h" -#include "squashfs.h" - -/* - * This file implements multi-threaded decompression in the - * decompressor framework - */ - - -/* - * The reason that multiply two is that a CPU can request new I/O - * while it is waiting previous request. - */ -#define MAX_DECOMPRESSOR (num_online_cpus() * 2) - - -int squashfs_max_decompressors(void) -{ - return MAX_DECOMPRESSOR; -} - - -struct squashfs_stream { - void *comp_opts; - struct list_head strm_list; - struct mutex mutex; - int avail_decomp; - wait_queue_head_t wait; -}; - - -struct decomp_stream { - void *stream; - struct list_head list; -}; - - -static void put_decomp_stream(struct decomp_stream *decomp_strm, - struct squashfs_stream *stream) -{ - mutex_lock(&stream->mutex); - list_add(&decomp_strm->list, &stream->strm_list); - mutex_unlock(&stream->mutex); - wake_up(&stream->wait); -} - -void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, - void *comp_opts) -{ - struct squashfs_stream *stream; - struct decomp_stream *decomp_strm = NULL; - int err = -ENOMEM; - - stream = kzalloc(sizeof(*stream), GFP_KERNEL); - if (!stream) - goto out; - - stream->comp_opts = comp_opts; - mutex_init(&stream->mutex); - INIT_LIST_HEAD(&stream->strm_list); - init_waitqueue_head(&stream->wait); - - /* - * We should have a decompressor at least as default - * so if we fail to allocate new decompressor dynamically, - * we could always fall back to default decompressor and - * file system works. - */ - decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); - if (!decomp_strm) - goto out; - - decomp_strm->stream = msblk->decompressor->init(msblk, - stream->comp_opts); - if (IS_ERR(decomp_strm->stream)) { - err = PTR_ERR(decomp_strm->stream); - goto out; - } - - list_add(&decomp_strm->list, &stream->strm_list); - stream->avail_decomp = 1; - return stream; - -out: - kfree(decomp_strm); - kfree(stream); - return ERR_PTR(err); -} - - -void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) -{ - struct squashfs_stream *stream = msblk->stream; - if (stream) { - struct decomp_stream *decomp_strm; - - while (!list_empty(&stream->strm_list)) { - decomp_strm = list_entry(stream->strm_list.prev, - struct decomp_stream, list); - list_del(&decomp_strm->list); - msblk->decompressor->free(decomp_strm->stream); - kfree(decomp_strm); - stream->avail_decomp--; - } - WARN_ON(stream->avail_decomp); - kfree(stream->comp_opts); - kfree(stream); - } -} - - -static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk, - struct squashfs_stream *stream) -{ - struct decomp_stream *decomp_strm; - - while (1) { - mutex_lock(&stream->mutex); - - /* There is available decomp_stream */ - if (!list_empty(&stream->strm_list)) { - decomp_strm = list_entry(stream->strm_list.prev, - struct decomp_stream, list); - list_del(&decomp_strm->list); - mutex_unlock(&stream->mutex); - break; - } - - /* - * If there is no available decomp and already full, - * let's wait for releasing decomp from other users. - */ - if (stream->avail_decomp >= MAX_DECOMPRESSOR) - goto wait; - - /* Let's allocate new decomp */ - decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); - if (!decomp_strm) - goto wait; - - decomp_strm->stream = msblk->decompressor->init(msblk, - stream->comp_opts); - if (IS_ERR(decomp_strm->stream)) { - kfree(decomp_strm); - goto wait; - } - - stream->avail_decomp++; - WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR); - - mutex_unlock(&stream->mutex); - break; -wait: - /* - * If system memory is tough, let's for other's - * releasing instead of hurting VM because it could - * make page cache thrashing. - */ - mutex_unlock(&stream->mutex); - wait_event(stream->wait, - !list_empty(&stream->strm_list)); - } - - return decomp_strm; -} - - -int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, - int b, int offset, int length, struct squashfs_page_actor *output) -{ - int res; - struct squashfs_stream *stream = msblk->stream; - struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream); - res = msblk->decompressor->decompress(msblk, decomp_stream->stream, - bh, b, offset, length, output); - put_decomp_stream(decomp_stream, stream); - if (res < 0) - ERROR("%s decompression failed, data probably corrupt\n", - msblk->decompressor->name); - return res; -} diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c deleted file mode 100644 index 23a9c28..0000000 --- a/fs/squashfs/decompressor_multi_percpu.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2013 - * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#include <linux/types.h> -#include <linux/slab.h> -#include <linux/percpu.h> -#include <linux/buffer_head.h> - -#include "squashfs_fs.h" -#include "squashfs_fs_sb.h" -#include "decompressor.h" -#include "squashfs.h" - -/* - * This file implements multi-threaded decompression using percpu - * variables, one thread per cpu core. - */ - -struct squashfs_stream { - void *stream; -}; - -void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, - void *comp_opts) -{ - struct squashfs_stream *stream; - struct squashfs_stream __percpu *percpu; - int err, cpu; - - percpu = alloc_percpu(struct squashfs_stream); - if (percpu == NULL) - return ERR_PTR(-ENOMEM); - - for_each_possible_cpu(cpu) { - stream = per_cpu_ptr(percpu, cpu); - stream->stream = msblk->decompressor->init(msblk, comp_opts); - if (IS_ERR(stream->stream)) { - err = PTR_ERR(stream->stream); - goto out; - } - } - - kfree(comp_opts); - return (__force void *) percpu; - -out: - for_each_possible_cpu(cpu) { - stream = per_cpu_ptr(percpu, cpu); - if (!IS_ERR_OR_NULL(stream->stream)) - msblk->decompressor->free(stream->stream); - } - free_percpu(percpu); - return ERR_PTR(err); -} - -void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) -{ - struct squashfs_stream __percpu *percpu = - (struct squashfs_stream __percpu *) msblk->stream; - struct squashfs_stream *stream; - int cpu; - - if (msblk->stream) { - for_each_possible_cpu(cpu) { - stream = per_cpu_ptr(percpu, cpu); - msblk->decompressor->free(stream->stream); - } - free_percpu(percpu); - } -} - -int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, - int b, int offset, int length, struct squashfs_page_actor *output) -{ - struct squashfs_stream __percpu *percpu = - (struct squashfs_stream __percpu *) msblk->stream; - struct squashfs_stream *stream = get_cpu_ptr(percpu); - int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, - offset, length, output); - put_cpu_ptr(stream); - - if (res < 0) - ERROR("%s decompression failed, data probably corrupt\n", - msblk->decompressor->name); - - return res; -} - -int squashfs_max_decompressors(void) -{ - return num_possible_cpus(); -} diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c deleted file mode 100644 index a6c7592..0000000 --- a/fs/squashfs/decompressor_single.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2013 - * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#include <linux/types.h> -#include <linux/mutex.h> -#include <linux/slab.h> -#include <linux/buffer_head.h> - -#include "squashfs_fs.h" -#include "squashfs_fs_sb.h" -#include "decompressor.h" -#include "squashfs.h" - -/* - * This file implements single-threaded decompression in the - * decompressor framework - */ - -struct squashfs_stream { - void *stream; - struct mutex mutex; -}; - -void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, - void *comp_opts) -{ - struct squashfs_stream *stream; - int err = -ENOMEM; - - stream = kmalloc(sizeof(*stream), GFP_KERNEL); - if (stream == NULL) - goto out; - - stream->stream = msblk->decompressor->init(msblk, comp_opts); - if (IS_ERR(stream->stream)) { - err = PTR_ERR(stream->stream); - goto out; - } - - kfree(comp_opts); - mutex_init(&stream->mutex); - return stream; - -out: - kfree(stream); - return ERR_PTR(err); -} - -void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) -{ - struct squashfs_stream *stream = msblk->stream; - - if (stream) { - msblk->decompressor->free(stream->stream); - kfree(stream); - } -} - -int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, - int b, int offset, int length, struct squashfs_page_actor *output) -{ - int res; - struct squashfs_stream *stream = msblk->stream; - - mutex_lock(&stream->mutex); - res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, - offset, length, output); - mutex_unlock(&stream->mutex); - - if (res < 0) - ERROR("%s decompression failed, data probably corrupt\n", - msblk->decompressor->name); - - return res; -} - -int squashfs_max_decompressors(void) -{ - return 1; -} diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index e5c9689..8ca62c2 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -370,15 +370,77 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) return le32_to_cpu(size); } -/* Copy data into page cache */ -void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, - int bytes, int offset) + +static int squashfs_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int bytes, i, offset = 0, sparse = 0; + struct squashfs_cache_entry *buffer = NULL; void *pageaddr; - int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - int start_index = page->index & ~mask, end_index = start_index | mask; + + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); + int start_index = page->index & ~mask; + int end_index = start_index | mask; + int file_end = i_size_read(inode) >> msblk->block_log; + + TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", + page->index, squashfs_i(inode)->start); + + if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) + goto out; + + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + /* + * Reading a datablock from disk. Need to read block list + * to get location and block size. + */ + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + if (bsize < 0) + goto error_out; + + if (bsize == 0) { /* hole */ + bytes = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; + sparse = 1; + } else { + /* + * Read and decompress datablock. + */ + buffer = squashfs_get_datablock(inode->i_sb, + block, bsize); + if (buffer->error) { + ERROR("Unable to read page, block %llx, size %x" + "\n", block, bsize); + squashfs_cache_put(buffer); + goto error_out; + } + bytes = buffer->length; + } + } else { + /* + * Datablock is stored inside a fragment (tail-end packed + * block). + */ + buffer = squashfs_get_fragment(inode->i_sb, + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + + if (buffer->error) { + ERROR("Unable to read page, block %llx, size %x\n", + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + squashfs_cache_put(buffer); + goto error_out; + } + bytes = i_size_read(inode) & (msblk->block_size - 1); + offset = squashfs_i(inode)->fragment_offset; + } /* * Loop copying datablock into pages. As the datablock likely covers @@ -389,7 +451,7 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, for (i = start_index; i <= end_index && bytes > 0; i++, bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { struct page *push_page; - int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0; + int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE); TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); @@ -413,75 +475,11 @@ skip_page: if (i != page->index) page_cache_release(push_page); } -} - -/* Read datablock stored packed inside a fragment (tail-end packed block) */ -static int squashfs_readpage_fragment(struct page *page) -{ - struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - int res = buffer->error; - - if (res) - ERROR("Unable to read page, block %llx, size %x\n", - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - else - squashfs_copy_cache(page, buffer, i_size_read(inode) & - (msblk->block_size - 1), - squashfs_i(inode)->fragment_offset); - - squashfs_cache_put(buffer); - return res; -} -static int squashfs_readpage_sparse(struct page *page, int index, int file_end) -{ - struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; + if (!sparse) + squashfs_cache_put(buffer); - squashfs_copy_cache(page, NULL, bytes, 0); return 0; -} - -static int squashfs_readpage(struct file *file, struct page *page) -{ - struct inode *inode = page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); - int file_end = i_size_read(inode) >> msblk->block_log; - int res; - void *pageaddr; - - TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(inode)->start); - - if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT)) - goto out; - - if (index < file_end || squashfs_i(inode)->fragment_block == - SQUASHFS_INVALID_BLK) { - u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - if (bsize < 0) - goto error_out; - - if (bsize == 0) - res = squashfs_readpage_sparse(page, index, file_end); - else - res = squashfs_readpage_block(page, block, bsize); - } else - res = squashfs_readpage_fragment(page); - - if (!res) - return 0; error_out: SetPageError(page); diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c deleted file mode 100644 index f2310d2..0000000 --- a/fs/squashfs/file_cache.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2013 - * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#include <linux/fs.h> -#include <linux/vfs.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/pagemap.h> -#include <linux/mutex.h> - -#include "squashfs_fs.h" -#include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" -#include "squashfs.h" - -/* Read separately compressed datablock and memcopy into page cache */ -int squashfs_readpage_block(struct page *page, u64 block, int bsize) -{ - struct inode *i = page->mapping->host; - struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, - block, bsize); - int res = buffer->error; - - if (res) - ERROR("Unable to read page, block %llx, size %x\n", block, - bsize); - else - squashfs_copy_cache(page, buffer, buffer->length, 0); - - squashfs_cache_put(buffer); - return res; -} diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c deleted file mode 100644 index 62a0de6..0000000 --- a/fs/squashfs/file_direct.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2013 - * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#include <linux/fs.h> -#include <linux/vfs.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/pagemap.h> -#include <linux/mutex.h> - -#include "squashfs_fs.h" -#include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" -#include "squashfs.h" -#include "page_actor.h" - -static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page); - -/* Read separately compressed datablock directly into page cache */ -int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) - -{ - struct inode *inode = target_page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - - int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; - int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - int start_index = target_page->index & ~mask; - int end_index = start_index | mask; - int i, n, pages, missing_pages, bytes, res = -ENOMEM; - struct page **page; - struct squashfs_page_actor *actor; - void *pageaddr; - - if (end_index > file_end) - end_index = file_end; - - pages = end_index - start_index + 1; - - page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); - if (page == NULL) - return res; - - /* - * Create a "page actor" which will kmap and kunmap the - * page cache pages appropriately within the decompressor - */ - actor = squashfs_page_actor_init_special(page, pages, 0); - if (actor == NULL) - goto out; - - /* Try to grab all the pages covered by the Squashfs block */ - for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { - page[i] = (n == target_page->index) ? target_page : - grab_cache_page_nowait(target_page->mapping, n); - - if (page[i] == NULL) { - missing_pages++; - continue; - } - - if (PageUptodate(page[i])) { - unlock_page(page[i]); - page_cache_release(page[i]); - page[i] = NULL; - missing_pages++; - } - } - - if (missing_pages) { - /* - * Couldn't get one or more pages, this page has either - * been VM reclaimed, but others are still in the page cache - * and uptodate, or we're racing with another thread in - * squashfs_readpage also trying to grab them. Fall back to - * using an intermediate buffer. - */ - res = squashfs_read_cache(target_page, block, bsize, pages, - page); - if (res < 0) - goto mark_errored; - - goto out; - } - - /* Decompress directly into the page cache buffers */ - res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); - if (res < 0) - goto mark_errored; - - /* Last page may have trailing bytes not filled */ - bytes = res % PAGE_CACHE_SIZE; - if (bytes) { - pageaddr = kmap_atomic(page[pages - 1]); - memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); - kunmap_atomic(pageaddr); - } - - /* Mark pages as uptodate, unlock and release */ - for (i = 0; i < pages; i++) { - flush_dcache_page(page[i]); - SetPageUptodate(page[i]); - unlock_page(page[i]); - if (page[i] != target_page) - page_cache_release(page[i]); - } - - kfree(actor); - kfree(page); - - return 0; - -mark_errored: - /* Decompression failed, mark pages as errored. Target_page is - * dealt with by the caller - */ - for (i = 0; i < pages; i++) { - if (page[i] == NULL || page[i] == target_page) - continue; - flush_dcache_page(page[i]); - SetPageError(page[i]); - unlock_page(page[i]); - page_cache_release(page[i]); - } - -out: - kfree(actor); - kfree(page); - return res; -} - - -static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page) -{ - struct inode *i = target_page->mapping->host; - struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, - block, bsize); - int bytes = buffer->length, res = buffer->error, n, offset = 0; - void *pageaddr; - - if (res) { - ERROR("Unable to read page, block %llx, size %x\n", block, - bsize); - goto out; - } - - for (n = 0; n < pages && bytes > 0; n++, - bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { - int avail = min_t(int, bytes, PAGE_CACHE_SIZE); - - if (page[n] == NULL) - continue; - - pageaddr = kmap_atomic(page[n]); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(page[n]); - SetPageUptodate(page[n]); - unlock_page(page[n]); - if (page[n] != target_page) - page_cache_release(page[n]); - } - -out: - squashfs_cache_put(buffer); - return res; -} diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 244b9fb..00f4dfc 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -31,14 +31,13 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" -#include "page_actor.h" struct squashfs_lzo { void *input; void *output; }; -static void *lzo_init(struct squashfs_sb_info *msblk, void *buff) +static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len) { int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); @@ -75,16 +74,22 @@ static void lzo_free(void *strm) } -static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, - struct buffer_head **bh, int b, int offset, int length, - struct squashfs_page_actor *output) +static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, + struct buffer_head **bh, int b, int offset, int length, int srclength, + int pages) { - struct squashfs_lzo *stream = strm; - void *buff = stream->input, *data; + struct squashfs_lzo *stream = msblk->stream; + void *buff = stream->input; int avail, i, bytes = length, res; - size_t out_len = output->length; + size_t out_len = srclength; + + mutex_lock(&msblk->read_data_mutex); for (i = 0; i < b; i++) { + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + goto block_release; + avail = min(bytes, msblk->devblksize - offset); memcpy(buff, bh[i]->b_data + offset, avail); buff += avail; @@ -99,24 +104,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, goto failed; res = bytes = (int)out_len; - data = squashfs_first_page(output); - buff = stream->output; - while (data) { - if (bytes <= PAGE_CACHE_SIZE) { - memcpy(data, buff, bytes); - break; - } else { - memcpy(data, buff, PAGE_CACHE_SIZE); - buff += PAGE_CACHE_SIZE; - bytes -= PAGE_CACHE_SIZE; - data = squashfs_next_page(output); - } + for (i = 0, buff = stream->output; bytes && i < pages; i++) { + avail = min_t(int, bytes, PAGE_CACHE_SIZE); + memcpy(buffer[i], buff, avail); + buff += avail; + bytes -= avail; } - squashfs_finish_page(output); + mutex_unlock(&msblk->read_data_mutex); return res; +block_release: + for (; i < b; i++) + put_bh(bh[i]); + failed: + mutex_unlock(&msblk->read_data_mutex); + + ERROR("lzo decompression failed, data probably corrupt\n"); return -EIO; } diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c deleted file mode 100644 index 5a1c11f..0000000 --- a/fs/squashfs/page_actor.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2013 - * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/pagemap.h> -#include "page_actor.h" - -/* - * This file contains implementations of page_actor for decompressing into - * an intermediate buffer, and for decompressing directly into the - * page cache. - * - * Calling code should avoid sleeping between calls to squashfs_first_page() - * and squashfs_finish_page(). - */ - -/* Implementation of page_actor for decompressing into intermediate buffer */ -static void *cache_first_page(struct squashfs_page_actor *actor) -{ - actor->next_page = 1; - return actor->buffer[0]; -} - -static void *cache_next_page(struct squashfs_page_actor *actor) -{ - if (actor->next_page == actor->pages) - return NULL; - - return actor->buffer[actor->next_page++]; -} - -static void cache_finish_page(struct squashfs_page_actor *actor) -{ - /* empty */ -} - -struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, - int pages, int length) -{ - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - - if (actor == NULL) - return NULL; - - actor->length = length ? : pages * PAGE_CACHE_SIZE; - actor->buffer = buffer; - actor->pages = pages; - actor->next_page = 0; - actor->squashfs_first_page = cache_first_page; - actor->squashfs_next_page = cache_next_page; - actor->squashfs_finish_page = cache_finish_page; - return actor; -} - -/* Implementation of page_actor for decompressing directly into page cache. */ -static void *direct_first_page(struct squashfs_page_actor *actor) -{ - actor->next_page = 1; - return actor->pageaddr = kmap_atomic(actor->page[0]); -} - -static void *direct_next_page(struct squashfs_page_actor *actor) -{ - if (actor->pageaddr) - kunmap_atomic(actor->pageaddr); - - return actor->pageaddr = actor->next_page == actor->pages ? NULL : - kmap_atomic(actor->page[actor->next_page++]); -} - -static void direct_finish_page(struct squashfs_page_actor *actor) -{ - if (actor->pageaddr) - kunmap_atomic(actor->pageaddr); -} - -struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, - int pages, int length) -{ - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - - if (actor == NULL) - return NULL; - - actor->length = length ? : pages * PAGE_CACHE_SIZE; - actor->page = page; - actor->pages = pages; - actor->next_page = 0; - actor->pageaddr = NULL; - actor->squashfs_first_page = direct_first_page; - actor->squashfs_next_page = direct_next_page; - actor->squashfs_finish_page = direct_finish_page; - return actor; -} diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h deleted file mode 100644 index 26dd820..0000000 --- a/fs/squashfs/page_actor.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef PAGE_ACTOR_H -#define PAGE_ACTOR_H -/* - * Copyright (c) 2013 - * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#ifndef CONFIG_SQUASHFS_FILE_DIRECT -struct squashfs_page_actor { - void **page; - int pages; - int length; - int next_page; -}; - -static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, - int pages, int length) -{ - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - - if (actor == NULL) - return NULL; - - actor->length = length ? : pages * PAGE_CACHE_SIZE; - actor->page = page; - actor->pages = pages; - actor->next_page = 0; - return actor; -} - -static inline void *squashfs_first_page(struct squashfs_page_actor *actor) -{ - actor->next_page = 1; - return actor->page[0]; -} - -static inline void *squashfs_next_page(struct squashfs_page_actor *actor) -{ - return actor->next_page == actor->pages ? NULL : - actor->page[actor->next_page++]; -} - -static inline void squashfs_finish_page(struct squashfs_page_actor *actor) -{ - /* empty */ -} -#else -struct squashfs_page_actor { - union { - void **buffer; - struct page **page; - }; - void *pageaddr; - void *(*squashfs_first_page)(struct squashfs_page_actor *); - void *(*squashfs_next_page)(struct squashfs_page_actor *); - void (*squashfs_finish_page)(struct squashfs_page_actor *); - int pages; - int length; - int next_page; -}; - -extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); -extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page - **, int, int); -static inline void *squashfs_first_page(struct squashfs_page_actor *actor) -{ - return actor->squashfs_first_page(actor); -} -static inline void *squashfs_next_page(struct squashfs_page_actor *actor) -{ - return actor->squashfs_next_page(actor); -} -static inline void squashfs_finish_page(struct squashfs_page_actor *actor) -{ - actor->squashfs_finish_page(actor); -} -#endif -#endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 9e1bb79..d126651 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -28,8 +28,8 @@ #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) /* block.c */ -extern int squashfs_read_data(struct super_block *, u64, int, u64 *, - struct squashfs_page_actor *); +extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, + int, int); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); @@ -48,14 +48,7 @@ extern void *squashfs_read_table(struct super_block *, u64, int); /* decompressor.c */ extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); -extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); - -/* decompressor_xxx.c */ -extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); -extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); -extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **, - int, int, int, struct squashfs_page_actor *); -extern int squashfs_max_decompressors(void); +extern void *squashfs_decompressor_init(struct super_block *, unsigned short); /* export.c */ extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64, @@ -66,13 +59,6 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *); extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); -/* file.c */ -void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, - int); - -/* file_xxx.c */ -extern int squashfs_readpage_block(struct page *, u64, int); - /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64, diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 1da565c..52934a2 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -50,7 +50,6 @@ struct squashfs_cache_entry { wait_queue_head_t wait_queue; struct squashfs_cache *cache; void **data; - struct squashfs_page_actor *actor; }; struct squashfs_sb_info { @@ -64,9 +63,10 @@ struct squashfs_sb_info { __le64 *id_table; __le64 *fragment_index; __le64 *xattr_id_table; + struct mutex read_data_mutex; struct mutex meta_index_mutex; struct meta_index *meta_index; - struct squashfs_stream *stream; + void *stream; __le64 *inode_lookup_table; u64 inode_table; u64 directory_table; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 202df63..60553a9 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -98,6 +98,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); msblk->devblksize_log2 = ffz(~msblk->devblksize); + mutex_init(&msblk->read_data_mutex); mutex_init(&msblk->meta_index_mutex); /* @@ -205,14 +206,13 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; /* Allocate read_page block */ - msblk->read_page = squashfs_cache_init("data", - squashfs_max_decompressors(), msblk->block_size); + msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size); if (msblk->read_page == NULL) { ERROR("Failed to allocate read_page block\n"); goto failed_mount; } - msblk->stream = squashfs_decompressor_setup(sb, flags); + msblk->stream = squashfs_decompressor_init(sb, flags); if (IS_ERR(msblk->stream)) { err = PTR_ERR(msblk->stream); msblk->stream = NULL; @@ -336,7 +336,7 @@ failed_mount: squashfs_cache_delete(msblk->block_cache); squashfs_cache_delete(msblk->fragment_cache); squashfs_cache_delete(msblk->read_page); - squashfs_decompressor_destroy(msblk); + squashfs_decompressor_free(msblk, msblk->stream); kfree(msblk->inode_lookup_table); kfree(msblk->fragment_index); kfree(msblk->id_table); @@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb) squashfs_cache_delete(sbi->block_cache); squashfs_cache_delete(sbi->fragment_cache); squashfs_cache_delete(sbi->read_page); - squashfs_decompressor_destroy(sbi); + squashfs_decompressor_free(sbi, sbi->stream); kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index c609624..1760b7d1 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -32,70 +32,44 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" -#include "page_actor.h" struct squashfs_xz { struct xz_dec *state; struct xz_buf buf; }; -struct disk_comp_opts { +struct comp_opts { __le32 dictionary_size; __le32 flags; }; -struct comp_opts { - int dict_size; -}; - -static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, - void *buff, int len) +static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, + int len) { - struct disk_comp_opts *comp_opts = buff; - struct comp_opts *opts; - int err = 0, n; - - opts = kmalloc(sizeof(*opts), GFP_KERNEL); - if (opts == NULL) { - err = -ENOMEM; - goto out2; - } + struct comp_opts *comp_opts = buff; + struct squashfs_xz *stream; + int dict_size = msblk->block_size; + int err, n; if (comp_opts) { /* check compressor options are the expected length */ if (len < sizeof(*comp_opts)) { err = -EIO; - goto out; + goto failed; } - opts->dict_size = le32_to_cpu(comp_opts->dictionary_size); + dict_size = le32_to_cpu(comp_opts->dictionary_size); /* the dictionary size should be 2^n or 2^n+2^(n+1) */ - n = ffs(opts->dict_size) - 1; - if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) + + n = ffs(dict_size) - 1; + if (dict_size != (1 << n) && dict_size != (1 << n) + (1 << (n + 1))) { err = -EIO; - goto out; + goto failed; } - } else - /* use defaults */ - opts->dict_size = max_t(int, msblk->block_size, - SQUASHFS_METADATA_SIZE); - - return opts; - -out: - kfree(opts); -out2: - return ERR_PTR(err); -} - + } -static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff) -{ - struct comp_opts *comp_opts = buff; - struct squashfs_xz *stream; - int err; + dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE); stream = kmalloc(sizeof(*stream), GFP_KERNEL); if (stream == NULL) { @@ -103,7 +77,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff) goto failed; } - stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size); + stream->state = xz_dec_init(XZ_PREALLOC, dict_size); if (stream->state == NULL) { kfree(stream); err = -ENOMEM; @@ -129,37 +103,42 @@ static void squashfs_xz_free(void *strm) } -static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, - struct buffer_head **bh, int b, int offset, int length, - struct squashfs_page_actor *output) +static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, + struct buffer_head **bh, int b, int offset, int length, int srclength, + int pages) { enum xz_ret xz_err; - int avail, total = 0, k = 0; - struct squashfs_xz *stream = strm; + int avail, total = 0, k = 0, page = 0; + struct squashfs_xz *stream = msblk->stream; + + mutex_lock(&msblk->read_data_mutex); xz_dec_reset(stream->state); stream->buf.in_pos = 0; stream->buf.in_size = 0; stream->buf.out_pos = 0; stream->buf.out_size = PAGE_CACHE_SIZE; - stream->buf.out = squashfs_first_page(output); + stream->buf.out = buffer[page++]; do { if (stream->buf.in_pos == stream->buf.in_size && k < b) { avail = min(length, msblk->devblksize - offset); length -= avail; + wait_on_buffer(bh[k]); + if (!buffer_uptodate(bh[k])) + goto release_mutex; + stream->buf.in = bh[k]->b_data + offset; stream->buf.in_size = avail; stream->buf.in_pos = 0; offset = 0; } - if (stream->buf.out_pos == stream->buf.out_size) { - stream->buf.out = squashfs_next_page(output); - if (stream->buf.out != NULL) { - stream->buf.out_pos = 0; - total += PAGE_CACHE_SIZE; - } + if (stream->buf.out_pos == stream->buf.out_size + && page < pages) { + stream->buf.out = buffer[page++]; + stream->buf.out_pos = 0; + total += PAGE_CACHE_SIZE; } xz_err = xz_dec_run(stream->state, &stream->buf); @@ -168,14 +147,23 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, put_bh(bh[k++]); } while (xz_err == XZ_OK); - squashfs_finish_page(output); + if (xz_err != XZ_STREAM_END) { + ERROR("xz_dec_run error, data probably corrupt\n"); + goto release_mutex; + } + + if (k < b) { + ERROR("xz_uncompress error, input remaining\n"); + goto release_mutex; + } - if (xz_err != XZ_STREAM_END || k < b) - goto out; + total += stream->buf.out_pos; + mutex_unlock(&msblk->read_data_mutex); + return total; - return total + stream->buf.out_pos; +release_mutex: + mutex_unlock(&msblk->read_data_mutex); -out: for (; k < b; k++) put_bh(bh[k]); @@ -184,7 +172,6 @@ out: const struct squashfs_decompressor squashfs_xz_comp_ops = { .init = squashfs_xz_init, - .comp_opts = squashfs_xz_comp_opts, .free = squashfs_xz_free, .decompress = squashfs_xz_uncompress, .id = XZ_COMPRESSION, diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 8727cab..55d918f 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -32,9 +32,8 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" -#include "page_actor.h" -static void *zlib_init(struct squashfs_sb_info *dummy, void *buff) +static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len) { z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); if (stream == NULL) @@ -62,37 +61,44 @@ static void zlib_free(void *strm) } -static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, - struct buffer_head **bh, int b, int offset, int length, - struct squashfs_page_actor *output) +static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, + struct buffer_head **bh, int b, int offset, int length, int srclength, + int pages) { - int zlib_err, zlib_init = 0, k = 0; - z_stream *stream = strm; + int zlib_err, zlib_init = 0; + int k = 0, page = 0; + z_stream *stream = msblk->stream; + + mutex_lock(&msblk->read_data_mutex); - stream->avail_out = PAGE_CACHE_SIZE; - stream->next_out = squashfs_first_page(output); + stream->avail_out = 0; stream->avail_in = 0; do { if (stream->avail_in == 0 && k < b) { int avail = min(length, msblk->devblksize - offset); length -= avail; + wait_on_buffer(bh[k]); + if (!buffer_uptodate(bh[k])) + goto release_mutex; + stream->next_in = bh[k]->b_data + offset; stream->avail_in = avail; offset = 0; } - if (stream->avail_out == 0) { - stream->next_out = squashfs_next_page(output); - if (stream->next_out != NULL) - stream->avail_out = PAGE_CACHE_SIZE; + if (stream->avail_out == 0 && page < pages) { + stream->next_out = buffer[page++]; + stream->avail_out = PAGE_CACHE_SIZE; } if (!zlib_init) { zlib_err = zlib_inflateInit(stream); if (zlib_err != Z_OK) { - squashfs_finish_page(output); - goto out; + ERROR("zlib_inflateInit returned unexpected " + "result 0x%x, srclength %d\n", + zlib_err, srclength); + goto release_mutex; } zlib_init = 1; } @@ -103,21 +109,29 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, put_bh(bh[k++]); } while (zlib_err == Z_OK); - squashfs_finish_page(output); - - if (zlib_err != Z_STREAM_END) - goto out; + if (zlib_err != Z_STREAM_END) { + ERROR("zlib_inflate error, data probably corrupt\n"); + goto release_mutex; + } zlib_err = zlib_inflateEnd(stream); - if (zlib_err != Z_OK) - goto out; + if (zlib_err != Z_OK) { + ERROR("zlib_inflate error, data probably corrupt\n"); + goto release_mutex; + } + + if (k < b) { + ERROR("zlib_uncompress error, data remaining\n"); + goto release_mutex; + } - if (k < b) - goto out; + length = stream->total_out; + mutex_unlock(&msblk->read_data_mutex); + return length; - return stream->total_out; +release_mutex: + mutex_unlock(&msblk->read_data_mutex); -out: for (; k < b; k++) put_bh(bh[k]); @@ -37,21 +37,14 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) EXPORT_SYMBOL(generic_fillattr); -/** - * vfs_getattr_nosec - getattr without security checks - * @path: file to get attributes from - * @stat: structure to return attributes in - * - * Get attributes without calling security_inode_getattr. - * - * Currently the only caller other than vfs_getattr is internal to the - * filehandle lookup code, which uses only the inode number and returns - * no attributes to any user. Any other code probably wants - * vfs_getattr. - */ -int vfs_getattr_nosec(struct path *path, struct kstat *stat) +int vfs_getattr(struct path *path, struct kstat *stat) { struct inode *inode = path->dentry->d_inode; + int retval; + + retval = security_inode_getattr(path->mnt, path->dentry); + if (retval) + return retval; if (inode->i_op->getattr) return inode->i_op->getattr(path->mnt, path->dentry, stat); @@ -60,18 +53,6 @@ int vfs_getattr_nosec(struct path *path, struct kstat *stat) return 0; } -EXPORT_SYMBOL(vfs_getattr_nosec); - -int vfs_getattr(struct path *path, struct kstat *stat) -{ - int retval; - - retval = security_inode_getattr(path->mnt, path->dentry); - if (retval) - return retval; - return vfs_getattr_nosec(path, stat); -} - EXPORT_SYMBOL(vfs_getattr); int vfs_fstat(unsigned int fd, struct kstat *stat) @@ -129,24 +129,33 @@ static unsigned long super_cache_count(struct shrinker *shrink, return total_objects; } -/** - * destroy_super - frees a superblock - * @s: superblock to free - * - * Frees a superblock. - */ -static void destroy_super(struct super_block *s) +static int init_sb_writers(struct super_block *s, struct file_system_type *type) { + int err; int i; - list_lru_destroy(&s->s_dentry_lru); - list_lru_destroy(&s->s_inode_lru); + + for (i = 0; i < SB_FREEZE_LEVELS; i++) { + err = percpu_counter_init(&s->s_writers.counter[i], 0); + if (err < 0) + goto err_out; + lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], + &type->s_writers_key[i], 0); + } + init_waitqueue_head(&s->s_writers.wait); + init_waitqueue_head(&s->s_writers.wait_unfrozen); + return 0; +err_out: + while (--i >= 0) + percpu_counter_destroy(&s->s_writers.counter[i]); + return err; +} + +static void destroy_sb_writers(struct super_block *s) +{ + int i; + for (i = 0; i < SB_FREEZE_LEVELS; i++) percpu_counter_destroy(&s->s_writers.counter[i]); - security_sb_free(s); - WARN_ON(!list_empty(&s->s_mounts)); - kfree(s->s_subtype); - kfree(s->s_options); - kfree_rcu(s, rcu); } /** @@ -161,74 +170,111 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static const struct super_operations default_op; - int i; - if (!s) - return NULL; + if (s) { + if (security_sb_alloc(s)) + goto out_free_sb; - if (security_sb_alloc(s)) - goto fail; +#ifdef CONFIG_SMP + s->s_files = alloc_percpu(struct list_head); + if (!s->s_files) + goto err_out; + else { + int i; - for (i = 0; i < SB_FREEZE_LEVELS; i++) { - if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0) - goto fail; - lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i], - &type->s_writers_key[i], 0); + for_each_possible_cpu(i) + INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); + } +#else + INIT_LIST_HEAD(&s->s_files); +#endif + if (init_sb_writers(s, type)) + goto err_out; + s->s_flags = flags; + s->s_bdi = &default_backing_dev_info; + INIT_HLIST_NODE(&s->s_instances); + INIT_HLIST_BL_HEAD(&s->s_anon); + INIT_LIST_HEAD(&s->s_inodes); + + if (list_lru_init(&s->s_dentry_lru)) + goto err_out; + if (list_lru_init(&s->s_inode_lru)) + goto err_out_dentry_lru; + + INIT_LIST_HEAD(&s->s_mounts); + init_rwsem(&s->s_umount); + lockdep_set_class(&s->s_umount, &type->s_umount_key); + /* + * sget() can have s_umount recursion. + * + * When it cannot find a suitable sb, it allocates a new + * one (this one), and tries again to find a suitable old + * one. + * + * In case that succeeds, it will acquire the s_umount + * lock of the old one. Since these are clearly distrinct + * locks, and this object isn't exposed yet, there's no + * risk of deadlocks. + * + * Annotate this by putting this lock in a different + * subclass. + */ + down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); + s->s_count = 1; + atomic_set(&s->s_active, 1); + mutex_init(&s->s_vfs_rename_mutex); + lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); + mutex_init(&s->s_dquot.dqio_mutex); + mutex_init(&s->s_dquot.dqonoff_mutex); + init_rwsem(&s->s_dquot.dqptr_sem); + s->s_maxbytes = MAX_NON_LFS; + s->s_op = &default_op; + s->s_time_gran = 1000000000; + s->cleancache_poolid = -1; + + s->s_shrink.seeks = DEFAULT_SEEKS; + s->s_shrink.scan_objects = super_cache_scan; + s->s_shrink.count_objects = super_cache_count; + s->s_shrink.batch = 1024; + s->s_shrink.flags = SHRINKER_NUMA_AWARE; } - init_waitqueue_head(&s->s_writers.wait); - init_waitqueue_head(&s->s_writers.wait_unfrozen); - s->s_flags = flags; - s->s_bdi = &default_backing_dev_info; - INIT_HLIST_NODE(&s->s_instances); - INIT_HLIST_BL_HEAD(&s->s_anon); - INIT_LIST_HEAD(&s->s_inodes); - - if (list_lru_init(&s->s_dentry_lru)) - goto fail; - if (list_lru_init(&s->s_inode_lru)) - goto fail; - - INIT_LIST_HEAD(&s->s_mounts); - init_rwsem(&s->s_umount); - lockdep_set_class(&s->s_umount, &type->s_umount_key); - /* - * sget() can have s_umount recursion. - * - * When it cannot find a suitable sb, it allocates a new - * one (this one), and tries again to find a suitable old - * one. - * - * In case that succeeds, it will acquire the s_umount - * lock of the old one. Since these are clearly distrinct - * locks, and this object isn't exposed yet, there's no - * risk of deadlocks. - * - * Annotate this by putting this lock in a different - * subclass. - */ - down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING); - s->s_count = 1; - atomic_set(&s->s_active, 1); - mutex_init(&s->s_vfs_rename_mutex); - lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key); - mutex_init(&s->s_dquot.dqio_mutex); - mutex_init(&s->s_dquot.dqonoff_mutex); - init_rwsem(&s->s_dquot.dqptr_sem); - s->s_maxbytes = MAX_NON_LFS; - s->s_op = &default_op; - s->s_time_gran = 1000000000; - s->cleancache_poolid = -1; - - s->s_shrink.seeks = DEFAULT_SEEKS; - s->s_shrink.scan_objects = super_cache_scan; - s->s_shrink.count_objects = super_cache_count; - s->s_shrink.batch = 1024; - s->s_shrink.flags = SHRINKER_NUMA_AWARE; +out: return s; -fail: - destroy_super(s); - return NULL; +err_out_dentry_lru: + list_lru_destroy(&s->s_dentry_lru); +err_out: + security_sb_free(s); +#ifdef CONFIG_SMP + if (s->s_files) + free_percpu(s->s_files); +#endif + destroy_sb_writers(s); +out_free_sb: + kfree(s); + s = NULL; + goto out; +} + +/** + * destroy_super - frees a superblock + * @s: superblock to free + * + * Frees a superblock. + */ +static inline void destroy_super(struct super_block *s) +{ + list_lru_destroy(&s->s_dentry_lru); + list_lru_destroy(&s->s_inode_lru); +#ifdef CONFIG_SMP + free_percpu(s->s_files); +#endif + destroy_sb_writers(s); + security_sb_free(s); + WARN_ON(!list_empty(&s->s_mounts)); + kfree(s->s_subtype); + kfree(s->s_options); + kfree(s); } /* Superblock refcounting */ @@ -710,8 +756,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) make sure there are no rw files opened */ if (remount_ro) { if (force) { - sb->s_readonly_remount = 1; - smp_wmb(); + mark_files_ro(sb); } else { retval = sb_prepare_remount_readonly(sb); if (retval) @@ -27,11 +27,10 @@ * wait == 1 case since in that case write_inode() functions do * sync_dirty_buffer() and thus effectively write one block at a time. */ -static int __sync_filesystem(struct super_block *sb, int wait, - unsigned long start) +static int __sync_filesystem(struct super_block *sb, int wait) { if (wait) - sync_inodes_sb(sb, start); + sync_inodes_sb(sb); else writeback_inodes_sb(sb, WB_REASON_SYNC); @@ -48,7 +47,6 @@ static int __sync_filesystem(struct super_block *sb, int wait, int sync_filesystem(struct super_block *sb) { int ret; - unsigned long start = jiffies; /* * We need to be protected against the filesystem going from @@ -62,17 +60,17 @@ int sync_filesystem(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - ret = __sync_filesystem(sb, 0, start); + ret = __sync_filesystem(sb, 0); if (ret < 0) return ret; - return __sync_filesystem(sb, 1, start); + return __sync_filesystem(sb, 1); } EXPORT_SYMBOL_GPL(sync_filesystem); static void sync_inodes_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY)) - sync_inodes_sb(sb, *((unsigned long *)arg)); + sync_inodes_sb(sb); } static void sync_fs_one_sb(struct super_block *sb, void *arg) @@ -104,10 +102,9 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg) SYSCALL_DEFINE0(sync) { int nowait = 0, wait = 1; - unsigned long start = jiffies; wakeup_flusher_threads(0, WB_REASON_SYNC); - iterate_supers(sync_inodes_one_sb, &start); + iterate_supers(sync_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &wait); iterate_bdevs(fdatawrite_one_bdev, NULL); @@ -180,7 +177,7 @@ SYSCALL_DEFINE1(syncfs, int, fd) */ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) { - if (!file->f_op->fsync) + if (!file->f_op || !file->f_op->fsync) return -EINVAL; return file->f_op->fsync(file, start, end, datasync); } diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile index 8876ac1..7a1ceb9 100644 --- a/fs/sysfs/Makefile +++ b/fs/sysfs/Makefile @@ -2,4 +2,5 @@ # Makefile for the sysfs virtual filesystem # -obj-y := inode.o file.o dir.o symlink.o mount.o group.o +obj-y := inode.o file.o dir.o symlink.o mount.o bin.o \ + group.o diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c new file mode 100644 index 0000000..c590cab --- /dev/null +++ b/fs/sysfs/bin.c @@ -0,0 +1,502 @@ +/* + * fs/sysfs/bin.c - sysfs binary file implementation + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Matthew Wilcox + * Copyright (c) 2004 Silicon Graphics, Inc. + * Copyright (c) 2007 SUSE Linux Products GmbH + * Copyright (c) 2007 Tejun Heo <teheo@suse.de> + * + * This file is released under the GPLv2. + * + * Please see Documentation/filesystems/sysfs.txt for more information. + */ + +#undef DEBUG + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/kobject.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/mm.h> +#include <linux/uaccess.h> + +#include "sysfs.h" + +/* + * There's one bin_buffer for each open file. + * + * filp->private_data points to bin_buffer and + * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s + * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock + */ +static DEFINE_MUTEX(sysfs_bin_lock); + +struct bin_buffer { + struct mutex mutex; + void *buffer; + int mmapped; + const struct vm_operations_struct *vm_ops; + struct file *file; + struct hlist_node list; +}; + +static int +fill_read(struct file *file, char *buffer, loff_t off, size_t count) +{ + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; + struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; + int rc; + + /* need attr_sd for attr, its parent for kobj */ + if (!sysfs_get_active(attr_sd)) + return -ENODEV; + + rc = -EIO; + if (attr->read) + rc = attr->read(file, kobj, attr, buffer, off, count); + + sysfs_put_active(attr_sd); + + return rc; +} + +static ssize_t +read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) +{ + struct bin_buffer *bb = file->private_data; + int size = file_inode(file)->i_size; + loff_t offs = *off; + int count = min_t(size_t, bytes, PAGE_SIZE); + char *temp; + + if (!bytes) + return 0; + + if (size) { + if (offs > size) + return 0; + if (offs + count > size) + count = size - offs; + } + + temp = kmalloc(count, GFP_KERNEL); + if (!temp) + return -ENOMEM; + + mutex_lock(&bb->mutex); + + count = fill_read(file, bb->buffer, offs, count); + if (count < 0) { + mutex_unlock(&bb->mutex); + goto out_free; + } + + memcpy(temp, bb->buffer, count); + + mutex_unlock(&bb->mutex); + + if (copy_to_user(userbuf, temp, count)) { + count = -EFAULT; + goto out_free; + } + + pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); + + *off = offs + count; + + out_free: + kfree(temp); + return count; +} + +static int +flush_write(struct file *file, char *buffer, loff_t offset, size_t count) +{ + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; + struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; + int rc; + + /* need attr_sd for attr, its parent for kobj */ + if (!sysfs_get_active(attr_sd)) + return -ENODEV; + + rc = -EIO; + if (attr->write) + rc = attr->write(file, kobj, attr, buffer, offset, count); + + sysfs_put_active(attr_sd); + + return rc; +} + +static ssize_t write(struct file *file, const char __user *userbuf, + size_t bytes, loff_t *off) +{ + struct bin_buffer *bb = file->private_data; + int size = file_inode(file)->i_size; + loff_t offs = *off; + int count = min_t(size_t, bytes, PAGE_SIZE); + char *temp; + + if (!bytes) + return 0; + + if (size) { + if (offs > size) + return 0; + if (offs + count > size) + count = size - offs; + } + + temp = memdup_user(userbuf, count); + if (IS_ERR(temp)) + return PTR_ERR(temp); + + mutex_lock(&bb->mutex); + + memcpy(bb->buffer, temp, count); + + count = flush_write(file, bb->buffer, offs, count); + mutex_unlock(&bb->mutex); + + if (count > 0) + *off = offs + count; + + kfree(temp); + return count; +} + +static void bin_vma_open(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + + if (!bb->vm_ops) + return; + + if (!sysfs_get_active(attr_sd)) + return; + + if (bb->vm_ops->open) + bb->vm_ops->open(vma); + + sysfs_put_active(attr_sd); +} + +static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops) + return VM_FAULT_SIGBUS; + + if (!sysfs_get_active(attr_sd)) + return VM_FAULT_SIGBUS; + + ret = VM_FAULT_SIGBUS; + if (bb->vm_ops->fault) + ret = bb->vm_ops->fault(vma, vmf); + + sysfs_put_active(attr_sd); + return ret; +} + +static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops) + return VM_FAULT_SIGBUS; + + if (!sysfs_get_active(attr_sd)) + return VM_FAULT_SIGBUS; + + ret = 0; + if (bb->vm_ops->page_mkwrite) + ret = bb->vm_ops->page_mkwrite(vma, vmf); + else + file_update_time(file); + + sysfs_put_active(attr_sd); + return ret; +} + +static int bin_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops) + return -EINVAL; + + if (!sysfs_get_active(attr_sd)) + return -EINVAL; + + ret = -EINVAL; + if (bb->vm_ops->access) + ret = bb->vm_ops->access(vma, addr, buf, len, write); + + sysfs_put_active(attr_sd); + return ret; +} + +#ifdef CONFIG_NUMA +static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops) + return 0; + + if (!sysfs_get_active(attr_sd)) + return -EINVAL; + + ret = 0; + if (bb->vm_ops->set_policy) + ret = bb->vm_ops->set_policy(vma, new); + + sysfs_put_active(attr_sd); + return ret; +} + +static struct mempolicy *bin_get_policy(struct vm_area_struct *vma, + unsigned long addr) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct mempolicy *pol; + + if (!bb->vm_ops) + return vma->vm_policy; + + if (!sysfs_get_active(attr_sd)) + return vma->vm_policy; + + pol = vma->vm_policy; + if (bb->vm_ops->get_policy) + pol = bb->vm_ops->get_policy(vma, addr); + + sysfs_put_active(attr_sd); + return pol; +} + +static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, + const nodemask_t *to, unsigned long flags) +{ + struct file *file = vma->vm_file; + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + int ret; + + if (!bb->vm_ops) + return 0; + + if (!sysfs_get_active(attr_sd)) + return 0; + + ret = 0; + if (bb->vm_ops->migrate) + ret = bb->vm_ops->migrate(vma, from, to, flags); + + sysfs_put_active(attr_sd); + return ret; +} +#endif + +static const struct vm_operations_struct bin_vm_ops = { + .open = bin_vma_open, + .fault = bin_fault, + .page_mkwrite = bin_page_mkwrite, + .access = bin_access, +#ifdef CONFIG_NUMA + .set_policy = bin_set_policy, + .get_policy = bin_get_policy, + .migrate = bin_migrate, +#endif +}; + +static int mmap(struct file *file, struct vm_area_struct *vma) +{ + struct bin_buffer *bb = file->private_data; + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; + struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; + int rc; + + mutex_lock(&bb->mutex); + + /* need attr_sd for attr, its parent for kobj */ + rc = -ENODEV; + if (!sysfs_get_active(attr_sd)) + goto out_unlock; + + rc = -EINVAL; + if (!attr->mmap) + goto out_put; + + rc = attr->mmap(file, kobj, attr, vma); + if (rc) + goto out_put; + + /* + * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() + * to satisfy versions of X which crash if the mmap fails: that + * substitutes a new vm_file, and we don't then want bin_vm_ops. + */ + if (vma->vm_file != file) + goto out_put; + + rc = -EINVAL; + if (bb->mmapped && bb->vm_ops != vma->vm_ops) + goto out_put; + + /* + * It is not possible to successfully wrap close. + * So error if someone is trying to use close. + */ + rc = -EINVAL; + if (vma->vm_ops && vma->vm_ops->close) + goto out_put; + + rc = 0; + bb->mmapped = 1; + bb->vm_ops = vma->vm_ops; + vma->vm_ops = &bin_vm_ops; +out_put: + sysfs_put_active(attr_sd); +out_unlock: + mutex_unlock(&bb->mutex); + + return rc; +} + +static int open(struct inode *inode, struct file *file) +{ + struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; + struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; + struct bin_buffer *bb = NULL; + int error; + + /* binary file operations requires both @sd and its parent */ + if (!sysfs_get_active(attr_sd)) + return -ENODEV; + + error = -EACCES; + if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) + goto err_out; + if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) + goto err_out; + + error = -ENOMEM; + bb = kzalloc(sizeof(*bb), GFP_KERNEL); + if (!bb) + goto err_out; + + bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!bb->buffer) + goto err_out; + + mutex_init(&bb->mutex); + bb->file = file; + file->private_data = bb; + + mutex_lock(&sysfs_bin_lock); + hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers); + mutex_unlock(&sysfs_bin_lock); + + /* open succeeded, put active references */ + sysfs_put_active(attr_sd); + return 0; + + err_out: + sysfs_put_active(attr_sd); + kfree(bb); + return error; +} + +static int release(struct inode *inode, struct file *file) +{ + struct bin_buffer *bb = file->private_data; + + mutex_lock(&sysfs_bin_lock); + hlist_del(&bb->list); + mutex_unlock(&sysfs_bin_lock); + + kfree(bb->buffer); + kfree(bb); + return 0; +} + +const struct file_operations bin_fops = { + .read = read, + .write = write, + .mmap = mmap, + .llseek = generic_file_llseek, + .open = open, + .release = release, +}; + + +void unmap_bin_file(struct sysfs_dirent *attr_sd) +{ + struct bin_buffer *bb; + + if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR) + return; + + mutex_lock(&sysfs_bin_lock); + + hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) { + struct inode *inode = file_inode(bb->file); + + unmap_mapping_range(inode->i_mapping, 0, 0, 1); + } + + mutex_unlock(&sysfs_bin_lock); +} + +/** + * sysfs_create_bin_file - create binary file for object. + * @kobj: object. + * @attr: attribute descriptor. + */ +int sysfs_create_bin_file(struct kobject *kobj, + const struct bin_attribute *attr) +{ + BUG_ON(!kobj || !kobj->sd || !attr); + + return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); +} +EXPORT_SYMBOL_GPL(sysfs_create_bin_file); + +/** + * sysfs_remove_bin_file - remove binary file for object. + * @kobj: object. + * @attr: attribute descriptor. + */ +void sysfs_remove_bin_file(struct kobject *kobj, + const struct bin_attribute *attr) +{ + sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name); +} +EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 5e73d66..4d83ced 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -26,21 +26,21 @@ #include "sysfs.h" DEFINE_MUTEX(sysfs_mutex); -DEFINE_SPINLOCK(sysfs_symlink_target_lock); +DEFINE_SPINLOCK(sysfs_assoc_lock); -#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb) +#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb); static DEFINE_SPINLOCK(sysfs_ino_lock); static DEFINE_IDA(sysfs_ino_ida); /** * sysfs_name_hash - * @name: Null terminated string to hash * @ns: Namespace tag to hash + * @name: Null terminated string to hash * * Returns 31 bit hash of ns + name (so it fits in an off_t ) */ -static unsigned int sysfs_name_hash(const char *name, const void *ns) +static unsigned int sysfs_name_hash(const void *ns, const char *name) { unsigned long hash = init_name_hash(); unsigned int len = strlen(name); @@ -56,8 +56,8 @@ static unsigned int sysfs_name_hash(const char *name, const void *ns) return hash; } -static int sysfs_name_compare(unsigned int hash, const char *name, - const void *ns, const struct sysfs_dirent *sd) +static int sysfs_name_compare(unsigned int hash, const void *ns, + const char *name, const struct sysfs_dirent *sd) { if (hash != sd->s_hash) return hash - sd->s_hash; @@ -69,7 +69,7 @@ static int sysfs_name_compare(unsigned int hash, const char *name, static int sysfs_sd_compare(const struct sysfs_dirent *left, const struct sysfs_dirent *right) { - return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns, + return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name, right); } @@ -132,6 +132,24 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd) rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); } +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +/* Test for attributes that want to ignore lockdep for read-locking */ +static bool ignore_lockdep(struct sysfs_dirent *sd) +{ + return sysfs_type(sd) == SYSFS_KOBJ_ATTR && + sd->s_attr.attr->ignore_lockdep; +} + +#else + +static inline bool ignore_lockdep(struct sysfs_dirent *sd) +{ + return true; +} + +#endif + /** * sysfs_get_active - get an active reference to sysfs_dirent * @sd: sysfs_dirent to get an active reference to @@ -150,7 +168,7 @@ struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) if (!atomic_inc_unless_negative(&sd->s_active)) return NULL; - if (likely(!sysfs_ignore_lockdep(sd))) + if (likely(!ignore_lockdep(sd))) rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_); return sd; } @@ -169,7 +187,7 @@ void sysfs_put_active(struct sysfs_dirent *sd) if (unlikely(!sd)) return; - if (likely(!sysfs_ignore_lockdep(sd))) + if (likely(!ignore_lockdep(sd))) rwsem_release(&sd->dep_map, 1, _RET_IP_); v = atomic_dec_return(&sd->s_active); if (likely(v != SD_DEACTIVATED_BIAS)) @@ -382,19 +400,22 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) /** * sysfs_addrm_start - prepare for sysfs_dirent add/remove * @acxt: pointer to sysfs_addrm_cxt to be used + * @parent_sd: parent sysfs_dirent * - * This function is called when the caller is about to add or remove - * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used - * to keep and pass context to other addrm functions. + * This function is called when the caller is about to add or + * remove sysfs_dirent under @parent_sd. This function acquires + * sysfs_mutex. @acxt is used to keep and pass context to + * other addrm functions. * * LOCKING: * Kernel thread context (may sleep). sysfs_mutex is locked on * return. */ -void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) - __acquires(sysfs_mutex) +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *parent_sd) { memset(acxt, 0, sizeof(*acxt)); + acxt->parent_sd = parent_sd; mutex_lock(&sysfs_mutex); } @@ -403,11 +424,10 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) * __sysfs_add_one - add sysfs_dirent to parent without warning * @acxt: addrm context to use * @sd: sysfs_dirent to be added - * @parent_sd: the parent sysfs_dirent to add @sd to * - * Get @parent_sd and set @sd->s_parent to it and increment nlink of - * the parent inode if @sd is a directory and link into the children - * list of the parent. + * Get @acxt->parent_sd and set sd->s_parent to it and increment + * nlink of parent inode if @sd is a directory and link into the + * children list of the parent. * * This function should be called between calls to * sysfs_addrm_start() and sysfs_addrm_finish() and should be @@ -420,28 +440,27 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) * 0 on success, -EEXIST if entry with the given name already * exists. */ -int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, - struct sysfs_dirent *parent_sd) +int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { struct sysfs_inode_attrs *ps_iattr; int ret; - if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) { + if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) { WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", - sysfs_ns_type(parent_sd) ? "required" : "invalid", - parent_sd->s_name, sd->s_name); + sysfs_ns_type(acxt->parent_sd) ? "required" : "invalid", + acxt->parent_sd->s_name, sd->s_name); return -EINVAL; } - sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); - sd->s_parent = sysfs_get(parent_sd); + sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); + sd->s_parent = sysfs_get(acxt->parent_sd); ret = sysfs_link_sibling(sd); if (ret) return ret; /* Update timestamps on the parent */ - ps_iattr = parent_sd->s_iattr; + ps_iattr = acxt->parent_sd->s_iattr; if (ps_iattr) { struct iattr *ps_iattrs = &ps_iattr->ia_iattr; ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; @@ -471,32 +490,14 @@ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) return path; } -void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) -{ - char *path; - - path = kzalloc(PATH_MAX, GFP_KERNEL); - if (path) { - sysfs_pathname(parent, path); - strlcat(path, "/", PATH_MAX); - strlcat(path, name, PATH_MAX); - } - - WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", - path ? path : name); - - kfree(path); -} - /** * sysfs_add_one - add sysfs_dirent to parent * @acxt: addrm context to use * @sd: sysfs_dirent to be added - * @parent_sd: the parent sysfs_dirent to add @sd to * - * Get @parent_sd and set @sd->s_parent to it and increment nlink of - * the parent inode if @sd is a directory and link into the children - * list of the parent. + * Get @acxt->parent_sd and set sd->s_parent to it and increment + * nlink of parent inode if @sd is a directory and link into the + * children list of the parent. * * This function should be called between calls to * sysfs_addrm_start() and sysfs_addrm_finish() and should be @@ -509,15 +510,23 @@ void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) * 0 on success, -EEXIST if entry with the given name already * exists. */ -int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, - struct sysfs_dirent *parent_sd) +int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { int ret; - ret = __sysfs_add_one(acxt, sd, parent_sd); + ret = __sysfs_add_one(acxt, sd); + if (ret == -EEXIST) { + char *path = kzalloc(PATH_MAX, GFP_KERNEL); + WARN(1, KERN_WARNING + "sysfs: cannot create duplicate filename '%s'\n", + (path == NULL) ? sd->s_name + : (sysfs_pathname(acxt->parent_sd, path), + strlcat(path, "/", PATH_MAX), + strlcat(path, sd->s_name, PATH_MAX), + path)); + kfree(path); + } - if (ret == -EEXIST) - sysfs_warn_dup(parent_sd, sd->s_name); return ret; } @@ -536,22 +545,16 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, * LOCKING: * Determined by sysfs_addrm_start(). */ -static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, - struct sysfs_dirent *sd) +void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) { struct sysfs_inode_attrs *ps_iattr; - /* - * Removal can be called multiple times on the same node. Only the - * first invocation is effective and puts the base ref. - */ - if (sd->s_flags & SYSFS_FLAG_REMOVED) - return; + BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED); sysfs_unlink_sibling(sd); /* Update timestamps on the parent */ - ps_iattr = sd->s_parent->s_iattr; + ps_iattr = acxt->parent_sd->s_iattr; if (ps_iattr) { struct iattr *ps_iattrs = &ps_iattr->ia_iattr; ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; @@ -574,7 +577,6 @@ static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, * sysfs_mutex is released. */ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) - __releases(sysfs_mutex) { /* release resources acquired by sysfs_addrm_start() */ mutex_unlock(&sysfs_mutex); @@ -586,7 +588,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) acxt->removed = sd->u.removed_list; sysfs_deactivate(sd); - sysfs_unmap_bin_file(sd); + unmap_bin_file(sd); sysfs_put(sd); } } @@ -595,7 +597,6 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) * sysfs_find_dirent - find sysfs_dirent with the given name * @parent_sd: sysfs_dirent to search under * @name: name to look for - * @ns: the namespace tag to use * * Look for sysfs_dirent with name @name under @parent_sd. * @@ -606,8 +607,8 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) * Pointer to sysfs_dirent if found, NULL if not. */ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, - const unsigned char *name, - const void *ns) + const void *ns, + const unsigned char *name) { struct rb_node *node = parent_sd->s_dir.children.rb_node; unsigned int hash; @@ -619,13 +620,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, return NULL; } - hash = sysfs_name_hash(name, ns); + hash = sysfs_name_hash(ns, name); while (node) { struct sysfs_dirent *sd; int result; sd = to_sysfs_dirent(node); - result = sysfs_name_compare(hash, name, ns, sd); + result = sysfs_name_compare(hash, ns, name, sd); if (result < 0) node = node->rb_left; else if (result > 0) @@ -637,10 +638,9 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, } /** - * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name + * sysfs_get_dirent - find and get sysfs_dirent with the given name * @parent_sd: sysfs_dirent to search under * @name: name to look for - * @ns: the namespace tag to use * * Look for sysfs_dirent with name @name under @parent_sd and get * it if found. @@ -651,25 +651,24 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, * RETURNS: * Pointer to sysfs_dirent if found, NULL if not. */ -struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd, - const unsigned char *name, - const void *ns) +struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const void *ns, + const unsigned char *name) { struct sysfs_dirent *sd; mutex_lock(&sysfs_mutex); - sd = sysfs_find_dirent(parent_sd, name, ns); + sd = sysfs_find_dirent(parent_sd, ns, name); sysfs_get(sd); mutex_unlock(&sysfs_mutex); return sd; } -EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns); +EXPORT_SYMBOL_GPL(sysfs_get_dirent); static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, - enum kobj_ns_type type, - const char *name, const void *ns, - struct sysfs_dirent **p_sd) + enum kobj_ns_type type, const void *ns, const char *name, + struct sysfs_dirent **p_sd) { umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; struct sysfs_addrm_cxt acxt; @@ -686,8 +685,8 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, sd->s_dir.kobj = kobj; /* link in */ - sysfs_addrm_start(&acxt); - rc = sysfs_add_one(&acxt, sd, parent_sd); + sysfs_addrm_start(&acxt, parent_sd); + rc = sysfs_add_one(&acxt, sd); sysfs_addrm_finish(&acxt); if (rc == 0) @@ -702,7 +701,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, struct sysfs_dirent **p_sd) { return create_dir(kobj, kobj->sd, - KOBJ_NS_TYPE_NONE, name, NULL, p_sd); + KOBJ_NS_TYPE_NONE, NULL, name, p_sd); } /** @@ -731,14 +730,14 @@ static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) } /** - * sysfs_create_dir_ns - create a directory for an object with a namespace tag - * @kobj: object we're creating directory for - * @ns: the namespace tag to use + * sysfs_create_dir - create a directory for an object. + * @kobj: object we're creating directory for. */ -int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) +int sysfs_create_dir(struct kobject *kobj) { enum kobj_ns_type type; struct sysfs_dirent *parent_sd, *sd; + const void *ns = NULL; int error = 0; BUG_ON(!kobj); @@ -751,9 +750,11 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) if (!parent_sd) return -ENOENT; + if (sysfs_ns_type(parent_sd)) + ns = kobj->ktype->namespace(kobj); type = sysfs_read_ns_type(kobj); - error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd); + error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd); if (!error) kobj->sd = sd; return error; @@ -775,7 +776,7 @@ static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry, type = sysfs_ns_type(parent_sd); ns = sysfs_info(dir->i_sb)->ns[type]; - sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns); + sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name); /* no such entry */ if (!sd) { @@ -806,128 +807,41 @@ const struct inode_operations sysfs_dir_inode_operations = { .setxattr = sysfs_setxattr, }; -static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos) +static void remove_dir(struct sysfs_dirent *sd) { - struct sysfs_dirent *last; - - while (true) { - struct rb_node *rbn; - - last = pos; - - if (sysfs_type(pos) != SYSFS_DIR) - break; - - rbn = rb_first(&pos->s_dir.children); - if (!rbn) - break; - - pos = to_sysfs_dirent(rbn); - } + struct sysfs_addrm_cxt acxt; - return last; + sysfs_addrm_start(&acxt, sd->s_parent); + sysfs_remove_one(&acxt, sd); + sysfs_addrm_finish(&acxt); } -/** - * sysfs_next_descendant_post - find the next descendant for post-order walk - * @pos: the current position (%NULL to initiate traversal) - * @root: sysfs_dirent whose descendants to walk - * - * Find the next descendant to visit for post-order traversal of @root's - * descendants. @root is included in the iteration and the last node to be - * visited. - */ -static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos, - struct sysfs_dirent *root) +void sysfs_remove_subdir(struct sysfs_dirent *sd) { - struct rb_node *rbn; - - lockdep_assert_held(&sysfs_mutex); - - /* if first iteration, visit leftmost descendant which may be root */ - if (!pos) - return sysfs_leftmost_descendant(root); - - /* if we visited @root, we're done */ - if (pos == root) - return NULL; - - /* if there's an unvisited sibling, visit its leftmost descendant */ - rbn = rb_next(&pos->s_rb); - if (rbn) - return sysfs_leftmost_descendant(to_sysfs_dirent(rbn)); - - /* no sibling left, visit parent */ - return pos->s_parent; + remove_dir(sd); } -static void __sysfs_remove(struct sysfs_addrm_cxt *acxt, - struct sysfs_dirent *sd) -{ - struct sysfs_dirent *pos, *next; - - if (!sd) - return; - pr_debug("sysfs %s: removing\n", sd->s_name); - - next = NULL; - do { - pos = next; - next = sysfs_next_descendant_post(pos, sd); - if (pos) - sysfs_remove_one(acxt, pos); - } while (next); -} - -/** - * sysfs_remove - remove a sysfs_dirent recursively - * @sd: the sysfs_dirent to remove - * - * Remove @sd along with all its subdirectories and files. - */ -void sysfs_remove(struct sysfs_dirent *sd) +static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) { struct sysfs_addrm_cxt acxt; + struct rb_node *pos; - sysfs_addrm_start(&acxt); - __sysfs_remove(&acxt, sd); - sysfs_addrm_finish(&acxt); -} - -/** - * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it - * @dir_sd: parent of the target - * @name: name of the sysfs_dirent to remove - * @ns: namespace tag of the sysfs_dirent to remove - * - * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove - * it. Returns 0 on success, -ENOENT if such entry doesn't exist. - */ -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, - const void *ns) -{ - struct sysfs_addrm_cxt acxt; - struct sysfs_dirent *sd; + if (!dir_sd) + return; - if (!dir_sd) { - WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", - name); - return -ENOENT; + pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); + sysfs_addrm_start(&acxt, dir_sd); + pos = rb_first(&dir_sd->s_dir.children); + while (pos) { + struct sysfs_dirent *sd = to_sysfs_dirent(pos); + pos = rb_next(pos); + if (sysfs_type(sd) != SYSFS_DIR) + sysfs_remove_one(&acxt, sd); } - - sysfs_addrm_start(&acxt); - - sd = sysfs_find_dirent(dir_sd, name, ns); - if (sd) - __sysfs_remove(&acxt, sd); - sysfs_addrm_finish(&acxt); - if (sd) - return 0; - else - return -ENOENT; + remove_dir(dir_sd); } /** @@ -938,34 +852,21 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, * the directory before we remove the directory, and we've inlined * what used to be sysfs_rmdir() below, instead of calling separately. */ + void sysfs_remove_dir(struct kobject *kobj) { struct sysfs_dirent *sd = kobj->sd; - /* - * In general, kboject owner is responsible for ensuring removal - * doesn't race with other operations and sysfs doesn't provide any - * protection; however, when @kobj is used as a symlink target, the - * symlinking entity usually doesn't own @kobj and thus has no - * control over removal. @kobj->sd may be removed anytime and - * symlink code may end up dereferencing an already freed sd. - * - * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation - * against symlink operations so that symlink code can safely - * dereference @kobj->sd. - */ - spin_lock(&sysfs_symlink_target_lock); + spin_lock(&sysfs_assoc_lock); kobj->sd = NULL; - spin_unlock(&sysfs_symlink_target_lock); + spin_unlock(&sysfs_assoc_lock); - if (sd) { - WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); - sysfs_remove(sd); - } + __sysfs_remove_dir(sd); } -int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, - const char *new_name, const void *new_ns) +int sysfs_rename(struct sysfs_dirent *sd, + struct sysfs_dirent *new_parent_sd, const void *new_ns, + const char *new_name) { int error; @@ -977,7 +878,7 @@ int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, goto out; /* nothing to rename */ error = -EEXIST; - if (sysfs_find_dirent(new_parent_sd, new_name, new_ns)) + if (sysfs_find_dirent(new_parent_sd, new_ns, new_name)) goto out; /* rename sysfs_dirent */ @@ -998,7 +899,7 @@ int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, sysfs_get(new_parent_sd); sysfs_put(sd->s_parent); sd->s_ns = new_ns; - sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); + sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); sd->s_parent = new_parent_sd; sysfs_link_sibling(sd); @@ -1008,25 +909,30 @@ int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, return error; } -int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, - const void *new_ns) +int sysfs_rename_dir(struct kobject *kobj, const char *new_name) { struct sysfs_dirent *parent_sd = kobj->sd->s_parent; + const void *new_ns = NULL; + + if (sysfs_ns_type(parent_sd)) + new_ns = kobj->ktype->namespace(kobj); - return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns); + return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name); } -int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, - const void *new_ns) +int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) { struct sysfs_dirent *sd = kobj->sd; struct sysfs_dirent *new_parent_sd; + const void *new_ns = NULL; BUG_ON(!sd->s_parent); + if (sysfs_ns_type(sd->s_parent)) + new_ns = kobj->ktype->namespace(kobj); new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; - return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns); + return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name); } /* Relationship between s_mode and the DT_xxx types */ diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b94f936..15ef5eb 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -21,114 +21,70 @@ #include <linux/mutex.h> #include <linux/limits.h> #include <linux/uaccess.h> -#include <linux/seq_file.h> -#include <linux/mm.h> #include "sysfs.h" /* - * There's one sysfs_open_file for each open file and one sysfs_open_dirent - * for each sysfs_dirent with one or more open files. + * There's one sysfs_buffer for each open file and one + * sysfs_open_dirent for each sysfs_dirent with one or more open + * files. * - * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is - * protected by sysfs_open_dirent_lock. - * - * filp->private_data points to seq_file whose ->private points to - * sysfs_open_file. sysfs_open_files are chained at - * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex. + * filp->private_data points to sysfs_buffer and + * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open + * is protected by sysfs_open_dirent_lock. */ static DEFINE_SPINLOCK(sysfs_open_dirent_lock); -static DEFINE_MUTEX(sysfs_open_file_mutex); struct sysfs_open_dirent { atomic_t refcnt; atomic_t event; wait_queue_head_t poll; - struct list_head files; /* goes through sysfs_open_file.list */ + struct list_head buffers; /* goes through sysfs_buffer.list */ }; -struct sysfs_open_file { - struct sysfs_dirent *sd; - struct file *file; +struct sysfs_buffer { + size_t count; + loff_t pos; + char *page; + const struct sysfs_ops *ops; struct mutex mutex; + int needs_read_fill; int event; struct list_head list; - - bool mmapped; - const struct vm_operations_struct *vm_ops; }; -static bool sysfs_is_bin(struct sysfs_dirent *sd) -{ - return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR; -} - -static struct sysfs_open_file *sysfs_of(struct file *file) -{ - return ((struct seq_file *)file->private_data)->private; -} - -/* - * Determine ktype->sysfs_ops for the given sysfs_dirent. This function - * must be called while holding an active reference. - */ -static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd) -{ - struct kobject *kobj = sd->s_parent->s_dir.kobj; - - if (!sysfs_ignore_lockdep(sd)) - lockdep_assert_held(sd); - return kobj->ktype ? kobj->ktype->sysfs_ops : NULL; -} - -/* - * Reads on sysfs are handled through seq_file, which takes care of hairy - * details like buffering and seeking. The following function pipes - * sysfs_ops->show() result through seq_file. +/** + * fill_read_buffer - allocate and fill buffer from object. + * @dentry: dentry pointer. + * @buffer: data buffer for file. + * + * Allocate @buffer->page, if it hasn't been already, then call the + * kobject's show() method to fill the buffer with this attribute's + * data. + * This is called only once, on the file's first read unless an error + * is returned. */ -static int sysfs_seq_show(struct seq_file *sf, void *v) +static int fill_read_buffer(struct dentry *dentry, struct sysfs_buffer *buffer) { - struct sysfs_open_file *of = sf->private; - struct kobject *kobj = of->sd->s_parent->s_dir.kobj; - const struct sysfs_ops *ops; - char *buf; + struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; + const struct sysfs_ops *ops = buffer->ops; + int ret = 0; ssize_t count; - /* acquire buffer and ensure that it's >= PAGE_SIZE */ - count = seq_get_buf(sf, &buf); - if (count < PAGE_SIZE) { - seq_commit(sf, -1); - return 0; - } + if (!buffer->page) + buffer->page = (char *) get_zeroed_page(GFP_KERNEL); + if (!buffer->page) + return -ENOMEM; - /* - * Need @of->sd for attr and ops, its parent for kobj. @of->mutex - * nests outside active ref and is just to ensure that the ops - * aren't called concurrently for the same open file. - */ - mutex_lock(&of->mutex); - if (!sysfs_get_active(of->sd)) { - mutex_unlock(&of->mutex); + /* need attr_sd for attr and ops, its parent for kobj */ + if (!sysfs_get_active(attr_sd)) return -ENODEV; - } - - of->event = atomic_read(&of->sd->s_attr.open->event); - - /* - * Lookup @ops and invoke show(). Control may reach here via seq - * file lseek even if @ops->show() isn't implemented. - */ - ops = sysfs_file_ops(of->sd); - if (ops->show) - count = ops->show(kobj, of->sd->s_attr.attr, buf); - else - count = 0; - sysfs_put_active(of->sd); - mutex_unlock(&of->mutex); + buffer->event = atomic_read(&attr_sd->s_attr.open->event); + count = ops->show(kobj, attr_sd->s_attr.attr, buffer->page); - if (count < 0) - return count; + sysfs_put_active(attr_sd); /* * The code works fine with PAGE_SIZE return but it's likely to @@ -140,389 +96,155 @@ static int sysfs_seq_show(struct seq_file *sf, void *v) /* Try to struggle along */ count = PAGE_SIZE - 1; } - seq_commit(sf, count); - return 0; + if (count >= 0) { + buffer->needs_read_fill = 0; + buffer->count = count; + } else { + ret = count; + } + return ret; } -/* - * Read method for bin files. As reading a bin file can have side-effects, - * the exact offset and bytes specified in read(2) call should be passed to - * the read callback making it difficult to use seq_file. Implement - * simplistic custom buffering for bin files. +/** + * sysfs_read_file - read an attribute. + * @file: file pointer. + * @buf: buffer to fill. + * @count: number of bytes to read. + * @ppos: starting offset in file. + * + * Userspace wants to read an attribute file. The attribute descriptor + * is in the file's ->d_fsdata. The target object is in the directory's + * ->d_fsdata. + * + * We call fill_read_buffer() to allocate and fill the buffer from the + * object's show() method exactly once (if the read is happening from + * the beginning of the file). That should fill the entire buffer with + * all the data the object has to offer for that attribute. + * We then call flush_read_buffer() to copy the buffer to userspace + * in the increments specified. */ -static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf, - size_t bytes, loff_t *off) -{ - struct sysfs_open_file *of = sysfs_of(file); - struct bin_attribute *battr = of->sd->s_attr.bin_attr; - struct kobject *kobj = of->sd->s_parent->s_dir.kobj; - loff_t size = file_inode(file)->i_size; - int count = min_t(size_t, bytes, PAGE_SIZE); - loff_t offs = *off; - char *buf; - - if (!bytes) - return 0; - - if (size) { - if (offs > size) - return 0; - if (offs + count > size) - count = size - offs; - } - - buf = kmalloc(count, GFP_KERNEL); - if (!buf) - return -ENOMEM; - /* need of->sd for battr, its parent for kobj */ - mutex_lock(&of->mutex); - if (!sysfs_get_active(of->sd)) { - count = -ENODEV; - mutex_unlock(&of->mutex); - goto out_free; - } - - if (battr->read) - count = battr->read(file, kobj, battr, buf, offs, count); - else - count = -EIO; - - sysfs_put_active(of->sd); - mutex_unlock(&of->mutex); - - if (count < 0) - goto out_free; +static ssize_t +sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + struct sysfs_buffer *buffer = file->private_data; + ssize_t retval = 0; - if (copy_to_user(userbuf, buf, count)) { - count = -EFAULT; - goto out_free; + mutex_lock(&buffer->mutex); + if (buffer->needs_read_fill || *ppos == 0) { + retval = fill_read_buffer(file->f_path.dentry, buffer); + if (retval) + goto out; } + pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", + __func__, count, *ppos, buffer->page); + retval = simple_read_from_buffer(buf, count, ppos, buffer->page, + buffer->count); +out: + mutex_unlock(&buffer->mutex); + return retval; +} - pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); +/** + * fill_write_buffer - copy buffer from userspace. + * @buffer: data buffer for file. + * @buf: data from user. + * @count: number of bytes in @userbuf. + * + * Allocate @buffer->page if it hasn't been already, then + * copy the user-supplied buffer into it. + */ +static int fill_write_buffer(struct sysfs_buffer *buffer, + const char __user *buf, size_t count) +{ + int error; - *off = offs + count; + if (!buffer->page) + buffer->page = (char *)get_zeroed_page(GFP_KERNEL); + if (!buffer->page) + return -ENOMEM; - out_free: - kfree(buf); - return count; + if (count >= PAGE_SIZE) + count = PAGE_SIZE - 1; + error = copy_from_user(buffer->page, buf, count); + buffer->needs_read_fill = 1; + /* if buf is assumed to contain a string, terminate it by \0, + so e.g. sscanf() can scan the string easily */ + buffer->page[count] = 0; + return error ? -EFAULT : count; } + /** - * flush_write_buffer - push buffer to kobject - * @of: open file - * @buf: data buffer for file - * @off: file offset to write to - * @count: number of bytes + * flush_write_buffer - push buffer to kobject. + * @dentry: dentry to the attribute + * @buffer: data buffer for file. + * @count: number of bytes * - * Get the correct pointers for the kobject and the attribute we're dealing - * with, then call the store() method for it with @buf. + * Get the correct pointers for the kobject and the attribute we're + * dealing with, then call the store() method for the attribute, + * passing the buffer that we acquired in fill_write_buffer(). */ -static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off, - size_t count) +static int flush_write_buffer(struct dentry *dentry, + struct sysfs_buffer *buffer, size_t count) { - struct kobject *kobj = of->sd->s_parent->s_dir.kobj; - int rc = 0; + struct sysfs_dirent *attr_sd = dentry->d_fsdata; + struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; + const struct sysfs_ops *ops = buffer->ops; + int rc; - /* - * Need @of->sd for attr and ops, its parent for kobj. @of->mutex - * nests outside active ref and is just to ensure that the ops - * aren't called concurrently for the same open file. - */ - mutex_lock(&of->mutex); - if (!sysfs_get_active(of->sd)) { - mutex_unlock(&of->mutex); + /* need attr_sd for attr and ops, its parent for kobj */ + if (!sysfs_get_active(attr_sd)) return -ENODEV; - } - - if (sysfs_is_bin(of->sd)) { - struct bin_attribute *battr = of->sd->s_attr.bin_attr; - - rc = -EIO; - if (battr->write) - rc = battr->write(of->file, kobj, battr, buf, off, - count); - } else { - const struct sysfs_ops *ops = sysfs_file_ops(of->sd); - rc = ops->store(kobj, of->sd->s_attr.attr, buf, count); - } + rc = ops->store(kobj, attr_sd->s_attr.attr, buffer->page, count); - sysfs_put_active(of->sd); - mutex_unlock(&of->mutex); + sysfs_put_active(attr_sd); return rc; } + /** - * sysfs_write_file - write an attribute - * @file: file pointer - * @user_buf: data to write - * @count: number of bytes - * @ppos: starting offset + * sysfs_write_file - write an attribute. + * @file: file pointer + * @buf: data to write + * @count: number of bytes + * @ppos: starting offset * - * Copy data in from userland and pass it to the matching - * sysfs_ops->store() by invoking flush_write_buffer(). - * - * There is no easy way for us to know if userspace is only doing a partial - * write, so we don't support them. We expect the entire buffer to come on - * the first write. Hint: if you're writing a value, first read the file, - * modify only the the value you're changing, then write entire buffer - * back. + * Similar to sysfs_read_file(), though working in the opposite direction. + * We allocate and fill the data from the user in fill_write_buffer(), + * then push it to the kobject in flush_write_buffer(). + * There is no easy way for us to know if userspace is only doing a partial + * write, so we don't support them. We expect the entire buffer to come + * on the first write. + * Hint: if you're writing a value, first read the file, modify only the + * the value you're changing, then write entire buffer back. */ -static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf, +static ssize_t sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct sysfs_open_file *of = sysfs_of(file); - ssize_t len = min_t(size_t, count, PAGE_SIZE); - loff_t size = file_inode(file)->i_size; - char *buf; - - if (sysfs_is_bin(of->sd) && size) { - if (size <= *ppos) - return 0; - len = min_t(ssize_t, len, size - *ppos); - } - - if (!len) - return 0; + struct sysfs_buffer *buffer = file->private_data; + ssize_t len; - buf = kmalloc(len + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - if (copy_from_user(buf, user_buf, len)) { - len = -EFAULT; - goto out_free; - } - buf[len] = '\0'; /* guarantee string termination */ - - len = flush_write_buffer(of, buf, *ppos, len); + mutex_lock(&buffer->mutex); + len = fill_write_buffer(buffer, buf, count); + if (len > 0) + len = flush_write_buffer(file->f_path.dentry, buffer, len); if (len > 0) *ppos += len; -out_free: - kfree(buf); + mutex_unlock(&buffer->mutex); return len; } -static void sysfs_bin_vma_open(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - - if (!of->vm_ops) - return; - - if (!sysfs_get_active(of->sd)) - return; - - if (of->vm_ops->open) - of->vm_ops->open(vma); - - sysfs_put_active(of->sd); -} - -static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - int ret; - - if (!of->vm_ops) - return VM_FAULT_SIGBUS; - - if (!sysfs_get_active(of->sd)) - return VM_FAULT_SIGBUS; - - ret = VM_FAULT_SIGBUS; - if (of->vm_ops->fault) - ret = of->vm_ops->fault(vma, vmf); - - sysfs_put_active(of->sd); - return ret; -} - -static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - int ret; - - if (!of->vm_ops) - return VM_FAULT_SIGBUS; - - if (!sysfs_get_active(of->sd)) - return VM_FAULT_SIGBUS; - - ret = 0; - if (of->vm_ops->page_mkwrite) - ret = of->vm_ops->page_mkwrite(vma, vmf); - else - file_update_time(file); - - sysfs_put_active(of->sd); - return ret; -} - -static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - int ret; - - if (!of->vm_ops) - return -EINVAL; - - if (!sysfs_get_active(of->sd)) - return -EINVAL; - - ret = -EINVAL; - if (of->vm_ops->access) - ret = of->vm_ops->access(vma, addr, buf, len, write); - - sysfs_put_active(of->sd); - return ret; -} - -#ifdef CONFIG_NUMA -static int sysfs_bin_set_policy(struct vm_area_struct *vma, - struct mempolicy *new) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - int ret; - - if (!of->vm_ops) - return 0; - - if (!sysfs_get_active(of->sd)) - return -EINVAL; - - ret = 0; - if (of->vm_ops->set_policy) - ret = of->vm_ops->set_policy(vma, new); - - sysfs_put_active(of->sd); - return ret; -} - -static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma, - unsigned long addr) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - struct mempolicy *pol; - - if (!of->vm_ops) - return vma->vm_policy; - - if (!sysfs_get_active(of->sd)) - return vma->vm_policy; - - pol = vma->vm_policy; - if (of->vm_ops->get_policy) - pol = of->vm_ops->get_policy(vma, addr); - - sysfs_put_active(of->sd); - return pol; -} - -static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, - const nodemask_t *to, unsigned long flags) -{ - struct file *file = vma->vm_file; - struct sysfs_open_file *of = sysfs_of(file); - int ret; - - if (!of->vm_ops) - return 0; - - if (!sysfs_get_active(of->sd)) - return 0; - - ret = 0; - if (of->vm_ops->migrate) - ret = of->vm_ops->migrate(vma, from, to, flags); - - sysfs_put_active(of->sd); - return ret; -} -#endif - -static const struct vm_operations_struct sysfs_bin_vm_ops = { - .open = sysfs_bin_vma_open, - .fault = sysfs_bin_fault, - .page_mkwrite = sysfs_bin_page_mkwrite, - .access = sysfs_bin_access, -#ifdef CONFIG_NUMA - .set_policy = sysfs_bin_set_policy, - .get_policy = sysfs_bin_get_policy, - .migrate = sysfs_bin_migrate, -#endif -}; - -static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct sysfs_open_file *of = sysfs_of(file); - struct bin_attribute *battr = of->sd->s_attr.bin_attr; - struct kobject *kobj = of->sd->s_parent->s_dir.kobj; - int rc; - - mutex_lock(&of->mutex); - - /* need of->sd for battr, its parent for kobj */ - rc = -ENODEV; - if (!sysfs_get_active(of->sd)) - goto out_unlock; - - if (!battr->mmap) - goto out_put; - - rc = battr->mmap(file, kobj, battr, vma); - if (rc) - goto out_put; - - /* - * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() - * to satisfy versions of X which crash if the mmap fails: that - * substitutes a new vm_file, and we don't then want bin_vm_ops. - */ - if (vma->vm_file != file) - goto out_put; - - rc = -EINVAL; - if (of->mmapped && of->vm_ops != vma->vm_ops) - goto out_put; - - /* - * It is not possible to successfully wrap close. - * So error if someone is trying to use close. - */ - rc = -EINVAL; - if (vma->vm_ops && vma->vm_ops->close) - goto out_put; - - rc = 0; - of->mmapped = 1; - of->vm_ops = vma->vm_ops; - vma->vm_ops = &sysfs_bin_vm_ops; -out_put: - sysfs_put_active(of->sd); -out_unlock: - mutex_unlock(&of->mutex); - - return rc; -} - /** * sysfs_get_open_dirent - get or create sysfs_open_dirent * @sd: target sysfs_dirent - * @of: sysfs_open_file for this instance of open + * @buffer: sysfs_buffer for this instance of open * * If @sd->s_attr.open exists, increment its reference count; - * otherwise, create one. @of is chained to the files list. + * otherwise, create one. @buffer is chained to the buffers + * list. * * LOCKING: * Kernel thread context (may sleep). @@ -531,12 +253,11 @@ out_unlock: * 0 on success, -errno on failure. */ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, - struct sysfs_open_file *of) + struct sysfs_buffer *buffer) { struct sysfs_open_dirent *od, *new_od = NULL; retry: - mutex_lock(&sysfs_open_file_mutex); spin_lock_irq(&sysfs_open_dirent_lock); if (!sd->s_attr.open && new_od) { @@ -547,11 +268,10 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, od = sd->s_attr.open; if (od) { atomic_inc(&od->refcnt); - list_add_tail(&of->list, &od->files); + list_add_tail(&buffer->list, &od->buffers); } spin_unlock_irq(&sysfs_open_dirent_lock); - mutex_unlock(&sysfs_open_file_mutex); if (od) { kfree(new_od); @@ -566,40 +286,36 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, atomic_set(&new_od->refcnt, 0); atomic_set(&new_od->event, 1); init_waitqueue_head(&new_od->poll); - INIT_LIST_HEAD(&new_od->files); + INIT_LIST_HEAD(&new_od->buffers); goto retry; } /** * sysfs_put_open_dirent - put sysfs_open_dirent * @sd: target sysfs_dirent - * @of: associated sysfs_open_file + * @buffer: associated sysfs_buffer * - * Put @sd->s_attr.open and unlink @of from the files list. If - * reference count reaches zero, disassociate and free it. + * Put @sd->s_attr.open and unlink @buffer from the buffers list. + * If reference count reaches zero, disassociate and free it. * * LOCKING: * None. */ static void sysfs_put_open_dirent(struct sysfs_dirent *sd, - struct sysfs_open_file *of) + struct sysfs_buffer *buffer) { struct sysfs_open_dirent *od = sd->s_attr.open; unsigned long flags; - mutex_lock(&sysfs_open_file_mutex); spin_lock_irqsave(&sysfs_open_dirent_lock, flags); - if (of) - list_del(&of->list); - + list_del(&buffer->list); if (atomic_dec_and_test(&od->refcnt)) sd->s_attr.open = NULL; else od = NULL; spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); - mutex_unlock(&sysfs_open_file_mutex); kfree(od); } @@ -608,99 +324,67 @@ static int sysfs_open_file(struct inode *inode, struct file *file) { struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - struct sysfs_open_file *of; - bool has_read, has_write, has_mmap; + struct sysfs_buffer *buffer; + const struct sysfs_ops *ops; int error = -EACCES; /* need attr_sd for attr and ops, its parent for kobj */ if (!sysfs_get_active(attr_sd)) return -ENODEV; - if (sysfs_is_bin(attr_sd)) { - struct bin_attribute *battr = attr_sd->s_attr.bin_attr; - - has_read = battr->read || battr->mmap; - has_write = battr->write || battr->mmap; - has_mmap = battr->mmap; - } else { - const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); + /* every kobject with an attribute needs a ktype assigned */ + if (kobj->ktype && kobj->ktype->sysfs_ops) + ops = kobj->ktype->sysfs_ops; + else { + WARN(1, KERN_ERR + "missing sysfs attribute operations for kobject: %s\n", + kobject_name(kobj)); + goto err_out; + } - /* every kobject with an attribute needs a ktype assigned */ - if (WARN(!ops, KERN_ERR - "missing sysfs attribute operations for kobject: %s\n", - kobject_name(kobj))) + /* File needs write support. + * The inode's perms must say it's ok, + * and we must have a store method. + */ + if (file->f_mode & FMODE_WRITE) { + if (!(inode->i_mode & S_IWUGO) || !ops->store) goto err_out; - - has_read = ops->show; - has_write = ops->store; - has_mmap = false; } - /* check perms and supported operations */ - if ((file->f_mode & FMODE_WRITE) && - (!(inode->i_mode & S_IWUGO) || !has_write)) - goto err_out; - - if ((file->f_mode & FMODE_READ) && - (!(inode->i_mode & S_IRUGO) || !has_read)) - goto err_out; - - /* allocate a sysfs_open_file for the file */ - error = -ENOMEM; - of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL); - if (!of) - goto err_out; - - /* - * The following is done to give a different lockdep key to - * @of->mutex for files which implement mmap. This is a rather - * crude way to avoid false positive lockdep warning around - * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and - * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under - * which mm->mmap_sem nests, while holding @of->mutex. As each - * open file has a separate mutex, it's okay as long as those don't - * happen on the same file. At this point, we can't easily give - * each file a separate locking class. Let's differentiate on - * whether the file has mmap or not for now. + /* File needs read support. + * The inode's perms must say it's ok, and we there + * must be a show method for it. */ - if (has_mmap) - mutex_init(&of->mutex); - else - mutex_init(&of->mutex); - - of->sd = attr_sd; - of->file = file; + if (file->f_mode & FMODE_READ) { + if (!(inode->i_mode & S_IRUGO) || !ops->show) + goto err_out; + } - /* - * Always instantiate seq_file even if read access doesn't use - * seq_file or is not requested. This unifies private data access - * and readable regular files are the vast majority anyway. + /* No error? Great, allocate a buffer for the file, and store it + * it in file->private_data for easy access. */ - if (sysfs_is_bin(attr_sd)) - error = single_open(file, NULL, of); - else - error = single_open(file, sysfs_seq_show, of); - if (error) - goto err_free; + error = -ENOMEM; + buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); + if (!buffer) + goto err_out; - /* seq_file clears PWRITE unconditionally, restore it if WRITE */ - if (file->f_mode & FMODE_WRITE) - file->f_mode |= FMODE_PWRITE; + mutex_init(&buffer->mutex); + buffer->needs_read_fill = 1; + buffer->ops = ops; + file->private_data = buffer; /* make sure we have open dirent struct */ - error = sysfs_get_open_dirent(attr_sd, of); + error = sysfs_get_open_dirent(attr_sd, buffer); if (error) - goto err_close; + goto err_free; /* open succeeded, put active references */ sysfs_put_active(attr_sd); return 0; -err_close: - single_release(inode, file); -err_free: - kfree(of); -err_out: + err_free: + kfree(buffer); + err_out: sysfs_put_active(attr_sd); return error; } @@ -708,39 +392,15 @@ err_out: static int sysfs_release(struct inode *inode, struct file *filp) { struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata; - struct sysfs_open_file *of = sysfs_of(filp); - - sysfs_put_open_dirent(sd, of); - single_release(inode, filp); - kfree(of); - - return 0; -} - -void sysfs_unmap_bin_file(struct sysfs_dirent *sd) -{ - struct sysfs_open_dirent *od; - struct sysfs_open_file *of; + struct sysfs_buffer *buffer = filp->private_data; - if (!sysfs_is_bin(sd)) - return; - - spin_lock_irq(&sysfs_open_dirent_lock); - od = sd->s_attr.open; - if (od) - atomic_inc(&od->refcnt); - spin_unlock_irq(&sysfs_open_dirent_lock); - if (!od) - return; + sysfs_put_open_dirent(sd, buffer); - mutex_lock(&sysfs_open_file_mutex); - list_for_each_entry(of, &od->files, list) { - struct inode *inode = file_inode(of->file); - unmap_mapping_range(inode->i_mapping, 0, 0, 1); - } - mutex_unlock(&sysfs_open_file_mutex); + if (buffer->page) + free_page((unsigned long)buffer->page); + kfree(buffer); - sysfs_put_open_dirent(sd, NULL); + return 0; } /* Sysfs attribute files are pollable. The idea is that you read @@ -758,7 +418,7 @@ void sysfs_unmap_bin_file(struct sysfs_dirent *sd) */ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) { - struct sysfs_open_file *of = sysfs_of(filp); + struct sysfs_buffer *buffer = filp->private_data; struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; struct sysfs_open_dirent *od = attr_sd->s_attr.open; @@ -770,12 +430,13 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) sysfs_put_active(attr_sd); - if (of->event != atomic_read(&od->event)) + if (buffer->event != atomic_read(&od->event)) goto trigger; return DEFAULT_POLLMASK; trigger: + buffer->needs_read_fill = 1; return DEFAULT_POLLMASK|POLLERR|POLLPRI; } @@ -805,9 +466,9 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr) mutex_lock(&sysfs_mutex); if (sd && dir) - sd = sysfs_find_dirent(sd, dir, NULL); + sd = sysfs_find_dirent(sd, NULL, dir); if (sd && attr) - sd = sysfs_find_dirent(sd, attr, NULL); + sd = sysfs_find_dirent(sd, NULL, attr); if (sd) sysfs_notify_dirent(sd); @@ -816,7 +477,7 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr) EXPORT_SYMBOL_GPL(sysfs_notify); const struct file_operations sysfs_file_operations = { - .read = seq_read, + .read = sysfs_read_file, .write = sysfs_write_file, .llseek = generic_file_llseek, .open = sysfs_open_file, @@ -824,25 +485,58 @@ const struct file_operations sysfs_file_operations = { .poll = sysfs_poll, }; -const struct file_operations sysfs_bin_operations = { - .read = sysfs_bin_read, - .write = sysfs_write_file, - .llseek = generic_file_llseek, - .mmap = sysfs_bin_mmap, - .open = sysfs_open_file, - .release = sysfs_release, - .poll = sysfs_poll, -}; +static int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr, + const void **pns) +{ + struct sysfs_dirent *dir_sd = kobj->sd; + const struct sysfs_ops *ops; + const void *ns = NULL; + int err; + + if (!dir_sd) { + WARN(1, KERN_ERR "sysfs: kobject %s without dirent\n", + kobject_name(kobj)); + return -ENOENT; + } + + err = 0; + if (!sysfs_ns_type(dir_sd)) + goto out; -int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, - const struct attribute *attr, int type, - umode_t amode, const void *ns) + err = -EINVAL; + if (!kobj->ktype) + goto out; + ops = kobj->ktype->sysfs_ops; + if (!ops) + goto out; + if (!ops->namespace) + goto out; + + err = 0; + ns = ops->namespace(kobj, attr); +out: + if (err) { + WARN(1, KERN_ERR + "missing sysfs namespace attribute operation for kobject: %s\n", + kobject_name(kobj)); + } + *pns = ns; + return err; +} + +int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, + const struct attribute *attr, int type, umode_t amode) { umode_t mode = (amode & S_IALLUGO) | S_IFREG; struct sysfs_addrm_cxt acxt; struct sysfs_dirent *sd; + const void *ns; int rc; + rc = sysfs_attr_ns(dir_sd->s_dir.kobj, attr, &ns); + if (rc) + return rc; + sd = sysfs_new_dirent(attr->name, mode, type); if (!sd) return -ENOMEM; @@ -851,8 +545,8 @@ int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, sd->s_attr.attr = (void *)attr; sysfs_dirent_init_lockdep(sd); - sysfs_addrm_start(&acxt); - rc = sysfs_add_one(&acxt, sd, dir_sd); + sysfs_addrm_start(&acxt, dir_sd); + rc = sysfs_add_one(&acxt, sd); sysfs_addrm_finish(&acxt); if (rc) @@ -865,25 +559,23 @@ int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, int type) { - return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL); + return sysfs_add_file_mode(dir_sd, attr, type, attr->mode); } + /** - * sysfs_create_file_ns - create an attribute file for an object with custom ns - * @kobj: object we're creating for - * @attr: attribute descriptor - * @ns: namespace the new file should belong to + * sysfs_create_file - create an attribute file for an object. + * @kobj: object we're creating for. + * @attr: attribute descriptor. */ -int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) +int sysfs_create_file(struct kobject *kobj, const struct attribute *attr) { BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR, - attr->mode, ns); + return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR); } -EXPORT_SYMBOL_GPL(sysfs_create_file_ns); +EXPORT_SYMBOL_GPL(sysfs_create_file); int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr) { @@ -912,7 +604,7 @@ int sysfs_add_file_to_group(struct kobject *kobj, int error; if (group) - dir_sd = sysfs_get_dirent(kobj->sd, group); + dir_sd = sysfs_get_dirent(kobj->sd, NULL, group); else dir_sd = sysfs_get(kobj->sd); @@ -938,12 +630,17 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, { struct sysfs_dirent *sd; struct iattr newattrs; + const void *ns; int rc; + rc = sysfs_attr_ns(kobj, attr, &ns); + if (rc) + return rc; + mutex_lock(&sysfs_mutex); rc = -ENOENT; - sd = sysfs_find_dirent(kobj->sd, attr->name, NULL); + sd = sysfs_find_dirent(kobj->sd, ns, attr->name); if (!sd) goto out; @@ -958,21 +655,22 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, EXPORT_SYMBOL_GPL(sysfs_chmod_file); /** - * sysfs_remove_file_ns - remove an object attribute with a custom ns tag - * @kobj: object we're acting for - * @attr: attribute descriptor - * @ns: namespace tag of the file to remove + * sysfs_remove_file - remove an object attribute. + * @kobj: object we're acting for. + * @attr: attribute descriptor. * - * Hash the attribute name and namespace tag and kill the victim. + * Hash the attribute name and kill the victim. */ -void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, - const void *ns) +void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr) { - struct sysfs_dirent *dir_sd = kobj->sd; + const void *ns; - sysfs_hash_and_remove(dir_sd, attr->name, ns); + if (sysfs_attr_ns(kobj, attr, &ns)) + return; + + sysfs_hash_and_remove(kobj->sd, ns, attr->name); } -EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); +EXPORT_SYMBOL_GPL(sysfs_remove_file); void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr) { @@ -994,42 +692,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj, struct sysfs_dirent *dir_sd; if (group) - dir_sd = sysfs_get_dirent(kobj->sd, group); + dir_sd = sysfs_get_dirent(kobj->sd, NULL, group); else dir_sd = sysfs_get(kobj->sd); if (dir_sd) { - sysfs_hash_and_remove(dir_sd, attr->name, NULL); + sysfs_hash_and_remove(dir_sd, NULL, attr->name); sysfs_put(dir_sd); } } EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); -/** - * sysfs_create_bin_file - create binary file for object. - * @kobj: object. - * @attr: attribute descriptor. - */ -int sysfs_create_bin_file(struct kobject *kobj, - const struct bin_attribute *attr) -{ - BUG_ON(!kobj || !kobj->sd || !attr); - - return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); -} -EXPORT_SYMBOL_GPL(sysfs_create_bin_file); - -/** - * sysfs_remove_bin_file - remove binary file for object. - * @kobj: object. - * @attr: attribute descriptor. - */ -void sysfs_remove_bin_file(struct kobject *kobj, - const struct bin_attribute *attr) -{ - sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL); -} -EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); - struct sysfs_schedule_callback_struct { struct list_head workq_list; struct kobject *kobj; diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 1898a10..5f92cd2 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -26,7 +26,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, if (grp->attrs) for (attr = grp->attrs; *attr; attr++) - sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); + sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); if (grp->bin_attrs) for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) sysfs_remove_bin_file(kobj, *bin_attr); @@ -49,17 +49,16 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, * re-adding (if required) the file. */ if (update) - sysfs_hash_and_remove(dir_sd, (*attr)->name, - NULL); + sysfs_hash_and_remove(dir_sd, NULL, + (*attr)->name); if (grp->is_visible) { mode = grp->is_visible(kobj, *attr, i); if (!mode) continue; } - error = sysfs_add_file_mode_ns(dir_sd, *attr, - SYSFS_KOBJ_ATTR, - (*attr)->mode | mode, - NULL); + error = sysfs_add_file_mode(dir_sd, *attr, + SYSFS_KOBJ_ATTR, + (*attr)->mode | mode); if (unlikely(error)) break; } @@ -111,7 +110,7 @@ static int internal_create_group(struct kobject *kobj, int update, error = create_files(sd, kobj, grp, update); if (error) { if (grp->name) - sysfs_remove(sd); + sysfs_remove_subdir(sd); } sysfs_put(sd); return error; @@ -207,7 +206,7 @@ void sysfs_remove_group(struct kobject *kobj, struct sysfs_dirent *sd; if (grp->name) { - sd = sysfs_get_dirent(dir_sd, grp->name); + sd = sysfs_get_dirent(dir_sd, NULL, grp->name); if (!sd) { WARN(!sd, KERN_WARNING "sysfs group %p not found for kobject '%s'\n", @@ -219,7 +218,7 @@ void sysfs_remove_group(struct kobject *kobj, remove_files(sd, kobj, grp); if (grp->name) - sysfs_remove(sd); + sysfs_remove_subdir(sd); sysfs_put(sd); } @@ -262,7 +261,7 @@ int sysfs_merge_group(struct kobject *kobj, struct attribute *const *attr; int i; - dir_sd = sysfs_get_dirent(kobj->sd, grp->name); + dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); if (!dir_sd) return -ENOENT; @@ -270,7 +269,7 @@ int sysfs_merge_group(struct kobject *kobj, error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); if (error) { while (--i >= 0) - sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL); + sysfs_hash_and_remove(dir_sd, NULL, (*--attr)->name); } sysfs_put(dir_sd); @@ -289,10 +288,10 @@ void sysfs_unmerge_group(struct kobject *kobj, struct sysfs_dirent *dir_sd; struct attribute *const *attr; - dir_sd = sysfs_get_dirent(kobj->sd, grp->name); + dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); if (dir_sd) { for (attr = grp->attrs; *attr; ++attr) - sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); + sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); sysfs_put(dir_sd); } } @@ -311,7 +310,7 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, struct sysfs_dirent *dir_sd; int error = 0; - dir_sd = sysfs_get_dirent(kobj->sd, group_name); + dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name); if (!dir_sd) return -ENOENT; @@ -333,9 +332,9 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, { struct sysfs_dirent *dir_sd; - dir_sd = sysfs_get_dirent(kobj->sd, group_name); + dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name); if (dir_sd) { - sysfs_hash_and_remove(dir_sd, link_name, NULL); + sysfs_hash_and_remove(dir_sd, NULL, link_name); sysfs_put(dir_sd); } } diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 1750f79..963f910 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -258,9 +258,9 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) inode->i_fop = &sysfs_file_operations; break; case SYSFS_KOBJ_BIN_ATTR: - bin_attr = sd->s_attr.bin_attr; + bin_attr = sd->s_bin_attr.bin_attr; inode->i_size = bin_attr->size; - inode->i_fop = &sysfs_bin_operations; + inode->i_fop = &bin_fops; break; case SYSFS_KOBJ_LINK: inode->i_op = &sysfs_symlink_inode_operations; @@ -314,6 +314,32 @@ void sysfs_evict_inode(struct inode *inode) sysfs_put(sd); } +int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, + const char *name) +{ + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + + if (!dir_sd) { + WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", + name); + return -ENOENT; + } + + sysfs_addrm_start(&acxt, dir_sd); + + sd = sysfs_find_dirent(dir_sd, ns, name); + if (sd) + sysfs_remove_one(&acxt, sd); + + sysfs_addrm_finish(&acxt); + + if (sd) + return 0; + else + return -ENOENT; +} + int sysfs_permission(struct inode *inode, int mask) { struct sysfs_dirent *sd; diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 3ae3f1b..2dd4507 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -33,15 +33,13 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, BUG_ON(!name || !parent_sd); - /* - * We don't own @target and it may be removed at any time. - * Synchronize using sysfs_symlink_target_lock. See - * sysfs_remove_dir() for details. + /* target->sd can go away beneath us but is protected with + * sysfs_assoc_lock. Fetch target_sd from it. */ - spin_lock(&sysfs_symlink_target_lock); + spin_lock(&sysfs_assoc_lock); if (target->sd) target_sd = sysfs_get(target->sd); - spin_unlock(&sysfs_symlink_target_lock); + spin_unlock(&sysfs_assoc_lock); error = -ENOENT; if (!target_sd) @@ -54,18 +52,18 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, ns_type = sysfs_ns_type(parent_sd); if (ns_type) - sd->s_ns = target_sd->s_ns; + sd->s_ns = target->ktype->namespace(target); sd->s_symlink.target_sd = target_sd; target_sd = NULL; /* reference is now owned by the symlink */ - sysfs_addrm_start(&acxt); + sysfs_addrm_start(&acxt, parent_sd); /* Symlinks must be between directories with the same ns_type */ if (!ns_type || (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) { if (warn) - error = sysfs_add_one(&acxt, sd, parent_sd); + error = sysfs_add_one(&acxt, sd); else - error = __sysfs_add_one(&acxt, sd, parent_sd); + error = __sysfs_add_one(&acxt, sd); } else { error = -EINVAL; WARN(1, KERN_WARNING @@ -157,17 +155,11 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, const char *name) { const void *ns = NULL; - - /* - * We don't own @target and it may be removed at any time. - * Synchronize using sysfs_symlink_target_lock. See - * sysfs_remove_dir() for details. - */ - spin_lock(&sysfs_symlink_target_lock); + spin_lock(&sysfs_assoc_lock); if (targ->sd && sysfs_ns_type(kobj->sd)) ns = targ->sd->s_ns; - spin_unlock(&sysfs_symlink_target_lock); - sysfs_hash_and_remove(kobj->sd, name, ns); + spin_unlock(&sysfs_assoc_lock); + sysfs_hash_and_remove(kobj->sd, ns, name); } /** @@ -184,25 +176,24 @@ void sysfs_remove_link(struct kobject *kobj, const char *name) else parent_sd = kobj->sd; - sysfs_hash_and_remove(parent_sd, name, NULL); + sysfs_hash_and_remove(parent_sd, NULL, name); } EXPORT_SYMBOL_GPL(sysfs_remove_link); /** - * sysfs_rename_link_ns - rename symlink in object's directory. + * sysfs_rename_link - rename symlink in object's directory. * @kobj: object we're acting for. * @targ: object we're pointing to. * @old: previous name of the symlink. * @new: new name of the symlink. - * @new_ns: new namespace of the symlink. * * A helper function for the common rename symlink idiom. */ -int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, - const char *old, const char *new, const void *new_ns) +int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, + const char *old, const char *new) { struct sysfs_dirent *parent_sd, *sd = NULL; - const void *old_ns = NULL; + const void *old_ns = NULL, *new_ns = NULL; int result; if (!kobj) @@ -214,7 +205,7 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, old_ns = targ->sd->s_ns; result = -ENOENT; - sd = sysfs_get_dirent_ns(parent_sd, old, old_ns); + sd = sysfs_get_dirent(parent_sd, old_ns, old); if (!sd) goto out; @@ -224,13 +215,16 @@ int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, if (sd->s_symlink.target_sd->s_dir.kobj != targ) goto out; - result = sysfs_rename(sd, parent_sd, new, new_ns); + if (sysfs_ns_type(parent_sd)) + new_ns = targ->ktype->namespace(targ); + + result = sysfs_rename(sd, parent_sd, new_ns, new); out: sysfs_put(sd); return result; } -EXPORT_SYMBOL_GPL(sysfs_rename_link_ns); +EXPORT_SYMBOL_GPL(sysfs_rename_link); static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, struct sysfs_dirent *target_sd, char *path) diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 0af09fb..b6deca3 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -29,13 +29,15 @@ struct sysfs_elem_symlink { }; struct sysfs_elem_attr { - union { - struct attribute *attr; - struct bin_attribute *bin_attr; - }; + struct attribute *attr; struct sysfs_open_dirent *open; }; +struct sysfs_elem_bin_attr { + struct bin_attribute *bin_attr; + struct hlist_head buffers; +}; + struct sysfs_inode_attrs { struct iattr ia_iattr; void *ia_secdata; @@ -72,6 +74,7 @@ struct sysfs_dirent { struct sysfs_elem_dir s_dir; struct sysfs_elem_symlink s_symlink; struct sysfs_elem_attr s_attr; + struct sysfs_elem_bin_attr s_bin_attr; }; unsigned short s_flags; @@ -112,7 +115,6 @@ static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd) } #ifdef CONFIG_DEBUG_LOCK_ALLOC - #define sysfs_dirent_init_lockdep(sd) \ do { \ struct attribute *attr = sd->s_attr.attr; \ @@ -122,31 +124,15 @@ do { \ \ lockdep_init_map(&sd->dep_map, "s_active", key, 0); \ } while (0) - -/* Test for attributes that want to ignore lockdep for read-locking */ -static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd) -{ - int type = sysfs_type(sd); - - return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) && - sd->s_attr.attr->ignore_lockdep; -} - #else - #define sysfs_dirent_init_lockdep(sd) do {} while (0) - -static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd) -{ - return true; -} - #endif /* * Context structure to be used while adding/removing nodes. */ struct sysfs_addrm_cxt { + struct sysfs_dirent *parent_sd; struct sysfs_dirent *removed; }; @@ -170,37 +156,38 @@ extern struct kmem_cache *sysfs_dir_cachep; * dir.c */ extern struct mutex sysfs_mutex; -extern spinlock_t sysfs_symlink_target_lock; +extern spinlock_t sysfs_assoc_lock; extern const struct dentry_operations sysfs_dentry_ops; extern const struct file_operations sysfs_dir_operations; extern const struct inode_operations sysfs_dir_inode_operations; +struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd); struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); void sysfs_put_active(struct sysfs_dirent *sd); -void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt); -void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name); -int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, - struct sysfs_dirent *parent_sd); -int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, - struct sysfs_dirent *parent_sd); -void sysfs_remove(struct sysfs_dirent *sd); -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, - const void *ns); +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *parent_sd); +int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); +int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); +void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, - const unsigned char *name, - const void *ns); + const void *ns, + const unsigned char *name); +struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, + const void *ns, + const unsigned char *name); struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); void release_sysfs_dirent(struct sysfs_dirent *sd); int sysfs_create_subdir(struct kobject *kobj, const char *name, struct sysfs_dirent **p_sd); +void sysfs_remove_subdir(struct sysfs_dirent *sd); int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, - const char *new_name, const void *new_ns); + const void *ns, const char *new_name); static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) { @@ -231,21 +218,25 @@ int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); +int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, + const char *name); int sysfs_inode_init(void); /* * file.c */ extern const struct file_operations sysfs_file_operations; -extern const struct file_operations sysfs_bin_operations; int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, int type); -int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, - const struct attribute *attr, int type, - umode_t amode, const void *ns); -void sysfs_unmap_bin_file(struct sysfs_dirent *sd); +int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, + const struct attribute *attr, int type, umode_t amode); +/* + * bin.c + */ +extern const struct file_operations bin_fops; +void unmap_bin_file(struct sysfs_dirent *attr_sd); /* * symlink.c diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index cc1febd..6e025e0 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2563,9 +2563,9 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf, unsigned int from, to, ffs = chance(1, 2); unsigned char *p = (void *)buf; - from = prandom_u32() % len; - /* Corruption span max to end of write unit */ - to = min(len, ALIGN(from + 1, c->max_write_size)); + from = prandom_u32() % (len + 1); + /* Corruption may only span one max. write unit */ + to = min(len, ALIGN(from, c->max_write_size)); ubifs_warn("filled bytes %u-%u with %s", from, to - 1, ffs ? "0xFFs" : "random data"); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ea41649..6b4947f 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -192,7 +192,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, struct ubifs_dent_node *dent; struct ubifs_info *c = dir->i_sb->s_fs_info; - dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); + dbg_gen("'%.*s' in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, dir->i_ino); if (dentry->d_name.len > UBIFS_MAX_NLEN) return ERR_PTR(-ENAMETOOLONG); @@ -224,8 +225,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, * checking. */ err = PTR_ERR(inode); - ubifs_err("dead directory entry '%pd', error %d", - dentry, err); + ubifs_err("dead directory entry '%.*s', error %d", + dentry->d_name.len, dentry->d_name.name, err); ubifs_ro_mode(c, err); goto out; } @@ -259,8 +260,8 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, * parent directory inode. */ - dbg_gen("dent '%pd', mode %#hx in dir ino %lu", - dentry, mode, dir->i_ino); + dbg_gen("dent '%.*s', mode %#hx in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); err = ubifs_budget_space(c, &req); if (err) @@ -508,8 +509,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, * changing the parent inode. */ - dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu", - dentry, inode->i_ino, + dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, inode->i_ino, inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); @@ -565,8 +566,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) * deletions. */ - dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu", - dentry, inode->i_ino, + dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, inode->i_ino, inode->i_nlink, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); @@ -655,8 +656,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) * because we have extra space reserved for deletions. */ - dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry, - inode->i_ino, dir->i_ino); + dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len, + dentry->d_name.name, inode->i_ino, dir->i_ino); ubifs_assert(mutex_is_locked(&dir->i_mutex)); ubifs_assert(mutex_is_locked(&inode->i_mutex)); err = check_dir_empty(c, dentry->d_inode); @@ -715,8 +716,8 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) * directory inode. */ - dbg_gen("dent '%pd', mode %#hx in dir ino %lu", - dentry, mode, dir->i_ino); + dbg_gen("dent '%.*s', mode %#hx in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); err = ubifs_budget_space(c, &req); if (err) @@ -777,7 +778,8 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, * directory inode. */ - dbg_gen("dent '%pd' in dir ino %lu", dentry, dir->i_ino); + dbg_gen("dent '%.*s' in dir ino %lu", + dentry->d_name.len, dentry->d_name.name, dir->i_ino); if (!new_valid_dev(rdev)) return -EINVAL; @@ -851,8 +853,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, * directory inode. */ - dbg_gen("dent '%pd', target '%s' in dir ino %lu", dentry, - symname, dir->i_ino); + dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len, + dentry->d_name.name, symname, dir->i_ino); if (len > UBIFS_MAX_INO_DATA) return -ENAMETOOLONG; @@ -977,9 +979,10 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, * separately. */ - dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu", - old_dentry, old_inode->i_ino, old_dir->i_ino, - new_dentry, new_dir->i_ino); + dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in dir ino %lu", + old_dentry->d_name.len, old_dentry->d_name.name, + old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len, + new_dentry->d_name.name, new_dir->i_ino); ubifs_assert(mutex_is_locked(&old_dir->i_mutex)); ubifs_assert(mutex_is_locked(&new_dir->i_mutex)); if (unlink) diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 9718da8..76ca53c 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -668,7 +668,8 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) ubifs_assert(!wbuf->used); for (i = 0; ; i++) { - int space_before, space_after; + int space_before = c->leb_size - wbuf->offs - wbuf->used; + int space_after; cond_resched(); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 0e045e7..afaad07 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -933,8 +933,10 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, int move = (old_dir != new_dir); struct ubifs_inode *uninitialized_var(new_ui); - dbg_jnl("dent '%pd' in dir ino %lu to dent '%pd' in dir ino %lu", - old_dentry, old_dir->i_ino, new_dentry, new_dir->i_ino); + dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu", + old_dentry->d_name.len, old_dentry->d_name.name, + old_dir->i_ino, new_dentry->d_name.len, + new_dentry->d_name.name, new_dir->i_ino); ubifs_assert(ubifs_inode(old_dir)->data_len == 0); ubifs_assert(ubifs_inode(new_dir)->data_len == 0); ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex)); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f69daa5..3e4aa72 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1630,10 +1630,8 @@ static int ubifs_remount_rw(struct ubifs_info *c) } c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL); - if (!c->write_reserve_buf) { - err = -ENOMEM; + if (!c->write_reserve_buf) goto out; - } err = ubifs_lpt_init(c, 0, 1); if (err) @@ -2066,10 +2064,8 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) } sb->s_root = d_make_root(root); - if (!sb->s_root) { - err = -ENOMEM; + if (!sb->s_root) goto out_umount; - } mutex_unlock(&c->umount_mutex); return 0; diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 5e0a63b..0f7139b 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -303,8 +303,8 @@ int ubifs_setxattr(struct dentry *dentry, const char *name, union ubifs_key key; int err, type; - dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name, - host->i_ino, dentry, size); + dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, + host->i_ino, dentry->d_name.len, dentry->d_name.name, size); ubifs_assert(mutex_is_locked(&host->i_mutex)); if (size > UBIFS_MAX_INO_DATA) @@ -367,8 +367,8 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, union ubifs_key key; int err; - dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name, - host->i_ino, dentry, size); + dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name, + host->i_ino, dentry->d_name.len, dentry->d_name.name, size); err = check_namespace(&nm); if (err < 0) @@ -426,8 +426,8 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) int err, len, written = 0; struct qstr nm = { .name = NULL }; - dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino, - dentry, size); + dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino, + dentry->d_name.len, dentry->d_name.name, size); len = host_ui->xattr_names + host_ui->xattr_cnt; if (!buffer) @@ -529,8 +529,8 @@ int ubifs_removexattr(struct dentry *dentry, const char *name) union ubifs_key key; int err; - dbg_gen("xattr '%s', ino %lu ('%pd')", name, - host->i_ino, dentry); + dbg_gen("xattr '%s', ino %lu ('%.*s')", name, + host->i_ino, dentry->d_name.len, dentry->d_name.name); ubifs_assert(mutex_is_locked(&host->i_mutex)); err = check_namespace(&nm); diff --git a/fs/udf/super.c b/fs/udf/super.c index 3306b9f..9121938 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -76,9 +76,6 @@ #define UDF_DEFAULT_BLOCKSIZE 2048 -#define VSD_FIRST_SECTOR_OFFSET 32768 -#define VSD_MAX_SECTOR_OFFSET 0x800000 - enum { UDF_MAX_LINKS = 0xffff }; /* These are the "meat" - everything else is stuffing */ @@ -688,7 +685,7 @@ out_unlock: static loff_t udf_check_vsd(struct super_block *sb) { struct volStructDesc *vsd = NULL; - loff_t sector = VSD_FIRST_SECTOR_OFFSET; + loff_t sector = 32768; int sectorsize; struct buffer_head *bh = NULL; int nsr02 = 0; @@ -706,18 +703,8 @@ static loff_t udf_check_vsd(struct super_block *sb) udf_debug("Starting at sector %u (%ld byte sectors)\n", (unsigned int)(sector >> sb->s_blocksize_bits), sb->s_blocksize); - /* Process the sequence (if applicable). The hard limit on the sector - * offset is arbitrary, hopefully large enough so that all valid UDF - * filesystems will be recognised. There is no mention of an upper - * bound to the size of the volume recognition area in the standard. - * The limit will prevent the code to read all the sectors of a - * specially crafted image (like a bluray disc full of CD001 sectors), - * potentially causing minutes or even hours of uninterruptible I/O - * activity. This actually happened with uninitialised SSD partitions - * (all 0xFF) before the check for the limit and all valid IDs were - * added */ - for (; !nsr02 && !nsr03 && sector < VSD_MAX_SECTOR_OFFSET; - sector += sectorsize) { + /* Process the sequence (if applicable) */ + for (; !nsr02 && !nsr03; sector += sectorsize) { /* Read a block */ bh = udf_tread(sb, sector >> sb->s_blocksize_bits); if (!bh) @@ -727,7 +714,10 @@ static loff_t udf_check_vsd(struct super_block *sb) vsd = (struct volStructDesc *)(bh->b_data + (sector & (sb->s_blocksize - 1))); - if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, + if (vsd->stdIdent[0] == 0) { + brelse(bh); + break; + } else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CD001, VSD_STD_ID_LEN)) { switch (vsd->structType) { case 0: @@ -763,17 +753,6 @@ static loff_t udf_check_vsd(struct super_block *sb) else if (!strncmp(vsd->stdIdent, VSD_STD_ID_NSR03, VSD_STD_ID_LEN)) nsr03 = sector; - else if (!strncmp(vsd->stdIdent, VSD_STD_ID_BOOT2, - VSD_STD_ID_LEN)) - ; /* nothing */ - else if (!strncmp(vsd->stdIdent, VSD_STD_ID_CDW02, - VSD_STD_ID_LEN)) - ; /* nothing */ - else { - /* invalid id : end of volume recognition area */ - brelse(bh); - break; - } brelse(bh); } @@ -781,8 +760,7 @@ static loff_t udf_check_vsd(struct super_block *sb) return nsr03; else if (nsr02) return nsr02; - else if (!bh && sector - (sbi->s_session << sb->s_blocksize_bits) == - VSD_FIRST_SECTOR_OFFSET) + else if (sector - (sbi->s_session << sb->s_blocksize_bits) == 32768) return -1; else return 0; @@ -1292,9 +1270,6 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) * PHYSICAL partitions are already set up */ type1_idx = i; -#ifdef UDFFS_DEBUG - map = NULL; /* supress 'maybe used uninitialized' warning */ -#endif for (i = 0; i < sbi->s_partitions; i++) { map = &sbi->s_partmaps[i]; @@ -1916,9 +1891,7 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt, return 0; } if (nsr_off == -1) - udf_debug("Failed to read sector at offset %d. " - "Assuming open disc. Skipping validity " - "check\n", VSD_FIRST_SECTOR_OFFSET); + udf_debug("Failed to read byte 32768. Assuming open disc. Skipping validity check\n"); if (!sbi->s_last_block) sbi->s_last_block = udf_get_last_block(sb); } else { diff --git a/fs/utimes.c b/fs/utimes.c index aa138d6..f4fb7ec 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -53,7 +53,6 @@ static int utimes_common(struct path *path, struct timespec *times) int error; struct iattr newattrs; struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; error = mnt_want_write(path->mnt); if (error) @@ -102,15 +101,9 @@ static int utimes_common(struct path *path, struct timespec *times) goto mnt_drop_write_and_out; } } -retry_deleg: mutex_lock(&inode->i_mutex); - error = notify_change(path->dentry, &newattrs, &delegated_inode); + error = notify_change(path->dentry, &newattrs); mutex_unlock(&inode->i_mutex); - if (delegated_inode) { - error = break_deleg_wait(&delegated_inode); - if (!error) - goto retry_deleg; - } mnt_drop_write_and_out: mnt_drop_write(path->mnt); diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index c21f435..0719e4d 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -66,14 +66,12 @@ xfs-y += xfs_alloc.o \ xfs_bmap_btree.o \ xfs_btree.o \ xfs_da_btree.o \ - xfs_da_format.o \ xfs_dir2.o \ xfs_dir2_block.o \ xfs_dir2_data.o \ xfs_dir2_leaf.o \ xfs_dir2_node.o \ xfs_dir2_sf.o \ - xfs_dquot_buf.o \ xfs_ialloc.o \ xfs_ialloc_btree.o \ xfs_icreate_item.o \ @@ -105,11 +103,7 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ xfs_qm_bhv.o \ xfs_qm.o \ xfs_quotaops.o - -# xfs_rtbitmap is shared with libxfs -xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o \ - xfs_rtbitmap.o - +xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o xfs-$(CONFIG_PROC_FS) += xfs_stats.o xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 66a36be..a02cfb9 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -63,6 +63,17 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) } void * +kmem_zalloc(size_t size, xfs_km_flags_t flags) +{ + void *ptr; + + ptr = kmem_alloc(size, flags); + if (ptr) + memset((char *)ptr, 0, (int)size); + return ptr; +} + +void * kmem_zalloc_large(size_t size, xfs_km_flags_t flags) { void *ptr; @@ -117,3 +128,14 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) congestion_wait(BLK_RW_ASYNC, HZ/50); } while (1); } + +void * +kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) +{ + void *ptr; + + ptr = kmem_zone_alloc(zone, flags); + if (ptr) + memset((char *)ptr, 0, kmem_cache_size(zone)); + return ptr; +} diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 64db0e5..3a7371c 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -32,7 +32,6 @@ typedef unsigned __bitwise xfs_km_flags_t; #define KM_NOSLEEP ((__force xfs_km_flags_t)0x0002u) #define KM_NOFS ((__force xfs_km_flags_t)0x0004u) #define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u) -#define KM_ZERO ((__force xfs_km_flags_t)0x0010u) /* * We use a special process flag to avoid recursive callbacks into @@ -44,7 +43,7 @@ kmem_flags_convert(xfs_km_flags_t flags) { gfp_t lflags; - BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_ZERO)); + BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); if (flags & KM_NOSLEEP) { lflags = GFP_ATOMIC | __GFP_NOWARN; @@ -53,14 +52,11 @@ kmem_flags_convert(xfs_km_flags_t flags) if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) lflags &= ~__GFP_FS; } - - if (flags & KM_ZERO) - lflags |= __GFP_ZERO; - return lflags; } extern void *kmem_alloc(size_t, xfs_km_flags_t); +extern void *kmem_zalloc(size_t, xfs_km_flags_t); extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t); extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); extern void kmem_free(const void *); @@ -68,12 +64,6 @@ extern void kmem_free(const void *); extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); -static inline void * -kmem_zalloc(size_t size, xfs_km_flags_t flags) -{ - return kmem_alloc(size, flags | KM_ZERO); -} - /* * Zone interfaces */ @@ -112,11 +102,6 @@ kmem_zone_destroy(kmem_zone_t *zone) } extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t); - -static inline void * -kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) -{ - return kmem_zone_alloc(zone, flags | KM_ZERO); -} +extern void *kmem_zone_zalloc(kmem_zone_t *, xfs_km_flags_t); #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 370eb3e..0e2f37e 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -16,15 +16,15 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" #include "xfs_ag.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_acl.h" -#include "xfs_attr.h" #include "xfs_trace.h" #include <linux/slab.h> #include <linux/xattr.h> diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 3fc1098..1cb740a 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -128,6 +128,8 @@ typedef struct xfs_agf { extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); +extern const struct xfs_buf_ops xfs_agf_buf_ops; + /* * Size of the unlinked inode hash table in the agi. */ @@ -189,6 +191,8 @@ typedef struct xfs_agi { extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); +extern const struct xfs_buf_ops xfs_agi_buf_ops; + /* * The third a.g. block contains the a.g. freelist, an array * of block pointers to blocks owned by the allocation btree code. diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 9eab2df..5a1393f 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -17,25 +17,25 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_shared.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_error.h" #include "xfs_cksum.h" #include "xfs_trace.h" -#include "xfs_trans.h" #include "xfs_buf_item.h" -#include "xfs_log.h" struct workqueue_struct *xfs_alloc_wq; @@ -2294,8 +2294,6 @@ xfs_read_agf( { int error; - trace_xfs_read_agf(mp, agno); - ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf( mp, tp, mp->m_ddev_targp, @@ -2326,9 +2324,8 @@ xfs_alloc_read_agf( struct xfs_perag *pag; /* per allocation group data */ int error; - trace_xfs_alloc_read_agf(mp, agno); - ASSERT(agno != NULLAGNUMBER); + error = xfs_read_agf(mp, tp, agno, (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, bpp); diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index feacb06..99d0a61 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -231,4 +231,7 @@ xfs_alloc_get_rec( xfs_extlen_t *len, /* output: length of extent */ int *stat); /* output: success/failure */ +extern const struct xfs_buf_ops xfs_agf_buf_ops; +extern const struct xfs_buf_ops xfs_agfl_buf_ops; + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 1308542..cafc902 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -17,21 +17,23 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_btree.h" +#include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" -#include "xfs_trans.h" STATIC struct xfs_btree_cur * diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 45e189e..e3a3f74 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -27,6 +27,39 @@ struct xfs_btree_cur; struct xfs_mount; /* + * There are two on-disk btrees, one sorted by blockno and one sorted + * by blockcount and blockno. All blocks look the same to make the code + * simpler; if we have time later, we'll make the optimizations. + */ +#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */ +#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */ +#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */ +#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */ + +/* + * Data record/key structure + */ +typedef struct xfs_alloc_rec { + __be32 ar_startblock; /* starting block number */ + __be32 ar_blockcount; /* count of free blocks */ +} xfs_alloc_rec_t, xfs_alloc_key_t; + +typedef struct xfs_alloc_rec_incore { + xfs_agblock_t ar_startblock; /* starting block number */ + xfs_extlen_t ar_blockcount; /* count of free blocks */ +} xfs_alloc_rec_incore_t; + +/* btree pointer type */ +typedef __be32 xfs_alloc_ptr_t; + +/* + * Block numbers in the AG: + * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3. + */ +#define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1)) +#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) + +/* * Btree block header size depends on a superblock flag. */ #define XFS_ALLOC_BLOCK_LEN(mp) \ @@ -62,4 +95,6 @@ extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, xfs_agnumber_t, xfs_btnum_t); extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); +extern const struct xfs_buf_ops xfs_allocbt_buf_ops; + #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 71c8c9d..e51e581 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -16,15 +16,14 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_trans.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -32,8 +31,6 @@ #include "xfs_trace.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" #include <linux/aio.h> #include <linux/gfp.h> #include <linux/mpage.h> @@ -336,7 +333,7 @@ xfs_map_blocks( if (type == XFS_IO_DELALLOC && (!nimaps || isnullstartblock(imap->br_startblock))) { - error = xfs_iomap_write_allocate(ip, offset, imap); + error = xfs_iomap_write_allocate(ip, offset, count, imap); if (!error) trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); return -XFS_ERROR(error); @@ -1572,7 +1569,8 @@ xfs_vm_write_begin( ASSERT(len <= PAGE_CACHE_SIZE); - page = grab_cache_page_write_begin(mapping, index, flags); + page = grab_cache_page_write_begin(mapping, index, + flags | AOP_FLAG_NOFS); if (!page) return -ENOMEM; diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index b861270..ddcf226 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -17,24 +17,23 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" #include "xfs_attr_sf.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_alloc.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" #include "xfs_attr_remote.h" @@ -42,7 +41,6 @@ #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_trace.h" -#include "xfs_dinode.h" /* * xfs_attr.c diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 09480c57..bb24b07 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -18,20 +18,22 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_inode.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" #include "xfs_alloc.h" +#include "xfs_btree.h" #include "xfs_attr_remote.h" -#include "xfs_trans.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_attr.h" @@ -39,8 +41,7 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trace.h" -#include "xfs_dinode.h" -#include "xfs_dir2.h" +#include "xfs_trans_priv.h" /* * Look at all the extents for this logical region, @@ -231,13 +232,13 @@ xfs_attr3_node_inactive( } node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&ichdr, node); + xfs_da3_node_hdr_from_disk(&ichdr, node); parent_blkno = bp->b_bn; if (!ichdr.count) { xfs_trans_brelse(*trans, bp); return 0; } - btree = dp->d_ops->node_tree_p(node); + btree = xfs_da3_node_tree_p(node); child_fsb = be32_to_cpu(btree[0].before); xfs_trans_brelse(*trans, bp); /* no locks for later trans */ diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 7b126f4..86db20a 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -18,31 +18,32 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_inode.h" -#include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap_btree.h" -#include "xfs_bmap.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_alloc.h" +#include "xfs_btree.h" #include "xfs_attr_sf.h" #include "xfs_attr_remote.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_inode_item.h" +#include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" -#include "xfs_dinode.h" -#include "xfs_dir2.h" /* @@ -917,8 +918,8 @@ xfs_attr3_leaf_to_node( if (error) goto out; node = bp1->b_addr; - dp->d_ops->node_hdr_from_disk(&icnodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(&icnodehdr, node); + btree = xfs_da3_node_tree_p(node); leaf = bp2->b_addr; xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf); @@ -928,7 +929,7 @@ xfs_attr3_leaf_to_node( btree[0].hashval = entries[icleafhdr.count - 1].hashval; btree[0].before = cpu_to_be32(blkno); icnodehdr.count = 1; - dp->d_ops->node_hdr_to_disk(node, &icnodehdr); + xfs_da3_node_hdr_to_disk(node, &icnodehdr); xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(mp) - 1); error = 0; out: diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index 3ec5ec0..c102213 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -19,6 +19,16 @@ #ifndef __XFS_ATTR_LEAF_H__ #define __XFS_ATTR_LEAF_H__ +/* + * Attribute storage layout, internal structure, access macros, etc. + * + * Attribute lists are structured around Btrees where all the data + * elements are in the leaf nodes. Attribute names are hashed into an int, + * then that int is used as the index into the Btree. Since the hashval + * of an attribute name may not be unique, we may have duplicate keys. The + * internal links in the Btree are logical block offsets into the file. + */ + struct attrlist; struct attrlist_cursor_kern; struct xfs_attr_list_context; @@ -28,6 +38,226 @@ struct xfs_da_state_blk; struct xfs_inode; struct xfs_trans; +/*======================================================================== + * Attribute structure when equal to XFS_LBSIZE(mp) bytes. + *========================================================================*/ + +/* + * This is the structure of the leaf nodes in the Btree. + * + * Struct leaf_entry's are packed from the top. Name/values grow from the + * bottom but are not packed. The freemap contains run-length-encoded entries + * for the free bytes after the leaf_entry's, but only the N largest such, + * smaller runs are dropped. When the freemap doesn't show enough space + * for an allocation, we compact the name/value area and try again. If we + * still don't have enough space, then we have to split the block. The + * name/value structs (both local and remote versions) must be 32bit aligned. + * + * Since we have duplicate hash keys, for each key that matches, compare + * the actual name string. The root and intermediate node search always + * takes the first-in-the-block key match found, so we should only have + * to work "forw"ard. If none matches, continue with the "forw"ard leaf + * nodes until the hash key changes or the attribute name is found. + * + * We store the fact that an attribute is a ROOT/USER/SECURE attribute in + * the leaf_entry. The namespaces are independent only because we also look + * at the namespace bit when we are looking for a matching attribute name. + * + * We also store an "incomplete" bit in the leaf_entry. It shows that an + * attribute is in the middle of being created and should not be shown to + * the user if we crash during the time that the bit is set. We clear the + * bit when we have finished setting up the attribute. We do this because + * we cannot create some large attributes inside a single transaction, and we + * need some indication that we weren't finished if we crash in the middle. + */ +#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ + +typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ + __be16 base; /* base of free region */ + __be16 size; /* length of free region */ +} xfs_attr_leaf_map_t; + +typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */ + xfs_da_blkinfo_t info; /* block type, links, etc. */ + __be16 count; /* count of active leaf_entry's */ + __be16 usedbytes; /* num bytes of names/values stored */ + __be16 firstused; /* first used byte in name area */ + __u8 holes; /* != 0 if blk needs compaction */ + __u8 pad1; + xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE]; + /* N largest free regions */ +} xfs_attr_leaf_hdr_t; + +typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */ + __be32 hashval; /* hash value of name */ + __be16 nameidx; /* index into buffer of name/value */ + __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */ + __u8 pad2; /* unused pad byte */ +} xfs_attr_leaf_entry_t; + +typedef struct xfs_attr_leaf_name_local { + __be16 valuelen; /* number of bytes in value */ + __u8 namelen; /* length of name bytes */ + __u8 nameval[1]; /* name/value bytes */ +} xfs_attr_leaf_name_local_t; + +typedef struct xfs_attr_leaf_name_remote { + __be32 valueblk; /* block number of value bytes */ + __be32 valuelen; /* number of bytes in value */ + __u8 namelen; /* length of name bytes */ + __u8 name[1]; /* name bytes */ +} xfs_attr_leaf_name_remote_t; + +typedef struct xfs_attr_leafblock { + xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ + xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ + xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ + xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ +} xfs_attr_leafblock_t; + +/* + * CRC enabled leaf structures. Called "version 3" structures to match the + * version number of the directory and dablk structures for this feature, and + * attr2 is already taken by the variable inode attribute fork size feature. + */ +struct xfs_attr3_leaf_hdr { + struct xfs_da3_blkinfo info; + __be16 count; + __be16 usedbytes; + __be16 firstused; + __u8 holes; + __u8 pad1; + struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE]; + __be32 pad2; /* 64 bit alignment */ +}; + +#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc)) + +struct xfs_attr3_leafblock { + struct xfs_attr3_leaf_hdr hdr; + struct xfs_attr_leaf_entry entries[1]; + + /* + * The rest of the block contains the following structures after the + * leaf entries, growing from the bottom up. The variables are never + * referenced, the locations accessed purely from helper functions. + * + * struct xfs_attr_leaf_name_local + * struct xfs_attr_leaf_name_remote + */ +}; + +/* + * incore, neutral version of the attribute leaf header + */ +struct xfs_attr3_icleaf_hdr { + __uint32_t forw; + __uint32_t back; + __uint16_t magic; + __uint16_t count; + __uint16_t usedbytes; + __uint16_t firstused; + __u8 holes; + struct { + __uint16_t base; + __uint16_t size; + } freemap[XFS_ATTR_LEAF_MAPSIZE]; +}; + +/* + * Flags used in the leaf_entry[i].flags field. + * NOTE: the INCOMPLETE bit must not collide with the flags bits specified + * on the system call, they are "or"ed together for various operations. + */ +#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */ +#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */ +#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */ +#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */ +#define XFS_ATTR_LOCAL (1 << XFS_ATTR_LOCAL_BIT) +#define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT) +#define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT) +#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT) + +/* + * Conversion macros for converting namespace bits from argument flags + * to ondisk flags. + */ +#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE) +#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) +#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK) +#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK) +#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\ + ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0)) +#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\ + ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0)) + +/* + * Alignment for namelist and valuelist entries (since they are mixed + * there can be only one alignment value) + */ +#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) + +static inline int +xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp) +{ + if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) + return sizeof(struct xfs_attr3_leaf_hdr); + return sizeof(struct xfs_attr_leaf_hdr); +} + +static inline struct xfs_attr_leaf_entry * +xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp) +{ + if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) + return &((struct xfs_attr3_leafblock *)leafp)->entries[0]; + return &leafp->entries[0]; +} + +/* + * Cast typed pointers for "local" and "remote" name/value structs. + */ +static inline char * +xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx) +{ + struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp); + + return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)]; +} + +static inline xfs_attr_leaf_name_remote_t * +xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) +{ + return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx); +} + +static inline xfs_attr_leaf_name_local_t * +xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) +{ + return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx); +} + +/* + * Calculate total bytes used (including trailing pad for alignment) for + * a "local" name/value structure, a "remote" name/value structure, and + * a pointer which might be either. + */ +static inline int xfs_attr_leaf_entsize_remote(int nlen) +{ + return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ + XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); +} + +static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) +{ + return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + + XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); +} + +static inline int xfs_attr_leaf_entsize_local_max(int bsize) +{ + return (((bsize) >> 1) + ((bsize) >> 2)); +} + /* * Used to keep a list of "remote value" extents when unlinking an inode. */ @@ -106,4 +336,6 @@ void xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to, void xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to, struct xfs_attr3_icleaf_hdr *from); +extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; + #endif /* __XFS_ATTR_LEAF_H__ */ diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 2d174b1..cbc80d4 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -18,29 +18,31 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_alloc.h" +#include "xfs_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_remote.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_attr.h" -#include "xfs_attr_sf.h" -#include "xfs_attr_remote.h" #include "xfs_attr_leaf.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" -#include "xfs_dinode.h" -#include "xfs_dir2.h" STATIC int xfs_attr_shortform_compare(const void *a, const void *b) @@ -227,7 +229,6 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) struct xfs_da_node_entry *btree; int error, i; struct xfs_buf *bp; - struct xfs_inode *dp = context->dp; trace_xfs_attr_node_list(context); @@ -241,7 +242,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) */ bp = NULL; if (cursor->blkno > 0) { - error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, + error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if ((error != 0) && (error != EFSCORRUPTED)) return(error); @@ -291,7 +292,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) for (;;) { __uint16_t magic; - error = xfs_da3_node_read(NULL, dp, + error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if (error) @@ -311,8 +312,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) return XFS_ERROR(EFSCORRUPTED); } - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); + btree = xfs_da3_node_tree_p(node); for (i = 0; i < nodehdr.count; btree++, i++) { if (cursor->hashval <= be32_to_cpu(btree->hashval)) { @@ -348,7 +349,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context) break; cursor->blkno = leafhdr.forw; xfs_trans_brelse(NULL, bp); - error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp); + error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1, + &bp); if (error) return error; } diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c index 739e0a5..712a502 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/xfs_attr_remote.c @@ -18,19 +18,20 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" +#include "xfs_error.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_alloc.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" @@ -41,7 +42,6 @@ #include "xfs_trace.h" #include "xfs_cksum.h" #include "xfs_buf_item.h" -#include "xfs_error.h" #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h index 5a9acfa..92a8fd7 100644 --- a/fs/xfs/xfs_attr_remote.h +++ b/fs/xfs/xfs_attr_remote.h @@ -18,6 +18,35 @@ #ifndef __XFS_ATTR_REMOTE_H__ #define __XFS_ATTR_REMOTE_H__ +#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */ + +/* + * There is one of these headers per filesystem block in a remote attribute. + * This is done to ensure there is a 1:1 mapping between the attribute value + * length and the number of blocks needed to store the attribute. This makes the + * verification of a buffer a little more complex, but greatly simplifies the + * allocation, reading and writing of these attributes as we don't have to guess + * the number of blocks needed to store the attribute data. + */ +struct xfs_attr3_rmt_hdr { + __be32 rm_magic; + __be32 rm_offset; + __be32 rm_bytes; + __be32 rm_crc; + uuid_t rm_uuid; + __be64 rm_owner; + __be64 rm_blkno; + __be64 rm_lsn; +}; + +#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc) + +#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \ + ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ + sizeof(struct xfs_attr3_rmt_hdr) : 0)) + +extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; + int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen); int xfs_attr_rmtval_get(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index 0e8885a..4822884 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -16,8 +16,10 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_log_format.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" /* * XFS bit manipulation routines, used in non-realtime code. diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3ef11b2..f47e65c 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -17,37 +17,39 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" #include "xfs_inum.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_trans.h" +#include "xfs_mount.h" +#include "xfs_itable.h" #include "xfs_inode_item.h" #include "xfs_extfree_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" #include "xfs_rtalloc.h" #include "xfs_error.h" +#include "xfs_attr_leaf.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_buf_item.h" +#include "xfs_filestream.h" #include "xfs_trace.h" #include "xfs_symlink.h" -#include "xfs_attr_leaf.h" -#include "xfs_dinode.h" -#include "xfs_filestream.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -1137,7 +1139,6 @@ xfs_bmap_add_attrfork( int committed; /* xaction was committed */ int logflags; /* logging flags */ int error; /* error return value */ - int cancel_flags = 0; ASSERT(XFS_IFORK_Q(ip) == 0); @@ -1148,20 +1149,19 @@ xfs_bmap_add_attrfork( if (rsvd) tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; + if (error) + goto error0; xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); - if (error) - goto trans_cancel; - cancel_flags |= XFS_TRANS_ABORT; + if (error) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); + return error; + } if (XFS_IFORK_Q(ip)) - goto trans_cancel; + goto error1; if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { /* * For inodes coming from pre-6.2 filesystems. @@ -1171,7 +1171,7 @@ xfs_bmap_add_attrfork( } ASSERT(ip->i_d.di_anextents == 0); - xfs_trans_ijoin(tp, ip, 0); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); switch (ip->i_d.di_format) { @@ -1193,7 +1193,7 @@ xfs_bmap_add_attrfork( default: ASSERT(0); error = XFS_ERROR(EINVAL); - goto trans_cancel; + goto error1; } ASSERT(ip->i_afp == NULL); @@ -1221,7 +1221,7 @@ xfs_bmap_add_attrfork( if (logflags) xfs_trans_log_inode(tp, ip, logflags); if (error) - goto bmap_cancel; + goto error2; if (!xfs_sb_version_hasattr(&mp->m_sb) || (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { __int64_t sbfields = 0; @@ -1244,16 +1244,14 @@ xfs_bmap_add_attrfork( error = xfs_bmap_finish(&tp, &flist, &committed); if (error) - goto bmap_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; - -bmap_cancel: + goto error2; + return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +error2: xfs_bmap_cancel(&flist); -trans_cancel: - xfs_trans_cancel(tp, cancel_flags); +error1: xfs_iunlock(ip, XFS_ILOCK_EXCL); +error0: + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); return error; } @@ -1484,7 +1482,7 @@ xfs_bmap_search_extents( xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x lastx: %x", + "blkcnt: %llx extent-state: %x lastx: %x\n", (unsigned long long)ip->i_ino, (unsigned long long)gotp->br_startblock, (unsigned long long)gotp->br_startoff, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 706bc3f..bb8de8e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -17,26 +17,27 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_btree.h" -#include "xfs_bmap_btree.h" +#include "xfs_itable.h" #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trace.h" #include "xfs_cksum.h" -#include "xfs_dinode.h" /* * Determine the extent state. diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 6e42e1e..e367461 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -18,6 +18,9 @@ #ifndef __XFS_BMAP_BTREE_H__ #define __XFS_BMAP_BTREE_H__ +#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ +#define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */ + struct xfs_btree_cur; struct xfs_btree_block; struct xfs_mount; @@ -25,6 +28,85 @@ struct xfs_inode; struct xfs_trans; /* + * Bmap root header, on-disk form only. + */ +typedef struct xfs_bmdr_block { + __be16 bb_level; /* 0 is a leaf */ + __be16 bb_numrecs; /* current # of data records */ +} xfs_bmdr_block_t; + +/* + * Bmap btree record and extent descriptor. + * l0:63 is an extent flag (value 1 indicates non-normal). + * l0:9-62 are startoff. + * l0:0-8 and l1:21-63 are startblock. + * l1:0-20 are blockcount. + */ +#define BMBT_EXNTFLAG_BITLEN 1 +#define BMBT_STARTOFF_BITLEN 54 +#define BMBT_STARTBLOCK_BITLEN 52 +#define BMBT_BLOCKCOUNT_BITLEN 21 + +typedef struct xfs_bmbt_rec { + __be64 l0, l1; +} xfs_bmbt_rec_t; + +typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ +typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; + +typedef struct xfs_bmbt_rec_host { + __uint64_t l0, l1; +} xfs_bmbt_rec_host_t; + +/* + * Values and macros for delayed-allocation startblock fields. + */ +#define STARTBLOCKVALBITS 17 +#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20) +#define DSTARTBLOCKMASKBITS (15 + 20) +#define STARTBLOCKMASK \ + (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) +#define DSTARTBLOCKMASK \ + (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) + +static inline int isnullstartblock(xfs_fsblock_t x) +{ + return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK; +} + +static inline int isnulldstartblock(xfs_dfsbno_t x) +{ + return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK; +} + +static inline xfs_fsblock_t nullstartblock(int k) +{ + ASSERT(k < (1 << STARTBLOCKVALBITS)); + return STARTBLOCKMASK | (k); +} + +static inline xfs_filblks_t startblockval(xfs_fsblock_t x) +{ + return (xfs_filblks_t)((x) & ~STARTBLOCKMASK); +} + +/* + * Possible extent formats. + */ +typedef enum { + XFS_EXTFMT_NOSTATE = 0, + XFS_EXTFMT_HASSTATE +} xfs_exntfmt_t; + +/* + * Possible extent states. + */ +typedef enum { + XFS_EXT_NORM, XFS_EXT_UNWRITTEN, + XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID +} xfs_exntst_t; + +/* * Extent state and extent format macros. */ #define XFS_EXTFMT_INODE(x) \ @@ -33,6 +115,27 @@ struct xfs_trans; #define ISUNWRITTEN(x) ((x)->br_state == XFS_EXT_UNWRITTEN) /* + * Incore version of above. + */ +typedef struct xfs_bmbt_irec +{ + xfs_fileoff_t br_startoff; /* starting file offset */ + xfs_fsblock_t br_startblock; /* starting block number */ + xfs_filblks_t br_blockcount; /* number of blocks */ + xfs_exntst_t br_state; /* extent state */ +} xfs_bmbt_irec_t; + +/* + * Key structure for non-leaf levels of the tree. + */ +typedef struct xfs_bmbt_key { + __be64 br_startoff; /* starting file offset */ +} xfs_bmbt_key_t, xfs_bmdr_key_t; + +/* btree pointer type */ +typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; + +/* * Btree block header size depends on a superblock flag. */ #define XFS_BMBT_BLOCK_LEN(mp) \ @@ -140,4 +243,6 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); +extern const struct xfs_buf_ops xfs_bmbt_buf_ops; + #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5887e41..97f952c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -18,31 +18,31 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_trans.h" #include "xfs_extfree_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_log.h" -#include "xfs_dinode.h" /* Kernel only BMAP related definitions and functions */ @@ -965,12 +965,32 @@ xfs_free_eofblocks( return error; } -int +/* + * xfs_alloc_file_space() + * This routine allocates disk space for the given file. + * + * If alloc_type == 0, this request is for an ALLOCSP type + * request which will change the file size. In this case, no + * DMAPI event will be generated by the call. A TRUNCATE event + * will be generated later by xfs_setattr. + * + * If alloc_type != 0, this request is for a RESVSP type + * request, and a DMAPI DM_EVENT_WRITE will be generated if the + * lower block boundary byte address is less than the file's + * length. + * + * RETURNS: + * 0 on success + * errno on error + * + */ +STATIC int xfs_alloc_file_space( - struct xfs_inode *ip, + xfs_inode_t *ip, xfs_off_t offset, xfs_off_t len, - int alloc_type) + int alloc_type, + int attr_flags) { xfs_mount_t *mp = ip->i_mount; xfs_off_t count; @@ -1212,11 +1232,24 @@ xfs_zero_remaining_bytes( return error; } -int +/* + * xfs_free_file_space() + * This routine frees disk space for the given file. + * + * This routine is only called by xfs_change_file_space + * for an UNRESVSP type call. + * + * RETURNS: + * 0 on success + * errno on error + * + */ +STATIC int xfs_free_file_space( - struct xfs_inode *ip, + xfs_inode_t *ip, xfs_off_t offset, - xfs_off_t len) + xfs_off_t len, + int attr_flags) { int committed; int done; @@ -1234,6 +1267,7 @@ xfs_free_file_space( int rt; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; + int need_iolock = 1; mp = ip->i_mount; @@ -1250,15 +1284,20 @@ xfs_free_file_space( startoffset_fsb = XFS_B_TO_FSB(mp, offset); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); - /* wait for the completion of any pending DIOs */ - inode_dio_wait(VFS_I(ip)); + if (attr_flags & XFS_ATTR_NOLOCK) + need_iolock = 0; + if (need_iolock) { + xfs_ilock(ip, XFS_IOLOCK_EXCL); + /* wait for the completion of any pending DIOs */ + inode_dio_wait(VFS_I(ip)); + } rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); ioffset = offset & ~(rounding - 1); error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, -1); if (error) - goto out; + goto out_unlock_iolock; truncate_pagecache_range(VFS_I(ip), ioffset, -1); /* @@ -1272,7 +1311,7 @@ xfs_free_file_space( error = xfs_bmapi_read(ip, startoffset_fsb, 1, &imap, &nimap, 0); if (error) - goto out; + goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { xfs_daddr_t block; @@ -1287,7 +1326,7 @@ xfs_free_file_space( error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, &imap, &nimap, 0); if (error) - goto out; + goto out_unlock_iolock; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { ASSERT(imap.br_startblock != DELAYSTARTBLOCK); @@ -1373,23 +1412,27 @@ xfs_free_file_space( xfs_iunlock(ip, XFS_ILOCK_EXCL); } - out: + out_unlock_iolock: + if (need_iolock) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; error0: xfs_bmap_cancel(&free_list); error1: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - goto out; + xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : + XFS_ILOCK_EXCL); + return error; } -int +STATIC int xfs_zero_file_space( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len) + xfs_off_t len, + int attr_flags) { struct xfs_mount *mp = ip->i_mount; uint granularity; @@ -1410,6 +1453,9 @@ xfs_zero_file_space( ASSERT(start_boundary >= offset); ASSERT(end_boundary <= offset + len); + if (!(attr_flags & XFS_ATTR_NOLOCK)) + xfs_ilock(ip, XFS_IOLOCK_EXCL); + if (start_boundary < end_boundary - 1) { /* punch out the page cache over the conversion range */ truncate_pagecache_range(VFS_I(ip), start_boundary, @@ -1417,16 +1463,16 @@ xfs_zero_file_space( /* convert the blocks */ error = xfs_alloc_file_space(ip, start_boundary, end_boundary - start_boundary - 1, - XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT); + XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT, + attr_flags); if (error) - goto out; + goto out_unlock; /* We've handled the interior of the range, now for the edges */ - if (start_boundary != offset) { + if (start_boundary != offset) error = xfs_iozero(ip, offset, start_boundary - offset); - if (error) - goto out; - } + if (error) + goto out_unlock; if (end_boundary != offset + len) error = xfs_iozero(ip, end_boundary, @@ -1440,12 +1486,197 @@ xfs_zero_file_space( error = xfs_iozero(ip, offset, len); } -out: +out_unlock: + if (!(attr_flags & XFS_ATTR_NOLOCK)) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } /* + * xfs_change_file_space() + * This routine allocates or frees disk space for the given file. + * The user specified parameters are checked for alignment and size + * limitations. + * + * RETURNS: + * 0 on success + * errno on error + * + */ +int +xfs_change_file_space( + xfs_inode_t *ip, + int cmd, + xfs_flock64_t *bf, + xfs_off_t offset, + int attr_flags) +{ + xfs_mount_t *mp = ip->i_mount; + int clrprealloc; + int error; + xfs_fsize_t fsize; + int setprealloc; + xfs_off_t startoffset; + xfs_trans_t *tp; + struct iattr iattr; + + if (!S_ISREG(ip->i_d.di_mode)) + return XFS_ERROR(EINVAL); + + switch (bf->l_whence) { + case 0: /*SEEK_SET*/ + break; + case 1: /*SEEK_CUR*/ + bf->l_start += offset; + break; + case 2: /*SEEK_END*/ + bf->l_start += XFS_ISIZE(ip); + break; + default: + return XFS_ERROR(EINVAL); + } + + /* + * length of <= 0 for resv/unresv/zero is invalid. length for + * alloc/free is ignored completely and we have no idea what userspace + * might have set it to, so set it to zero to allow range + * checks to pass. + */ + switch (cmd) { + case XFS_IOC_ZERO_RANGE: + case XFS_IOC_RESVSP: + case XFS_IOC_RESVSP64: + case XFS_IOC_UNRESVSP: + case XFS_IOC_UNRESVSP64: + if (bf->l_len <= 0) + return XFS_ERROR(EINVAL); + break; + default: + bf->l_len = 0; + break; + } + + if (bf->l_start < 0 || + bf->l_start > mp->m_super->s_maxbytes || + bf->l_start + bf->l_len < 0 || + bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) + return XFS_ERROR(EINVAL); + + bf->l_whence = 0; + + startoffset = bf->l_start; + fsize = XFS_ISIZE(ip); + + setprealloc = clrprealloc = 0; + switch (cmd) { + case XFS_IOC_ZERO_RANGE: + error = xfs_zero_file_space(ip, startoffset, bf->l_len, + attr_flags); + if (error) + return error; + setprealloc = 1; + break; + + case XFS_IOC_RESVSP: + case XFS_IOC_RESVSP64: + error = xfs_alloc_file_space(ip, startoffset, bf->l_len, + XFS_BMAPI_PREALLOC, attr_flags); + if (error) + return error; + setprealloc = 1; + break; + + case XFS_IOC_UNRESVSP: + case XFS_IOC_UNRESVSP64: + if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, + attr_flags))) + return error; + break; + + case XFS_IOC_ALLOCSP: + case XFS_IOC_ALLOCSP64: + case XFS_IOC_FREESP: + case XFS_IOC_FREESP64: + /* + * These operations actually do IO when extending the file, but + * the allocation is done seperately to the zeroing that is + * done. This set of operations need to be serialised against + * other IO operations, such as truncate and buffered IO. We + * need to take the IOLOCK here to serialise the allocation and + * zeroing IO to prevent other IOLOCK holders (e.g. getbmap, + * truncate, direct IO) from racing against the transient + * allocated but not written state we can have here. + */ + xfs_ilock(ip, XFS_IOLOCK_EXCL); + if (startoffset > fsize) { + error = xfs_alloc_file_space(ip, fsize, + startoffset - fsize, 0, + attr_flags | XFS_ATTR_NOLOCK); + if (error) { + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + break; + } + } + + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = startoffset; + + error = xfs_setattr_size(ip, &iattr, + attr_flags | XFS_ATTR_NOLOCK); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + + if (error) + return error; + + clrprealloc = 1; + break; + + default: + ASSERT(0); + return XFS_ERROR(EINVAL); + } + + /* + * update the inode timestamp, mode, and prealloc flag bits + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + if ((attr_flags & XFS_ATTR_DMI) == 0) { + ip->i_d.di_mode &= ~S_ISUID; + + /* + * Note that we don't have to worry about mandatory + * file locking being disabled here because we only + * clear the S_ISGID bit if the Group execute bit is + * on, but if it was on then mandatory locking wouldn't + * have been enabled. + */ + if (ip->i_d.di_mode & S_IXGRP) + ip->i_d.di_mode &= ~S_ISGID; + + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + } + if (setprealloc) + ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; + else if (clrprealloc) + ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + if (attr_flags & XFS_ATTR_SYNC) + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp, 0); +} + +/* * We need to check that the format of the data fork in the temporary inode is * valid for the target inode before doing the swap. This is not a problem with * attr1 because of the fixed fork offset, but attr2 has a dynamically sized diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 900747b..0612609 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -93,12 +93,9 @@ int xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip, int *is_empty); /* preallocation and hole punch interface */ -int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len, int alloc_type); -int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len); -int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t len); +int xfs_change_file_space(struct xfs_inode *ip, int cmd, + xfs_flock64_t *bf, xfs_off_t offset, + int attr_flags); /* EOF block manipulation functions */ bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 9adaae4..5690e10 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -17,16 +17,18 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_buf_item.h" #include "xfs_btree.h" diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 91e34f2..06729b6 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -27,6 +27,73 @@ struct xfs_trans; extern kmem_zone_t *xfs_btree_cur_zone; /* + * This nonsense is to make -wlint happy. + */ +#define XFS_LOOKUP_EQ ((xfs_lookup_t)XFS_LOOKUP_EQi) +#define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi) +#define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi) + +#define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi) +#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) +#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) +#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) + +/* + * Generic btree header. + * + * This is a combination of the actual format used on disk for short and long + * format btrees. The first three fields are shared by both format, but the + * pointers are different and should be used with care. + * + * To get the size of the actual short or long form headers please use the size + * macros below. Never use sizeof(xfs_btree_block). + * + * The blkno, crc, lsn, owner and uuid fields are only available in filesystems + * with the crc feature bit, and all accesses to them must be conditional on + * that flag. + */ +struct xfs_btree_block { + __be32 bb_magic; /* magic number for block type */ + __be16 bb_level; /* 0 is a leaf */ + __be16 bb_numrecs; /* current # of data records */ + union { + struct { + __be32 bb_leftsib; + __be32 bb_rightsib; + + __be64 bb_blkno; + __be64 bb_lsn; + uuid_t bb_uuid; + __be32 bb_owner; + __le32 bb_crc; + } s; /* short form pointers */ + struct { + __be64 bb_leftsib; + __be64 bb_rightsib; + + __be64 bb_blkno; + __be64 bb_lsn; + uuid_t bb_uuid; + __be64 bb_owner; + __le32 bb_crc; + __be32 bb_pad; /* padding for alignment */ + } l; /* long form pointers */ + } bb_u; /* rest */ +}; + +#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */ +#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */ + +/* sizes of CRC enabled btree blocks */ +#define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40) +#define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48) + +#define XFS_BTREE_SBLOCK_CRC_OFF \ + offsetof(struct xfs_btree_block, bb_u.s.bb_crc) +#define XFS_BTREE_LBLOCK_CRC_OFF \ + offsetof(struct xfs_btree_block, bb_u.l.bb_crc) + +/* * Generic key, ptr and record wrapper structures. * * These are disk format structures, and are converted where necessary @@ -52,18 +119,6 @@ union xfs_btree_rec { }; /* - * This nonsense is to make -wlint happy. - */ -#define XFS_LOOKUP_EQ ((xfs_lookup_t)XFS_LOOKUP_EQi) -#define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi) -#define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi) - -#define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi) -#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) -#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) -#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) - -/* * For logging record fields. */ #define XFS_BB_MAGIC (1 << 0) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c7f0b77..2634700 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -34,13 +34,12 @@ #include <linux/backing-dev.h> #include <linux/freezer.h> -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_sb.h" +#include "xfs_trans_resv.h" +#include "xfs_log.h" #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_trace.h" -#include "xfs_log.h" static kmem_zone_t *xfs_buf_zone; @@ -591,7 +590,7 @@ found: error = _xfs_buf_map_pages(bp, flags); if (unlikely(error)) { xfs_warn(target->bt_mount, - "%s: failed to map pagesn", __func__); + "%s: failed to map pages\n", __func__); xfs_buf_relse(bp); return NULL; } @@ -810,7 +809,7 @@ xfs_buf_get_uncached( error = _xfs_buf_map_pages(bp, 0); if (unlikely(error)) { xfs_warn(target->bt_mount, - "%s: failed to map pages", __func__); + "%s: failed to map pages\n", __func__); goto fail_free_mem; } @@ -1619,7 +1618,7 @@ xfs_setsize_buftarg_flags( bdevname(btp->bt_bdev, name); xfs_warn(btp->bt_mount, - "Cannot set_blocksize to %u on device %s", + "Cannot set_blocksize to %u on device %s\n", sectorsize, name); return EINVAL; } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index a64f67b..f1d85cf 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -17,18 +17,17 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_log.h" kmem_zone_t *xfs_buf_item_zone; @@ -809,7 +808,7 @@ xfs_buf_item_init( * Mark bytes first through last inclusive as dirty in the buf * item's bitmap. */ -static void +void xfs_buf_item_log_segment( struct xfs_buf_log_item *bip, uint first, diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 3f3455a..db63710 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -71,6 +71,10 @@ void xfs_buf_attach_iodone(struct xfs_buf *, void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); +void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, + enum xfs_blft); +void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp); + extern kmem_zone_t *xfs_buf_item_zone; #endif /* __XFS_BUF_ITEM_H__ */ diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 796272a..20bf8e8 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -18,20 +18,20 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" @@ -129,6 +129,56 @@ xfs_da_state_free(xfs_da_state_t *state) kmem_zone_free(xfs_da_state_zone, state); } +void +xfs_da3_node_hdr_from_disk( + struct xfs_da3_icnode_hdr *to, + struct xfs_da_intnode *from) +{ + ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || + from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); + + if (from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { + struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from; + + to->forw = be32_to_cpu(hdr3->info.hdr.forw); + to->back = be32_to_cpu(hdr3->info.hdr.back); + to->magic = be16_to_cpu(hdr3->info.hdr.magic); + to->count = be16_to_cpu(hdr3->__count); + to->level = be16_to_cpu(hdr3->__level); + return; + } + to->forw = be32_to_cpu(from->hdr.info.forw); + to->back = be32_to_cpu(from->hdr.info.back); + to->magic = be16_to_cpu(from->hdr.info.magic); + to->count = be16_to_cpu(from->hdr.__count); + to->level = be16_to_cpu(from->hdr.__level); +} + +void +xfs_da3_node_hdr_to_disk( + struct xfs_da_intnode *to, + struct xfs_da3_icnode_hdr *from) +{ + ASSERT(from->magic == XFS_DA_NODE_MAGIC || + from->magic == XFS_DA3_NODE_MAGIC); + + if (from->magic == XFS_DA3_NODE_MAGIC) { + struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to; + + hdr3->info.hdr.forw = cpu_to_be32(from->forw); + hdr3->info.hdr.back = cpu_to_be32(from->back); + hdr3->info.hdr.magic = cpu_to_be16(from->magic); + hdr3->__count = cpu_to_be16(from->count); + hdr3->__level = cpu_to_be16(from->level); + return; + } + to->hdr.info.forw = cpu_to_be32(from->forw); + to->hdr.info.back = cpu_to_be32(from->back); + to->hdr.info.magic = cpu_to_be16(from->magic); + to->hdr.__count = cpu_to_be16(from->count); + to->hdr.__level = cpu_to_be16(from->level); +} + static bool xfs_da3_node_verify( struct xfs_buf *bp) @@ -136,11 +186,8 @@ xfs_da3_node_verify( struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_da_intnode *hdr = bp->b_addr; struct xfs_da3_icnode_hdr ichdr; - const struct xfs_dir_ops *ops; - ops = xfs_dir_get_ops(mp, NULL); - - ops->node_hdr_from_disk(&ichdr, hdr); + xfs_da3_node_hdr_from_disk(&ichdr, hdr); if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_da3_node_hdr *hdr3 = bp->b_addr; @@ -307,12 +354,11 @@ xfs_da3_node_create( struct xfs_da3_icnode_hdr ichdr = {0}; struct xfs_buf *bp; int error; - struct xfs_inode *dp = args->dp; trace_xfs_da_node_create(args); ASSERT(level <= XFS_DA_NODE_MAXDEPTH); - error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); + error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); if (error) return(error); bp->b_ops = &xfs_da3_node_buf_ops; @@ -331,9 +377,9 @@ xfs_da3_node_create( } ichdr.level = level; - dp->d_ops->node_hdr_to_disk(node, &ichdr); + xfs_da3_node_hdr_to_disk(node, &ichdr); xfs_trans_log_buf(tp, bp, - XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); + XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node))); *bpp = bp; return(0); @@ -543,8 +589,8 @@ xfs_da3_root_split( oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { struct xfs_da3_icnode_hdr nodehdr; - dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); - btree = dp->d_ops->node_tree_p(oldroot); + xfs_da3_node_hdr_from_disk(&nodehdr, oldroot); + btree = xfs_da3_node_tree_p(oldroot); size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); level = nodehdr.level; @@ -558,8 +604,8 @@ xfs_da3_root_split( struct xfs_dir2_leaf_entry *ents; leaf = (xfs_dir2_leaf_t *)oldroot; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); @@ -603,14 +649,14 @@ xfs_da3_root_split( return error; node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); + btree = xfs_da3_node_tree_p(node); btree[0].hashval = cpu_to_be32(blk1->hashval); btree[0].before = cpu_to_be32(blk1->blkno); btree[1].hashval = cpu_to_be32(blk2->hashval); btree[1].before = cpu_to_be32(blk2->blkno); nodehdr.count = 2; - dp->d_ops->node_hdr_to_disk(node, &nodehdr); + xfs_da3_node_hdr_to_disk(node, &nodehdr); #ifdef DEBUG if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || @@ -647,12 +693,11 @@ xfs_da3_node_split( int newcount; int error; int useextra; - struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_split(state->args); node = oldblk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); /* * With V2 dirs the extra block is data or freespace. @@ -699,7 +744,7 @@ xfs_da3_node_split( * If we had double-split op below us, then add the extra block too. */ node = oldblk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); if (oldblk->index <= nodehdr.count) { oldblk->index++; xfs_da3_node_add(state, oldblk, addblk); @@ -748,16 +793,15 @@ xfs_da3_node_rebalance( int count; int tmp; int swap = 0; - struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_rebalance(state->args); node1 = blk1->bp->b_addr; node2 = blk2->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); - dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); + xfs_da3_node_hdr_from_disk(&nodehdr1, node1); + xfs_da3_node_hdr_from_disk(&nodehdr2, node2); + btree1 = xfs_da3_node_tree_p(node1); + btree2 = xfs_da3_node_tree_p(node2); /* * Figure out how many entries need to move, and in which direction. @@ -770,10 +814,10 @@ xfs_da3_node_rebalance( tmpnode = node1; node1 = node2; node2 = tmpnode; - dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); - dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); + xfs_da3_node_hdr_from_disk(&nodehdr1, node1); + xfs_da3_node_hdr_from_disk(&nodehdr2, node2); + btree1 = xfs_da3_node_tree_p(node1); + btree2 = xfs_da3_node_tree_p(node2); swap = 1; } @@ -835,14 +879,15 @@ xfs_da3_node_rebalance( /* * Log header of node 1 and all current bits of node 2. */ - dp->d_ops->node_hdr_to_disk(node1, &nodehdr1); + xfs_da3_node_hdr_to_disk(node1, &nodehdr1); xfs_trans_log_buf(tp, blk1->bp, - XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size)); + XFS_DA_LOGRANGE(node1, &node1->hdr, + xfs_da3_node_hdr_size(node1))); - dp->d_ops->node_hdr_to_disk(node2, &nodehdr2); + xfs_da3_node_hdr_to_disk(node2, &nodehdr2); xfs_trans_log_buf(tp, blk2->bp, XFS_DA_LOGRANGE(node2, &node2->hdr, - dp->d_ops->node_hdr_size + + xfs_da3_node_hdr_size(node2) + (sizeof(btree2[0]) * nodehdr2.count))); /* @@ -852,10 +897,10 @@ xfs_da3_node_rebalance( if (swap) { node1 = blk1->bp->b_addr; node2 = blk2->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); - dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); + xfs_da3_node_hdr_from_disk(&nodehdr1, node1); + xfs_da3_node_hdr_from_disk(&nodehdr2, node2); + btree1 = xfs_da3_node_tree_p(node1); + btree2 = xfs_da3_node_tree_p(node2); } blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); @@ -882,13 +927,12 @@ xfs_da3_node_add( struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_node_entry *btree; int tmp; - struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_add(state->args); node = oldblk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); + btree = xfs_da3_node_tree_p(node); ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); ASSERT(newblk->blkno != 0); @@ -911,9 +955,9 @@ xfs_da3_node_add( tmp + sizeof(*btree))); nodehdr.count += 1; - dp->d_ops->node_hdr_to_disk(node, &nodehdr); + xfs_da3_node_hdr_to_disk(node, &nodehdr); xfs_trans_log_buf(state->args->trans, oldblk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); + XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node))); /* * Copy the last hash value from the oldblk to propagate upwards. @@ -1050,7 +1094,6 @@ xfs_da3_root_join( struct xfs_da3_icnode_hdr oldroothdr; struct xfs_da_node_entry *btree; int error; - struct xfs_inode *dp = state->args->dp; trace_xfs_da_root_join(state->args); @@ -1058,7 +1101,7 @@ xfs_da3_root_join( args = state->args; oldroot = root_blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot); + xfs_da3_node_hdr_from_disk(&oldroothdr, oldroot); ASSERT(oldroothdr.forw == 0); ASSERT(oldroothdr.back == 0); @@ -1072,10 +1115,10 @@ xfs_da3_root_join( * Read in the (only) child block, then copy those bytes into * the root block's buffer and free the original child block. */ - btree = dp->d_ops->node_tree_p(oldroot); + btree = xfs_da3_node_tree_p(oldroot); child = be32_to_cpu(btree[0].before); ASSERT(child != 0); - error = xfs_da3_node_read(args->trans, dp, child, -1, &bp, + error = xfs_da3_node_read(args->trans, args->dp, child, -1, &bp, args->whichfork); if (error) return error; @@ -1125,7 +1168,6 @@ xfs_da3_node_toosmall( int error; int retval; int i; - struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_toosmall(state->args); @@ -1137,7 +1179,7 @@ xfs_da3_node_toosmall( blk = &state->path.blk[ state->path.active-1 ]; info = blk->bp->b_addr; node = (xfs_da_intnode_t *)info; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); if (nodehdr.count > (state->node_ents >> 1)) { *action = 0; /* blk over 50%, don't try to join */ return(0); /* blk over 50%, don't try to join */ @@ -1189,13 +1231,13 @@ xfs_da3_node_toosmall( blkno = nodehdr.back; if (blkno == 0) continue; - error = xfs_da3_node_read(state->args->trans, dp, + error = xfs_da3_node_read(state->args->trans, state->args->dp, blkno, -1, &bp, state->args->whichfork); if (error) return(error); node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&thdr, node); + xfs_da3_node_hdr_from_disk(&thdr, node); xfs_trans_brelse(state->args->trans, bp); if (count - thdr.count >= 0) @@ -1233,7 +1275,6 @@ xfs_da3_node_toosmall( */ STATIC uint xfs_da3_node_lasthash( - struct xfs_inode *dp, struct xfs_buf *bp, int *count) { @@ -1242,12 +1283,12 @@ xfs_da3_node_lasthash( struct xfs_da3_icnode_hdr nodehdr; node = bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); if (count) *count = nodehdr.count; if (!nodehdr.count) return 0; - btree = dp->d_ops->node_tree_p(node); + btree = xfs_da3_node_tree_p(node); return be32_to_cpu(btree[nodehdr.count - 1].hashval); } @@ -1266,7 +1307,6 @@ xfs_da3_fixhashpath( xfs_dahash_t lasthash=0; int level; int count; - struct xfs_inode *dp = state->args->dp; trace_xfs_da_fixhashpath(state->args); @@ -1279,12 +1319,12 @@ xfs_da3_fixhashpath( return; break; case XFS_DIR2_LEAFN_MAGIC: - lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count); + lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count); if (count == 0) return; break; case XFS_DA_NODE_MAGIC: - lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); + lasthash = xfs_da3_node_lasthash(blk->bp, &count); if (count == 0) return; break; @@ -1293,8 +1333,8 @@ xfs_da3_fixhashpath( struct xfs_da3_icnode_hdr nodehdr; node = blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); + btree = xfs_da3_node_tree_p(node); if (be32_to_cpu(btree->hashval) == lasthash) break; blk->hashval = lasthash; @@ -1320,12 +1360,11 @@ xfs_da3_node_remove( struct xfs_da_node_entry *btree; int index; int tmp; - struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_remove(state->args); node = drop_blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); ASSERT(drop_blk->index < nodehdr.count); ASSERT(drop_blk->index >= 0); @@ -1333,7 +1372,7 @@ xfs_da3_node_remove( * Copy over the offending entry, or just zero it out. */ index = drop_blk->index; - btree = dp->d_ops->node_tree_p(node); + btree = xfs_da3_node_tree_p(node); if (index < nodehdr.count - 1) { tmp = nodehdr.count - index - 1; tmp *= (uint)sizeof(xfs_da_node_entry_t); @@ -1346,9 +1385,9 @@ xfs_da3_node_remove( xfs_trans_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); nodehdr.count -= 1; - dp->d_ops->node_hdr_to_disk(node, &nodehdr); + xfs_da3_node_hdr_to_disk(node, &nodehdr); xfs_trans_log_buf(state->args->trans, drop_blk->bp, - XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); + XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node))); /* * Copy the last hash value from the block to propagate upwards. @@ -1375,16 +1414,15 @@ xfs_da3_node_unbalance( struct xfs_trans *tp; int sindex; int tmp; - struct xfs_inode *dp = state->args->dp; trace_xfs_da_node_unbalance(state->args); drop_node = drop_blk->bp->b_addr; save_node = save_blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node); - dp->d_ops->node_hdr_from_disk(&save_hdr, save_node); - drop_btree = dp->d_ops->node_tree_p(drop_node); - save_btree = dp->d_ops->node_tree_p(save_node); + xfs_da3_node_hdr_from_disk(&drop_hdr, drop_node); + xfs_da3_node_hdr_from_disk(&save_hdr, save_node); + drop_btree = xfs_da3_node_tree_p(drop_node); + save_btree = xfs_da3_node_tree_p(save_node); tp = state->args->trans; /* @@ -1418,10 +1456,10 @@ xfs_da3_node_unbalance( memcpy(&save_btree[sindex], &drop_btree[0], tmp); save_hdr.count += drop_hdr.count; - dp->d_ops->node_hdr_to_disk(save_node, &save_hdr); + xfs_da3_node_hdr_to_disk(save_node, &save_hdr); xfs_trans_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, &save_node->hdr, - dp->d_ops->node_hdr_size)); + xfs_da3_node_hdr_size(save_node))); /* * Save the last hashval in the remaining block for upward propagation. @@ -1463,7 +1501,6 @@ xfs_da3_node_lookup_int( int max; int error; int retval; - struct xfs_inode *dp = state->args->dp; args = state->args; @@ -1499,8 +1536,7 @@ xfs_da3_node_lookup_int( if (blk->magic == XFS_DIR2_LEAFN_MAGIC || blk->magic == XFS_DIR3_LEAFN_MAGIC) { blk->magic = XFS_DIR2_LEAFN_MAGIC; - blk->hashval = xfs_dir2_leafn_lasthash(args->dp, - blk->bp, NULL); + blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); break; } @@ -1511,8 +1547,8 @@ xfs_da3_node_lookup_int( * Search an intermediate node for a match. */ node = blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); + btree = xfs_da3_node_tree_p(node); max = nodehdr.count; blk->hashval = be32_to_cpu(btree[max - 1].hashval); @@ -1607,7 +1643,6 @@ xfs_da3_node_lookup_int( */ STATIC int xfs_da3_node_order( - struct xfs_inode *dp, struct xfs_buf *node1_bp, struct xfs_buf *node2_bp) { @@ -1620,10 +1655,10 @@ xfs_da3_node_order( node1 = node1_bp->b_addr; node2 = node2_bp->b_addr; - dp->d_ops->node_hdr_from_disk(&node1hdr, node1); - dp->d_ops->node_hdr_from_disk(&node2hdr, node2); - btree1 = dp->d_ops->node_tree_p(node1); - btree2 = dp->d_ops->node_tree_p(node2); + xfs_da3_node_hdr_from_disk(&node1hdr, node1); + xfs_da3_node_hdr_from_disk(&node2hdr, node2); + btree1 = xfs_da3_node_tree_p(node1); + btree2 = xfs_da3_node_tree_p(node2); if (node1hdr.count > 0 && node2hdr.count > 0 && ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || @@ -1650,7 +1685,6 @@ xfs_da3_blk_link( struct xfs_buf *bp; int before = 0; int error; - struct xfs_inode *dp = state->args->dp; /* * Set up environment. @@ -1668,10 +1702,10 @@ xfs_da3_blk_link( before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); break; case XFS_DIR2_LEAFN_MAGIC: - before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp); + before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp); break; case XFS_DA_NODE_MAGIC: - before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); + before = xfs_da3_node_order(old_blk->bp, new_blk->bp); break; } @@ -1686,7 +1720,7 @@ xfs_da3_blk_link( new_info->forw = cpu_to_be32(old_blk->blkno); new_info->back = old_info->back; if (old_info->back) { - error = xfs_da3_node_read(args->trans, dp, + error = xfs_da3_node_read(args->trans, args->dp, be32_to_cpu(old_info->back), -1, &bp, args->whichfork); if (error) @@ -1707,7 +1741,7 @@ xfs_da3_blk_link( new_info->forw = old_info->forw; new_info->back = cpu_to_be32(old_blk->blkno); if (old_info->forw) { - error = xfs_da3_node_read(args->trans, dp, + error = xfs_da3_node_read(args->trans, args->dp, be32_to_cpu(old_info->forw), -1, &bp, args->whichfork); if (error) @@ -1827,7 +1861,6 @@ xfs_da3_path_shift( xfs_dablk_t blkno = 0; int level; int error; - struct xfs_inode *dp = state->args->dp; trace_xfs_da_path_shift(state->args); @@ -1843,8 +1876,8 @@ xfs_da3_path_shift( level = (path->active-1) - 1; /* skip bottom layer in path */ for (blk = &path->blk[level]; level >= 0; blk--, level--) { node = blk->bp->b_addr; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); + btree = xfs_da3_node_tree_p(node); if (forward && (blk->index < nodehdr.count - 1)) { blk->index++; @@ -1878,7 +1911,7 @@ xfs_da3_path_shift( * Read the next child block. */ blk->blkno = blkno; - error = xfs_da3_node_read(args->trans, dp, blkno, -1, + error = xfs_da3_node_read(args->trans, args->dp, blkno, -1, &blk->bp, args->whichfork); if (error) return(error); @@ -1900,8 +1933,8 @@ xfs_da3_path_shift( case XFS_DA3_NODE_MAGIC: blk->magic = XFS_DA_NODE_MAGIC; node = (xfs_da_intnode_t *)info; - dp->d_ops->node_hdr_from_disk(&nodehdr, node); - btree = dp->d_ops->node_tree_p(node); + xfs_da3_node_hdr_from_disk(&nodehdr, node); + btree = xfs_da3_node_tree_p(node); blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); if (forward) blk->index = 0; @@ -1914,15 +1947,16 @@ xfs_da3_path_shift( blk->magic = XFS_ATTR_LEAF_MAGIC; ASSERT(level == path->active-1); blk->index = 0; - blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); + blk->hashval = xfs_attr_leaf_lasthash(blk->bp, + NULL); break; case XFS_DIR2_LEAFN_MAGIC: case XFS_DIR3_LEAFN_MAGIC: blk->magic = XFS_DIR2_LEAFN_MAGIC; ASSERT(level == path->active-1); blk->index = 0; - blk->hashval = xfs_dir2_leafn_lasthash(args->dp, - blk->bp, NULL); + blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, + NULL); break; default: ASSERT(0); @@ -2129,7 +2163,7 @@ xfs_da3_swap_lastblock( struct xfs_dir2_leaf *dead_leaf2; struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr par_hdr; - struct xfs_inode *dp; + struct xfs_inode *ip; struct xfs_trans *tp; struct xfs_mount *mp; struct xfs_buf *dead_buf; @@ -2153,12 +2187,12 @@ xfs_da3_swap_lastblock( dead_buf = *dead_bufp; dead_blkno = *dead_blknop; tp = args->trans; - dp = args->dp; + ip = args->dp; w = args->whichfork; ASSERT(w == XFS_DATA_FORK); - mp = dp->i_mount; + mp = ip->i_mount; lastoff = mp->m_dirfreeblk; - error = xfs_bmap_last_before(tp, dp, &lastoff, w); + error = xfs_bmap_last_before(tp, ip, &lastoff, w); if (error) return error; if (unlikely(lastoff == 0)) { @@ -2170,7 +2204,7 @@ xfs_da3_swap_lastblock( * Read the last block in the btree space. */ last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; - error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); + error = xfs_da3_node_read(tp, ip, last_blkno, -1, &last_buf, w); if (error) return error; /* @@ -2188,16 +2222,16 @@ xfs_da3_swap_lastblock( struct xfs_dir2_leaf_entry *ents; dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2); - ents = dp->d_ops->leaf_ents_p(dead_leaf2); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, dead_leaf2); + ents = xfs_dir3_leaf_ents_p(dead_leaf2); dead_level = 0; dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); } else { struct xfs_da3_icnode_hdr deadhdr; dead_node = (xfs_da_intnode_t *)dead_info; - dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node); - btree = dp->d_ops->node_tree_p(dead_node); + xfs_da3_node_hdr_from_disk(&deadhdr, dead_node); + btree = xfs_da3_node_tree_p(dead_node); dead_level = deadhdr.level; dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); } @@ -2206,7 +2240,7 @@ xfs_da3_swap_lastblock( * If the moved block has a left sibling, fix up the pointers. */ if ((sib_blkno = be32_to_cpu(dead_info->back))) { - error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); + error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); if (error) goto done; sib_info = sib_buf->b_addr; @@ -2228,7 +2262,7 @@ xfs_da3_swap_lastblock( * If the moved block has a right sibling, fix up the pointers. */ if ((sib_blkno = be32_to_cpu(dead_info->forw))) { - error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); + error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); if (error) goto done; sib_info = sib_buf->b_addr; @@ -2252,11 +2286,11 @@ xfs_da3_swap_lastblock( * Walk down the tree looking for the parent of the moved block. */ for (;;) { - error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); + error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w); if (error) goto done; par_node = par_buf->b_addr; - dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); + xfs_da3_node_hdr_from_disk(&par_hdr, par_node); if (level >= 0 && level != par_hdr.level + 1) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", XFS_ERRLEVEL_LOW, mp); @@ -2264,7 +2298,7 @@ xfs_da3_swap_lastblock( goto done; } level = par_hdr.level; - btree = dp->d_ops->node_tree_p(par_node); + btree = xfs_da3_node_tree_p(par_node); for (entno = 0; entno < par_hdr.count && be32_to_cpu(btree[entno].hashval) < dead_hash; @@ -2303,18 +2337,18 @@ xfs_da3_swap_lastblock( error = XFS_ERROR(EFSCORRUPTED); goto done; } - error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); + error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w); if (error) goto done; par_node = par_buf->b_addr; - dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); + xfs_da3_node_hdr_from_disk(&par_hdr, par_node); if (par_hdr.level != level) { XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", XFS_ERRLEVEL_LOW, mp); error = XFS_ERROR(EFSCORRUPTED); goto done; } - btree = dp->d_ops->node_tree_p(par_node); + btree = xfs_da3_node_tree_p(par_node); entno = 0; } /* diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 6e95ea7..b1f2679 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -23,7 +23,146 @@ struct xfs_bmap_free; struct xfs_inode; struct xfs_trans; struct zone; -struct xfs_dir_ops; + +/*======================================================================== + * Directory Structure when greater than XFS_LBSIZE(mp) bytes. + *========================================================================*/ + +/* + * This structure is common to both leaf nodes and non-leaf nodes in the Btree. + * + * It is used to manage a doubly linked list of all blocks at the same + * level in the Btree, and to identify which type of block this is. + */ +#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ +#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */ +#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ +#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ + +typedef struct xfs_da_blkinfo { + __be32 forw; /* previous block in list */ + __be32 back; /* following block in list */ + __be16 magic; /* validity check on block */ + __be16 pad; /* unused */ +} xfs_da_blkinfo_t; + +/* + * CRC enabled directory structure types + * + * The headers change size for the additional verification information, but + * otherwise the tree layouts and contents are unchanged. Hence the da btree + * code can use the struct xfs_da_blkinfo for manipulating the tree links and + * magic numbers without modification for both v2 and v3 nodes. + */ +#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */ +#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */ +#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */ +#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */ + +struct xfs_da3_blkinfo { + /* + * the node link manipulation code relies on the fact that the first + * element of this structure is the struct xfs_da_blkinfo so it can + * ignore the differences in the rest of the structures. + */ + struct xfs_da_blkinfo hdr; + __be32 crc; /* CRC of block */ + __be64 blkno; /* first block of the buffer */ + __be64 lsn; /* sequence number of last write */ + uuid_t uuid; /* filesystem we belong to */ + __be64 owner; /* inode that owns the block */ +}; + +/* + * This is the structure of the root and intermediate nodes in the Btree. + * The leaf nodes are defined above. + * + * Entries are not packed. + * + * Since we have duplicate keys, use a binary search but always follow + * all match in the block, not just the first match found. + */ +#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ + +typedef struct xfs_da_node_hdr { + struct xfs_da_blkinfo info; /* block type, links, etc. */ + __be16 __count; /* count of active entries */ + __be16 __level; /* level above leaves (leaf == 0) */ +} xfs_da_node_hdr_t; + +struct xfs_da3_node_hdr { + struct xfs_da3_blkinfo info; /* block type, links, etc. */ + __be16 __count; /* count of active entries */ + __be16 __level; /* level above leaves (leaf == 0) */ + __be32 __pad32; +}; + +#define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc)) + +typedef struct xfs_da_node_entry { + __be32 hashval; /* hash value for this descendant */ + __be32 before; /* Btree block before this key */ +} xfs_da_node_entry_t; + +typedef struct xfs_da_intnode { + struct xfs_da_node_hdr hdr; + struct xfs_da_node_entry __btree[]; +} xfs_da_intnode_t; + +struct xfs_da3_intnode { + struct xfs_da3_node_hdr hdr; + struct xfs_da_node_entry __btree[]; +}; + +/* + * In-core version of the node header to abstract the differences in the v2 and + * v3 disk format of the headers. Callers need to convert to/from disk format as + * appropriate. + */ +struct xfs_da3_icnode_hdr { + __uint32_t forw; + __uint32_t back; + __uint16_t magic; + __uint16_t count; + __uint16_t level; +}; + +extern void xfs_da3_node_hdr_from_disk(struct xfs_da3_icnode_hdr *to, + struct xfs_da_intnode *from); +extern void xfs_da3_node_hdr_to_disk(struct xfs_da_intnode *to, + struct xfs_da3_icnode_hdr *from); + +static inline int +__xfs_da3_node_hdr_size(bool v3) +{ + if (v3) + return sizeof(struct xfs_da3_node_hdr); + return sizeof(struct xfs_da_node_hdr); +} +static inline int +xfs_da3_node_hdr_size(struct xfs_da_intnode *dap) +{ + bool v3 = dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC); + + return __xfs_da3_node_hdr_size(v3); +} + +static inline struct xfs_da_node_entry * +xfs_da3_node_tree_p(struct xfs_da_intnode *dap) +{ + if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { + struct xfs_da3_intnode *dap3 = (struct xfs_da3_intnode *)dap; + return dap3->__btree; + } + return dap->__btree; +} + +extern void xfs_da3_intnode_from_disk(struct xfs_da3_icnode_hdr *to, + struct xfs_da_intnode *from); +extern void xfs_da3_intnode_to_disk(struct xfs_da_intnode *to, + struct xfs_da3_icnode_hdr *from); + +#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize /*======================================================================== * Btree searching and modification structure definitions. @@ -170,6 +309,8 @@ int xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp, xfs_dablk_t bno, xfs_daddr_t mappedbno, struct xfs_buf **bpp, int which_fork); +extern const struct xfs_buf_ops xfs_da3_node_buf_ops; + /* * Utility routines. */ diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/xfs_da_format.c deleted file mode 100644 index e6c83e1..0000000 --- a/fs/xfs/xfs_da_format.c +++ /dev/null @@ -1,907 +0,0 @@ -/* - * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_inode.h" -#include "xfs_dir2.h" - -/* - * Shortform directory ops - */ -static int -xfs_dir2_sf_entsize( - struct xfs_dir2_sf_hdr *hdr, - int len) -{ - int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */ - - count += len; /* name */ - count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) : - sizeof(xfs_dir2_ino4_t); /* ino # */ - return count; -} - -static int -xfs_dir3_sf_entsize( - struct xfs_dir2_sf_hdr *hdr, - int len) -{ - return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t); -} - -static struct xfs_dir2_sf_entry * -xfs_dir2_sf_nextentry( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); -} - -static struct xfs_dir2_sf_entry * -xfs_dir3_sf_nextentry( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return (struct xfs_dir2_sf_entry *) - ((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen)); -} - - -/* - * For filetype enabled shortform directories, the file type field is stored at - * the end of the name. Because it's only a single byte, endian conversion is - * not necessary. For non-filetype enable directories, the type is always - * unknown and we never store the value. - */ -static __uint8_t -xfs_dir2_sfe_get_ftype( - struct xfs_dir2_sf_entry *sfep) -{ - return XFS_DIR3_FT_UNKNOWN; -} - -static void -xfs_dir2_sfe_put_ftype( - struct xfs_dir2_sf_entry *sfep, - __uint8_t ftype) -{ - ASSERT(ftype < XFS_DIR3_FT_MAX); -} - -static __uint8_t -xfs_dir3_sfe_get_ftype( - struct xfs_dir2_sf_entry *sfep) -{ - __uint8_t ftype; - - ftype = sfep->name[sfep->namelen]; - if (ftype >= XFS_DIR3_FT_MAX) - return XFS_DIR3_FT_UNKNOWN; - return ftype; -} - -static void -xfs_dir3_sfe_put_ftype( - struct xfs_dir2_sf_entry *sfep, - __uint8_t ftype) -{ - ASSERT(ftype < XFS_DIR3_FT_MAX); - - sfep->name[sfep->namelen] = ftype; -} - -/* - * Inode numbers in short-form directories can come in two versions, - * either 4 bytes or 8 bytes wide. These helpers deal with the - * two forms transparently by looking at the headers i8count field. - * - * For 64-bit inode number the most significant byte must be zero. - */ -static xfs_ino_t -xfs_dir2_sf_get_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_dir2_inou_t *from) -{ - if (hdr->i8count) - return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; - else - return get_unaligned_be32(&from->i4.i); -} - -static void -xfs_dir2_sf_put_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_dir2_inou_t *to, - xfs_ino_t ino) -{ - ASSERT((ino & 0xff00000000000000ULL) == 0); - - if (hdr->i8count) - put_unaligned_be64(ino, &to->i8.i); - else - put_unaligned_be32(ino, &to->i4.i); -} - -static xfs_ino_t -xfs_dir2_sf_get_parent_ino( - struct xfs_dir2_sf_hdr *hdr) -{ - return xfs_dir2_sf_get_ino(hdr, &hdr->parent); -} - -static void -xfs_dir2_sf_put_parent_ino( - struct xfs_dir2_sf_hdr *hdr, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); -} - -/* - * In short-form directory entries the inode numbers are stored at variable - * offset behind the entry name. If the entry stores a filetype value, then it - * sits between the name and the inode number. Hence the inode numbers may only - * be accessed through the helpers below. - */ -static xfs_ino_t -xfs_dir2_sfe_get_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return xfs_dir2_sf_get_ino(hdr, - (xfs_dir2_inou_t *)&sfep->name[sfep->namelen]); -} - -static void -xfs_dir2_sfe_put_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, - (xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino); -} - -static xfs_ino_t -xfs_dir3_sfe_get_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep) -{ - return xfs_dir2_sf_get_ino(hdr, - (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]); -} - -static void -xfs_dir3_sfe_put_ino( - struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino) -{ - xfs_dir2_sf_put_ino(hdr, - (xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino); -} - - -/* - * Directory data block operations - */ - -/* - * For special situations, the dirent size ends up fixed because we always know - * what the size of the entry is. That's true for the "." and "..", and - * therefore we know that they are a fixed size and hence their offsets are - * constant, as is the first entry. - * - * Hence, this calculation is written as a macro to be able to be calculated at - * compile time and so certain offsets can be calculated directly in the - * structure initaliser via the macro. There are two macros - one for dirents - * with ftype and without so there are no unresolvable conditionals in the - * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power - * of 2 and the compiler doesn't reject it (unlike roundup()). - */ -#define XFS_DIR2_DATA_ENTSIZE(n) \ - round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ - sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN) - -#define XFS_DIR3_DATA_ENTSIZE(n) \ - round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \ - sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)), \ - XFS_DIR2_DATA_ALIGN) - -static int -xfs_dir2_data_entsize( - int n) -{ - return XFS_DIR2_DATA_ENTSIZE(n); -} - -static int -xfs_dir3_data_entsize( - int n) -{ - return XFS_DIR3_DATA_ENTSIZE(n); -} - -static __uint8_t -xfs_dir2_data_get_ftype( - struct xfs_dir2_data_entry *dep) -{ - return XFS_DIR3_FT_UNKNOWN; -} - -static void -xfs_dir2_data_put_ftype( - struct xfs_dir2_data_entry *dep, - __uint8_t ftype) -{ - ASSERT(ftype < XFS_DIR3_FT_MAX); -} - -static __uint8_t -xfs_dir3_data_get_ftype( - struct xfs_dir2_data_entry *dep) -{ - __uint8_t ftype = dep->name[dep->namelen]; - - ASSERT(ftype < XFS_DIR3_FT_MAX); - if (ftype >= XFS_DIR3_FT_MAX) - return XFS_DIR3_FT_UNKNOWN; - return ftype; -} - -static void -xfs_dir3_data_put_ftype( - struct xfs_dir2_data_entry *dep, - __uint8_t type) -{ - ASSERT(type < XFS_DIR3_FT_MAX); - ASSERT(dep->namelen != 0); - - dep->name[dep->namelen] = type; -} - -/* - * Pointer to an entry's tag word. - */ -static __be16 * -xfs_dir2_data_entry_tag_p( - struct xfs_dir2_data_entry *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); -} - -static __be16 * -xfs_dir3_data_entry_tag_p( - struct xfs_dir2_data_entry *dep) -{ - return (__be16 *)((char *)dep + - xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16)); -} - -/* - * location of . and .. in data space (always block 0) - */ -static struct xfs_dir2_data_entry * -xfs_dir2_data_dot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_data_dotdot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_data_first_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1) + - XFS_DIR2_DATA_ENTSIZE(2)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_ftype_data_dotdot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1)); -} - -static struct xfs_dir2_data_entry * -xfs_dir2_ftype_data_first_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_dot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_dotdot_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_first_entry_p( - struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2)); -} - -static struct xfs_dir2_data_free * -xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) -{ - return hdr->bestfree; -} - -static struct xfs_dir2_data_free * -xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) -{ - return ((struct xfs_dir3_data_hdr *)hdr)->best_free; -} - -static struct xfs_dir2_data_entry * -xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); -} - -static struct xfs_dir2_data_unused * -xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_unused *) - ((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); -} - -static struct xfs_dir2_data_entry * -xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_entry *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); -} - -static struct xfs_dir2_data_unused * -xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) -{ - return (struct xfs_dir2_data_unused *) - ((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); -} - - -/* - * Directory Leaf block operations - */ -static int -xfs_dir2_max_leaf_ents(struct xfs_mount *mp) -{ - return (mp->m_dirblksize - sizeof(struct xfs_dir2_leaf_hdr)) / - (uint)sizeof(struct xfs_dir2_leaf_entry); -} - -static struct xfs_dir2_leaf_entry * -xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp) -{ - return lp->__ents; -} - -static int -xfs_dir3_max_leaf_ents(struct xfs_mount *mp) -{ - return (mp->m_dirblksize - sizeof(struct xfs_dir3_leaf_hdr)) / - (uint)sizeof(struct xfs_dir2_leaf_entry); -} - -static struct xfs_dir2_leaf_entry * -xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp) -{ - return ((struct xfs_dir3_leaf *)lp)->__ents; -} - -static void -xfs_dir2_leaf_hdr_from_disk( - struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from) -{ - to->forw = be32_to_cpu(from->hdr.info.forw); - to->back = be32_to_cpu(from->hdr.info.back); - to->magic = be16_to_cpu(from->hdr.info.magic); - to->count = be16_to_cpu(from->hdr.count); - to->stale = be16_to_cpu(from->hdr.stale); - - ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || - to->magic == XFS_DIR2_LEAFN_MAGIC); -} - -static void -xfs_dir2_leaf_hdr_to_disk( - struct xfs_dir2_leaf *to, - struct xfs_dir3_icleaf_hdr *from) -{ - ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || - from->magic == XFS_DIR2_LEAFN_MAGIC); - - to->hdr.info.forw = cpu_to_be32(from->forw); - to->hdr.info.back = cpu_to_be32(from->back); - to->hdr.info.magic = cpu_to_be16(from->magic); - to->hdr.count = cpu_to_be16(from->count); - to->hdr.stale = cpu_to_be16(from->stale); -} - -static void -xfs_dir3_leaf_hdr_from_disk( - struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from) -{ - struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from; - - to->forw = be32_to_cpu(hdr3->info.hdr.forw); - to->back = be32_to_cpu(hdr3->info.hdr.back); - to->magic = be16_to_cpu(hdr3->info.hdr.magic); - to->count = be16_to_cpu(hdr3->count); - to->stale = be16_to_cpu(hdr3->stale); - - ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC || - to->magic == XFS_DIR3_LEAFN_MAGIC); -} - -static void -xfs_dir3_leaf_hdr_to_disk( - struct xfs_dir2_leaf *to, - struct xfs_dir3_icleaf_hdr *from) -{ - struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to; - - ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC || - from->magic == XFS_DIR3_LEAFN_MAGIC); - - hdr3->info.hdr.forw = cpu_to_be32(from->forw); - hdr3->info.hdr.back = cpu_to_be32(from->back); - hdr3->info.hdr.magic = cpu_to_be16(from->magic); - hdr3->count = cpu_to_be16(from->count); - hdr3->stale = cpu_to_be16(from->stale); -} - - -/* - * Directory/Attribute Node block operations - */ -static struct xfs_da_node_entry * -xfs_da2_node_tree_p(struct xfs_da_intnode *dap) -{ - return dap->__btree; -} - -static struct xfs_da_node_entry * -xfs_da3_node_tree_p(struct xfs_da_intnode *dap) -{ - return ((struct xfs_da3_intnode *)dap)->__btree; -} - -static void -xfs_da2_node_hdr_from_disk( - struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from) -{ - ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); - to->forw = be32_to_cpu(from->hdr.info.forw); - to->back = be32_to_cpu(from->hdr.info.back); - to->magic = be16_to_cpu(from->hdr.info.magic); - to->count = be16_to_cpu(from->hdr.__count); - to->level = be16_to_cpu(from->hdr.__level); -} - -static void -xfs_da2_node_hdr_to_disk( - struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from) -{ - ASSERT(from->magic == XFS_DA_NODE_MAGIC); - to->hdr.info.forw = cpu_to_be32(from->forw); - to->hdr.info.back = cpu_to_be32(from->back); - to->hdr.info.magic = cpu_to_be16(from->magic); - to->hdr.__count = cpu_to_be16(from->count); - to->hdr.__level = cpu_to_be16(from->level); -} - -static void -xfs_da3_node_hdr_from_disk( - struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from) -{ - struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from; - - ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); - to->forw = be32_to_cpu(hdr3->info.hdr.forw); - to->back = be32_to_cpu(hdr3->info.hdr.back); - to->magic = be16_to_cpu(hdr3->info.hdr.magic); - to->count = be16_to_cpu(hdr3->__count); - to->level = be16_to_cpu(hdr3->__level); -} - -static void -xfs_da3_node_hdr_to_disk( - struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from) -{ - struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to; - - ASSERT(from->magic == XFS_DA3_NODE_MAGIC); - hdr3->info.hdr.forw = cpu_to_be32(from->forw); - hdr3->info.hdr.back = cpu_to_be32(from->back); - hdr3->info.hdr.magic = cpu_to_be16(from->magic); - hdr3->__count = cpu_to_be16(from->count); - hdr3->__level = cpu_to_be16(from->level); -} - - -/* - * Directory free space block operations - */ -static int -xfs_dir2_free_max_bests(struct xfs_mount *mp) -{ - return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) / - sizeof(xfs_dir2_data_off_t); -} - -static __be16 * -xfs_dir2_free_bests_p(struct xfs_dir2_free *free) -{ - return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr)); -} - -/* - * Convert data space db to the corresponding free db. - */ -static xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static int -xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return db % xfs_dir2_free_max_bests(mp); -} - -static int -xfs_dir3_free_max_bests(struct xfs_mount *mp) -{ - return (mp->m_dirblksize - sizeof(struct xfs_dir3_free_hdr)) / - sizeof(xfs_dir2_data_off_t); -} - -static __be16 * -xfs_dir3_free_bests_p(struct xfs_dir2_free *free) -{ - return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr)); -} - -/* - * Convert data space db to the corresponding free db. - */ -static xfs_dir2_db_t -xfs_dir3_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static int -xfs_dir3_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) -{ - return db % xfs_dir3_free_max_bests(mp); -} - -static void -xfs_dir2_free_hdr_from_disk( - struct xfs_dir3_icfree_hdr *to, - struct xfs_dir2_free *from) -{ - to->magic = be32_to_cpu(from->hdr.magic); - to->firstdb = be32_to_cpu(from->hdr.firstdb); - to->nvalid = be32_to_cpu(from->hdr.nvalid); - to->nused = be32_to_cpu(from->hdr.nused); - ASSERT(to->magic == XFS_DIR2_FREE_MAGIC); -} - -static void -xfs_dir2_free_hdr_to_disk( - struct xfs_dir2_free *to, - struct xfs_dir3_icfree_hdr *from) -{ - ASSERT(from->magic == XFS_DIR2_FREE_MAGIC); - - to->hdr.magic = cpu_to_be32(from->magic); - to->hdr.firstdb = cpu_to_be32(from->firstdb); - to->hdr.nvalid = cpu_to_be32(from->nvalid); - to->hdr.nused = cpu_to_be32(from->nused); -} - -static void -xfs_dir3_free_hdr_from_disk( - struct xfs_dir3_icfree_hdr *to, - struct xfs_dir2_free *from) -{ - struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from; - - to->magic = be32_to_cpu(hdr3->hdr.magic); - to->firstdb = be32_to_cpu(hdr3->firstdb); - to->nvalid = be32_to_cpu(hdr3->nvalid); - to->nused = be32_to_cpu(hdr3->nused); - - ASSERT(to->magic == XFS_DIR3_FREE_MAGIC); -} - -static void -xfs_dir3_free_hdr_to_disk( - struct xfs_dir2_free *to, - struct xfs_dir3_icfree_hdr *from) -{ - struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to; - - ASSERT(from->magic == XFS_DIR3_FREE_MAGIC); - - hdr3->hdr.magic = cpu_to_be32(from->magic); - hdr3->firstdb = cpu_to_be32(from->firstdb); - hdr3->nvalid = cpu_to_be32(from->nvalid); - hdr3->nused = cpu_to_be32(from->nused); -} - -static const struct xfs_dir_ops xfs_dir2_ops = { - .sf_entsize = xfs_dir2_sf_entsize, - .sf_nextentry = xfs_dir2_sf_nextentry, - .sf_get_ftype = xfs_dir2_sfe_get_ftype, - .sf_put_ftype = xfs_dir2_sfe_put_ftype, - .sf_get_ino = xfs_dir2_sfe_get_ino, - .sf_put_ino = xfs_dir2_sfe_put_ino, - .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, - .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, - - .data_entsize = xfs_dir2_data_entsize, - .data_get_ftype = xfs_dir2_data_get_ftype, - .data_put_ftype = xfs_dir2_data_put_ftype, - .data_entry_tag_p = xfs_dir2_data_entry_tag_p, - .data_bestfree_p = xfs_dir2_data_bestfree_p, - - .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), - .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1), - .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR2_DATA_ENTSIZE(1) + - XFS_DIR2_DATA_ENTSIZE(2), - .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), - - .data_dot_entry_p = xfs_dir2_data_dot_entry_p, - .data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p, - .data_first_entry_p = xfs_dir2_data_first_entry_p, - .data_entry_p = xfs_dir2_data_entry_p, - .data_unused_p = xfs_dir2_data_unused_p, - - .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), - .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, - .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, - .leaf_max_ents = xfs_dir2_max_leaf_ents, - .leaf_ents_p = xfs_dir2_leaf_ents_p, - - .node_hdr_size = sizeof(struct xfs_da_node_hdr), - .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, - .node_tree_p = xfs_da2_node_tree_p, - - .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), - .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, - .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, - .free_max_bests = xfs_dir2_free_max_bests, - .free_bests_p = xfs_dir2_free_bests_p, - .db_to_fdb = xfs_dir2_db_to_fdb, - .db_to_fdindex = xfs_dir2_db_to_fdindex, -}; - -static const struct xfs_dir_ops xfs_dir2_ftype_ops = { - .sf_entsize = xfs_dir3_sf_entsize, - .sf_nextentry = xfs_dir3_sf_nextentry, - .sf_get_ftype = xfs_dir3_sfe_get_ftype, - .sf_put_ftype = xfs_dir3_sfe_put_ftype, - .sf_get_ino = xfs_dir3_sfe_get_ino, - .sf_put_ino = xfs_dir3_sfe_put_ino, - .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, - .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, - - .data_entsize = xfs_dir3_data_entsize, - .data_get_ftype = xfs_dir3_data_get_ftype, - .data_put_ftype = xfs_dir3_data_put_ftype, - .data_entry_tag_p = xfs_dir3_data_entry_tag_p, - .data_bestfree_p = xfs_dir2_data_bestfree_p, - - .data_dot_offset = sizeof(struct xfs_dir2_data_hdr), - .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1), - .data_first_offset = sizeof(struct xfs_dir2_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2), - .data_entry_offset = sizeof(struct xfs_dir2_data_hdr), - - .data_dot_entry_p = xfs_dir2_data_dot_entry_p, - .data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p, - .data_first_entry_p = xfs_dir2_ftype_data_first_entry_p, - .data_entry_p = xfs_dir2_data_entry_p, - .data_unused_p = xfs_dir2_data_unused_p, - - .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), - .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, - .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, - .leaf_max_ents = xfs_dir2_max_leaf_ents, - .leaf_ents_p = xfs_dir2_leaf_ents_p, - - .node_hdr_size = sizeof(struct xfs_da_node_hdr), - .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, - .node_tree_p = xfs_da2_node_tree_p, - - .free_hdr_size = sizeof(struct xfs_dir2_free_hdr), - .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, - .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, - .free_max_bests = xfs_dir2_free_max_bests, - .free_bests_p = xfs_dir2_free_bests_p, - .db_to_fdb = xfs_dir2_db_to_fdb, - .db_to_fdindex = xfs_dir2_db_to_fdindex, -}; - -static const struct xfs_dir_ops xfs_dir3_ops = { - .sf_entsize = xfs_dir3_sf_entsize, - .sf_nextentry = xfs_dir3_sf_nextentry, - .sf_get_ftype = xfs_dir3_sfe_get_ftype, - .sf_put_ftype = xfs_dir3_sfe_put_ftype, - .sf_get_ino = xfs_dir3_sfe_get_ino, - .sf_put_ino = xfs_dir3_sfe_put_ino, - .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, - .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, - - .data_entsize = xfs_dir3_data_entsize, - .data_get_ftype = xfs_dir3_data_get_ftype, - .data_put_ftype = xfs_dir3_data_put_ftype, - .data_entry_tag_p = xfs_dir3_data_entry_tag_p, - .data_bestfree_p = xfs_dir3_data_bestfree_p, - - .data_dot_offset = sizeof(struct xfs_dir3_data_hdr), - .data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1), - .data_first_offset = sizeof(struct xfs_dir3_data_hdr) + - XFS_DIR3_DATA_ENTSIZE(1) + - XFS_DIR3_DATA_ENTSIZE(2), - .data_entry_offset = sizeof(struct xfs_dir3_data_hdr), - - .data_dot_entry_p = xfs_dir3_data_dot_entry_p, - .data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p, - .data_first_entry_p = xfs_dir3_data_first_entry_p, - .data_entry_p = xfs_dir3_data_entry_p, - .data_unused_p = xfs_dir3_data_unused_p, - - .leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr), - .leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk, - .leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk, - .leaf_max_ents = xfs_dir3_max_leaf_ents, - .leaf_ents_p = xfs_dir3_leaf_ents_p, - - .node_hdr_size = sizeof(struct xfs_da3_node_hdr), - .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, - .node_tree_p = xfs_da3_node_tree_p, - - .free_hdr_size = sizeof(struct xfs_dir3_free_hdr), - .free_hdr_to_disk = xfs_dir3_free_hdr_to_disk, - .free_hdr_from_disk = xfs_dir3_free_hdr_from_disk, - .free_max_bests = xfs_dir3_free_max_bests, - .free_bests_p = xfs_dir3_free_bests_p, - .db_to_fdb = xfs_dir3_db_to_fdb, - .db_to_fdindex = xfs_dir3_db_to_fdindex, -}; - -static const struct xfs_dir_ops xfs_dir2_nondir_ops = { - .node_hdr_size = sizeof(struct xfs_da_node_hdr), - .node_hdr_to_disk = xfs_da2_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da2_node_hdr_from_disk, - .node_tree_p = xfs_da2_node_tree_p, -}; - -static const struct xfs_dir_ops xfs_dir3_nondir_ops = { - .node_hdr_size = sizeof(struct xfs_da3_node_hdr), - .node_hdr_to_disk = xfs_da3_node_hdr_to_disk, - .node_hdr_from_disk = xfs_da3_node_hdr_from_disk, - .node_tree_p = xfs_da3_node_tree_p, -}; - -/* - * Return the ops structure according to the current config. If we are passed - * an inode, then that overrides the default config we use which is based on - * feature bits. - */ -const struct xfs_dir_ops * -xfs_dir_get_ops( - struct xfs_mount *mp, - struct xfs_inode *dp) -{ - if (dp) - return dp->d_ops; - if (mp->m_dir_inode_ops) - return mp->m_dir_inode_ops; - if (xfs_sb_version_hascrc(&mp->m_sb)) - return &xfs_dir3_ops; - if (xfs_sb_version_hasftype(&mp->m_sb)) - return &xfs_dir2_ftype_ops; - return &xfs_dir2_ops; -} - -const struct xfs_dir_ops * -xfs_nondir_get_ops( - struct xfs_mount *mp, - struct xfs_inode *dp) -{ - if (dp) - return dp->d_ops; - if (mp->m_nondir_inode_ops) - return mp->m_nondir_inode_ops; - if (xfs_sb_version_hascrc(&mp->m_sb)) - return &xfs_dir3_nondir_ops; - return &xfs_dir2_nondir_ops; -} diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index ce16ef0..edf203a 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -17,24 +17,25 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" #include "xfs_inum.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_dinode.h" struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; @@ -95,17 +96,13 @@ xfs_dir_mount( ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb)); ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <= XFS_MAX_BLOCKSIZE); - - mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); - mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); - mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog; mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp)); mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp)); - nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; + nodehdr_size = __xfs_da3_node_hdr_size(xfs_sb_version_hascrc(&mp->m_sb)); mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - nodehdr_size) / (uint)sizeof(xfs_da_node_entry_t); mp->m_dir_node_ents = (mp->m_dirblksize - nodehdr_size) / @@ -116,7 +113,6 @@ xfs_dir_mount( mp->m_dirnameops = &xfs_ascii_ci_nameops; else mp->m_dirnameops = &xfs_default_nameops; - } /* diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index cec70e0..9910401 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h @@ -32,83 +32,6 @@ struct xfs_dir2_data_unused; extern struct xfs_name xfs_name_dotdot; /* - * directory operations vector for encode/decode routines - */ -struct xfs_dir_ops { - int (*sf_entsize)(struct xfs_dir2_sf_hdr *hdr, int len); - struct xfs_dir2_sf_entry * - (*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep); - __uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep); - void (*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep, - __uint8_t ftype); - xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep); - void (*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr, - struct xfs_dir2_sf_entry *sfep, - xfs_ino_t ino); - xfs_ino_t (*sf_get_parent_ino)(struct xfs_dir2_sf_hdr *hdr); - void (*sf_put_parent_ino)(struct xfs_dir2_sf_hdr *hdr, - xfs_ino_t ino); - - int (*data_entsize)(int len); - __uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep); - void (*data_put_ftype)(struct xfs_dir2_data_entry *dep, - __uint8_t ftype); - __be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep); - struct xfs_dir2_data_free * - (*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr); - - xfs_dir2_data_aoff_t data_dot_offset; - xfs_dir2_data_aoff_t data_dotdot_offset; - xfs_dir2_data_aoff_t data_first_offset; - size_t data_entry_offset; - - struct xfs_dir2_data_entry * - (*data_dot_entry_p)(struct xfs_dir2_data_hdr *hdr); - struct xfs_dir2_data_entry * - (*data_dotdot_entry_p)(struct xfs_dir2_data_hdr *hdr); - struct xfs_dir2_data_entry * - (*data_first_entry_p)(struct xfs_dir2_data_hdr *hdr); - struct xfs_dir2_data_entry * - (*data_entry_p)(struct xfs_dir2_data_hdr *hdr); - struct xfs_dir2_data_unused * - (*data_unused_p)(struct xfs_dir2_data_hdr *hdr); - - int leaf_hdr_size; - void (*leaf_hdr_to_disk)(struct xfs_dir2_leaf *to, - struct xfs_dir3_icleaf_hdr *from); - void (*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to, - struct xfs_dir2_leaf *from); - int (*leaf_max_ents)(struct xfs_mount *mp); - struct xfs_dir2_leaf_entry * - (*leaf_ents_p)(struct xfs_dir2_leaf *lp); - - int node_hdr_size; - void (*node_hdr_to_disk)(struct xfs_da_intnode *to, - struct xfs_da3_icnode_hdr *from); - void (*node_hdr_from_disk)(struct xfs_da3_icnode_hdr *to, - struct xfs_da_intnode *from); - struct xfs_da_node_entry * - (*node_tree_p)(struct xfs_da_intnode *dap); - - int free_hdr_size; - void (*free_hdr_to_disk)(struct xfs_dir2_free *to, - struct xfs_dir3_icfree_hdr *from); - void (*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to, - struct xfs_dir2_free *from); - int (*free_max_bests)(struct xfs_mount *mp); - __be16 * (*free_bests_p)(struct xfs_dir2_free *free); - xfs_dir2_db_t (*db_to_fdb)(struct xfs_mount *mp, xfs_dir2_db_t db); - int (*db_to_fdindex)(struct xfs_mount *mp, xfs_dir2_db_t db); -}; - -extern const struct xfs_dir_ops * - xfs_dir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp); -extern const struct xfs_dir_ops * - xfs_nondir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp); - -/* * Generic directory interface routines */ extern void xfs_dir_startup(void); @@ -142,30 +65,37 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args); /* * Interface routines used by userspace utilities */ +extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp); +extern void xfs_dir2_sf_put_parent_ino(struct xfs_dir2_sf_hdr *sfp, + xfs_ino_t ino); +extern xfs_ino_t xfs_dir3_sfe_get_ino(struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep); +extern void xfs_dir3_sfe_put_ino(struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_entry *sfep, + xfs_ino_t ino); + extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, struct xfs_buf *bp); -extern void xfs_dir2_data_freescan(struct xfs_inode *dp, +extern void xfs_dir2_data_freescan(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr, int *loghead); -extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_buf *bp, struct xfs_dir2_data_entry *dep); -extern void xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_dir2_data_entry *dep); +extern void xfs_dir2_data_log_header(struct xfs_trans *tp, struct xfs_buf *bp); extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_dir2_data_unused *dup); -extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, - xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_buf *bp, struct xfs_dir2_data_unused *dup, +extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); +extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, + xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); extern struct xfs_dir2_data_free *xfs_dir2_data_freefind( - struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, - struct xfs_dir2_data_unused *dup); + struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_unused *dup); extern const struct xfs_buf_ops xfs_dir3_block_buf_ops; extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops; diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 90cdbf4..12dad18 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -18,25 +18,25 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_buf_item.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" -#include "xfs_dinode.h" /* * Local function prototypes. @@ -168,7 +168,6 @@ xfs_dir3_block_init( static void xfs_dir2_block_need_space( - struct xfs_inode *dp, struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_block_tail *btp, struct xfs_dir2_leaf_entry *blp, @@ -184,7 +183,7 @@ xfs_dir2_block_need_space( struct xfs_dir2_data_unused *enddup = NULL; *compact = 0; - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir3_data_bestfree_p(hdr); /* * If there are stale entries we'll use one for the leaf. @@ -281,7 +280,6 @@ out: static void xfs_dir2_block_compact( struct xfs_trans *tp, - struct xfs_inode *dp, struct xfs_buf *bp, struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_block_tail *btp, @@ -314,7 +312,7 @@ xfs_dir2_block_compact( *lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); *lfloghigh -= be32_to_cpu(btp->stale) - 1; be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); - xfs_dir2_data_make_free(tp, dp, bp, + xfs_dir2_data_make_free(tp, bp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), needlog, &needscan); @@ -325,7 +323,7 @@ xfs_dir2_block_compact( * This needs to happen before the next call to use_free. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, needlog); + xfs_dir2_data_freescan(tp->t_mountp, hdr, needlog); } /* @@ -371,7 +369,7 @@ xfs_dir2_block_addname( if (error) return error; - len = dp->d_ops->data_entsize(args->namelen); + len = xfs_dir3_data_entsize(mp, args->namelen); /* * Set up pointers to parts of the block. @@ -384,7 +382,7 @@ xfs_dir2_block_addname( * Find out if we can reuse stale entries or whether we need extra * space for entry and new leaf. */ - xfs_dir2_block_need_space(dp, hdr, btp, blp, &tagp, &dup, + xfs_dir2_block_need_space(hdr, btp, blp, &tagp, &dup, &enddup, &compact, len); /* @@ -420,7 +418,7 @@ xfs_dir2_block_addname( * If need to compact the leaf entries, do it now. */ if (compact) { - xfs_dir2_block_compact(tp, dp, bp, hdr, btp, blp, &needlog, + xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog, &lfloghigh, &lfloglow); /* recalculate blp post-compaction */ blp = xfs_dir2_block_leaf_p(btp); @@ -455,7 +453,7 @@ xfs_dir2_block_addname( /* * Mark the space needed for the new leaf entry, now in use. */ - xfs_dir2_data_use_free(tp, dp, bp, enddup, + xfs_dir2_data_use_free(tp, bp, enddup, (xfs_dir2_data_aoff_t) ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - sizeof(*blp)), @@ -470,7 +468,7 @@ xfs_dir2_block_addname( * This needs to happen before the next call to use_free. */ if (needscan) { - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); needscan = 0; } /* @@ -542,7 +540,7 @@ xfs_dir2_block_addname( /* * Mark space for the data entry used. */ - xfs_dir2_data_use_free(tp, dp, bp, dup, + xfs_dir2_data_use_free(tp, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), (xfs_dir2_data_aoff_t)len, &needlog, &needscan); /* @@ -551,18 +549,18 @@ xfs_dir2_block_addname( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); - dp->d_ops->data_put_ftype(dep, args->filetype); - tagp = dp->d_ops->data_entry_tag_p(dep); + xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); + tagp = xfs_dir3_data_entry_tag_p(mp, dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Clean up the bestfree array and log the header, tail, and entry. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, bp); + xfs_dir2_data_log_header(tp, bp); xfs_dir2_block_log_tail(tp, bp); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(tp, bp, dep); xfs_dir3_data_check(dp, bp); return 0; } @@ -644,7 +642,7 @@ xfs_dir2_block_lookup( * Fill in inode number, CI name if appropriate, release the block. */ args->inumber = be64_to_cpu(dep->inumber); - args->filetype = dp->d_ops->data_get_ftype(dep); + args->filetype = xfs_dir3_dirent_get_ftype(mp, dep); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(args->trans, bp); return XFS_ERROR(error); @@ -801,9 +799,9 @@ xfs_dir2_block_removename( * Mark the data entry's space free. */ needlog = needscan = 0; - xfs_dir2_data_make_free(tp, dp, bp, + xfs_dir2_data_make_free(tp, bp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), - dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); + xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan); /* * Fix up the block tail. */ @@ -818,9 +816,9 @@ xfs_dir2_block_removename( * Fix up bestfree, log the header if necessary. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, bp); + xfs_dir2_data_log_header(tp, bp); xfs_dir3_data_check(dp, bp); /* * See if the size as a shortform is good enough. @@ -877,8 +875,8 @@ xfs_dir2_block_replace( * Change the inode number to the new value. */ dep->inumber = cpu_to_be64(args->inumber); - dp->d_ops->data_put_ftype(dep, args->filetype); - xfs_dir2_data_log_entry(args->trans, dp, bp, dep); + xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); + xfs_dir2_data_log_entry(args->trans, bp, dep); xfs_dir3_data_check(dp, bp); return 0; } @@ -936,8 +934,8 @@ xfs_dir2_leaf_to_block( tp = args->trans; mp = dp->i_mount; leaf = lbp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); ltp = xfs_dir2_leaf_tail_p(mp, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC || @@ -951,7 +949,7 @@ xfs_dir2_leaf_to_block( while (dp->i_d.di_size > mp->m_dirblksize) { int hdrsz; - hdrsz = dp->d_ops->data_entry_offset; + hdrsz = xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb)); bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == mp->m_dirblksize - hdrsz) { @@ -1001,7 +999,7 @@ xfs_dir2_leaf_to_block( /* * Use up the space at the end of the block (blp/btp). */ - xfs_dir2_data_use_free(tp, dp, dbp, dup, mp->m_dirblksize - size, size, + xfs_dir2_data_use_free(tp, dbp, dup, mp->m_dirblksize - size, size, &needlog, &needscan); /* * Initialize the block tail. @@ -1025,9 +1023,9 @@ xfs_dir2_leaf_to_block( * Scan the bestfree if we need it and log the data block header. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(tp, dbp); /* * Pitch the old leaf block. */ @@ -1138,9 +1136,9 @@ xfs_dir2_sf_to_block( * The whole thing is initialized to free by the init routine. * Say we're using the leaf and tail area. */ - dup = dp->d_ops->data_unused_p(hdr); + dup = xfs_dir3_data_unused_p(hdr); needlog = needscan = 0; - xfs_dir2_data_use_free(tp, dp, bp, dup, mp->m_dirblksize - i, i, &needlog, + xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, &needscan); ASSERT(needscan == 0); /* @@ -1154,38 +1152,38 @@ xfs_dir2_sf_to_block( /* * Remove the freespace, we'll manage it. */ - xfs_dir2_data_use_free(tp, dp, bp, dup, + xfs_dir2_data_use_free(tp, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), be16_to_cpu(dup->length), &needlog, &needscan); /* * Create entry for . */ - dep = dp->d_ops->data_dot_entry_p(hdr); + dep = xfs_dir3_data_dot_entry_p(mp, hdr); dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; - dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); - tagp = dp->d_ops->data_entry_tag_p(dep); + xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR); + tagp = xfs_dir3_data_entry_tag_p(mp, dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(tp, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)hdr)); /* * Create entry for .. */ - dep = dp->d_ops->data_dotdot_entry_p(hdr); - dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp)); + dep = xfs_dir3_data_dotdot_entry_p(mp, hdr); + dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; - dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); - tagp = dp->d_ops->data_entry_tag_p(dep); + xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR); + tagp = xfs_dir3_data_entry_tag_p(mp, dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(tp, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)hdr)); - offset = dp->d_ops->data_first_offset; + offset = xfs_dir3_data_first_offset(mp); /* * Loop over existing entries, stuff them in. */ @@ -1216,9 +1214,7 @@ xfs_dir2_sf_to_block( *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( ((char *)dup - (char *)hdr)); xfs_dir2_data_log_unused(tp, bp, dup); - xfs_dir2_data_freeinsert(hdr, - dp->d_ops->data_bestfree_p(hdr), - dup, &dummy); + xfs_dir2_data_freeinsert(hdr, dup, &dummy); offset += be16_to_cpu(dup->length); continue; } @@ -1226,13 +1222,14 @@ xfs_dir2_sf_to_block( * Copy a real entry. */ dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); - dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep)); + dep->inumber = cpu_to_be64(xfs_dir3_sfe_get_ino(mp, sfp, sfep)); dep->namelen = sfep->namelen; - dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep)); + xfs_dir3_dirent_put_ftype(mp, dep, + xfs_dir3_sfe_get_ftype(mp, sfp, sfep)); memcpy(dep->name, sfep->name, dep->namelen); - tagp = dp->d_ops->data_entry_tag_p(dep); + tagp = xfs_dir3_data_entry_tag_p(mp, dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, bp, dep); + xfs_dir2_data_log_entry(tp, bp, dep); name.name = sfep->name; name.len = sfep->namelen; blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> @@ -1243,7 +1240,7 @@ xfs_dir2_sf_to_block( if (++i == sfp->count) sfep = NULL; else - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); } /* Done with the temporary buffer */ kmem_free(sfp); diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 70acff4..47e1326 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -18,19 +18,20 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" -#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" @@ -62,18 +63,11 @@ __xfs_dir3_data_check( char *p; /* current data position */ int stale; /* count of stale leaves */ struct xfs_name name; - const struct xfs_dir_ops *ops; mp = bp->b_target->bt_mount; - - /* - * We can be passed a null dp here from a verifier, so we need to go the - * hard way to get them. - */ - ops = xfs_dir_get_ops(mp, dp); - hdr = bp->b_addr; - p = (char *)ops->data_entry_p(hdr); + bf = xfs_dir3_data_bestfree_p(hdr); + p = (char *)xfs_dir3_data_entry_p(hdr); switch (hdr->magic) { case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): @@ -81,16 +75,6 @@ __xfs_dir3_data_check( btp = xfs_dir2_block_tail_p(mp, hdr); lep = xfs_dir2_block_leaf_p(btp); endp = (char *)lep; - - /* - * The number of leaf entries is limited by the size of the - * block and the amount of space used by the data entries. - * We don't know how much space is used by the data entries yet, - * so just ensure that the count falls somewhere inside the - * block right now. - */ - XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) < - ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry)); break; case cpu_to_be32(XFS_DIR3_DATA_MAGIC): case cpu_to_be32(XFS_DIR2_DATA_MAGIC): @@ -101,11 +85,10 @@ __xfs_dir3_data_check( return EFSCORRUPTED; } + count = lastfree = freeseen = 0; /* * Account for zero bestfree entries. */ - bf = ops->data_bestfree_p(hdr); - count = lastfree = freeseen = 0; if (!bf[0].length) { XFS_WANT_CORRUPTED_RETURN(!bf[0].offset); freeseen |= 1 << 0; @@ -138,7 +121,7 @@ __xfs_dir3_data_check( XFS_WANT_CORRUPTED_RETURN( be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == (char *)dup - (char *)hdr); - dfp = xfs_dir2_data_freefind(hdr, bf, dup); + dfp = xfs_dir2_data_freefind(hdr, dup); if (dfp) { i = (int)(dfp - bf); XFS_WANT_CORRUPTED_RETURN( @@ -164,10 +147,10 @@ __xfs_dir3_data_check( XFS_WANT_CORRUPTED_RETURN( !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); XFS_WANT_CORRUPTED_RETURN( - be16_to_cpu(*ops->data_entry_tag_p(dep)) == + be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) == (char *)dep - (char *)hdr); XFS_WANT_CORRUPTED_RETURN( - ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX); + xfs_dir3_dirent_get_ftype(mp, dep) < XFS_DIR3_FT_MAX); count++; lastfree = 0; if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || @@ -185,7 +168,7 @@ __xfs_dir3_data_check( } XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count)); } - p += ops->data_entsize(dep->namelen); + p += xfs_dir3_data_entsize(mp, dep->namelen); } /* * Need to have seen all the entries and all the bestfree slots. @@ -344,18 +327,19 @@ xfs_dir3_data_readahead( */ xfs_dir2_data_free_t * xfs_dir2_data_freefind( - struct xfs_dir2_data_hdr *hdr, /* data block header */ - struct xfs_dir2_data_free *bf, /* bestfree table pointer */ - struct xfs_dir2_data_unused *dup) /* unused space */ + xfs_dir2_data_hdr_t *hdr, /* data block */ + xfs_dir2_data_unused_t *dup) /* data unused entry */ { xfs_dir2_data_free_t *dfp; /* bestfree entry */ xfs_dir2_data_aoff_t off; /* offset value needed */ + struct xfs_dir2_data_free *bf; #ifdef DEBUG int matched; /* matched the value */ int seenzero; /* saw a 0 bestfree entry */ #endif off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); + bf = xfs_dir3_data_bestfree_p(hdr); #ifdef DEBUG /* @@ -415,11 +399,11 @@ xfs_dir2_data_freefind( */ xfs_dir2_data_free_t * /* entry inserted */ xfs_dir2_data_freeinsert( - struct xfs_dir2_data_hdr *hdr, /* data block pointer */ - struct xfs_dir2_data_free *dfp, /* bestfree table pointer */ - struct xfs_dir2_data_unused *dup, /* unused space */ + xfs_dir2_data_hdr_t *hdr, /* data block pointer */ + xfs_dir2_data_unused_t *dup, /* unused space */ int *loghead) /* log the data header (out) */ { + xfs_dir2_data_free_t *dfp; /* bestfree table pointer */ xfs_dir2_data_free_t new; /* new bestfree entry */ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || @@ -427,6 +411,7 @@ xfs_dir2_data_freeinsert( hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); + dfp = xfs_dir3_data_bestfree_p(hdr); new.length = dup->length; new.offset = cpu_to_be16((char *)dup - (char *)hdr); @@ -459,11 +444,11 @@ xfs_dir2_data_freeinsert( */ STATIC void xfs_dir2_data_freeremove( - struct xfs_dir2_data_hdr *hdr, /* data block header */ - struct xfs_dir2_data_free *bf, /* bestfree table pointer */ - struct xfs_dir2_data_free *dfp, /* bestfree entry pointer */ + xfs_dir2_data_hdr_t *hdr, /* data block header */ + xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ int *loghead) /* out: log data header */ { + struct xfs_dir2_data_free *bf; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || @@ -473,6 +458,7 @@ xfs_dir2_data_freeremove( /* * It's the first entry, slide the next 2 up. */ + bf = xfs_dir3_data_bestfree_p(hdr); if (dfp == &bf[0]) { bf[0] = bf[1]; bf[1] = bf[2]; @@ -500,9 +486,9 @@ xfs_dir2_data_freeremove( */ void xfs_dir2_data_freescan( - struct xfs_inode *dp, - struct xfs_dir2_data_hdr *hdr, - int *loghead) + xfs_mount_t *mp, /* filesystem mount point */ + xfs_dir2_data_hdr_t *hdr, /* data block header */ + int *loghead) /* out: log data header */ { xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* active data entry */ @@ -519,19 +505,19 @@ xfs_dir2_data_freescan( /* * Start by clearing the table. */ - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir3_data_bestfree_p(hdr); memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT); *loghead = 1; /* * Set up pointers. */ - p = (char *)dp->d_ops->data_entry_p(hdr); + p = (char *)xfs_dir3_data_entry_p(hdr); if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { - btp = xfs_dir2_block_tail_p(dp->i_mount, hdr); + btp = xfs_dir2_block_tail_p(mp, hdr); endp = (char *)xfs_dir2_block_leaf_p(btp); } else - endp = (char *)hdr + dp->i_mount->m_dirblksize; + endp = (char *)hdr + mp->m_dirblksize; /* * Loop over the block's entries. */ @@ -543,7 +529,7 @@ xfs_dir2_data_freescan( if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); - xfs_dir2_data_freeinsert(hdr, bf, dup, loghead); + xfs_dir2_data_freeinsert(hdr, dup, loghead); p += be16_to_cpu(dup->length); } /* @@ -552,8 +538,8 @@ xfs_dir2_data_freescan( else { dep = (xfs_dir2_data_entry_t *)p; ASSERT((char *)dep - (char *)hdr == - be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep))); - p += dp->d_ops->data_entsize(dep->namelen); + be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep))); + p += xfs_dir3_data_entsize(mp, dep->namelen); } } } @@ -608,8 +594,8 @@ xfs_dir3_data_init( } else hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); - bf = dp->d_ops->data_bestfree_p(hdr); - bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset); + bf = xfs_dir3_data_bestfree_p(hdr); + bf[0].offset = cpu_to_be16(xfs_dir3_data_entry_offset(hdr)); for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { bf[i].length = 0; bf[i].offset = 0; @@ -618,17 +604,17 @@ xfs_dir3_data_init( /* * Set up an unused entry for the block's body. */ - dup = dp->d_ops->data_unused_p(hdr); + dup = xfs_dir3_data_unused_p(hdr); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); - t = mp->m_dirblksize - (uint)dp->d_ops->data_entry_offset; + t = mp->m_dirblksize - (uint)xfs_dir3_data_entry_offset(hdr); bf[0].length = cpu_to_be16(t); dup->length = cpu_to_be16(t); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr); /* * Log it and return it. */ - xfs_dir2_data_log_header(tp, dp, bp); + xfs_dir2_data_log_header(tp, bp); xfs_dir2_data_log_unused(tp, bp, dup); *bpp = bp; return 0; @@ -640,11 +626,11 @@ xfs_dir3_data_init( void xfs_dir2_data_log_entry( struct xfs_trans *tp, - struct xfs_inode *dp, struct xfs_buf *bp, xfs_dir2_data_entry_t *dep) /* data entry pointer */ { struct xfs_dir2_data_hdr *hdr = bp->b_addr; + struct xfs_mount *mp = tp->t_mountp; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || @@ -652,7 +638,7 @@ xfs_dir2_data_log_entry( hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), - (uint)((char *)(dp->d_ops->data_entry_tag_p(dep) + 1) - + (uint)((char *)(xfs_dir3_data_entry_tag_p(mp, dep) + 1) - (char *)hdr - 1)); } @@ -662,19 +648,16 @@ xfs_dir2_data_log_entry( void xfs_dir2_data_log_header( struct xfs_trans *tp, - struct xfs_inode *dp, struct xfs_buf *bp) { -#ifdef DEBUG - struct xfs_dir2_data_hdr *hdr = bp->b_addr; + xfs_dir2_data_hdr_t *hdr = bp->b_addr; ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); -#endif - xfs_trans_log_buf(tp, bp, 0, dp->d_ops->data_entry_offset - 1); + xfs_trans_log_buf(tp, bp, 0, xfs_dir3_data_entry_offset(hdr) - 1); } /* @@ -715,7 +698,6 @@ xfs_dir2_data_log_unused( void xfs_dir2_data_make_free( struct xfs_trans *tp, - struct xfs_inode *dp, struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, /* starting byte offset */ xfs_dir2_data_aoff_t len, /* length in bytes */ @@ -753,7 +735,7 @@ xfs_dir2_data_make_free( * If this isn't the start of the block, then back up to * the previous entry and see if it's free. */ - if (offset > dp->d_ops->data_entry_offset) { + if (offset > xfs_dir3_data_entry_offset(hdr)) { __be16 *tagp; /* tag just before us */ tagp = (__be16 *)((char *)hdr + offset) - 1; @@ -779,15 +761,15 @@ xfs_dir2_data_make_free( * Previous and following entries are both free, * merge everything into a single free entry. */ - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir3_data_bestfree_p(hdr); if (prevdup && postdup) { xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */ /* * See if prevdup and/or postdup are in bestfree table. */ - dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); - dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup); + dfp = xfs_dir2_data_freefind(hdr, prevdup); + dfp2 = xfs_dir2_data_freefind(hdr, postdup); /* * We need a rescan unless there are exactly 2 free entries * namely our two. Then we know what's happening, otherwise @@ -815,13 +797,12 @@ xfs_dir2_data_make_free( ASSERT(dfp2 == dfp); dfp2 = &bf[1]; } - xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp); - xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); + xfs_dir2_data_freeremove(hdr, dfp2, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); /* * Now insert the new entry. */ - dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup, - needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); ASSERT(dfp == &bf[0]); ASSERT(dfp->length == prevdup->length); ASSERT(!dfp[1].length); @@ -832,7 +813,7 @@ xfs_dir2_data_make_free( * The entry before us is free, merge with it. */ else if (prevdup) { - dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); + dfp = xfs_dir2_data_freefind(hdr, prevdup); be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)hdr); @@ -843,8 +824,8 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. @@ -858,7 +839,7 @@ xfs_dir2_data_make_free( * The following entry is free, merge with it. */ else if (postdup) { - dfp = xfs_dir2_data_freefind(hdr, bf, postdup); + dfp = xfs_dir2_data_freefind(hdr, postdup); newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); @@ -871,8 +852,8 @@ xfs_dir2_data_make_free( * the old one and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); - xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup, needlogp); } /* * Otherwise we need a scan if the new entry is big enough. @@ -892,7 +873,7 @@ xfs_dir2_data_make_free( *xfs_dir2_data_unused_tag_p(newdup) = cpu_to_be16((char *)newdup - (char *)hdr); xfs_dir2_data_log_unused(tp, bp, newdup); - xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup, needlogp); } *needscanp = needscan; } @@ -903,7 +884,6 @@ xfs_dir2_data_make_free( void xfs_dir2_data_use_free( struct xfs_trans *tp, - struct xfs_inode *dp, struct xfs_buf *bp, xfs_dir2_data_unused_t *dup, /* unused entry */ xfs_dir2_data_aoff_t offset, /* starting offset to use */ @@ -933,9 +913,9 @@ xfs_dir2_data_use_free( /* * Look up the entry in the bestfree table. */ + dfp = xfs_dir2_data_freefind(hdr, dup); oldlen = be16_to_cpu(dup->length); - bf = dp->d_ops->data_bestfree_p(hdr); - dfp = xfs_dir2_data_freefind(hdr, bf, dup); + bf = xfs_dir3_data_bestfree_p(hdr); ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length)); /* * Check for alignment with front and back of the entry. @@ -952,8 +932,7 @@ xfs_dir2_data_use_free( if (dfp) { needscan = (bf[2].offset != 0); if (!needscan) - xfs_dir2_data_freeremove(hdr, bf, dfp, - needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); } } /* @@ -971,9 +950,8 @@ xfs_dir2_data_use_free( * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, - needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); @@ -999,9 +977,8 @@ xfs_dir2_data_use_free( * If it was in the table, remove it and add the new one. */ if (dfp) { - xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); - dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, - needlogp); + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); ASSERT(dfp != NULL); ASSERT(dfp->length == newdup->length); ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); @@ -1040,11 +1017,9 @@ xfs_dir2_data_use_free( if (dfp) { needscan = (bf[2].length != 0); if (!needscan) { - xfs_dir2_data_freeremove(hdr, bf, dfp, - needlogp); - xfs_dir2_data_freeinsert(hdr, bf, newdup, - needlogp); - xfs_dir2_data_freeinsert(hdr, bf, newdup2, + xfs_dir2_data_freeremove(hdr, dfp, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup, needlogp); + xfs_dir2_data_freeinsert(hdr, newdup2, needlogp); } } diff --git a/fs/xfs/xfs_da_format.h b/fs/xfs/xfs_dir2_format.h index a19d3f8..9cf6738 100644 --- a/fs/xfs/xfs_da_format.h +++ b/fs/xfs/xfs_dir2_format.h @@ -16,113 +16,8 @@ * along with this program; if not, write the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef __XFS_DA_FORMAT_H__ -#define __XFS_DA_FORMAT_H__ - -/*======================================================================== - * Directory Structure when greater than XFS_LBSIZE(mp) bytes. - *========================================================================*/ - -/* - * This structure is common to both leaf nodes and non-leaf nodes in the Btree. - * - * It is used to manage a doubly linked list of all blocks at the same - * level in the Btree, and to identify which type of block this is. - */ -#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */ -#define XFS_ATTR_LEAF_MAGIC 0xfbee /* magic number: attribute leaf blks */ -#define XFS_DIR2_LEAF1_MAGIC 0xd2f1 /* magic number: v2 dirlf single blks */ -#define XFS_DIR2_LEAFN_MAGIC 0xd2ff /* magic number: v2 dirlf multi blks */ - -typedef struct xfs_da_blkinfo { - __be32 forw; /* previous block in list */ - __be32 back; /* following block in list */ - __be16 magic; /* validity check on block */ - __be16 pad; /* unused */ -} xfs_da_blkinfo_t; - -/* - * CRC enabled directory structure types - * - * The headers change size for the additional verification information, but - * otherwise the tree layouts and contents are unchanged. Hence the da btree - * code can use the struct xfs_da_blkinfo for manipulating the tree links and - * magic numbers without modification for both v2 and v3 nodes. - */ -#define XFS_DA3_NODE_MAGIC 0x3ebe /* magic number: non-leaf blocks */ -#define XFS_ATTR3_LEAF_MAGIC 0x3bee /* magic number: attribute leaf blks */ -#define XFS_DIR3_LEAF1_MAGIC 0x3df1 /* magic number: v2 dirlf single blks */ -#define XFS_DIR3_LEAFN_MAGIC 0x3dff /* magic number: v2 dirlf multi blks */ - -struct xfs_da3_blkinfo { - /* - * the node link manipulation code relies on the fact that the first - * element of this structure is the struct xfs_da_blkinfo so it can - * ignore the differences in the rest of the structures. - */ - struct xfs_da_blkinfo hdr; - __be32 crc; /* CRC of block */ - __be64 blkno; /* first block of the buffer */ - __be64 lsn; /* sequence number of last write */ - uuid_t uuid; /* filesystem we belong to */ - __be64 owner; /* inode that owns the block */ -}; - -/* - * This is the structure of the root and intermediate nodes in the Btree. - * The leaf nodes are defined above. - * - * Entries are not packed. - * - * Since we have duplicate keys, use a binary search but always follow - * all match in the block, not just the first match found. - */ -#define XFS_DA_NODE_MAXDEPTH 5 /* max depth of Btree */ - -typedef struct xfs_da_node_hdr { - struct xfs_da_blkinfo info; /* block type, links, etc. */ - __be16 __count; /* count of active entries */ - __be16 __level; /* level above leaves (leaf == 0) */ -} xfs_da_node_hdr_t; - -struct xfs_da3_node_hdr { - struct xfs_da3_blkinfo info; /* block type, links, etc. */ - __be16 __count; /* count of active entries */ - __be16 __level; /* level above leaves (leaf == 0) */ - __be32 __pad32; -}; - -#define XFS_DA3_NODE_CRC_OFF (offsetof(struct xfs_da3_node_hdr, info.crc)) - -typedef struct xfs_da_node_entry { - __be32 hashval; /* hash value for this descendant */ - __be32 before; /* Btree block before this key */ -} xfs_da_node_entry_t; - -typedef struct xfs_da_intnode { - struct xfs_da_node_hdr hdr; - struct xfs_da_node_entry __btree[]; -} xfs_da_intnode_t; - -struct xfs_da3_intnode { - struct xfs_da3_node_hdr hdr; - struct xfs_da_node_entry __btree[]; -}; - -/* - * In-core version of the node header to abstract the differences in the v2 and - * v3 disk format of the headers. Callers need to convert to/from disk format as - * appropriate. - */ -struct xfs_da3_icnode_hdr { - __uint32_t forw; - __uint32_t back; - __uint16_t magic; - __uint16_t count; - __uint16_t level; -}; - -#define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize +#ifndef __XFS_DIR2_FORMAT_H__ +#define __XFS_DIR2_FORMAT_H__ /* * Directory version 2. @@ -294,6 +189,79 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr) ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count)); } +static inline int +xfs_dir3_sf_entsize( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, + int len) +{ + int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */ + + count += len; /* name */ + count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) : + sizeof(xfs_dir2_ino4_t); /* ino # */ + if (xfs_sb_version_hasftype(&mp->m_sb)) + count += sizeof(__uint8_t); /* file type */ + return count; +} + +static inline struct xfs_dir2_sf_entry * +xfs_dir3_sf_nextentry( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return (struct xfs_dir2_sf_entry *) + ((char *)sfep + xfs_dir3_sf_entsize(mp, hdr, sfep->namelen)); +} + +/* + * in dir3 shortform directories, the file type field is stored at a variable + * offset after the inode number. Because it's only a single byte, endian + * conversion is not necessary. + */ +static inline __uint8_t * +xfs_dir3_sfe_ftypep( + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return (__uint8_t *)&sfep->name[sfep->namelen]; +} + +static inline __uint8_t +xfs_dir3_sfe_get_ftype( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + __uint8_t *ftp; + + if (!xfs_sb_version_hasftype(&mp->m_sb)) + return XFS_DIR3_FT_UNKNOWN; + + ftp = xfs_dir3_sfe_ftypep(hdr, sfep); + if (*ftp >= XFS_DIR3_FT_MAX) + return XFS_DIR3_FT_UNKNOWN; + return *ftp; +} + +static inline void +xfs_dir3_sfe_put_ftype( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep, + __uint8_t ftype) +{ + __uint8_t *ftp; + + ASSERT(ftype < XFS_DIR3_FT_MAX); + + if (!xfs_sb_version_hasftype(&mp->m_sb)) + return; + ftp = xfs_dir3_sfe_ftypep(hdr, sfep); + *ftp = ftype; +} + /* * Data block structures. * @@ -377,6 +345,17 @@ struct xfs_dir3_data_hdr { #define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc) +static inline struct xfs_dir2_data_free * +xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) +{ + if (hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { + struct xfs_dir3_data_hdr *hdr3 = (struct xfs_dir3_data_hdr *)hdr; + return hdr3->best_free; + } + return hdr->bestfree; +} + /* * Active entry in a data block. * @@ -410,6 +389,72 @@ typedef struct xfs_dir2_data_unused { } xfs_dir2_data_unused_t; /* + * Size of a data entry. + */ +static inline int +__xfs_dir3_data_entsize( + bool ftype, + int n) +{ + int size = offsetof(struct xfs_dir2_data_entry, name[0]); + + size += n; + size += sizeof(xfs_dir2_data_off_t); + if (ftype) + size += sizeof(__uint8_t); + return roundup(size, XFS_DIR2_DATA_ALIGN); +} +static inline int +xfs_dir3_data_entsize( + struct xfs_mount *mp, + int n) +{ + bool ftype = xfs_sb_version_hasftype(&mp->m_sb) ? true : false; + return __xfs_dir3_data_entsize(ftype, n); +} + +static inline __uint8_t +xfs_dir3_dirent_get_ftype( + struct xfs_mount *mp, + struct xfs_dir2_data_entry *dep) +{ + if (xfs_sb_version_hasftype(&mp->m_sb)) { + __uint8_t type = dep->name[dep->namelen]; + + ASSERT(type < XFS_DIR3_FT_MAX); + if (type < XFS_DIR3_FT_MAX) + return type; + + } + return XFS_DIR3_FT_UNKNOWN; +} + +static inline void +xfs_dir3_dirent_put_ftype( + struct xfs_mount *mp, + struct xfs_dir2_data_entry *dep, + __uint8_t type) +{ + ASSERT(type < XFS_DIR3_FT_MAX); + ASSERT(dep->namelen != 0); + + if (xfs_sb_version_hasftype(&mp->m_sb)) + dep->name[dep->namelen] = type; +} + +/* + * Pointer to an entry's tag word. + */ +static inline __be16 * +xfs_dir3_data_entry_tag_p( + struct xfs_mount *mp, + struct xfs_dir2_data_entry *dep) +{ + return (__be16 *)((char *)dep + + xfs_dir3_data_entsize(mp, dep->namelen) - sizeof(__be16)); +} + +/* * Pointer to a freespace's tag word. */ static inline __be16 * @@ -419,6 +464,93 @@ xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup) be16_to_cpu(dup->length) - sizeof(__be16)); } +static inline size_t +xfs_dir3_data_hdr_size(bool dir3) +{ + if (dir3) + return sizeof(struct xfs_dir3_data_hdr); + return sizeof(struct xfs_dir2_data_hdr); +} + +static inline size_t +xfs_dir3_data_entry_offset(struct xfs_dir2_data_hdr *hdr) +{ + bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || + hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); + return xfs_dir3_data_hdr_size(dir3); +} + +static inline struct xfs_dir2_data_entry * +xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + xfs_dir3_data_entry_offset(hdr)); +} + +static inline struct xfs_dir2_data_unused * +xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_unused *) + ((char *)hdr + xfs_dir3_data_entry_offset(hdr)); +} + +/* + * Offsets of . and .. in data space (always block 0) + * + * XXX: there is scope for significant optimisation of the logic here. Right + * now we are checking for "dir3 format" over and over again. Ideally we should + * only do it once for each operation. + */ +static inline xfs_dir2_data_aoff_t +xfs_dir3_data_dot_offset(struct xfs_mount *mp) +{ + return xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb)); +} + +static inline xfs_dir2_data_aoff_t +xfs_dir3_data_dotdot_offset(struct xfs_mount *mp) +{ + return xfs_dir3_data_dot_offset(mp) + + xfs_dir3_data_entsize(mp, 1); +} + +static inline xfs_dir2_data_aoff_t +xfs_dir3_data_first_offset(struct xfs_mount *mp) +{ + return xfs_dir3_data_dotdot_offset(mp) + + xfs_dir3_data_entsize(mp, 2); +} + +/* + * location of . and .. in data space (always block 0) + */ +static inline struct xfs_dir2_data_entry * +xfs_dir3_data_dot_entry_p( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + xfs_dir3_data_dot_offset(mp)); +} + +static inline struct xfs_dir2_data_entry * +xfs_dir3_data_dotdot_entry_p( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + xfs_dir3_data_dotdot_offset(mp)); +} + +static inline struct xfs_dir2_data_entry * +xfs_dir3_data_first_entry_p( + struct xfs_mount *mp, + struct xfs_dir2_data_hdr *hdr) +{ + return (struct xfs_dir2_data_entry *) + ((char *)hdr + xfs_dir3_data_first_offset(mp)); +} + /* * Leaf block structures. * @@ -513,6 +645,39 @@ struct xfs_dir3_leaf { #define XFS_DIR3_LEAF_CRC_OFF offsetof(struct xfs_dir3_leaf_hdr, info.crc) +extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to, + struct xfs_dir2_leaf *from); + +static inline int +xfs_dir3_leaf_hdr_size(struct xfs_dir2_leaf *lp) +{ + if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || + lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) + return sizeof(struct xfs_dir3_leaf_hdr); + return sizeof(struct xfs_dir2_leaf_hdr); +} + +static inline int +xfs_dir3_max_leaf_ents(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) +{ + return (mp->m_dirblksize - xfs_dir3_leaf_hdr_size(lp)) / + (uint)sizeof(struct xfs_dir2_leaf_entry); +} + +/* + * Get address of the bestcount field in the single-leaf block. + */ +static inline struct xfs_dir2_leaf_entry * +xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp) +{ + if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || + lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { + struct xfs_dir3_leaf *lp3 = (struct xfs_dir3_leaf *)lp; + return lp3->__ents; + } + return lp->__ents; +} + /* * Get address of the bestcount field in the single-leaf block. */ @@ -704,6 +869,48 @@ struct xfs_dir3_icfree_hdr { }; +void xfs_dir3_free_hdr_from_disk(struct xfs_dir3_icfree_hdr *to, + struct xfs_dir2_free *from); + +static inline int +xfs_dir3_free_hdr_size(struct xfs_mount *mp) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) + return sizeof(struct xfs_dir3_free_hdr); + return sizeof(struct xfs_dir2_free_hdr); +} + +static inline int +xfs_dir3_free_max_bests(struct xfs_mount *mp) +{ + return (mp->m_dirblksize - xfs_dir3_free_hdr_size(mp)) / + sizeof(xfs_dir2_data_off_t); +} + +static inline __be16 * +xfs_dir3_free_bests_p(struct xfs_mount *mp, struct xfs_dir2_free *free) +{ + return (__be16 *)((char *)free + xfs_dir3_free_hdr_size(mp)); +} + +/* + * Convert data space db to the corresponding free db. + */ +static inline xfs_dir2_db_t +xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static inline int +xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) +{ + return db % xfs_dir3_free_max_bests(mp); +} + /* * Single block format. * @@ -754,262 +961,4 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); } - -/* - * Attribute storage layout - * - * Attribute lists are structured around Btrees where all the data - * elements are in the leaf nodes. Attribute names are hashed into an int, - * then that int is used as the index into the Btree. Since the hashval - * of an attribute name may not be unique, we may have duplicate keys. The - * internal links in the Btree are logical block offsets into the file. - * - *======================================================================== - * Attribute structure when equal to XFS_LBSIZE(mp) bytes. - *======================================================================== - * - * Struct leaf_entry's are packed from the top. Name/values grow from the - * bottom but are not packed. The freemap contains run-length-encoded entries - * for the free bytes after the leaf_entry's, but only the N largest such, - * smaller runs are dropped. When the freemap doesn't show enough space - * for an allocation, we compact the name/value area and try again. If we - * still don't have enough space, then we have to split the block. The - * name/value structs (both local and remote versions) must be 32bit aligned. - * - * Since we have duplicate hash keys, for each key that matches, compare - * the actual name string. The root and intermediate node search always - * takes the first-in-the-block key match found, so we should only have - * to work "forw"ard. If none matches, continue with the "forw"ard leaf - * nodes until the hash key changes or the attribute name is found. - * - * We store the fact that an attribute is a ROOT/USER/SECURE attribute in - * the leaf_entry. The namespaces are independent only because we also look - * at the namespace bit when we are looking for a matching attribute name. - * - * We also store an "incomplete" bit in the leaf_entry. It shows that an - * attribute is in the middle of being created and should not be shown to - * the user if we crash during the time that the bit is set. We clear the - * bit when we have finished setting up the attribute. We do this because - * we cannot create some large attributes inside a single transaction, and we - * need some indication that we weren't finished if we crash in the middle. - */ -#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ - -typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ - __be16 base; /* base of free region */ - __be16 size; /* length of free region */ -} xfs_attr_leaf_map_t; - -typedef struct xfs_attr_leaf_hdr { /* constant-structure header block */ - xfs_da_blkinfo_t info; /* block type, links, etc. */ - __be16 count; /* count of active leaf_entry's */ - __be16 usedbytes; /* num bytes of names/values stored */ - __be16 firstused; /* first used byte in name area */ - __u8 holes; /* != 0 if blk needs compaction */ - __u8 pad1; - xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE]; - /* N largest free regions */ -} xfs_attr_leaf_hdr_t; - -typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */ - __be32 hashval; /* hash value of name */ - __be16 nameidx; /* index into buffer of name/value */ - __u8 flags; /* LOCAL/ROOT/SECURE/INCOMPLETE flag */ - __u8 pad2; /* unused pad byte */ -} xfs_attr_leaf_entry_t; - -typedef struct xfs_attr_leaf_name_local { - __be16 valuelen; /* number of bytes in value */ - __u8 namelen; /* length of name bytes */ - __u8 nameval[1]; /* name/value bytes */ -} xfs_attr_leaf_name_local_t; - -typedef struct xfs_attr_leaf_name_remote { - __be32 valueblk; /* block number of value bytes */ - __be32 valuelen; /* number of bytes in value */ - __u8 namelen; /* length of name bytes */ - __u8 name[1]; /* name bytes */ -} xfs_attr_leaf_name_remote_t; - -typedef struct xfs_attr_leafblock { - xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */ - xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */ - xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */ - xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */ -} xfs_attr_leafblock_t; - -/* - * CRC enabled leaf structures. Called "version 3" structures to match the - * version number of the directory and dablk structures for this feature, and - * attr2 is already taken by the variable inode attribute fork size feature. - */ -struct xfs_attr3_leaf_hdr { - struct xfs_da3_blkinfo info; - __be16 count; - __be16 usedbytes; - __be16 firstused; - __u8 holes; - __u8 pad1; - struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE]; - __be32 pad2; /* 64 bit alignment */ -}; - -#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc)) - -struct xfs_attr3_leafblock { - struct xfs_attr3_leaf_hdr hdr; - struct xfs_attr_leaf_entry entries[1]; - - /* - * The rest of the block contains the following structures after the - * leaf entries, growing from the bottom up. The variables are never - * referenced, the locations accessed purely from helper functions. - * - * struct xfs_attr_leaf_name_local - * struct xfs_attr_leaf_name_remote - */ -}; - -/* - * incore, neutral version of the attribute leaf header - */ -struct xfs_attr3_icleaf_hdr { - __uint32_t forw; - __uint32_t back; - __uint16_t magic; - __uint16_t count; - __uint16_t usedbytes; - __uint16_t firstused; - __u8 holes; - struct { - __uint16_t base; - __uint16_t size; - } freemap[XFS_ATTR_LEAF_MAPSIZE]; -}; - -/* - * Flags used in the leaf_entry[i].flags field. - * NOTE: the INCOMPLETE bit must not collide with the flags bits specified - * on the system call, they are "or"ed together for various operations. - */ -#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */ -#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */ -#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */ -#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */ -#define XFS_ATTR_LOCAL (1 << XFS_ATTR_LOCAL_BIT) -#define XFS_ATTR_ROOT (1 << XFS_ATTR_ROOT_BIT) -#define XFS_ATTR_SECURE (1 << XFS_ATTR_SECURE_BIT) -#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT) - -/* - * Conversion macros for converting namespace bits from argument flags - * to ondisk flags. - */ -#define XFS_ATTR_NSP_ARGS_MASK (ATTR_ROOT | ATTR_SECURE) -#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) -#define XFS_ATTR_NSP_ONDISK(flags) ((flags) & XFS_ATTR_NSP_ONDISK_MASK) -#define XFS_ATTR_NSP_ARGS(flags) ((flags) & XFS_ATTR_NSP_ARGS_MASK) -#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x) (((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\ - ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0)) -#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x) (((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\ - ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0)) - -/* - * Alignment for namelist and valuelist entries (since they are mixed - * there can be only one alignment value) - */ -#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t)) - -static inline int -xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp) -{ - if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) - return sizeof(struct xfs_attr3_leaf_hdr); - return sizeof(struct xfs_attr_leaf_hdr); -} - -static inline struct xfs_attr_leaf_entry * -xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp) -{ - if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) - return &((struct xfs_attr3_leafblock *)leafp)->entries[0]; - return &leafp->entries[0]; -} - -/* - * Cast typed pointers for "local" and "remote" name/value structs. - */ -static inline char * -xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx) -{ - struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp); - - return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)]; -} - -static inline xfs_attr_leaf_name_remote_t * -xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) -{ - return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx); -} - -static inline xfs_attr_leaf_name_local_t * -xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) -{ - return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx); -} - -/* - * Calculate total bytes used (including trailing pad for alignment) for - * a "local" name/value structure, a "remote" name/value structure, and - * a pointer which might be either. - */ -static inline int xfs_attr_leaf_entsize_remote(int nlen) -{ - return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ - XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); -} - -static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) -{ - return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + - XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); -} - -static inline int xfs_attr_leaf_entsize_local_max(int bsize) -{ - return (((bsize) >> 1) + ((bsize) >> 2)); -} - - - -/* - * Remote attribute block format definition - * - * There is one of these headers per filesystem block in a remote attribute. - * This is done to ensure there is a 1:1 mapping between the attribute value - * length and the number of blocks needed to store the attribute. This makes the - * verification of a buffer a little more complex, but greatly simplifies the - * allocation, reading and writing of these attributes as we don't have to guess - * the number of blocks needed to store the attribute data. - */ -#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */ - -struct xfs_attr3_rmt_hdr { - __be32 rm_magic; - __be32 rm_offset; - __be32 rm_bytes; - __be32 rm_crc; - uuid_t rm_uuid; - __be64 rm_owner; - __be64 rm_blkno; - __be64 rm_lsn; -}; - -#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc) - -#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \ - ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ - sizeof(struct xfs_attr3_rmt_hdr) : 0)) - -#endif /* __XFS_DA_FORMAT_H__ */ +#endif /* __XFS_DIR2_FORMAT_H__ */ diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index ae47ec6..1021c83 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -18,21 +18,23 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" @@ -50,21 +52,21 @@ static void xfs_dir3_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); * Pop an assert if something is wrong. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ +#define xfs_dir3_leaf_check(mp, bp) \ do { \ - if (!xfs_dir3_leaf1_check((dp), (bp))) \ + if (!xfs_dir3_leaf1_check((mp), (bp))) \ ASSERT(0); \ } while (0); STATIC bool xfs_dir3_leaf1_check( - struct xfs_inode *dp, + struct xfs_mount *mp, struct xfs_buf *bp) { struct xfs_dir2_leaf *leaf = bp->b_addr; struct xfs_dir3_icleaf_hdr leafhdr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; @@ -73,16 +75,71 @@ xfs_dir3_leaf1_check( } else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC) return false; - return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); + return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf); } #else -#define xfs_dir3_leaf_check(dp, bp) +#define xfs_dir3_leaf_check(mp, bp) #endif +void +xfs_dir3_leaf_hdr_from_disk( + struct xfs_dir3_icleaf_hdr *to, + struct xfs_dir2_leaf *from) +{ + if (from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || + from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { + to->forw = be32_to_cpu(from->hdr.info.forw); + to->back = be32_to_cpu(from->hdr.info.back); + to->magic = be16_to_cpu(from->hdr.info.magic); + to->count = be16_to_cpu(from->hdr.count); + to->stale = be16_to_cpu(from->hdr.stale); + } else { + struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from; + + to->forw = be32_to_cpu(hdr3->info.hdr.forw); + to->back = be32_to_cpu(hdr3->info.hdr.back); + to->magic = be16_to_cpu(hdr3->info.hdr.magic); + to->count = be16_to_cpu(hdr3->count); + to->stale = be16_to_cpu(hdr3->stale); + } + + ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || + to->magic == XFS_DIR3_LEAF1_MAGIC || + to->magic == XFS_DIR2_LEAFN_MAGIC || + to->magic == XFS_DIR3_LEAFN_MAGIC); +} + +void +xfs_dir3_leaf_hdr_to_disk( + struct xfs_dir2_leaf *to, + struct xfs_dir3_icleaf_hdr *from) +{ + ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || + from->magic == XFS_DIR3_LEAF1_MAGIC || + from->magic == XFS_DIR2_LEAFN_MAGIC || + from->magic == XFS_DIR3_LEAFN_MAGIC); + + if (from->magic == XFS_DIR2_LEAF1_MAGIC || + from->magic == XFS_DIR2_LEAFN_MAGIC) { + to->hdr.info.forw = cpu_to_be32(from->forw); + to->hdr.info.back = cpu_to_be32(from->back); + to->hdr.info.magic = cpu_to_be16(from->magic); + to->hdr.count = cpu_to_be16(from->count); + to->hdr.stale = cpu_to_be16(from->stale); + } else { + struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to; + + hdr3->info.hdr.forw = cpu_to_be32(from->forw); + hdr3->info.hdr.back = cpu_to_be32(from->back); + hdr3->info.hdr.magic = cpu_to_be16(from->magic); + hdr3->count = cpu_to_be16(from->count); + hdr3->stale = cpu_to_be16(from->stale); + } +} + bool xfs_dir3_leaf_check_int( struct xfs_mount *mp, - struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf) { @@ -90,21 +147,8 @@ xfs_dir3_leaf_check_int( xfs_dir2_leaf_tail_t *ltp; int stale; int i; - const struct xfs_dir_ops *ops; - struct xfs_dir3_icleaf_hdr leafhdr; - /* - * we can be passed a null dp here from a verifier, so we need to go the - * hard way to get them. - */ - ops = xfs_dir_get_ops(mp, dp); - - if (!hdr) { - ops->leaf_hdr_from_disk(&leafhdr, leaf); - hdr = &leafhdr; - } - - ents = ops->leaf_ents_p(leaf); + ents = xfs_dir3_leaf_ents_p(leaf); ltp = xfs_dir2_leaf_tail_p(mp, leaf); /* @@ -112,7 +156,7 @@ xfs_dir3_leaf_check_int( * Should factor in the size of the bests table as well. * We can deduce a value for that from di_size. */ - if (hdr->count > ops->leaf_max_ents(mp)) + if (hdr->count > xfs_dir3_max_leaf_ents(mp, leaf)) return false; /* Leaves and bests don't overlap in leaf format. */ @@ -148,6 +192,7 @@ xfs_dir3_leaf_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_dir2_leaf *leaf = bp->b_addr; + struct xfs_dir3_icleaf_hdr leafhdr; ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); @@ -169,7 +214,8 @@ xfs_dir3_leaf_verify( return false; } - return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf); } static void @@ -355,7 +401,7 @@ xfs_dir3_leaf_get_buf( return error; xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic); - xfs_dir3_leaf_log_header(tp, dp, bp); + xfs_dir3_leaf_log_header(tp, bp); if (magic == XFS_DIR2_LEAF1_MAGIC) xfs_dir3_leaf_log_tail(tp, bp); *bpp = bp; @@ -416,31 +462,31 @@ xfs_dir2_block_to_leaf( xfs_dir3_data_check(dp, dbp); btp = xfs_dir2_block_tail_p(mp, hdr); blp = xfs_dir2_block_leaf_p(btp); - bf = dp->d_ops->data_bestfree_p(hdr); - ents = dp->d_ops->leaf_ents_p(leaf); + bf = xfs_dir3_data_bestfree_p(hdr); + ents = xfs_dir3_leaf_ents_p(leaf); /* * Set the counts in the leaf header. */ - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); leafhdr.count = be32_to_cpu(btp->count); leafhdr.stale = be32_to_cpu(btp->stale); - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, lbp); /* * Could compact these but I think we always do the conversion * after squeezing out stale entries. */ memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, dp, lbp, 0, leafhdr.count - 1); + xfs_dir3_leaf_log_ents(tp, lbp, 0, leafhdr.count - 1); needscan = 0; needlog = 1; /* * Make the space formerly occupied by the leaf entries and block * tail be free. */ - xfs_dir2_data_make_free(tp, dp, dbp, + xfs_dir2_data_make_free(tp, dbp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize - (char *)blp), @@ -456,7 +502,7 @@ xfs_dir2_block_to_leaf( hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC); if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); /* * Set up leaf tail and bests table. */ @@ -468,8 +514,8 @@ xfs_dir2_block_to_leaf( * Log the data header and leaf bests table. */ if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); - xfs_dir3_leaf_check(dp, lbp); + xfs_dir2_data_log_header(tp, dbp); + xfs_dir3_leaf_check(mp, lbp); xfs_dir3_data_check(dp, dbp); xfs_dir3_leaf_log_bests(tp, lbp, 0, 0); return 0; @@ -653,10 +699,10 @@ xfs_dir2_leaf_addname( index = xfs_dir2_leaf_search_hash(args, lbp); leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); bestsp = xfs_dir2_leaf_bests_p(ltp); - length = dp->d_ops->data_entsize(args->namelen); + length = xfs_dir3_data_entsize(mp, args->namelen); /* * See if there are any entries with the same hash value @@ -818,7 +864,7 @@ xfs_dir2_leaf_addname( else xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block); hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir3_data_bestfree_p(hdr); bestsp[use_block] = bf[0].length; grown = 1; } else { @@ -834,7 +880,7 @@ xfs_dir2_leaf_addname( return error; } hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir3_data_bestfree_p(hdr); grown = 0; } /* @@ -847,7 +893,7 @@ xfs_dir2_leaf_addname( /* * Mark the initial part of our freespace in use for the new entry. */ - xfs_dir2_data_use_free(tp, dp, dbp, dup, + xfs_dir2_data_use_free(tp, dbp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* @@ -857,20 +903,20 @@ xfs_dir2_leaf_addname( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); - dp->d_ops->data_put_ftype(dep, args->filetype); - tagp = dp->d_ops->data_entry_tag_p(dep); + xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); + tagp = xfs_dir3_data_entry_tag_p(mp, dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Need to scan fix up the bestfree table. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); /* * Need to log the data block's header. */ if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); - xfs_dir2_data_log_entry(tp, dp, dbp, dep); + xfs_dir2_data_log_header(tp, dbp); + xfs_dir2_data_log_entry(tp, dbp, dep); /* * If the bests table needs to be changed, do it. * Log the change unless we've already done that. @@ -893,10 +939,10 @@ xfs_dir2_leaf_addname( /* * Log the leaf fields and give up the buffers. */ - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); - xfs_dir3_leaf_log_ents(tp, dp, lbp, lfloglow, lfloghigh); - xfs_dir3_leaf_check(dp, lbp); + xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, lbp); + xfs_dir3_leaf_log_ents(tp, lbp, lfloglow, lfloghigh); + xfs_dir3_leaf_check(mp, lbp); xfs_dir3_data_check(dp, dbp); return 0; } @@ -916,7 +962,6 @@ xfs_dir3_leaf_compact( int loglow; /* first leaf entry to log */ int to; /* target leaf index */ struct xfs_dir2_leaf_entry *ents; - struct xfs_inode *dp = args->dp; leaf = bp->b_addr; if (!leafhdr->stale) @@ -925,7 +970,7 @@ xfs_dir3_leaf_compact( /* * Compress out the stale entries in place. */ - ents = dp->d_ops->leaf_ents_p(leaf); + ents = xfs_dir3_leaf_ents_p(leaf); for (from = to = 0, loglow = -1; from < leafhdr->count; from++) { if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) continue; @@ -946,10 +991,10 @@ xfs_dir3_leaf_compact( leafhdr->count -= leafhdr->stale; leafhdr->stale = 0; - dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr); - xfs_dir3_leaf_log_header(args->trans, dp, bp); + xfs_dir3_leaf_hdr_to_disk(leaf, leafhdr); + xfs_dir3_leaf_log_header(args->trans, bp); if (loglow != -1) - xfs_dir3_leaf_log_ents(args->trans, dp, bp, loglow, to - 1); + xfs_dir3_leaf_log_ents(args->trans, bp, loglow, to - 1); } /* @@ -1076,11 +1121,10 @@ xfs_dir3_leaf_log_bests( */ void xfs_dir3_leaf_log_ents( - struct xfs_trans *tp, - struct xfs_inode *dp, - struct xfs_buf *bp, - int first, - int last) + xfs_trans_t *tp, /* transaction pointer */ + struct xfs_buf *bp, /* leaf buffer */ + int first, /* first entry to log */ + int last) /* last entry to log */ { xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */ xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */ @@ -1092,7 +1136,7 @@ xfs_dir3_leaf_log_ents( leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); - ents = dp->d_ops->leaf_ents_p(leaf); + ents = xfs_dir3_leaf_ents_p(leaf); firstlep = &ents[first]; lastlep = &ents[last]; xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), @@ -1105,7 +1149,6 @@ xfs_dir3_leaf_log_ents( void xfs_dir3_leaf_log_header( struct xfs_trans *tp, - struct xfs_inode *dp, struct xfs_buf *bp) { struct xfs_dir2_leaf *leaf = bp->b_addr; @@ -1116,7 +1159,7 @@ xfs_dir3_leaf_log_header( leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), - dp->d_ops->leaf_hdr_size - 1); + xfs_dir3_leaf_hdr_size(leaf) - 1); } /* @@ -1171,9 +1214,9 @@ xfs_dir2_leaf_lookup( } tp = args->trans; dp = args->dp; - xfs_dir3_leaf_check(dp, lbp); + xfs_dir3_leaf_check(dp->i_mount, lbp); leaf = lbp->b_addr; - ents = dp->d_ops->leaf_ents_p(leaf); + ents = xfs_dir3_leaf_ents_p(leaf); /* * Get to the leaf entry and contained data entry address. */ @@ -1189,7 +1232,7 @@ xfs_dir2_leaf_lookup( * Return the found inode number & CI name if appropriate */ args->inumber = be64_to_cpu(dep->inumber); - args->filetype = dp->d_ops->data_get_ftype(dep); + args->filetype = xfs_dir3_dirent_get_ftype(dp->i_mount, dep); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(tp, dbp); xfs_trans_brelse(tp, lbp); @@ -1236,9 +1279,9 @@ xfs_dir2_leaf_lookup_int( *lbpp = lbp; leaf = lbp->b_addr; - xfs_dir3_leaf_check(dp, lbp); - ents = dp->d_ops->leaf_ents_p(leaf); - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir3_leaf_check(mp, lbp); + ents = xfs_dir3_leaf_ents_p(leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); /* * Look for the first leaf entry with our hash value. @@ -1372,9 +1415,9 @@ xfs_dir2_leaf_removename( leaf = lbp->b_addr; hdr = dbp->b_addr; xfs_dir3_data_check(dp, dbp); - bf = dp->d_ops->data_bestfree_p(hdr); - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + bf = xfs_dir3_data_bestfree_p(hdr); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); /* * Point to the leaf entry, use that to point to the data entry. */ @@ -1390,27 +1433,27 @@ xfs_dir2_leaf_removename( /* * Mark the former data entry unused. */ - xfs_dir2_data_make_free(tp, dp, dbp, + xfs_dir2_data_make_free(tp, dbp, (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), - dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); + xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan); /* * We just mark the leaf entry stale by putting a null in it. */ leafhdr.stale++; - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, lbp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(tp, dp, lbp, index, index); + xfs_dir3_leaf_log_ents(tp, lbp, index, index); /* * Scan the freespace in the data block again if necessary, * log the data block header if necessary. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(tp, dbp); /* * If the longest freespace in the data block has changed, * put the new value in the bests table and log that. @@ -1424,7 +1467,7 @@ xfs_dir2_leaf_removename( * If the data block is now empty then get rid of the data block. */ if (be16_to_cpu(bf[0].length) == - mp->m_dirblksize - dp->d_ops->data_entry_offset) { + mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)) { ASSERT(db != mp->m_dirdatablk); if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { /* @@ -1435,7 +1478,7 @@ xfs_dir2_leaf_removename( */ if (error == ENOSPC && args->total == 0) error = 0; - xfs_dir3_leaf_check(dp, lbp); + xfs_dir3_leaf_check(mp, lbp); return error; } dbp = NULL; @@ -1469,7 +1512,7 @@ xfs_dir2_leaf_removename( else if (db != mp->m_dirdatablk) dbp = NULL; - xfs_dir3_leaf_check(dp, lbp); + xfs_dir3_leaf_check(mp, lbp); /* * See if we can convert to block form. */ @@ -1504,7 +1547,7 @@ xfs_dir2_leaf_replace( } dp = args->dp; leaf = lbp->b_addr; - ents = dp->d_ops->leaf_ents_p(leaf); + ents = xfs_dir3_leaf_ents_p(leaf); /* * Point to the leaf entry, get data address from it. */ @@ -1520,10 +1563,10 @@ xfs_dir2_leaf_replace( * Put the new inode number in, log it. */ dep->inumber = cpu_to_be64(args->inumber); - dp->d_ops->data_put_ftype(dep, args->filetype); + xfs_dir3_dirent_put_ftype(dp->i_mount, dep, args->filetype); tp = args->trans; - xfs_dir2_data_log_entry(tp, dp, dbp, dep); - xfs_dir3_leaf_check(dp, lbp); + xfs_dir2_data_log_entry(tp, dbp, dep); + xfs_dir3_leaf_check(dp->i_mount, lbp); xfs_trans_brelse(tp, lbp); return 0; } @@ -1549,8 +1592,8 @@ xfs_dir2_leaf_search_hash( struct xfs_dir3_icleaf_hdr leafhdr; leaf = lbp->b_addr; - ents = args->dp->d_ops->leaf_ents_p(leaf); - args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); /* * Note, the table cannot be empty, so we have to go through the loop. @@ -1618,12 +1661,12 @@ xfs_dir2_leaf_trim_data( #ifdef DEBUG { struct xfs_dir2_data_hdr *hdr = dbp->b_addr; - struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr); + struct xfs_dir2_data_free *bf = xfs_dir3_data_bestfree_p(hdr); ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)); ASSERT(be16_to_cpu(bf[0].length) == - mp->m_dirblksize - dp->d_ops->data_entry_offset); + mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)); ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); } #endif @@ -1739,7 +1782,7 @@ xfs_dir2_node_to_leaf( return 0; lbp = state->path.blk[0].bp; leaf = lbp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); @@ -1751,7 +1794,7 @@ xfs_dir2_node_to_leaf( if (error) return error; free = fbp->b_addr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); + xfs_dir3_free_hdr_from_disk(&freehdr, free); ASSERT(!freehdr.firstdb); @@ -1785,14 +1828,14 @@ xfs_dir2_node_to_leaf( /* * Set up the leaf bests table. */ - memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free), + memcpy(xfs_dir2_leaf_bests_p(ltp), xfs_dir3_free_bests_p(mp, free), freehdr.nvalid * sizeof(xfs_dir2_data_off_t)); - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, lbp); + xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, lbp); xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); xfs_dir3_leaf_log_tail(tp, lbp); - xfs_dir3_leaf_check(dp, lbp); + xfs_dir3_leaf_check(mp, lbp); /* * Get rid of the freespace block. diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 56369d4..4c3dba7 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -18,21 +18,22 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_cksum.h" @@ -54,21 +55,21 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args, * Check internal consistency of a leafn block. */ #ifdef DEBUG -#define xfs_dir3_leaf_check(dp, bp) \ +#define xfs_dir3_leaf_check(mp, bp) \ do { \ - if (!xfs_dir3_leafn_check((dp), (bp))) \ + if (!xfs_dir3_leafn_check((mp), (bp))) \ ASSERT(0); \ } while (0); static bool xfs_dir3_leafn_check( - struct xfs_inode *dp, + struct xfs_mount *mp, struct xfs_buf *bp) { struct xfs_dir2_leaf *leaf = bp->b_addr; struct xfs_dir3_icleaf_hdr leafhdr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) { struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; @@ -77,10 +78,10 @@ xfs_dir3_leafn_check( } else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC) return false; - return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf); + return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf); } #else -#define xfs_dir3_leaf_check(dp, bp) +#define xfs_dir3_leaf_check(mp, bp) #endif static bool @@ -192,6 +193,53 @@ xfs_dir2_free_try_read( return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp); } + +void +xfs_dir3_free_hdr_from_disk( + struct xfs_dir3_icfree_hdr *to, + struct xfs_dir2_free *from) +{ + if (from->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)) { + to->magic = be32_to_cpu(from->hdr.magic); + to->firstdb = be32_to_cpu(from->hdr.firstdb); + to->nvalid = be32_to_cpu(from->hdr.nvalid); + to->nused = be32_to_cpu(from->hdr.nused); + } else { + struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from; + + to->magic = be32_to_cpu(hdr3->hdr.magic); + to->firstdb = be32_to_cpu(hdr3->firstdb); + to->nvalid = be32_to_cpu(hdr3->nvalid); + to->nused = be32_to_cpu(hdr3->nused); + } + + ASSERT(to->magic == XFS_DIR2_FREE_MAGIC || + to->magic == XFS_DIR3_FREE_MAGIC); +} + +static void +xfs_dir3_free_hdr_to_disk( + struct xfs_dir2_free *to, + struct xfs_dir3_icfree_hdr *from) +{ + ASSERT(from->magic == XFS_DIR2_FREE_MAGIC || + from->magic == XFS_DIR3_FREE_MAGIC); + + if (from->magic == XFS_DIR2_FREE_MAGIC) { + to->hdr.magic = cpu_to_be32(from->magic); + to->hdr.firstdb = cpu_to_be32(from->firstdb); + to->hdr.nvalid = cpu_to_be32(from->nvalid); + to->hdr.nused = cpu_to_be32(from->nused); + } else { + struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to; + + hdr3->hdr.magic = cpu_to_be32(from->magic); + hdr3->firstdb = cpu_to_be32(from->firstdb); + hdr3->nvalid = cpu_to_be32(from->nvalid); + hdr3->nused = cpu_to_be32(from->nused); + } +} + static int xfs_dir3_free_get_buf( struct xfs_trans *tp, @@ -229,7 +277,7 @@ xfs_dir3_free_get_buf( uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid); } else hdr.magic = XFS_DIR2_FREE_MAGIC; - dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr); + xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr); *bpp = bp; return 0; } @@ -240,7 +288,6 @@ xfs_dir3_free_get_buf( STATIC void xfs_dir2_free_log_bests( struct xfs_trans *tp, - struct xfs_inode *dp, struct xfs_buf *bp, int first, /* first entry to log */ int last) /* last entry to log */ @@ -249,7 +296,7 @@ xfs_dir2_free_log_bests( __be16 *bests; free = bp->b_addr; - bests = dp->d_ops->free_bests_p(free); + bests = xfs_dir3_free_bests_p(tp->t_mountp, free); ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); xfs_trans_log_buf(tp, bp, @@ -264,7 +311,6 @@ xfs_dir2_free_log_bests( static void xfs_dir2_free_log_header( struct xfs_trans *tp, - struct xfs_inode *dp, struct xfs_buf *bp) { #ifdef DEBUG @@ -274,7 +320,7 @@ xfs_dir2_free_log_header( ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); #endif - xfs_trans_log_buf(tp, bp, 0, dp->d_ops->free_hdr_size - 1); + xfs_trans_log_buf(tp, bp, 0, xfs_dir3_free_hdr_size(tp->t_mountp) - 1); } /* @@ -323,7 +369,7 @@ xfs_dir2_leaf_to_node( return error; free = fbp->b_addr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); + xfs_dir3_free_hdr_from_disk(&freehdr, free); leaf = lbp->b_addr; ltp = xfs_dir2_leaf_tail_p(mp, leaf); ASSERT(be32_to_cpu(ltp->bestcount) <= @@ -334,7 +380,7 @@ xfs_dir2_leaf_to_node( * Count active entries. */ from = xfs_dir2_leaf_bests_p(ltp); - to = dp->d_ops->free_bests_p(free); + to = xfs_dir3_free_bests_p(mp, free); for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) { if ((off = be16_to_cpu(*from)) != NULLDATAOFF) n++; @@ -347,9 +393,9 @@ xfs_dir2_leaf_to_node( freehdr.nused = n; freehdr.nvalid = be32_to_cpu(ltp->bestcount); - dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_bests(tp, dp, fbp, 0, freehdr.nvalid - 1); - xfs_dir2_free_log_header(tp, dp, fbp); + xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr); + xfs_dir2_free_log_bests(tp, fbp, 0, freehdr.nvalid - 1); + xfs_dir2_free_log_header(tp, fbp); /* * Converting the leaf to a leafnode is just a matter of changing the @@ -363,8 +409,8 @@ xfs_dir2_leaf_to_node( leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC); lbp->b_ops = &xfs_dir3_leafn_buf_ops; xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF); - xfs_dir3_leaf_log_header(tp, dp, lbp); - xfs_dir3_leaf_check(dp, lbp); + xfs_dir3_leaf_log_header(tp, lbp); + xfs_dir3_leaf_check(mp, lbp); return 0; } @@ -397,8 +443,8 @@ xfs_dir2_leafn_add( mp = dp->i_mount; tp = args->trans; leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); /* * Quick check just to make sure we are not going to index @@ -414,7 +460,7 @@ xfs_dir2_leafn_add( * a compact. */ - if (leafhdr.count == dp->d_ops->leaf_max_ents(mp)) { + if (leafhdr.count == xfs_dir3_max_leaf_ents(mp, leaf)) { if (!leafhdr.stale) return XFS_ERROR(ENOSPC); compact = leafhdr.stale > 1; @@ -452,30 +498,30 @@ xfs_dir2_leafn_add( lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, args->blkno, args->index)); - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, bp); - xfs_dir3_leaf_log_ents(tp, dp, bp, lfloglow, lfloghigh); - xfs_dir3_leaf_check(dp, bp); + xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, bp); + xfs_dir3_leaf_log_ents(tp, bp, lfloglow, lfloghigh); + xfs_dir3_leaf_check(mp, bp); return 0; } #ifdef DEBUG static void xfs_dir2_free_hdr_check( - struct xfs_inode *dp, + struct xfs_mount *mp, struct xfs_buf *bp, xfs_dir2_db_t db) { struct xfs_dir3_icfree_hdr hdr; - dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr); + xfs_dir3_free_hdr_from_disk(&hdr, bp->b_addr); - ASSERT((hdr.firstdb % dp->d_ops->free_max_bests(dp->i_mount)) == 0); + ASSERT((hdr.firstdb % xfs_dir3_free_max_bests(mp)) == 0); ASSERT(hdr.firstdb <= db); ASSERT(db < hdr.firstdb + hdr.nvalid); } #else -#define xfs_dir2_free_hdr_check(dp, bp, db) +#define xfs_dir2_free_hdr_check(mp, dp, db) #endif /* DEBUG */ /* @@ -484,7 +530,6 @@ xfs_dir2_free_hdr_check( */ xfs_dahash_t /* hash value */ xfs_dir2_leafn_lasthash( - struct xfs_inode *dp, struct xfs_buf *bp, /* leaf buffer */ int *count) /* count of entries in leaf */ { @@ -492,7 +537,7 @@ xfs_dir2_leafn_lasthash( struct xfs_dir2_leaf_entry *ents; struct xfs_dir3_icleaf_hdr leafhdr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); @@ -502,7 +547,7 @@ xfs_dir2_leafn_lasthash( if (!leafhdr.count) return 0; - ents = dp->d_ops->leaf_ents_p(leaf); + ents = xfs_dir3_leaf_ents_p(leaf); return be32_to_cpu(ents[leafhdr.count - 1].hashval); } @@ -539,10 +584,10 @@ xfs_dir2_leafn_lookup_for_addname( tp = args->trans; mp = dp->i_mount; leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); - xfs_dir3_leaf_check(dp, bp); + xfs_dir3_leaf_check(mp, bp); ASSERT(leafhdr.count > 0); /* @@ -560,7 +605,7 @@ xfs_dir2_leafn_lookup_for_addname( ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) || free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); } - length = dp->d_ops->data_entsize(args->namelen); + length = xfs_dir3_data_entsize(mp, args->namelen); /* * Loop over leaf entries with the right hash value. */ @@ -592,7 +637,7 @@ xfs_dir2_leafn_lookup_for_addname( * Convert the data block to the free block * holding its freespace information. */ - newfdb = dp->d_ops->db_to_fdb(mp, newdb); + newfdb = xfs_dir2_db_to_fdb(mp, newdb); /* * If it's not the one we have in hand, read it in. */ @@ -610,16 +655,16 @@ xfs_dir2_leafn_lookup_for_addname( return error; free = curbp->b_addr; - xfs_dir2_free_hdr_check(dp, curbp, curdb); + xfs_dir2_free_hdr_check(mp, curbp, curdb); } /* * Get the index for our entry. */ - fi = dp->d_ops->db_to_fdindex(mp, curdb); + fi = xfs_dir2_db_to_fdindex(mp, curdb); /* * If it has room, return it. */ - bests = dp->d_ops->free_bests_p(free); + bests = xfs_dir3_free_bests_p(mp, free); if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) { XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", XFS_ERRLEVEL_LOW, mp); @@ -689,10 +734,10 @@ xfs_dir2_leafn_lookup_for_entry( tp = args->trans; mp = dp->i_mount; leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); - xfs_dir3_leaf_check(dp, bp); + xfs_dir3_leaf_check(mp, bp); ASSERT(leafhdr.count > 0); /* @@ -771,7 +816,7 @@ xfs_dir2_leafn_lookup_for_entry( xfs_trans_brelse(tp, state->extrablk.bp); args->cmpresult = cmp; args->inumber = be64_to_cpu(dep->inumber); - args->filetype = dp->d_ops->data_get_ftype(dep); + args->filetype = xfs_dir3_dirent_get_ftype(mp, dep); *indexp = index; state->extravalid = 1; state->extrablk.bp = curbp; @@ -862,7 +907,7 @@ xfs_dir3_leafn_moveents( if (start_d < dhdr->count) { memmove(&dents[start_d + count], &dents[start_d], (dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, start_d + count, + xfs_dir3_leaf_log_ents(tp, bp_d, start_d + count, count + dhdr->count - 1); } /* @@ -884,8 +929,7 @@ xfs_dir3_leafn_moveents( */ memcpy(&dents[start_d], &sents[start_s], count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, args->dp, bp_d, - start_d, start_d + count - 1); + xfs_dir3_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1); /* * If there are source entries after the ones we copied, @@ -894,8 +938,7 @@ xfs_dir3_leafn_moveents( if (start_s + count < shdr->count) { memmove(&sents[start_s], &sents[start_s + count], count * sizeof(xfs_dir2_leaf_entry_t)); - xfs_dir3_leaf_log_ents(tp, args->dp, bp_s, - start_s, start_s + count - 1); + xfs_dir3_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1); } /* @@ -913,7 +956,6 @@ xfs_dir3_leafn_moveents( */ int /* sort order */ xfs_dir2_leafn_order( - struct xfs_inode *dp, struct xfs_buf *leaf1_bp, /* leaf1 buffer */ struct xfs_buf *leaf2_bp) /* leaf2 buffer */ { @@ -924,10 +966,10 @@ xfs_dir2_leafn_order( struct xfs_dir3_icleaf_hdr hdr1; struct xfs_dir3_icleaf_hdr hdr2; - dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); - dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); - ents1 = dp->d_ops->leaf_ents_p(leaf1); - ents2 = dp->d_ops->leaf_ents_p(leaf2); + xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1); + xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2); + ents1 = xfs_dir3_leaf_ents_p(leaf1); + ents2 = xfs_dir3_leaf_ents_p(leaf2); if (hdr1.count > 0 && hdr2.count > 0 && (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) || @@ -965,13 +1007,12 @@ xfs_dir2_leafn_rebalance( struct xfs_dir2_leaf_entry *ents2; struct xfs_dir3_icleaf_hdr hdr1; struct xfs_dir3_icleaf_hdr hdr2; - struct xfs_inode *dp = state->args->dp; args = state->args; /* * If the block order is wrong, swap the arguments. */ - if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) { + if ((swap = xfs_dir2_leafn_order(blk1->bp, blk2->bp))) { xfs_da_state_blk_t *tmp; /* temp for block swap */ tmp = blk1; @@ -980,10 +1021,10 @@ xfs_dir2_leafn_rebalance( } leaf1 = blk1->bp->b_addr; leaf2 = blk2->bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); - dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); - ents1 = dp->d_ops->leaf_ents_p(leaf1); - ents2 = dp->d_ops->leaf_ents_p(leaf2); + xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1); + xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2); + ents1 = xfs_dir3_leaf_ents_p(leaf1); + ents2 = xfs_dir3_leaf_ents_p(leaf2); oldsum = hdr1.count + hdr2.count; #if defined(DEBUG) || defined(XFS_WARN) @@ -1029,13 +1070,13 @@ xfs_dir2_leafn_rebalance( ASSERT(hdr1.stale + hdr2.stale == oldstale); /* log the changes made when moving the entries */ - dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1); - dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2); - xfs_dir3_leaf_log_header(args->trans, dp, blk1->bp); - xfs_dir3_leaf_log_header(args->trans, dp, blk2->bp); + xfs_dir3_leaf_hdr_to_disk(leaf1, &hdr1); + xfs_dir3_leaf_hdr_to_disk(leaf2, &hdr2); + xfs_dir3_leaf_log_header(args->trans, blk1->bp); + xfs_dir3_leaf_log_header(args->trans, blk2->bp); - xfs_dir3_leaf_check(dp, blk1->bp); - xfs_dir3_leaf_check(dp, blk2->bp); + xfs_dir3_leaf_check(args->dp->i_mount, blk1->bp); + xfs_dir3_leaf_check(args->dp->i_mount, blk2->bp); /* * Mark whether we're inserting into the old or new leaf. @@ -1056,11 +1097,11 @@ xfs_dir2_leafn_rebalance( * Finally sanity check just to make sure we are not returning a * negative index */ - if (blk2->index < 0) { + if(blk2->index < 0) { state->inleaf = 1; blk2->index = 0; - xfs_alert(dp->i_mount, - "%s: picked the wrong leaf? reverting original leaf: blk1->index %d", + xfs_alert(args->dp->i_mount, + "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n", __func__, blk1->index); } } @@ -1079,17 +1120,17 @@ xfs_dir3_data_block_free( int logfree = 0; __be16 *bests; struct xfs_dir3_icfree_hdr freehdr; - struct xfs_inode *dp = args->dp; - dp->d_ops->free_hdr_from_disk(&freehdr, free); - bests = dp->d_ops->free_bests_p(free); + xfs_dir3_free_hdr_from_disk(&freehdr, free); + + bests = xfs_dir3_free_bests_p(tp->t_mountp, free); if (hdr) { /* * Data block is not empty, just set the free entry to the new * value. */ bests[findex] = cpu_to_be16(longest); - xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); + xfs_dir2_free_log_bests(tp, fbp, findex, findex); return 0; } @@ -1116,8 +1157,8 @@ xfs_dir3_data_block_free( logfree = 1; } - dp->d_ops->free_hdr_to_disk(free, &freehdr); - xfs_dir2_free_log_header(tp, dp, fbp); + xfs_dir3_free_hdr_to_disk(free, &freehdr); + xfs_dir2_free_log_header(tp, fbp); /* * If there are no useful entries left in the block, get rid of the @@ -1141,7 +1182,7 @@ xfs_dir3_data_block_free( /* Log the free entry that changed, unless we got rid of it. */ if (logfree) - xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); + xfs_dir2_free_log_bests(tp, fbp, findex, findex); return 0; } @@ -1181,8 +1222,8 @@ xfs_dir2_leafn_remove( tp = args->trans; mp = dp->i_mount; leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); /* * Point to the entry we're removing. @@ -1202,11 +1243,11 @@ xfs_dir2_leafn_remove( * Log the leaf block changes. */ leafhdr.stale++; - dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); - xfs_dir3_leaf_log_header(tp, dp, bp); + xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); + xfs_dir3_leaf_log_header(tp, bp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); - xfs_dir3_leaf_log_ents(tp, dp, bp, index, index); + xfs_dir3_leaf_log_ents(tp, bp, index, index); /* * Make the data entry free. Keep track of the longest freespace @@ -1215,19 +1256,19 @@ xfs_dir2_leafn_remove( dbp = dblk->bp; hdr = dbp->b_addr; dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir3_data_bestfree_p(hdr); longest = be16_to_cpu(bf[0].length); needlog = needscan = 0; - xfs_dir2_data_make_free(tp, dp, dbp, off, - dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan); + xfs_dir2_data_make_free(tp, dbp, off, + xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan); /* * Rescan the data block freespaces for bestfree. * Log the data block header if needed. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(tp, dbp); xfs_dir3_data_check(dp, dbp); /* * If the longest data block freespace changes, need to update @@ -1244,7 +1285,7 @@ xfs_dir2_leafn_remove( * Convert the data block number to a free block, * read in the free block. */ - fdb = dp->d_ops->db_to_fdb(mp, db); + fdb = xfs_dir2_db_to_fdb(mp, db); error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb), &fbp); if (error) @@ -1253,22 +1294,22 @@ xfs_dir2_leafn_remove( #ifdef DEBUG { struct xfs_dir3_icfree_hdr freehdr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); - ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(mp) * + xfs_dir3_free_hdr_from_disk(&freehdr, free); + ASSERT(freehdr.firstdb == xfs_dir3_free_max_bests(mp) * (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); } #endif /* * Calculate which entry we need to fix. */ - findex = dp->d_ops->db_to_fdindex(mp, db); + findex = xfs_dir2_db_to_fdindex(mp, db); longest = be16_to_cpu(bf[0].length); /* * If the data block is now empty we can get rid of it * (usually). */ if (longest == mp->m_dirblksize - - dp->d_ops->data_entry_offset) { + xfs_dir3_data_entry_offset(hdr)) { /* * Try to punch out the data block. */ @@ -1295,12 +1336,12 @@ xfs_dir2_leafn_remove( return error; } - xfs_dir3_leaf_check(dp, bp); + xfs_dir3_leaf_check(mp, bp); /* * Return indication of whether this leaf block is empty enough * to justify trying to join it with a neighbor. */ - *rval = (dp->d_ops->leaf_hdr_size + + *rval = (xfs_dir3_leaf_hdr_size(leaf) + (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) < mp->m_dir_magicpct; return 0; @@ -1319,14 +1360,13 @@ xfs_dir2_leafn_split( xfs_dablk_t blkno; /* new leaf block number */ int error; /* error return value */ xfs_mount_t *mp; /* filesystem mount point */ - struct xfs_inode *dp; /* * Allocate space for a new leaf node. */ args = state->args; - dp = args->dp; - mp = dp->i_mount; + mp = args->dp->i_mount; + ASSERT(args != NULL); ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC); error = xfs_da_grow_inode(args, &blkno); if (error) { @@ -1361,10 +1401,10 @@ xfs_dir2_leafn_split( /* * Update last hashval in each block since we added the name. */ - oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL); - newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL); - xfs_dir3_leaf_check(dp, oldblk->bp); - xfs_dir3_leaf_check(dp, newblk->bp); + oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL); + newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL); + xfs_dir3_leaf_check(mp, oldblk->bp); + xfs_dir3_leaf_check(mp, newblk->bp); return error; } @@ -1394,7 +1434,6 @@ xfs_dir2_leafn_toosmall( int rval; /* result from path_shift */ struct xfs_dir3_icleaf_hdr leafhdr; struct xfs_dir2_leaf_entry *ents; - struct xfs_inode *dp = state->args->dp; /* * Check for the degenerate case of the block being over 50% full. @@ -1403,12 +1442,12 @@ xfs_dir2_leafn_toosmall( */ blk = &state->path.blk[state->path.active - 1]; leaf = blk->bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); - xfs_dir3_leaf_check(dp, blk->bp); + xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); + xfs_dir3_leaf_check(state->args->dp->i_mount, blk->bp); count = leafhdr.count - leafhdr.stale; - bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]); + bytes = xfs_dir3_leaf_hdr_size(leaf) + count * sizeof(ents[0]); if (bytes > (state->blocksize >> 1)) { /* * Blk over 50%, don't try to join. @@ -1453,7 +1492,7 @@ xfs_dir2_leafn_toosmall( /* * Read the sibling leaf block. */ - error = xfs_dir3_leafn_read(state->args->trans, dp, + error = xfs_dir3_leafn_read(state->args->trans, state->args->dp, blkno, -1, &bp); if (error) return error; @@ -1465,8 +1504,8 @@ xfs_dir2_leafn_toosmall( bytes = state->blocksize - (state->blocksize >> 2); leaf = bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf); - ents = dp->d_ops->leaf_ents_p(leaf); + xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf); + ents = xfs_dir3_leaf_ents_p(leaf); count += hdr2.count - hdr2.stale; bytes -= count * sizeof(ents[0]); @@ -1520,7 +1559,6 @@ xfs_dir2_leafn_unbalance( struct xfs_dir3_icleaf_hdr drophdr; struct xfs_dir2_leaf_entry *sents; struct xfs_dir2_leaf_entry *dents; - struct xfs_inode *dp = state->args->dp; args = state->args; ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); @@ -1528,10 +1566,10 @@ xfs_dir2_leafn_unbalance( drop_leaf = drop_blk->bp->b_addr; save_leaf = save_blk->bp->b_addr; - dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf); - dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf); - sents = dp->d_ops->leaf_ents_p(save_leaf); - dents = dp->d_ops->leaf_ents_p(drop_leaf); + xfs_dir3_leaf_hdr_from_disk(&savehdr, save_leaf); + xfs_dir3_leaf_hdr_from_disk(&drophdr, drop_leaf); + sents = xfs_dir3_leaf_ents_p(save_leaf); + dents = xfs_dir3_leaf_ents_p(drop_leaf); /* * If there are any stale leaf entries, take this opportunity @@ -1546,7 +1584,7 @@ xfs_dir2_leafn_unbalance( * Move the entries from drop to the appropriate end of save. */ drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval); - if (xfs_dir2_leafn_order(dp, save_blk->bp, drop_blk->bp)) + if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp)) xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0, save_blk->bp, &savehdr, sents, 0, drophdr.count); @@ -1557,13 +1595,13 @@ xfs_dir2_leafn_unbalance( save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval); /* log the changes made when moving the entries */ - dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr); - dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr); - xfs_dir3_leaf_log_header(args->trans, dp, save_blk->bp); - xfs_dir3_leaf_log_header(args->trans, dp, drop_blk->bp); + xfs_dir3_leaf_hdr_to_disk(save_leaf, &savehdr); + xfs_dir3_leaf_hdr_to_disk(drop_leaf, &drophdr); + xfs_dir3_leaf_log_header(args->trans, save_blk->bp); + xfs_dir3_leaf_log_header(args->trans, drop_blk->bp); - xfs_dir3_leaf_check(dp, save_blk->bp); - xfs_dir3_leaf_check(dp, drop_blk->bp); + xfs_dir3_leaf_check(args->dp->i_mount, save_blk->bp); + xfs_dir3_leaf_check(args->dp->i_mount, drop_blk->bp); } /* @@ -1674,7 +1712,7 @@ xfs_dir2_node_addname_int( dp = args->dp; mp = dp->i_mount; tp = args->trans; - length = dp->d_ops->data_entsize(args->namelen); + length = xfs_dir3_data_entsize(mp, args->namelen); /* * If we came in with a freespace block that means that lookup * found an entry with our hash value. This is the freespace @@ -1688,8 +1726,8 @@ xfs_dir2_node_addname_int( ifbno = fblk->blkno; free = fbp->b_addr; findex = fblk->index; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); + bests = xfs_dir3_free_bests_p(mp, free); + xfs_dir3_free_hdr_from_disk(&freehdr, free); /* * This means the free entry showed that the data block had @@ -1781,8 +1819,8 @@ xfs_dir2_node_addname_int( * and the freehdr are actually initialised if they are placed * there, so we have to do it here to avoid warnings. Blech. */ - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); + bests = xfs_dir3_free_bests_p(mp, free); + xfs_dir3_free_hdr_from_disk(&freehdr, free); if (be16_to_cpu(bests[findex]) != NULLDATAOFF && be16_to_cpu(bests[findex]) >= length) dbno = freehdr.firstdb + findex; @@ -1833,7 +1871,7 @@ xfs_dir2_node_addname_int( * Get the freespace block corresponding to the data block * that was just allocated. */ - fbno = dp->d_ops->db_to_fdb(mp, dbno); + fbno = xfs_dir2_db_to_fdb(mp, dbno); error = xfs_dir2_free_try_read(tp, dp, xfs_dir2_db_to_da(mp, fbno), &fbp); @@ -1850,12 +1888,12 @@ xfs_dir2_node_addname_int( if (error) return error; - if (unlikely(dp->d_ops->db_to_fdb(mp, dbno) != fbno)) { + if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { xfs_alert(mp, "%s: dir ino %llu needed freesp block %lld for\n" " data block %lld, got %lld ifbno %llu lastfbno %d", __func__, (unsigned long long)dp->i_ino, - (long long)dp->d_ops->db_to_fdb(mp, dbno), + (long long)xfs_dir2_db_to_fdb(mp, dbno), (long long)dbno, (long long)fbno, (unsigned long long)ifbno, lastfbno); if (fblk) { @@ -1880,30 +1918,30 @@ xfs_dir2_node_addname_int( if (error) return error; free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); + bests = xfs_dir3_free_bests_p(mp, free); + xfs_dir3_free_hdr_from_disk(&freehdr, free); /* * Remember the first slot as our empty slot. */ freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * - dp->d_ops->free_max_bests(mp); + xfs_dir3_free_max_bests(mp); } else { free = fbp->b_addr; - bests = dp->d_ops->free_bests_p(free); - dp->d_ops->free_hdr_from_disk(&freehdr, free); + bests = xfs_dir3_free_bests_p(mp, free); + xfs_dir3_free_hdr_from_disk(&freehdr, free); } /* * Set the freespace block index from the data block number. */ - findex = dp->d_ops->db_to_fdindex(mp, dbno); + findex = xfs_dir2_db_to_fdindex(mp, dbno); /* * If it's after the end of the current entries in the * freespace block, extend that table. */ if (findex >= freehdr.nvalid) { - ASSERT(findex < dp->d_ops->free_max_bests(mp)); + ASSERT(findex < xfs_dir3_free_max_bests(mp)); freehdr.nvalid = findex + 1; /* * Tag new entry so nused will go up. @@ -1916,8 +1954,8 @@ xfs_dir2_node_addname_int( */ if (bests[findex] == cpu_to_be16(NULLDATAOFF)) { freehdr.nused++; - dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); - xfs_dir2_free_log_header(tp, dp, fbp); + xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr); + xfs_dir2_free_log_header(tp, fbp); } /* * Update the real value in the table. @@ -1925,7 +1963,7 @@ xfs_dir2_node_addname_int( * change again. */ hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir3_data_bestfree_p(hdr); bests[findex] = bf[0].length; logfree = 1; } @@ -1947,7 +1985,7 @@ xfs_dir2_node_addname_int( if (error) return error; hdr = dbp->b_addr; - bf = dp->d_ops->data_bestfree_p(hdr); + bf = xfs_dir3_data_bestfree_p(hdr); logfree = 0; } ASSERT(be16_to_cpu(bf[0].length) >= length); @@ -1960,7 +1998,7 @@ xfs_dir2_node_addname_int( /* * Mark the first part of the unused space, inuse for us. */ - xfs_dir2_data_use_free(tp, dp, dbp, dup, + xfs_dir2_data_use_free(tp, dbp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length, &needlog, &needscan); /* @@ -1970,24 +2008,24 @@ xfs_dir2_node_addname_int( dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, dep->namelen); - dp->d_ops->data_put_ftype(dep, args->filetype); - tagp = dp->d_ops->data_entry_tag_p(dep); + xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); + tagp = xfs_dir3_data_entry_tag_p(mp, dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); - xfs_dir2_data_log_entry(tp, dp, dbp, dep); + xfs_dir2_data_log_entry(tp, dbp, dep); /* * Rescan the block for bestfree if needed. */ if (needscan) - xfs_dir2_data_freescan(dp, hdr, &needlog); + xfs_dir2_data_freescan(mp, hdr, &needlog); /* * Log the data block header if needed. */ if (needlog) - xfs_dir2_data_log_header(tp, dp, dbp); + xfs_dir2_data_log_header(tp, dbp); /* * If the freespace entry is now wrong, update it. */ - bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */ + bests = xfs_dir3_free_bests_p(mp, free); /* gcc is so stupid */ if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) { bests[findex] = bf[0].length; logfree = 1; @@ -1996,7 +2034,7 @@ xfs_dir2_node_addname_int( * Log the freespace entry if needed. */ if (logfree) - xfs_dir2_free_log_bests(tp, dp, fbp, findex, findex); + xfs_dir2_free_log_bests(tp, fbp, findex, findex); /* * Return the data block and offset in args, then drop the data block. */ @@ -2174,7 +2212,7 @@ xfs_dir2_node_replace( blk = &state->path.blk[state->path.active - 1]; ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); leaf = blk->bp->b_addr; - ents = args->dp->d_ops->leaf_ents_p(leaf); + ents = xfs_dir3_leaf_ents_p(leaf); lep = &ents[blk->index]; ASSERT(state->extravalid); /* @@ -2191,9 +2229,8 @@ xfs_dir2_node_replace( * Fill in the new inode number and log the entry. */ dep->inumber = cpu_to_be64(inum); - args->dp->d_ops->data_put_ftype(dep, args->filetype); - xfs_dir2_data_log_entry(args->trans, args->dp, - state->extrablk.bp, dep); + xfs_dir3_dirent_put_ftype(state->mp, dep, args->filetype); + xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep); rval = 0; } /* @@ -2248,7 +2285,7 @@ xfs_dir2_node_trim_free( if (!bp) return 0; free = bp->b_addr; - dp->d_ops->free_hdr_from_disk(&freehdr, free); + xfs_dir3_free_hdr_from_disk(&freehdr, free); /* * If there are used entries, there's nothing to do. diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h index 8b9d228..1bad84c 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/xfs_dir2_priv.h @@ -59,8 +59,7 @@ extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp, extern struct xfs_dir2_data_free * xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, - struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup, - int *loghead); + struct xfs_dir2_data_unused *dup, int *loghead); extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno, struct xfs_buf **bpp); @@ -77,9 +76,9 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, struct xfs_buf **bpp, __uint16_t magic); -extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_buf *bp, int first, int last); -extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp, struct xfs_inode *dp, +extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, + int first, int last); +extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp, struct xfs_buf *bp); extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args); extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); @@ -94,18 +93,21 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr, int lowstale, int highstale, int *lfloglow, int *lfloghigh); extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); -extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp, +extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to, + struct xfs_dir2_leaf *from); +extern void xfs_dir3_leaf_hdr_to_disk(struct xfs_dir2_leaf *to, + struct xfs_dir3_icleaf_hdr *from); +extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf); /* xfs_dir2_node.c */ extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args, struct xfs_buf *lbp); -extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp, - struct xfs_buf *bp, int *count); +extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_buf *bp, int *count); extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp, struct xfs_da_args *args, int *indexp, struct xfs_da_state *state); -extern int xfs_dir2_leafn_order(struct xfs_inode *dp, struct xfs_buf *leaf1_bp, +extern int xfs_dir2_leafn_order(struct xfs_buf *leaf1_bp, struct xfs_buf *leaf2_bp); extern int xfs_dir2_leafn_split(struct xfs_da_state *state, struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index c4e50c6..8f84153 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -18,23 +18,23 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_bmap.h" -#include "xfs_trans.h" -#include "xfs_dinode.h" /* * Directory file type support functions @@ -119,9 +119,9 @@ xfs_dir2_sf_getdents( * mp->m_dirdatablk. */ dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - dp->d_ops->data_dot_offset); + xfs_dir3_data_dot_offset(mp)); dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, - dp->d_ops->data_dotdot_offset); + xfs_dir3_data_dotdot_offset(mp)); /* * Put . entry unless we're starting past it. @@ -136,7 +136,7 @@ xfs_dir2_sf_getdents( * Put .. entry unless we're starting past it. */ if (ctx->pos <= dotdot_offset) { - ino = dp->d_ops->sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_parent_ino(sfp); ctx->pos = dotdot_offset & 0x7fffffff; if (!dir_emit(ctx, "..", 2, ino, DT_DIR)) return 0; @@ -153,17 +153,17 @@ xfs_dir2_sf_getdents( xfs_dir2_sf_get_offset(sfep)); if (ctx->pos > off) { - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); continue; } - ino = dp->d_ops->sf_get_ino(sfp, sfep); - filetype = dp->d_ops->sf_get_ftype(sfep); + ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep); + filetype = xfs_dir3_sfe_get_ftype(mp, sfp, sfep); ctx->pos = off & 0x7fffffff; if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino, xfs_dir3_get_dtype(mp, filetype))) return 0; - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); } ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & @@ -213,7 +213,7 @@ xfs_dir2_block_getdents( * Set up values for the loop. */ btp = xfs_dir2_block_tail_p(mp, hdr); - ptr = (char *)dp->d_ops->data_entry_p(hdr); + ptr = (char *)xfs_dir3_data_entry_p(hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); /* @@ -237,7 +237,7 @@ xfs_dir2_block_getdents( /* * Bump pointer for the next iteration. */ - ptr += dp->d_ops->data_entsize(dep->namelen); + ptr += xfs_dir3_data_entsize(mp, dep->namelen); /* * The entry is before the desired starting point, skip it. */ @@ -248,7 +248,7 @@ xfs_dir2_block_getdents( (char *)dep - (char *)hdr); ctx->pos = cook & 0x7fffffff; - filetype = dp->d_ops->data_get_ftype(dep); + filetype = xfs_dir3_dirent_get_ftype(mp, dep); /* * If it didn't fit, set the final offset to here & return. */ @@ -578,13 +578,13 @@ xfs_dir2_leaf_getdents( /* * Find our position in the block. */ - ptr = (char *)dp->d_ops->data_entry_p(hdr); + ptr = (char *)xfs_dir3_data_entry_p(hdr); byteoff = xfs_dir2_byte_to_off(mp, curoff); /* * Skip past the header. */ if (byteoff == 0) - curoff += dp->d_ops->data_entry_offset; + curoff += xfs_dir3_data_entry_offset(hdr); /* * Skip past entries until we reach our offset. */ @@ -601,7 +601,7 @@ xfs_dir2_leaf_getdents( } dep = (xfs_dir2_data_entry_t *)ptr; length = - dp->d_ops->data_entsize(dep->namelen); + xfs_dir3_data_entsize(mp, dep->namelen); ptr += length; } /* @@ -632,8 +632,8 @@ xfs_dir2_leaf_getdents( } dep = (xfs_dir2_data_entry_t *)ptr; - length = dp->d_ops->data_entsize(dep->namelen); - filetype = dp->d_ops->data_get_ftype(dep); + length = xfs_dir3_data_entsize(mp, dep->namelen); + filetype = xfs_dir3_dirent_get_ftype(mp, dep); ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; if (!dir_emit(ctx, (char *)dep->name, dep->namelen, diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index aafc6e4..3ef6d40 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -17,22 +17,22 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_error.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_trace.h" -#include "xfs_dinode.h" /* * Prototypes for internal functions. @@ -57,6 +57,89 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args); #endif /* XFS_BIG_INUMS */ /* + * Inode numbers in short-form directories can come in two versions, + * either 4 bytes or 8 bytes wide. These helpers deal with the + * two forms transparently by looking at the headers i8count field. + * + * For 64-bit inode number the most significant byte must be zero. + */ +static xfs_ino_t +xfs_dir2_sf_get_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_dir2_inou_t *from) +{ + if (hdr->i8count) + return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; + else + return get_unaligned_be32(&from->i4.i); +} + +static void +xfs_dir2_sf_put_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_dir2_inou_t *to, + xfs_ino_t ino) +{ + ASSERT((ino & 0xff00000000000000ULL) == 0); + + if (hdr->i8count) + put_unaligned_be64(ino, &to->i8.i); + else + put_unaligned_be32(ino, &to->i4.i); +} + +xfs_ino_t +xfs_dir2_sf_get_parent_ino( + struct xfs_dir2_sf_hdr *hdr) +{ + return xfs_dir2_sf_get_ino(hdr, &hdr->parent); +} + +void +xfs_dir2_sf_put_parent_ino( + struct xfs_dir2_sf_hdr *hdr, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); +} + +/* + * In short-form directory entries the inode numbers are stored at variable + * offset behind the entry name. If the entry stores a filetype value, then it + * sits between the name and the inode number. Hence the inode numbers may only + * be accessed through the helpers below. + */ +static xfs_dir2_inou_t * +xfs_dir3_sfe_inop( + struct xfs_mount *mp, + struct xfs_dir2_sf_entry *sfep) +{ + __uint8_t *ptr = &sfep->name[sfep->namelen]; + if (xfs_sb_version_hasftype(&mp->m_sb)) + ptr++; + return (xfs_dir2_inou_t *)ptr; +} + +xfs_ino_t +xfs_dir3_sfe_get_ino( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep) +{ + return xfs_dir2_sf_get_ino(hdr, xfs_dir3_sfe_inop(mp, sfep)); +} + +void +xfs_dir3_sfe_put_ino( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *hdr, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t ino) +{ + xfs_dir2_sf_put_ino(hdr, xfs_dir3_sfe_inop(mp, sfep), ino); +} + +/* * Given a block directory (dp/block), calculate its size as a shortform (sf) * directory and a header for the sf directory, if it will fit it the * space currently present in the inode. If it won't fit, the output @@ -143,7 +226,7 @@ xfs_dir2_block_sfsize( */ sfhp->count = count; sfhp->i8count = i8count; - dp->d_ops->sf_put_parent_ino(sfhp, parent); + xfs_dir2_sf_put_parent_ino(sfhp, parent); return size; } @@ -210,7 +293,7 @@ xfs_dir2_block_to_sf( * Set up to loop over the block's entries. */ btp = xfs_dir2_block_tail_p(mp, hdr); - ptr = (char *)dp->d_ops->data_entry_p(hdr); + ptr = (char *)xfs_dir3_data_entry_p(hdr); endptr = (char *)xfs_dir2_block_leaf_p(btp); sfep = xfs_dir2_sf_firstentry(sfp); /* @@ -238,7 +321,7 @@ xfs_dir2_block_to_sf( else if (dep->namelen == 2 && dep->name[0] == '.' && dep->name[1] == '.') ASSERT(be64_to_cpu(dep->inumber) == - dp->d_ops->sf_get_parent_ino(sfp)); + xfs_dir2_sf_get_parent_ino(sfp)); /* * Normal entry, copy it into shortform. */ @@ -248,14 +331,14 @@ xfs_dir2_block_to_sf( (xfs_dir2_data_aoff_t) ((char *)dep - (char *)hdr)); memcpy(sfep->name, dep->name, dep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, - be64_to_cpu(dep->inumber)); - dp->d_ops->sf_put_ftype(sfep, - dp->d_ops->data_get_ftype(dep)); + xfs_dir3_sfe_put_ino(mp, sfp, sfep, + be64_to_cpu(dep->inumber)); + xfs_dir3_sfe_put_ftype(mp, sfp, sfep, + xfs_dir3_dirent_get_ftype(mp, dep)); - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); } - ptr += dp->d_ops->data_entsize(dep->namelen); + ptr += xfs_dir3_data_entsize(mp, dep->namelen); } ASSERT((char *)sfep - (char *)sfp == size); xfs_dir2_sf_check(args); @@ -306,7 +389,7 @@ xfs_dir2_sf_addname( /* * Compute entry (and change in) size. */ - add_entsize = dp->d_ops->sf_entsize(sfp, args->namelen); + add_entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen); incr_isize = add_entsize; objchange = 0; #if XFS_BIG_INUMS @@ -400,7 +483,8 @@ xfs_dir2_sf_addname_easy( /* * Grow the in-inode space. */ - xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen), + xfs_idata_realloc(dp, + xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen), XFS_DATA_FORK); /* * Need to set up again due to realloc of the inode data. @@ -413,8 +497,8 @@ xfs_dir2_sf_addname_easy( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); - dp->d_ops->sf_put_ftype(sfep, args->filetype); + xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep, args->inumber); + xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep, args->filetype); /* * Update the header and inode. @@ -473,13 +557,13 @@ xfs_dir2_sf_addname_hard( * to insert the new entry. * If it's going to end up at the end then oldsfep will point there. */ - for (offset = dp->d_ops->data_first_offset, + for (offset = xfs_dir3_data_first_offset(mp), oldsfep = xfs_dir2_sf_firstentry(oldsfp), - add_datasize = dp->d_ops->data_entsize(args->namelen), + add_datasize = xfs_dir3_data_entsize(mp, args->namelen), eof = (char *)oldsfep == &buf[old_isize]; !eof; - offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen), - oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep), + offset = new_offset + xfs_dir3_data_entsize(mp, oldsfep->namelen), + oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep), eof = (char *)oldsfep == &buf[old_isize]) { new_offset = xfs_dir2_sf_get_offset(oldsfep); if (offset + add_datasize <= new_offset) @@ -508,8 +592,8 @@ xfs_dir2_sf_addname_hard( sfep->namelen = args->namelen; xfs_dir2_sf_put_offset(sfep, offset); memcpy(sfep->name, args->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); - dp->d_ops->sf_put_ftype(sfep, args->filetype); + xfs_dir3_sfe_put_ino(mp, sfp, sfep, args->inumber); + xfs_dir3_sfe_put_ftype(mp, sfp, sfep, args->filetype); sfp->count++; #if XFS_BIG_INUMS if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) @@ -519,7 +603,7 @@ xfs_dir2_sf_addname_hard( * If there's more left to copy, do that. */ if (!eof) { - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); memcpy(sfep, oldsfep, old_isize - nbytes); } kmem_free(buf); @@ -555,8 +639,8 @@ xfs_dir2_sf_addname_pick( mp = dp->i_mount; sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - size = dp->d_ops->data_entsize(args->namelen); - offset = dp->d_ops->data_first_offset; + size = xfs_dir3_data_entsize(mp, args->namelen); + offset = xfs_dir3_data_first_offset(mp); sfep = xfs_dir2_sf_firstentry(sfp); holefit = 0; /* @@ -568,8 +652,8 @@ xfs_dir2_sf_addname_pick( if (!holefit) holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); offset = xfs_dir2_sf_get_offset(sfep) + - dp->d_ops->data_entsize(sfep->namelen); - sfep = dp->d_ops->sf_nextentry(sfp, sfep); + xfs_dir3_data_entsize(mp, sfep->namelen); + sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); } /* * Calculate data bytes used excluding the new entry, if this @@ -629,20 +713,21 @@ xfs_dir2_sf_check( mp = dp->i_mount; sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - offset = dp->d_ops->data_first_offset; - ino = dp->d_ops->sf_get_parent_ino(sfp); + offset = xfs_dir3_data_first_offset(mp); + ino = xfs_dir2_sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep)) { ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); - ino = dp->d_ops->sf_get_ino(sfp, sfep); + ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; offset = xfs_dir2_sf_get_offset(sfep) + - dp->d_ops->data_entsize(sfep->namelen); - ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX); + xfs_dir3_data_entsize(mp, sfep->namelen); + ASSERT(xfs_dir3_sfe_get_ftype(mp, sfp, sfep) < + XFS_DIR3_FT_MAX); } ASSERT(i8count == sfp->i8count); ASSERT(XFS_BIG_INUMS || i8count == 0); @@ -698,7 +783,7 @@ xfs_dir2_sf_create( /* * Now can put in the inode number, since i8count is set. */ - dp->d_ops->sf_put_parent_ino(sfp, pino); + xfs_dir2_sf_put_parent_ino(sfp, pino); sfp->count = 0; dp->i_d.di_size = size; xfs_dir2_sf_check(args); @@ -753,7 +838,7 @@ xfs_dir2_sf_lookup( */ if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { - args->inumber = dp->d_ops->sf_get_parent_ino(sfp); + args->inumber = xfs_dir2_sf_get_parent_ino(sfp); args->cmpresult = XFS_CMP_EXACT; args->filetype = XFS_DIR3_FT_DIR; return XFS_ERROR(EEXIST); @@ -763,7 +848,7 @@ xfs_dir2_sf_lookup( */ ci_sfep = NULL; for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) { /* * Compare name and if it's an exact match, return the inode * number. If it's the first case-insensitive match, store the @@ -773,8 +858,10 @@ xfs_dir2_sf_lookup( sfep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { args->cmpresult = cmp; - args->inumber = dp->d_ops->sf_get_ino(sfp, sfep); - args->filetype = dp->d_ops->sf_get_ftype(sfep); + args->inumber = xfs_dir3_sfe_get_ino(dp->i_mount, + sfp, sfep); + args->filetype = xfs_dir3_sfe_get_ftype(dp->i_mount, + sfp, sfep); if (cmp == XFS_CMP_EXACT) return XFS_ERROR(EEXIST); ci_sfep = sfep; @@ -830,10 +917,10 @@ xfs_dir2_sf_removename( * Find the one we're deleting. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { - ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) == + ASSERT(xfs_dir3_sfe_get_ino(dp->i_mount, sfp, sfep) == args->inumber); break; } @@ -847,7 +934,7 @@ xfs_dir2_sf_removename( * Calculate sizes. */ byteoff = (int)((char *)sfep - (char *)sfp); - entsize = dp->d_ops->sf_entsize(sfp, args->namelen); + entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen); newsize = oldsize - entsize; /* * Copy the part if any after the removed entry, sliding it down. @@ -954,25 +1041,28 @@ xfs_dir2_sf_replace( if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { #if XFS_BIG_INUMS || defined(DEBUG) - ino = dp->d_ops->sf_get_parent_ino(sfp); + ino = xfs_dir2_sf_get_parent_ino(sfp); ASSERT(args->inumber != ino); #endif - dp->d_ops->sf_put_parent_ino(sfp, args->inumber); + xfs_dir2_sf_put_parent_ino(sfp, args->inumber); } /* * Normal entry, look for the name. */ else { for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) { + i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) { if (xfs_da_compname(args, sfep->name, sfep->namelen) == XFS_CMP_EXACT) { #if XFS_BIG_INUMS || defined(DEBUG) - ino = dp->d_ops->sf_get_ino(sfp, sfep); + ino = xfs_dir3_sfe_get_ino(dp->i_mount, + sfp, sfep); ASSERT(args->inumber != ino); #endif - dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); - dp->d_ops->sf_put_ftype(sfep, args->filetype); + xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep, + args->inumber); + xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep, + args->filetype); break; } } @@ -1075,21 +1165,22 @@ xfs_dir2_sf_toino4( */ sfp->count = oldsfp->count; sfp->i8count = 0; - dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); + xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), - oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { + i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep), + oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, - dp->d_ops->sf_get_ino(oldsfp, oldsfep)); - dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); + xfs_dir3_sfe_put_ino(mp, sfp, sfep, + xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep)); + xfs_dir3_sfe_put_ftype(mp, sfp, sfep, + xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep)); } /* * Clean up the inode. @@ -1153,21 +1244,22 @@ xfs_dir2_sf_toino8( */ sfp->count = oldsfp->count; sfp->i8count = 1; - dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp)); + xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); /* * Copy the entries field by field. */ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), oldsfep = xfs_dir2_sf_firstentry(oldsfp); i < sfp->count; - i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), - oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) { + i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep), + oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) { sfep->namelen = oldsfep->namelen; sfep->offset = oldsfep->offset; memcpy(sfep->name, oldsfep->name, sfep->namelen); - dp->d_ops->sf_put_ino(sfp, sfep, - dp->d_ops->sf_get_ino(oldsfp, oldsfep)); - dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep)); + xfs_dir3_sfe_put_ino(mp, sfp, sfep, + xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep)); + xfs_dir3_sfe_put_ftype(mp, sfp, sfep, + xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep)); } /* * Clean up the inode. diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 8367d6d..45560ee 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -17,21 +17,22 @@ */ #include "xfs.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_quota.h" -#include "xfs_inode.h" -#include "xfs_btree.h" #include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_inode.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_extent_busy.h" #include "xfs_discard.h" #include "xfs_trace.h" -#include "xfs_log.h" STATIC int xfs_trim_extents( diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 6b1e695..1ee776d 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -18,28 +18,28 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_shared.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" +#include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_trans.h" +#include "xfs_itable.h" +#include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" #include "xfs_trans_priv.h" #include "xfs_qm.h" #include "xfs_cksum.h" #include "xfs_trace.h" -#include "xfs_log.h" -#include "xfs_bmap_btree.h" /* * Lock order: @@ -292,6 +292,118 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; } +STATIC bool +xfs_dquot_buf_verify_crc( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; + int ndquots; + int i; + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return true; + + /* + * if we are in log recovery, the quota subsystem has not been + * initialised so we have no quotainfo structure. In that case, we need + * to manually calculate the number of dquots in the buffer. + */ + if (mp->m_quotainfo) + ndquots = mp->m_quotainfo->qi_dqperchunk; + else + ndquots = xfs_qm_calc_dquots_per_chunk(mp, + XFS_BB_TO_FSB(mp, bp->b_length)); + + for (i = 0; i < ndquots; i++, d++) { + if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF)) + return false; + if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) + return false; + } + return true; +} + +STATIC bool +xfs_dquot_buf_verify( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; + xfs_dqid_t id = 0; + int ndquots; + int i; + + /* + * if we are in log recovery, the quota subsystem has not been + * initialised so we have no quotainfo structure. In that case, we need + * to manually calculate the number of dquots in the buffer. + */ + if (mp->m_quotainfo) + ndquots = mp->m_quotainfo->qi_dqperchunk; + else + ndquots = xfs_qm_calc_dquots_per_chunk(mp, bp->b_length); + + /* + * On the first read of the buffer, verify that each dquot is valid. + * We don't know what the id of the dquot is supposed to be, just that + * they should be increasing monotonically within the buffer. If the + * first id is corrupt, then it will fail on the second dquot in the + * buffer so corruptions could point to the wrong dquot in this case. + */ + for (i = 0; i < ndquots; i++) { + struct xfs_disk_dquot *ddq; + int error; + + ddq = &d[i].dd_diskdq; + + if (i == 0) + id = be32_to_cpu(ddq->d_id); + + error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, + "xfs_dquot_buf_verify"); + if (error) + return false; + } + return true; +} + +static void +xfs_dquot_buf_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + } +} + +/* + * we don't calculate the CRC here as that is done when the dquot is flushed to + * the buffer after the update is done. This ensures that the dquot in the + * buffer always has an up-to-date CRC value. + */ +void +xfs_dquot_buf_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (!xfs_dquot_buf_verify(mp, bp)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); + return; + } +} + +const struct xfs_buf_ops xfs_dquot_buf_ops = { + .verify_read = xfs_dquot_buf_read_verify, + .verify_write = xfs_dquot_buf_write_verify, +}; + /* * Allocate a block and fill it with dquots. * This is called when the bmapi finds a hole. @@ -402,7 +514,6 @@ xfs_qm_dqalloc( return (error); } - STATIC int xfs_qm_dqrepair( struct xfs_mount *mp, @@ -436,7 +547,7 @@ xfs_qm_dqrepair( /* Do the actual repair of dquots in this buffer */ for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { ddq = &d[i].dd_diskdq; - error = xfs_dqcheck(mp, ddq, firstid + i, + error = xfs_qm_dqcheck(mp, ddq, firstid + i, dqp->dq_flags & XFS_DQ_ALLTYPES, XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); if (error) { @@ -1022,7 +1133,7 @@ xfs_qm_dqflush( /* * A simple sanity check in case we got a corrupted dquot.. */ - error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, + error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)"); if (error) { xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index d22ed00..55abbca 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -172,4 +172,6 @@ static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp) return dqp; } +extern const struct xfs_buf_ops xfs_dquot_buf_ops; + #endif /* __XFS_DQUOT_H__ */ diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c deleted file mode 100644 index d401457..0000000 --- a/fs/xfs/xfs_dquot_buf.c +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_quota.h" -#include "xfs_trans.h" -#include "xfs_qm.h" -#include "xfs_error.h" -#include "xfs_cksum.h" -#include "xfs_trace.h" - -int -xfs_calc_dquots_per_chunk( - struct xfs_mount *mp, - unsigned int nbblks) /* basic block units */ -{ - unsigned int ndquots; - - ASSERT(nbblks > 0); - ndquots = BBTOB(nbblks); - do_div(ndquots, sizeof(xfs_dqblk_t)); - - return ndquots; -} - -/* - * Do some primitive error checking on ondisk dquot data structures. - */ -int -xfs_dqcheck( - struct xfs_mount *mp, - xfs_disk_dquot_t *ddq, - xfs_dqid_t id, - uint type, /* used only when IO_dorepair is true */ - uint flags, - char *str) -{ - xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; - int errs = 0; - - /* - * We can encounter an uninitialized dquot buffer for 2 reasons: - * 1. If we crash while deleting the quotainode(s), and those blks got - * used for user data. This is because we take the path of regular - * file deletion; however, the size field of quotainodes is never - * updated, so all the tricks that we play in itruncate_finish - * don't quite matter. - * - * 2. We don't play the quota buffers when there's a quotaoff logitem. - * But the allocation will be replayed so we'll end up with an - * uninitialized quota block. - * - * This is all fine; things are still consistent, and we haven't lost - * any quota information. Just don't complain about bad dquot blks. - */ - if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", - str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); - errs++; - } - if (ddq->d_version != XFS_DQUOT_VERSION) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", - str, id, ddq->d_version, XFS_DQUOT_VERSION); - errs++; - } - - if (ddq->d_flags != XFS_DQ_USER && - ddq->d_flags != XFS_DQ_PROJ && - ddq->d_flags != XFS_DQ_GROUP) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : XFS dquot ID 0x%x, unknown flags 0x%x", - str, id, ddq->d_flags); - errs++; - } - - if (id != -1 && id != be32_to_cpu(ddq->d_id)) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : ondisk-dquot 0x%p, ID mismatch: " - "0x%x expected, found id 0x%x", - str, ddq, id, be32_to_cpu(ddq->d_id)); - errs++; - } - - if (!errs && ddq->d_id) { - if (ddq->d_blk_softlimit && - be64_to_cpu(ddq->d_bcount) > - be64_to_cpu(ddq->d_blk_softlimit)) { - if (!ddq->d_btimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_ino_softlimit && - be64_to_cpu(ddq->d_icount) > - be64_to_cpu(ddq->d_ino_softlimit)) { - if (!ddq->d_itimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - if (ddq->d_rtb_softlimit && - be64_to_cpu(ddq->d_rtbcount) > - be64_to_cpu(ddq->d_rtb_softlimit)) { - if (!ddq->d_rtbtimer) { - if (flags & XFS_QMOPT_DOWARN) - xfs_alert(mp, - "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", - str, (int)be32_to_cpu(ddq->d_id), ddq); - errs++; - } - } - } - - if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) - return errs; - - if (flags & XFS_QMOPT_DOWARN) - xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); - - /* - * Typically, a repair is only requested by quotacheck. - */ - ASSERT(id != -1); - ASSERT(flags & XFS_QMOPT_DQREPAIR); - memset(d, 0, sizeof(xfs_dqblk_t)); - - d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); - d->dd_diskdq.d_version = XFS_DQUOT_VERSION; - d->dd_diskdq.d_flags = type; - d->dd_diskdq.d_id = cpu_to_be32(id); - - if (xfs_sb_version_hascrc(&mp->m_sb)) { - uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); - xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), - XFS_DQUOT_CRC_OFF); - } - - return errs; -} - -STATIC bool -xfs_dquot_buf_verify_crc( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; - int ndquots; - int i; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return true; - - /* - * if we are in log recovery, the quota subsystem has not been - * initialised so we have no quotainfo structure. In that case, we need - * to manually calculate the number of dquots in the buffer. - */ - if (mp->m_quotainfo) - ndquots = mp->m_quotainfo->qi_dqperchunk; - else - ndquots = xfs_calc_dquots_per_chunk(mp, - XFS_BB_TO_FSB(mp, bp->b_length)); - - for (i = 0; i < ndquots; i++, d++) { - if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), - XFS_DQUOT_CRC_OFF)) - return false; - if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) - return false; - } - return true; -} - -STATIC bool -xfs_dquot_buf_verify( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; - xfs_dqid_t id = 0; - int ndquots; - int i; - - /* - * if we are in log recovery, the quota subsystem has not been - * initialised so we have no quotainfo structure. In that case, we need - * to manually calculate the number of dquots in the buffer. - */ - if (mp->m_quotainfo) - ndquots = mp->m_quotainfo->qi_dqperchunk; - else - ndquots = xfs_calc_dquots_per_chunk(mp, bp->b_length); - - /* - * On the first read of the buffer, verify that each dquot is valid. - * We don't know what the id of the dquot is supposed to be, just that - * they should be increasing monotonically within the buffer. If the - * first id is corrupt, then it will fail on the second dquot in the - * buffer so corruptions could point to the wrong dquot in this case. - */ - for (i = 0; i < ndquots; i++) { - struct xfs_disk_dquot *ddq; - int error; - - ddq = &d[i].dd_diskdq; - - if (i == 0) - id = be32_to_cpu(ddq->d_id); - - error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, - "xfs_dquot_buf_verify"); - if (error) - return false; - } - return true; -} - -static void -xfs_dquot_buf_read_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } -} - -/* - * we don't calculate the CRC here as that is done when the dquot is flushed to - * the buffer after the update is done. This ensures that the dquot in the - * buffer always has an up-to-date CRC value. - */ -static void -xfs_dquot_buf_write_verify( - struct xfs_buf *bp) -{ - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (!xfs_dquot_buf_verify(mp, bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - return; - } -} - -const struct xfs_buf_ops xfs_dquot_buf_ops = { - .verify_read = xfs_dquot_buf_read_verify, - .verify_write = xfs_dquot_buf_write_verify, -}; - diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 92e5f62..e838d84 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -18,19 +18,23 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_quota.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_trans.h" +#include "xfs_itable.h" +#include "xfs_attr.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" #include "xfs_qm.h" -#include "xfs_log.h" static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) { diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 9995b80..1123d93f 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -16,13 +16,16 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_format.h" #include "xfs_fs.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" #include "xfs_error.h" #ifdef DEBUG @@ -156,7 +159,7 @@ xfs_error_report( { if (level <= xfs_error_level) { xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, - "Internal error %s at line %d of file %s. Caller 0x%p", + "Internal error %s at line %d of file %s. Caller 0x%p\n", tag, linenum, filename, ra); xfs_stack_trace(); diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 1399e18..066df42 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -16,21 +16,21 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_export.h" +#include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_log.h" /* * Note that we only accept fileids which are long enough rather than allow diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index fd22f69..e43708e 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -19,18 +19,17 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_shared.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" #include "xfs_alloc.h" +#include "xfs_inode.h" #include "xfs_extent_busy.h" #include "xfs_trace.h" -#include "xfs_trans.h" -#include "xfs_log.h" void xfs_extent_busy_insert( diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index bfff284..985412d 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -20,10 +20,6 @@ #ifndef __XFS_EXTENT_BUSY_H__ #define __XFS_EXTENT_BUSY_H__ -struct xfs_mount; -struct xfs_trans; -struct xfs_alloc_arg; - /* * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that * have been freed but whose transactions aren't committed to disk yet. diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 3680d04..dc53e8f 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -17,14 +17,14 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_buf_item.h" #include "xfs_extfree_item.h" diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 52c91e1..4c749ab 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -17,27 +17,25 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_trans.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_error.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_ioctl.h" #include "xfs_trace.h" -#include "xfs_log.h" -#include "xfs_dinode.h" #include <linux/aio.h> #include <linux/dcache.h> @@ -807,64 +805,44 @@ out: STATIC long xfs_file_fallocate( - struct file *file, - int mode, - loff_t offset, - loff_t len) + struct file *file, + int mode, + loff_t offset, + loff_t len) { - struct inode *inode = file_inode(file); - struct xfs_inode *ip = XFS_I(inode); - struct xfs_trans *tp; - long error; - loff_t new_size = 0; + struct inode *inode = file_inode(file); + long error; + loff_t new_size = 0; + xfs_flock64_t bf; + xfs_inode_t *ip = XFS_I(inode); + int cmd = XFS_IOC_RESVSP; + int attr_flags = XFS_ATTR_NOLOCK; - if (!S_ISREG(inode->i_mode)) - return -EINVAL; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; + bf.l_whence = 0; + bf.l_start = offset; + bf.l_len = len; + xfs_ilock(ip, XFS_IOLOCK_EXCL); - if (mode & FALLOC_FL_PUNCH_HOLE) { - error = xfs_free_file_space(ip, offset, len); - if (error) - goto out_unlock; - } else { - if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { - new_size = offset + len; - error = -inode_newsize_ok(inode, new_size); - if (error) - goto out_unlock; - } - error = xfs_alloc_file_space(ip, offset, len, - XFS_BMAPI_PREALLOC); + if (mode & FALLOC_FL_PUNCH_HOLE) + cmd = XFS_IOC_UNRESVSP; + + /* check the new inode size is valid before allocating */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) { + new_size = offset + len; + error = inode_newsize_ok(inode, new_size); if (error) goto out_unlock; } - tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); - error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - goto out_unlock; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - ip->i_d.di_mode &= ~S_ISUID; - if (ip->i_d.di_mode & S_IXGRP) - ip->i_d.di_mode &= ~S_ISGID; - - if (!(mode & FALLOC_FL_PUNCH_HOLE)) - ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (file->f_flags & O_DSYNC) - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); + attr_flags |= XFS_ATTR_SYNC; + + error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); if (error) goto out_unlock; @@ -874,12 +852,12 @@ xfs_file_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = xfs_setattr_size(ip, &iattr); + error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); } out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return -error; + return error; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 12b6e77..ce78e65 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -16,19 +16,19 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_bmap_btree.h" +#include "xfs_inum.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" #include "xfs_ag.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_inum.h" -#include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_alloc.h" #include "xfs_mru_cache.h" -#include "xfs_dinode.h" #include "xfs_filestream.h" #include "xfs_trace.h" diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h index b6ab5a3..35c08ff 100644 --- a/fs/xfs/xfs_format.h +++ b/fs/xfs/xfs_format.h @@ -156,259 +156,14 @@ struct xfs_dsymlink_hdr { ((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ sizeof(struct xfs_dsymlink_hdr) : 0)) - -/* - * Allocation Btree format definitions - * - * There are two on-disk btrees, one sorted by blockno and one sorted - * by blockcount and blockno. All blocks look the same to make the code - * simpler; if we have time later, we'll make the optimizations. - */ -#define XFS_ABTB_MAGIC 0x41425442 /* 'ABTB' for bno tree */ -#define XFS_ABTB_CRC_MAGIC 0x41423342 /* 'AB3B' */ -#define XFS_ABTC_MAGIC 0x41425443 /* 'ABTC' for cnt tree */ -#define XFS_ABTC_CRC_MAGIC 0x41423343 /* 'AB3C' */ - -/* - * Data record/key structure - */ -typedef struct xfs_alloc_rec { - __be32 ar_startblock; /* starting block number */ - __be32 ar_blockcount; /* count of free blocks */ -} xfs_alloc_rec_t, xfs_alloc_key_t; - -typedef struct xfs_alloc_rec_incore { - xfs_agblock_t ar_startblock; /* starting block number */ - xfs_extlen_t ar_blockcount; /* count of free blocks */ -} xfs_alloc_rec_incore_t; - -/* btree pointer type */ -typedef __be32 xfs_alloc_ptr_t; - -/* - * Block numbers in the AG: - * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3. - */ -#define XFS_BNO_BLOCK(mp) ((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1)) -#define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) - - -/* - * Inode Allocation Btree format definitions - * - * There is a btree for the inode map per allocation group. - */ -#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ -#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ - -typedef __uint64_t xfs_inofree_t; -#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) -#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3) -#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) -#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) - -static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) -{ - return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; -} - -/* - * Data record structure - */ -typedef struct xfs_inobt_rec { - __be32 ir_startino; /* starting inode number */ - __be32 ir_freecount; /* count of free inodes (set bits) */ - __be64 ir_free; /* free inode mask */ -} xfs_inobt_rec_t; - -typedef struct xfs_inobt_rec_incore { - xfs_agino_t ir_startino; /* starting inode number */ - __int32_t ir_freecount; /* count of free inodes (set bits) */ - xfs_inofree_t ir_free; /* free inode mask */ -} xfs_inobt_rec_incore_t; - - -/* - * Key structure - */ -typedef struct xfs_inobt_key { - __be32 ir_startino; /* starting inode number */ -} xfs_inobt_key_t; - -/* btree pointer type */ -typedef __be32 xfs_inobt_ptr_t; - -/* - * block numbers in the AG. - */ -#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) -#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) - - - -/* - * BMAP Btree format definitions - * - * This includes both the root block definition that sits inside an inode fork - * and the record/pointer formats for the leaf/node in the blocks. - */ -#define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ -#define XFS_BMAP_CRC_MAGIC 0x424d4133 /* 'BMA3' */ - -/* - * Bmap root header, on-disk form only. - */ -typedef struct xfs_bmdr_block { - __be16 bb_level; /* 0 is a leaf */ - __be16 bb_numrecs; /* current # of data records */ -} xfs_bmdr_block_t; - -/* - * Bmap btree record and extent descriptor. - * l0:63 is an extent flag (value 1 indicates non-normal). - * l0:9-62 are startoff. - * l0:0-8 and l1:21-63 are startblock. - * l1:0-20 are blockcount. - */ -#define BMBT_EXNTFLAG_BITLEN 1 -#define BMBT_STARTOFF_BITLEN 54 -#define BMBT_STARTBLOCK_BITLEN 52 -#define BMBT_BLOCKCOUNT_BITLEN 21 - -typedef struct xfs_bmbt_rec { - __be64 l0, l1; -} xfs_bmbt_rec_t; - -typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ -typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; - -typedef struct xfs_bmbt_rec_host { - __uint64_t l0, l1; -} xfs_bmbt_rec_host_t; - -/* - * Values and macros for delayed-allocation startblock fields. - */ -#define STARTBLOCKVALBITS 17 -#define STARTBLOCKMASKBITS (15 + XFS_BIG_BLKNOS * 20) -#define DSTARTBLOCKMASKBITS (15 + 20) -#define STARTBLOCKMASK \ - (((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) -#define DSTARTBLOCKMASK \ - (((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) - -static inline int isnullstartblock(xfs_fsblock_t x) -{ - return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK; -} - -static inline int isnulldstartblock(xfs_dfsbno_t x) -{ - return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK; -} - -static inline xfs_fsblock_t nullstartblock(int k) -{ - ASSERT(k < (1 << STARTBLOCKVALBITS)); - return STARTBLOCKMASK | (k); -} - -static inline xfs_filblks_t startblockval(xfs_fsblock_t x) -{ - return (xfs_filblks_t)((x) & ~STARTBLOCKMASK); -} - -/* - * Possible extent formats. - */ -typedef enum { - XFS_EXTFMT_NOSTATE = 0, - XFS_EXTFMT_HASSTATE -} xfs_exntfmt_t; - -/* - * Possible extent states. - */ -typedef enum { - XFS_EXT_NORM, XFS_EXT_UNWRITTEN, - XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID -} xfs_exntst_t; - -/* - * Incore version of above. - */ -typedef struct xfs_bmbt_irec -{ - xfs_fileoff_t br_startoff; /* starting file offset */ - xfs_fsblock_t br_startblock; /* starting block number */ - xfs_filblks_t br_blockcount; /* number of blocks */ - xfs_exntst_t br_state; /* extent state */ -} xfs_bmbt_irec_t; - -/* - * Key structure for non-leaf levels of the tree. - */ -typedef struct xfs_bmbt_key { - __be64 br_startoff; /* starting file offset */ -} xfs_bmbt_key_t, xfs_bmdr_key_t; - -/* btree pointer type */ -typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; - - -/* - * Generic Btree block format definitions - * - * This is a combination of the actual format used on disk for short and long - * format btrees. The first three fields are shared by both format, but the - * pointers are different and should be used with care. - * - * To get the size of the actual short or long form headers please use the size - * macros below. Never use sizeof(xfs_btree_block). - * - * The blkno, crc, lsn, owner and uuid fields are only available in filesystems - * with the crc feature bit, and all accesses to them must be conditional on - * that flag. - */ -struct xfs_btree_block { - __be32 bb_magic; /* magic number for block type */ - __be16 bb_level; /* 0 is a leaf */ - __be16 bb_numrecs; /* current # of data records */ - union { - struct { - __be32 bb_leftsib; - __be32 bb_rightsib; - - __be64 bb_blkno; - __be64 bb_lsn; - uuid_t bb_uuid; - __be32 bb_owner; - __le32 bb_crc; - } s; /* short form pointers */ - struct { - __be64 bb_leftsib; - __be64 bb_rightsib; - - __be64 bb_blkno; - __be64 bb_lsn; - uuid_t bb_uuid; - __be64 bb_owner; - __le32 bb_crc; - __be32 bb_pad; /* padding for alignment */ - } l; /* long form pointers */ - } bb_u; /* rest */ -}; - -#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */ -#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */ - -/* sizes of CRC enabled btree blocks */ -#define XFS_BTREE_SBLOCK_CRC_LEN (XFS_BTREE_SBLOCK_LEN + 40) -#define XFS_BTREE_LBLOCK_CRC_LEN (XFS_BTREE_LBLOCK_LEN + 48) - -#define XFS_BTREE_SBLOCK_CRC_OFF \ - offsetof(struct xfs_btree_block, bb_u.s.bb_crc) -#define XFS_BTREE_LBLOCK_CRC_OFF \ - offsetof(struct xfs_btree_block, bb_u.l.bb_crc) +int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); +int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, + uint32_t size, struct xfs_buf *bp); +bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, + uint32_t size, struct xfs_buf *bp); +void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_inode *ip, struct xfs_ifork *ifp); + +extern const struct xfs_buf_ops xfs_symlink_buf_ops; #endif /* __XFS_FORMAT_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index c5fc116..18272c7 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -233,11 +233,11 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ #define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ #define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ -#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */ +#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */ #define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ #define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */ -#define XFS_FSOP_GEOM_FLAGS_FTYPE 0x10000 /* inode directory types */ + /* * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index a6e54b3..e64ee52 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -17,29 +17,28 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" -#include "xfs_error.h" #include "xfs_btree.h" -#include "xfs_alloc_btree.h" +#include "xfs_error.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_fsops.h" #include "xfs_itable.h" #include "xfs_trans_space.h" #include "xfs_rtalloc.h" -#include "xfs_trace.h" -#include "xfs_log.h" -#include "xfs_dinode.h" #include "xfs_filestream.h" +#include "xfs_trace.h" /* * File system operations @@ -102,9 +101,7 @@ xfs_fs_geometry( (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) | (xfs_sb_version_hascrc(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_V5SB : 0) | - (xfs_sb_version_hasftype(&mp->m_sb) ? - XFS_FSOP_GEOM_FLAGS_FTYPE : 0); + XFS_FSOP_GEOM_FLAGS_V5SB : 0); geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? mp->m_sb.sb_logsectsize : BBSIZE; geo->rtsectsize = mp->m_sb.sb_blocksize; @@ -156,7 +153,7 @@ xfs_growfs_data_private( xfs_buf_t *bp; int bucket; int dpct; - int error, saved_error = 0; + int error; xfs_agnumber_t nagcount; xfs_agnumber_t nagimax = 0; xfs_rfsblock_t nb, nb_mod; @@ -499,33 +496,29 @@ xfs_growfs_data_private( error = ENOMEM; } - /* - * If we get an error reading or writing alternate superblocks, - * continue. xfs_repair chooses the "best" superblock based - * on most matches; if we break early, we'll leave more - * superblocks un-updated than updated, and xfs_repair may - * pick them over the properly-updated primary. - */ if (error) { xfs_warn(mp, "error %d reading secondary superblock for ag %d", error, agno); - saved_error = error; - continue; + break; } xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS); + /* + * If we get an error writing out the alternate superblocks, + * just issue a warning and continue. The real work is + * already done and committed. + */ error = xfs_bwrite(bp); xfs_buf_relse(bp); if (error) { xfs_warn(mp, "write error %d updating secondary superblock for ag %d", error, agno); - saved_error = error; - continue; + break; /* no point in continuing */ } } - return saved_error ? saved_error : error; + return error; error0: xfs_trans_cancel(tp, XFS_TRANS_ABORT); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index e87719c..ccf2fb1 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -17,30 +17,29 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" #include "xfs_inum.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ialloc.h" -#include "xfs_ialloc_btree.h" #include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_bmap.h" #include "xfs_cksum.h" -#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_icreate_item.h" #include "xfs_icache.h" -#include "xfs_dinode.h" -#include "xfs_trace.h" /* @@ -1628,9 +1627,8 @@ xfs_read_agi( { int error; - trace_xfs_read_agi(mp, agno); - ASSERT(agno != NULLAGNUMBER); + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); @@ -1653,8 +1651,6 @@ xfs_ialloc_read_agi( struct xfs_perag *pag; /* per allocation group data */ int error; - trace_xfs_ialloc_read_agi(mp, agno); - error = xfs_read_agi(mp, tp, agno, bpp); if (error) return error; diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index a8f76a5..68c0732 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -23,7 +23,6 @@ struct xfs_dinode; struct xfs_imap; struct xfs_mount; struct xfs_trans; -struct xfs_btree_cur; /* * Allocation parameters for inode allocation. @@ -43,7 +42,7 @@ struct xfs_btree_cur; static inline struct xfs_dinode * xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) { - return (struct xfs_dinode *) + return (xfs_dinode_t *) (xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog)); } @@ -159,4 +158,6 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, unsigned int gen); +extern const struct xfs_buf_ops xfs_agi_buf_ops; + #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index c8fa5bb..5448eb6 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -17,23 +17,24 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ialloc.h" -#include "xfs_ialloc_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_cksum.h" -#include "xfs_trans.h" STATIC int diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index f38b220..3ac36b76 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -27,6 +27,55 @@ struct xfs_btree_cur; struct xfs_mount; /* + * There is a btree for the inode map per allocation group. + */ +#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ +#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ + +typedef __uint64_t xfs_inofree_t; +#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) +#define XFS_INODES_PER_CHUNK_LOG (XFS_NBBYLOG + 3) +#define XFS_INOBT_ALL_FREE ((xfs_inofree_t)-1) +#define XFS_INOBT_MASK(i) ((xfs_inofree_t)1 << (i)) + +static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) +{ + return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; +} + +/* + * Data record structure + */ +typedef struct xfs_inobt_rec { + __be32 ir_startino; /* starting inode number */ + __be32 ir_freecount; /* count of free inodes (set bits) */ + __be64 ir_free; /* free inode mask */ +} xfs_inobt_rec_t; + +typedef struct xfs_inobt_rec_incore { + xfs_agino_t ir_startino; /* starting inode number */ + __int32_t ir_freecount; /* count of free inodes (set bits) */ + xfs_inofree_t ir_free; /* free inode mask */ +} xfs_inobt_rec_incore_t; + + +/* + * Key structure + */ +typedef struct xfs_inobt_key { + __be32 ir_startino; /* starting inode number */ +} xfs_inobt_key_t; + +/* btree pointer type */ +typedef __be32 xfs_inobt_ptr_t; + +/* + * block numbers in the AG. + */ +#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) +#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) + +/* * Btree block header size depends on a superblock flag. */ #define XFS_INOBT_BLOCK_LEN(mp) \ @@ -61,4 +110,6 @@ extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); +extern const struct xfs_buf_ops xfs_inobt_buf_ops; + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 98d3524..474807a 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -18,19 +18,24 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_log_priv.h" #include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" #include "xfs_inode.h" +#include "xfs_dinode.h" #include "xfs_error.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" +#include "xfs_filestream.h" #include "xfs_inode_item.h" #include "xfs_quota.h" #include "xfs_trace.h" +#include "xfs_fsops.h" #include "xfs_icache.h" #include "xfs_bmap_util.h" @@ -495,6 +500,11 @@ xfs_inode_ag_walk_grab( if (!igrab(inode)) return ENOENT; + if (is_bad_inode(inode)) { + IRELE(ip); + return ENOENT; + } + /* inode is valid */ return 0; @@ -908,6 +918,8 @@ restart: xfs_iflock(ip); } + if (is_bad_inode(VFS_I(ip))) + goto reclaim; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_iunpin_wait(ip); xfs_iflush_abort(ip, false); diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index d2eaccf..5a5a593 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -17,14 +17,13 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_error.h" #include "xfs_icreate_item.h" diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 001aa89..e3d7538 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -19,38 +19,39 @@ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" #include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_trans_space.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" #include "xfs_attr_sf.h" #include "xfs_attr.h" -#include "xfs_trans_space.h" -#include "xfs_trans.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" #include "xfs_buf_item.h" #include "xfs_inode_item.h" +#include "xfs_btree.h" +#include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_error.h" #include "xfs_quota.h" -#include "xfs_dinode.h" #include "xfs_filestream.h" #include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" -#include "xfs_trans_priv.h" -#include "xfs_log.h" -#include "xfs_bmap_btree.h" kmem_zone_t *xfs_inode_zone; @@ -1662,126 +1663,6 @@ xfs_release( } /* - * xfs_inactive_truncate - * - * Called to perform a truncate when an inode becomes unlinked. - */ -STATIC int -xfs_inactive_truncate( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error; - - tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); - if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); - - /* - * Log the inode size first to prevent stale data exposure in the event - * of a system crash before the truncate completes. See the related - * comment in xfs_setattr_size() for details. - */ - ip->i_d.di_size = 0; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); - if (error) - goto error_trans_cancel; - - ASSERT(ip->i_d.di_nextents == 0); - - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - if (error) - goto error_unlock; - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return 0; - -error_trans_cancel: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); -error_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; -} - -/* - * xfs_inactive_ifree() - * - * Perform the inode free when an inode is unlinked. - */ -STATIC int -xfs_inactive_ifree( - struct xfs_inode *ip) -{ - xfs_bmap_free_t free_list; - xfs_fsblock_t first_block; - int committed; - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error; - - tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); - if (error) { - ASSERT(XFS_FORCED_SHUTDOWN(mp)); - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); - return error; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); - - xfs_bmap_init(&free_list, &first_block); - error = xfs_ifree(tp, ip, &free_list); - if (error) { - /* - * If we fail to free the inode, shut down. The cancel - * might do that, we need to make sure. Otherwise the - * inode might be lost for a long time or forever. - */ - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_notice(mp, "%s: xfs_ifree returned error %d", - __func__, error); - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - } - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; - } - - /* - * Credit the quota account(s). The inode is gone. - */ - xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); - - /* - * Just ignore errors at this point. There is nothing we can - * do except to try to keep going. Make sure it's not a silent - * error. - */ - error = xfs_bmap_finish(&tp, &free_list, &committed); - if (error) - xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", - __func__, error); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - if (error) - xfs_notice(mp, "%s: xfs_trans_commit returned error %d", - __func__, error); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return 0; -} - -/* * xfs_inactive * * This is called when the vnode reference count for the vnode @@ -1789,11 +1670,16 @@ xfs_inactive_ifree( * now be truncated. Also, we clear all of the read-ahead state * kept for the inode here since the file is now closed. */ -void +int xfs_inactive( xfs_inode_t *ip) { + xfs_bmap_free_t free_list; + xfs_fsblock_t first_block; + int committed; + struct xfs_trans *tp; struct xfs_mount *mp; + struct xfs_trans_res *resp; int error; int truncate = 0; @@ -1801,17 +1687,19 @@ xfs_inactive( * If the inode is already free, then there can be nothing * to clean up here. */ - if (ip->i_d.di_mode == 0) { + if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) { ASSERT(ip->i_df.if_real_bytes == 0); ASSERT(ip->i_df.if_broot_bytes == 0); - return; + return VN_INACTIVE_CACHE; } mp = ip->i_mount; + error = 0; + /* If this is a read-only mount, don't do this (would generate I/O) */ if (mp->m_flags & XFS_MOUNT_RDONLY) - return; + goto out; if (ip->i_d.di_nlink != 0) { /* @@ -1819,10 +1707,12 @@ xfs_inactive( * cache. Post-eof blocks must be freed, lest we end up with * broken free space accounting. */ - if (xfs_can_free_eofblocks(ip, true)) - xfs_free_eofblocks(mp, ip, false); - - return; + if (xfs_can_free_eofblocks(ip, true)) { + error = xfs_free_eofblocks(mp, ip, false); + if (error) + return VN_INACTIVE_CACHE; + } + goto out; } if (S_ISREG(ip->i_d.di_mode) && @@ -1832,14 +1722,36 @@ xfs_inactive( error = xfs_qm_dqattach(ip, 0); if (error) - return; + return VN_INACTIVE_CACHE; - if (S_ISLNK(ip->i_d.di_mode)) - error = xfs_inactive_symlink(ip); - else if (truncate) - error = xfs_inactive_truncate(ip); - if (error) - return; + tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); + resp = (truncate || S_ISLNK(ip->i_d.di_mode)) ? + &M_RES(mp)->tr_itruncate : &M_RES(mp)->tr_ifree; + + error = xfs_trans_reserve(tp, resp, 0, 0); + if (error) { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + xfs_trans_cancel(tp, 0); + return VN_INACTIVE_CACHE; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + if (S_ISLNK(ip->i_d.di_mode)) { + error = xfs_inactive_symlink(ip, &tp); + if (error) + goto out_cancel; + } else if (truncate) { + ip->i_d.di_size = 0; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); + if (error) + goto out_cancel; + + ASSERT(ip->i_d.di_nextents == 0); + } /* * If there are attributes associated with the file then blow them away @@ -1850,9 +1762,25 @@ xfs_inactive( if (ip->i_d.di_anextents > 0) { ASSERT(ip->i_d.di_forkoff != 0); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) + goto out_unlock; + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + error = xfs_attr_inactive(ip); if (error) - return; + goto out; + + tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + goto out; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); } if (ip->i_afp) @@ -1863,14 +1791,52 @@ xfs_inactive( /* * Free the inode. */ - error = xfs_inactive_ifree(ip); - if (error) - return; + xfs_bmap_init(&free_list, &first_block); + error = xfs_ifree(tp, ip, &free_list); + if (error) { + /* + * If we fail to free the inode, shut down. The cancel + * might do that, we need to make sure. Otherwise the + * inode might be lost for a long time or forever. + */ + if (!XFS_FORCED_SHUTDOWN(mp)) { + xfs_notice(mp, "%s: xfs_ifree returned error %d", + __func__, error); + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + } + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + } else { + /* + * Credit the quota account(s). The inode is gone. + */ + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); + + /* + * Just ignore errors at this point. There is nothing we can + * do except to try to keep going. Make sure it's not a silent + * error. + */ + error = xfs_bmap_finish(&tp, &free_list, &committed); + if (error) + xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", + __func__, error); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) + xfs_notice(mp, "%s: xfs_trans_commit returned error %d", + __func__, error); + } /* * Release the dquots held by inode, if any. */ xfs_qm_dqdetach(ip); +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out: + return VN_INACTIVE_CACHE; +out_cancel: + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); + goto out_unlock; } /* @@ -2404,33 +2370,6 @@ xfs_iunpin_wait( __xfs_iunpin_wait(ip); } -/* - * Removing an inode from the namespace involves removing the directory entry - * and dropping the link count on the inode. Removing the directory entry can - * result in locking an AGF (directory blocks were freed) and removing a link - * count can result in placing the inode on an unlinked list which results in - * locking an AGI. - * - * The big problem here is that we have an ordering constraint on AGF and AGI - * locking - inode allocation locks the AGI, then can allocate a new extent for - * new inodes, locking the AGF after the AGI. Similarly, freeing the inode - * removes the inode from the unlinked list, requiring that we lock the AGI - * first, and then freeing the inode can result in an inode chunk being freed - * and hence freeing disk space requiring that we lock an AGF. - * - * Hence the ordering that is imposed by other parts of the code is AGI before - * AGF. This means we cannot remove the directory entry before we drop the inode - * reference count and put it on the unlinked list as this results in a lock - * order of AGF then AGI, and this can deadlock against inode allocation and - * freeing. Therefore we must drop the link counts before we remove the - * directory entry. - * - * This is still safe from a transactional point of view - it is not until we - * get to xfs_bmap_finish() that we have the possibility of multiple - * transactions in this operation. Hence as long as we remove the directory - * entry and drop the link count in the first transaction of the remove - * operation, there are no transactional constraints on the ordering here. - */ int xfs_remove( xfs_inode_t *dp, @@ -2500,7 +2439,6 @@ xfs_remove( /* * If we're removing a directory perform some additional validation. */ - cancel_flags |= XFS_TRANS_ABORT; if (is_dir) { ASSERT(ip->i_d.di_nlink >= 2); if (ip->i_d.di_nlink != 2) { @@ -2511,16 +2449,31 @@ xfs_remove( error = XFS_ERROR(ENOTEMPTY); goto out_trans_cancel; } + } - /* Drop the link from ip's "..". */ + xfs_bmap_init(&free_list, &first_block); + error = xfs_dir_removename(tp, dp, name, ip->i_ino, + &first_block, &free_list, resblks); + if (error) { + ASSERT(error != ENOENT); + goto out_bmap_cancel; + } + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + if (is_dir) { + /* + * Drop the link from ip's "..". + */ error = xfs_droplink(tp, dp); if (error) - goto out_trans_cancel; + goto out_bmap_cancel; - /* Drop the "." link from ip to self. */ + /* + * Drop the "." link from ip to self. + */ error = xfs_droplink(tp, ip); if (error) - goto out_trans_cancel; + goto out_bmap_cancel; } else { /* * When removing a non-directory we need to log the parent @@ -2529,24 +2482,20 @@ xfs_remove( */ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } - xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* Drop the link from dp to ip. */ + /* + * Drop the link from dp to ip. + */ error = xfs_droplink(tp, ip); if (error) - goto out_trans_cancel; + goto out_bmap_cancel; - /* Determine if this is the last link while the inode is locked */ + /* + * Determine if this is the last link while + * we are in the transaction. + */ link_zero = (ip->i_d.di_nlink == 0); - xfs_bmap_init(&free_list, &first_block); - error = xfs_dir_removename(tp, dp, name, ip->i_ino, - &first_block, &free_list, resblks); - if (error) { - ASSERT(error != ENOENT); - goto out_bmap_cancel; - } - /* * If this is a synchronous mount, make sure that the * remove transaction goes to disk before returning to @@ -2576,6 +2525,7 @@ xfs_remove( out_bmap_cancel: xfs_bmap_cancel(&free_list); + cancel_flags |= XFS_TRANS_ABORT; out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); std_return: diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 9e6efccb..4a91358 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -24,6 +24,7 @@ /* * Kernel only inode definitions */ + struct xfs_dinode; struct xfs_inode; struct xfs_buf; @@ -49,9 +50,6 @@ typedef struct xfs_inode { xfs_ifork_t *i_afp; /* attribute fork pointer */ xfs_ifork_t i_df; /* data fork */ - /* operations vectors */ - const struct xfs_dir_ops *d_ops; /* directory ops vector */ - /* Transaction and locking information. */ struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ @@ -318,7 +316,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) int xfs_release(struct xfs_inode *ip); -void xfs_inactive(struct xfs_inode *ip); +int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c index 4fc9f39..63382d3 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/xfs_inode_buf.c @@ -17,20 +17,20 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_error.h" #include "xfs_cksum.h" #include "xfs_icache.h" -#include "xfs_trans.h" #include "xfs_ialloc.h" -#include "xfs_dinode.h" /* * Check that none of the inode's in the buffer have a next diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/xfs_inode_buf.h index 9308c47..abba0ae 100644 --- a/fs/xfs/xfs_inode_buf.h +++ b/fs/xfs/xfs_inode_buf.h @@ -47,4 +47,7 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #define xfs_inobp_check(mp, bp) #endif /* DEBUG */ +extern const struct xfs_buf_ops xfs_inode_buf_ops; +extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; + #endif /* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c index cfee14a..02f1083 100644 --- a/fs/xfs/xfs_inode_fork.c +++ b/fs/xfs/xfs_inode_fork.c @@ -20,21 +20,31 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" #include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" +#include "xfs_buf_item.h" #include "xfs_inode_item.h" -#include "xfs_bmap_btree.h" +#include "xfs_btree.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_error.h" +#include "xfs_quota.h" +#include "xfs_filestream.h" +#include "xfs_cksum.h" #include "xfs_trace.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" +#include "xfs_icache.h" kmem_zone_t *xfs_ifork_zone; @@ -1021,14 +1031,15 @@ xfs_iext_add( * the next index needed in the indirection array. */ else { - uint count = ext_diff; + int count = ext_diff; while (count) { erp = xfs_iext_irec_new(ifp, erp_idx); - erp->er_extcount = min(count, XFS_LINEAR_EXTS); - count -= erp->er_extcount; - if (count) + erp->er_extcount = count; + count -= MIN(count, (int)XFS_LINEAR_EXTS); + if (count) { erp_idx++; + } } } } @@ -1348,7 +1359,7 @@ xfs_iext_remove_indirect( void xfs_iext_realloc_direct( xfs_ifork_t *ifp, /* inode fork pointer */ - int new_size) /* new size of extents after adding */ + int new_size) /* new size of extents */ { int rnew_size; /* real new size of extents */ @@ -1386,8 +1397,13 @@ xfs_iext_realloc_direct( rnew_size - ifp->if_real_bytes); } } - /* Switch from the inline extent buffer to a direct extent list */ + /* + * Switch from the inline extent buffer to a direct + * extent list. Be sure to include the inline extent + * bytes in new_size. + */ else { + new_size += ifp->if_bytes; if (!is_power_of_2(new_size)) { rnew_size = roundup_pow_of_two(new_size); } diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/xfs_inode_fork.h index eb329a1..28661a0 100644 --- a/fs/xfs/xfs_inode_fork.h +++ b/fs/xfs/xfs_inode_fork.h @@ -19,7 +19,6 @@ #define __XFS_INODE_FORK_H__ struct xfs_inode_log_item; -struct xfs_dinode; /* * The following xfs_ext_irec_t struct introduces a second (top) level diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 7c0d391f..3780811 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -17,19 +17,19 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_trans_priv.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_trans_priv.h" -#include "xfs_dinode.h" kmem_zone_t *xfs_ili_zone; /* inode log item zone */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 4d61340..668e8f4 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -17,31 +17,32 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_alloc.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_ioctl.h" -#include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_itable.h" #include "xfs_error.h" #include "xfs_attr.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" +#include "xfs_buf_item.h" #include "xfs_fsops.h" #include "xfs_discard.h" #include "xfs_quota.h" +#include "xfs_inode_item.h" #include "xfs_export.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" -#include "xfs_dinode.h" -#include "xfs_trans.h" #include <linux/capability.h> #include <linux/dcache.h> @@ -640,11 +641,7 @@ xfs_ioc_space( unsigned int cmd, xfs_flock64_t *bf) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - struct iattr iattr; - bool setprealloc = false; - bool clrprealloc = false; + int attr_flags = 0; int error; /* @@ -664,128 +661,19 @@ xfs_ioc_space( if (!S_ISREG(inode->i_mode)) return -XFS_ERROR(EINVAL); - error = mnt_want_write_file(filp); - if (error) - return error; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - switch (bf->l_whence) { - case 0: /*SEEK_SET*/ - break; - case 1: /*SEEK_CUR*/ - bf->l_start += filp->f_pos; - break; - case 2: /*SEEK_END*/ - bf->l_start += XFS_ISIZE(ip); - break; - default: - error = XFS_ERROR(EINVAL); - goto out_unlock; - } - - /* - * length of <= 0 for resv/unresv/zero is invalid. length for - * alloc/free is ignored completely and we have no idea what userspace - * might have set it to, so set it to zero to allow range - * checks to pass. - */ - switch (cmd) { - case XFS_IOC_ZERO_RANGE: - case XFS_IOC_RESVSP: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP: - case XFS_IOC_UNRESVSP64: - if (bf->l_len <= 0) { - error = XFS_ERROR(EINVAL); - goto out_unlock; - } - break; - default: - bf->l_len = 0; - break; - } - - if (bf->l_start < 0 || - bf->l_start > mp->m_super->s_maxbytes || - bf->l_start + bf->l_len < 0 || - bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { - error = XFS_ERROR(EINVAL); - goto out_unlock; - } - - switch (cmd) { - case XFS_IOC_ZERO_RANGE: - error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); - if (!error) - setprealloc = true; - break; - case XFS_IOC_RESVSP: - case XFS_IOC_RESVSP64: - error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, - XFS_BMAPI_PREALLOC); - if (!error) - setprealloc = true; - break; - case XFS_IOC_UNRESVSP: - case XFS_IOC_UNRESVSP64: - error = xfs_free_file_space(ip, bf->l_start, bf->l_len); - break; - case XFS_IOC_ALLOCSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP: - case XFS_IOC_FREESP64: - if (bf->l_start > XFS_ISIZE(ip)) { - error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), - bf->l_start - XFS_ISIZE(ip), 0); - if (error) - goto out_unlock; - } + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + attr_flags |= XFS_ATTR_NONBLOCK; - iattr.ia_valid = ATTR_SIZE; - iattr.ia_size = bf->l_start; + if (filp->f_flags & O_DSYNC) + attr_flags |= XFS_ATTR_SYNC; - error = xfs_setattr_size(ip, &iattr); - if (!error) - clrprealloc = true; - break; - default: - ASSERT(0); - error = XFS_ERROR(EINVAL); - } + if (ioflags & IO_INVIS) + attr_flags |= XFS_ATTR_DMI; + error = mnt_want_write_file(filp); if (error) - goto out_unlock; - - tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - goto out_unlock; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - - if (!(ioflags & IO_INVIS)) { - ip->i_d.di_mode &= ~S_ISUID; - if (ip->i_d.di_mode & S_IXGRP) - ip->i_d.di_mode &= ~S_ISGID; - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - } - - if (setprealloc) - ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; - else if (clrprealloc) - ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (filp->f_flags & O_DSYNC) - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - -out_unlock: - xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; + error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); mnt_drop_write_file(filp); return -error; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index e8fb123..f671f7e 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -22,13 +22,14 @@ #include <asm/uaccess.h> #include "xfs.h" #include "xfs_fs.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" #include "xfs_vnode.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_itable.h" #include "xfs_error.h" diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 22d1cbe..8d4d49b 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -17,28 +17,34 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_inode_item.h" #include "xfs_btree.h" -#include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" +#include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_trans.h" +#include "xfs_itable.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" #include "xfs_trans_space.h" #include "xfs_iomap.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_quota.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" -#include "xfs_dinode.h" #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ @@ -104,7 +110,7 @@ xfs_alert_fsblock_zero( xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, "Access to block zero in inode %llu " "start_block: %llx start_off: %llx " - "blkcnt: %llx extent-state: %x", + "blkcnt: %llx extent-state: %x\n", (unsigned long long)ip->i_ino, (unsigned long long)imap->br_startblock, (unsigned long long)imap->br_startoff, @@ -649,6 +655,7 @@ int xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, + size_t count, xfs_bmbt_irec_t *imap) { xfs_mount_t *mp = ip->i_mount; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 411fbb8..8061576 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -21,12 +21,12 @@ struct xfs_inode; struct xfs_bmbt_irec; -int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, +extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *, int); -int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, +extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *); -int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, +extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, struct xfs_bmbt_irec *); -int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); +extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 27e0e54..2b8952d 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -17,28 +17,32 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_acl.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_da_format.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_acl.h" -#include "xfs_quota.h" +#include "xfs_rtalloc.h" #include "xfs_error.h" +#include "xfs_itable.h" #include "xfs_attr.h" -#include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_inode_item.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" #include "xfs_da_btree.h" +#include "xfs_dir2_format.h" #include "xfs_dir2_priv.h" -#include "xfs_dinode.h" #include <linux/capability.h> #include <linux/xattr.h> @@ -705,7 +709,8 @@ out_dqrele: int xfs_setattr_size( struct xfs_inode *ip, - struct iattr *iattr) + struct iattr *iattr, + int flags) { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); @@ -728,11 +733,15 @@ xfs_setattr_size( if (error) return XFS_ERROR(error); - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); + if (!(flags & XFS_ATTR_NOLOCK)) { + lock_flags |= XFS_IOLOCK_EXCL; + xfs_ilock(ip, lock_flags); + } + oldsize = inode->i_size; newsize = iattr->ia_size; @@ -741,11 +750,12 @@ xfs_setattr_size( */ if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { if (!(mask & (ATTR_CTIME|ATTR_MTIME))) - return 0; + goto out_unlock; /* * Use the regular setattr path to update the timestamps. */ + xfs_iunlock(ip, lock_flags); iattr->ia_valid &= ~ATTR_SIZE; return xfs_setattr_nonsize(ip, iattr, 0); } @@ -755,7 +765,7 @@ xfs_setattr_size( */ error = xfs_qm_dqattach(ip, 0); if (error) - return error; + goto out_unlock; /* * Now we can make the changes. Before we join the inode to the @@ -773,7 +783,7 @@ xfs_setattr_size( */ error = xfs_zero_eof(ip, newsize, oldsize); if (error) - return error; + goto out_unlock; } /* @@ -792,7 +802,7 @@ xfs_setattr_size( error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ip->i_d.di_size, newsize); if (error) - return error; + goto out_unlock; } /* @@ -802,7 +812,7 @@ xfs_setattr_size( error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) - return error; + goto out_unlock; tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); @@ -906,21 +916,12 @@ out_trans_cancel: STATIC int xfs_vn_setattr( - struct dentry *dentry, - struct iattr *iattr) + struct dentry *dentry, + struct iattr *iattr) { - struct xfs_inode *ip = XFS_I(dentry->d_inode); - int error; - - if (iattr->ia_valid & ATTR_SIZE) { - xfs_ilock(ip, XFS_IOLOCK_EXCL); - error = xfs_setattr_size(ip, iattr); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - } else { - error = xfs_setattr_nonsize(ip, iattr, 0); - } - - return -error; + if (iattr->ia_valid & ATTR_SIZE) + return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); + return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); } STATIC int @@ -1168,7 +1169,6 @@ xfs_setup_inode( struct xfs_inode *ip) { struct inode *inode = &ip->i_vnode; - gfp_t gfp_mask; inode->i_ino = ip->i_ino; inode->i_state = I_NEW; @@ -1204,7 +1204,6 @@ xfs_setup_inode( inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; xfs_diflags_to_iflags(inode, ip); - ip->d_ops = ip->i_mount->m_nondir_inode_ops; switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &xfs_inode_operations; @@ -1217,7 +1216,6 @@ xfs_setup_inode( else inode->i_op = &xfs_dir_inode_operations; inode->i_fop = &xfs_dir_file_operations; - ip->d_ops = ip->i_mount->m_dir_inode_ops; break; case S_IFLNK: inode->i_op = &xfs_symlink_inode_operations; @@ -1231,14 +1229,6 @@ xfs_setup_inode( } /* - * Ensure all page cache allocations are done from GFP_NOFS context to - * prevent direct reclaim recursion back into the filesystem and blowing - * stacks or deadlocking. - */ - gfp_mask = mapping_gfp_mask(inode->i_mapping); - mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); - - /* * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. */ diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index d2c5057..d81fb41 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -30,10 +30,14 @@ extern void xfs_setup_inode(struct xfs_inode *); /* * Internal setattr interfaces. */ -#define XFS_ATTR_NOACL 0x01 /* Don't call xfs_acl_chmod */ +#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ +#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if op would block */ +#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ +#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */ +#define XFS_ATTR_SYNC 0x10 /* synchronous operation required */ extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); -extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap); +extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags); #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c237ad1..084b3e1 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -17,23 +17,24 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" #include "xfs_inum.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_btree.h" #include "xfs_ialloc.h" -#include "xfs_ialloc_btree.h" #include "xfs_itable.h" #include "xfs_error.h" +#include "xfs_btree.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_dinode.h" STATIC int xfs_internal_inum( diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 8497a00..a2dea108 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -17,19 +17,21 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_error.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_log.h" #include "xfs_log_priv.h" +#include "xfs_buf_item.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" #include "xfs_log_recover.h" +#include "xfs_trans_priv.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_trace.h" #include "xfs_fsops.h" @@ -998,34 +1000,27 @@ xfs_log_space_wake( } /* - * Determine if we have a transaction that has gone to disk that needs to be - * covered. To begin the transition to the idle state firstly the log needs to - * be idle. That means the CIL, the AIL and the iclogs needs to be empty before - * we start attempting to cover the log. + * Determine if we have a transaction that has gone to disk + * that needs to be covered. To begin the transition to the idle state + * firstly the log needs to be idle (no AIL and nothing in the iclogs). + * If we are then in a state where covering is needed, the caller is informed + * that dummy transactions are required to move the log into the idle state. * - * Only if we are then in a state where covering is needed, the caller is - * informed that dummy transactions are required to move the log into the idle - * state. - * - * If there are any items in the AIl or CIL, then we do not want to attempt to - * cover the log as we may be in a situation where there isn't log space - * available to run a dummy transaction and this can lead to deadlocks when the - * tail of the log is pinned by an item that is modified in the CIL. Hence - * there's no point in running a dummy transaction at this point because we - * can't start trying to idle the log until both the CIL and AIL are empty. + * Because this is called as part of the sync process, we should also indicate + * that dummy transactions should be issued in anything but the covered or + * idle states. This ensures that the log tail is accurately reflected in + * the log at the end of the sync, hence if a crash occurrs avoids replay + * of transactions where the metadata is already on disk. */ int xfs_log_need_covered(xfs_mount_t *mp) { - struct xlog *log = mp->m_log; int needed = 0; + struct xlog *log = mp->m_log; if (!xfs_fs_writable(mp)) return 0; - if (!xlog_cil_empty(log)) - return 0; - spin_lock(&log->l_icloglock); switch (log->l_covered_state) { case XLOG_STATE_COVER_DONE: @@ -1034,17 +1029,14 @@ xfs_log_need_covered(xfs_mount_t *mp) break; case XLOG_STATE_COVER_NEED: case XLOG_STATE_COVER_NEED2: - if (xfs_ail_min_lsn(log->l_ailp)) - break; - if (!xlog_iclogs_empty(log)) - break; - - needed = 1; - if (log->l_covered_state == XLOG_STATE_COVER_NEED) - log->l_covered_state = XLOG_STATE_COVER_DONE; - else - log->l_covered_state = XLOG_STATE_COVER_DONE2; - break; + if (!xfs_ail_min_lsn(log->l_ailp) && + xlog_iclogs_empty(log)) { + if (log->l_covered_state == XLOG_STATE_COVER_NEED) + log->l_covered_state = XLOG_STATE_COVER_DONE; + else + log->l_covered_state = XLOG_STATE_COVER_DONE2; + } + /* FALLTHRU */ default: needed = 1; break; @@ -1076,7 +1068,6 @@ xlog_assign_tail_lsn_locked( tail_lsn = lip->li_lsn; else tail_lsn = atomic64_read(&log->l_last_sync_lsn); - trace_xfs_log_assign_tail_lsn(log, tail_lsn); atomic64_set(&log->l_tail_lsn, tail_lsn); return tail_lsn; } @@ -1988,7 +1979,7 @@ xlog_print_tic_res( for (i = 0; i < ticket->t_res_num; i++) { uint r_type = ticket->t_res_arr[i].r_type; - xfs_warn(mp, "region[%u]: %s - %u bytes", i, + xfs_warn(mp, "region[%u]: %s - %u bytes\n", i, ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? "bad-rtype" : res_type_str[r_type-1]), ticket->t_res_arr[i].r_len); @@ -3711,9 +3702,11 @@ xlog_verify_iclog( /* check validity of iclog pointers */ spin_lock(&log->l_icloglock); icptr = log->l_iclog; - for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next) - ASSERT(icptr); - + for (i=0; i < log->l_iclog_bufs; i++) { + if (icptr == NULL) + xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); + icptr = icptr->ic_next; + } if (icptr != log->l_iclog) xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__); spin_unlock(&log->l_icloglock); diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index e148719..1c45848 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -18,6 +18,8 @@ #ifndef __XFS_LOG_H__ #define __XFS_LOG_H__ +#include "xfs_log_format.h" + struct xfs_log_vec { struct xfs_log_vec *lv_next; /* next lv in build list */ int lv_niovecs; /* number of iovecs in lv */ @@ -80,7 +82,11 @@ struct xlog_ticket; struct xfs_log_item; struct xfs_item_ops; struct xfs_trans; -struct xfs_log_callback; + +void xfs_log_item_init(struct xfs_mount *mp, + struct xfs_log_item *item, + int type, + const struct xfs_item_ops *ops); xfs_lsn_t xfs_log_done(struct xfs_mount *mp, struct xlog_ticket *ticket, @@ -108,7 +114,7 @@ xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); void xfs_log_space_wake(struct xfs_mount *mp); int xfs_log_notify(struct xfs_mount *mp, struct xlog_in_core *iclog, - struct xfs_log_callback *callback_entry); + xfs_log_callback_t *callback_entry); int xfs_log_release_iclog(struct xfs_mount *mp, struct xlog_in_core *iclog); int xfs_log_reserve(struct xfs_mount *mp, diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 5eb51fc..cfe9797 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -17,9 +17,11 @@ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log_format.h" -#include "xfs_shared.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_log_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" @@ -27,10 +29,6 @@ #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_discard.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_log.h" -#include "xfs_log_priv.h" /* * Allocate a new ticket. Failing to get a new ticket makes it really hard to @@ -713,20 +711,6 @@ xlog_cil_push_foreground( xlog_cil_push(log); } -bool -xlog_cil_empty( - struct xlog *log) -{ - struct xfs_cil *cil = log->l_cilp; - bool empty = false; - - spin_lock(&cil->xc_push_lock); - if (list_empty(&cil->xc_cil)) - empty = true; - spin_unlock(&cil->xc_push_lock); - return empty; -} - /* * Commit a transaction with the given vector to the Committed Item List. * diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/xfs_log_format.h index f0969c7..ca7e28a 100644 --- a/fs/xfs/xfs_log_format.h +++ b/fs/xfs/xfs_log_format.h @@ -234,6 +234,178 @@ typedef struct xfs_trans_header { { XFS_LI_ICREATE, "XFS_LI_ICREATE" } /* + * Transaction types. Used to distinguish types of buffers. + */ +#define XFS_TRANS_SETATTR_NOT_SIZE 1 +#define XFS_TRANS_SETATTR_SIZE 2 +#define XFS_TRANS_INACTIVE 3 +#define XFS_TRANS_CREATE 4 +#define XFS_TRANS_CREATE_TRUNC 5 +#define XFS_TRANS_TRUNCATE_FILE 6 +#define XFS_TRANS_REMOVE 7 +#define XFS_TRANS_LINK 8 +#define XFS_TRANS_RENAME 9 +#define XFS_TRANS_MKDIR 10 +#define XFS_TRANS_RMDIR 11 +#define XFS_TRANS_SYMLINK 12 +#define XFS_TRANS_SET_DMATTRS 13 +#define XFS_TRANS_GROWFS 14 +#define XFS_TRANS_STRAT_WRITE 15 +#define XFS_TRANS_DIOSTRAT 16 +/* 17 was XFS_TRANS_WRITE_SYNC */ +#define XFS_TRANS_WRITEID 18 +#define XFS_TRANS_ADDAFORK 19 +#define XFS_TRANS_ATTRINVAL 20 +#define XFS_TRANS_ATRUNCATE 21 +#define XFS_TRANS_ATTR_SET 22 +#define XFS_TRANS_ATTR_RM 23 +#define XFS_TRANS_ATTR_FLAG 24 +#define XFS_TRANS_CLEAR_AGI_BUCKET 25 +#define XFS_TRANS_QM_SBCHANGE 26 +/* + * Dummy entries since we use the transaction type to index into the + * trans_type[] in xlog_recover_print_trans_head() + */ +#define XFS_TRANS_DUMMY1 27 +#define XFS_TRANS_DUMMY2 28 +#define XFS_TRANS_QM_QUOTAOFF 29 +#define XFS_TRANS_QM_DQALLOC 30 +#define XFS_TRANS_QM_SETQLIM 31 +#define XFS_TRANS_QM_DQCLUSTER 32 +#define XFS_TRANS_QM_QINOCREATE 33 +#define XFS_TRANS_QM_QUOTAOFF_END 34 +#define XFS_TRANS_SB_UNIT 35 +#define XFS_TRANS_FSYNC_TS 36 +#define XFS_TRANS_GROWFSRT_ALLOC 37 +#define XFS_TRANS_GROWFSRT_ZERO 38 +#define XFS_TRANS_GROWFSRT_FREE 39 +#define XFS_TRANS_SWAPEXT 40 +#define XFS_TRANS_SB_COUNT 41 +#define XFS_TRANS_CHECKPOINT 42 +#define XFS_TRANS_ICREATE 43 +#define XFS_TRANS_TYPE_MAX 43 +/* new transaction types need to be reflected in xfs_logprint(8) */ + +#define XFS_TRANS_TYPES \ + { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \ + { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ + { XFS_TRANS_INACTIVE, "INACTIVE" }, \ + { XFS_TRANS_CREATE, "CREATE" }, \ + { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ + { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ + { XFS_TRANS_REMOVE, "REMOVE" }, \ + { XFS_TRANS_LINK, "LINK" }, \ + { XFS_TRANS_RENAME, "RENAME" }, \ + { XFS_TRANS_MKDIR, "MKDIR" }, \ + { XFS_TRANS_RMDIR, "RMDIR" }, \ + { XFS_TRANS_SYMLINK, "SYMLINK" }, \ + { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \ + { XFS_TRANS_GROWFS, "GROWFS" }, \ + { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \ + { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \ + { XFS_TRANS_WRITEID, "WRITEID" }, \ + { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \ + { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \ + { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \ + { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \ + { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ + { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ + { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ + { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \ + { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ + { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ + { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ + { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ + { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ + { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ + { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \ + { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ + { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ + { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ + { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ + { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ + { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ + { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ + { XFS_TRANS_DUMMY1, "DUMMY1" }, \ + { XFS_TRANS_DUMMY2, "DUMMY2" }, \ + { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } + +/* + * This structure is used to track log items associated with + * a transaction. It points to the log item and keeps some + * flags to track the state of the log item. It also tracks + * the amount of space needed to log the item it describes + * once we get to commit processing (see xfs_trans_commit()). + */ +struct xfs_log_item_desc { + struct xfs_log_item *lid_item; + struct list_head lid_trans; + unsigned char lid_flags; +}; + +#define XFS_LID_DIRTY 0x1 + +/* + * Values for t_flags. + */ +#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */ +#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ +#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ +#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ +#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ +#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ +#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer + count in superblock */ + +/* + * Values for call flags parameter. + */ +#define XFS_TRANS_RELEASE_LOG_RES 0x4 +#define XFS_TRANS_ABORT 0x8 + +/* + * Field values for xfs_trans_mod_sb. + */ +#define XFS_TRANS_SB_ICOUNT 0x00000001 +#define XFS_TRANS_SB_IFREE 0x00000002 +#define XFS_TRANS_SB_FDBLOCKS 0x00000004 +#define XFS_TRANS_SB_RES_FDBLOCKS 0x00000008 +#define XFS_TRANS_SB_FREXTENTS 0x00000010 +#define XFS_TRANS_SB_RES_FREXTENTS 0x00000020 +#define XFS_TRANS_SB_DBLOCKS 0x00000040 +#define XFS_TRANS_SB_AGCOUNT 0x00000080 +#define XFS_TRANS_SB_IMAXPCT 0x00000100 +#define XFS_TRANS_SB_REXTSIZE 0x00000200 +#define XFS_TRANS_SB_RBMBLOCKS 0x00000400 +#define XFS_TRANS_SB_RBLOCKS 0x00000800 +#define XFS_TRANS_SB_REXTENTS 0x00001000 +#define XFS_TRANS_SB_REXTSLOG 0x00002000 + +/* + * Here we centralize the specification of XFS meta-data buffer + * reference count values. This determine how hard the buffer + * cache tries to hold onto the buffer. + */ +#define XFS_AGF_REF 4 +#define XFS_AGI_REF 4 +#define XFS_AGFL_REF 3 +#define XFS_INO_BTREE_REF 3 +#define XFS_ALLOC_BTREE_REF 2 +#define XFS_BMAP_BTREE_REF 2 +#define XFS_DIR_BTREE_REF 2 +#define XFS_INO_REF 2 +#define XFS_ATTR_BTREE_REF 1 +#define XFS_DQUOT_REF 1 + +/* + * Flags for xfs_trans_ichgtime(). + */ +#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ +#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ +#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ + + +/* * Inode Log Item Format definitions. * * This is the structure used to lay out an inode log item in the @@ -625,6 +797,7 @@ typedef struct xfs_qoff_logformat { char qf_pad[12]; /* padding for future */ } xfs_qoff_logformat_t; + /* * Disk quotas status in m_qflags, and also sb_qflags. 16 bits. */ @@ -676,4 +849,8 @@ struct xfs_icreate_log { __be32 icl_gen; /* inode generation number to use */ }; +int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); +int xfs_log_calc_minimum_size(struct xfs_mount *); + + #endif /* __XFS_LOG_FORMAT_H__ */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 9bc403a..136654b 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -22,7 +22,6 @@ struct xfs_buf; struct xlog; struct xlog_ticket; struct xfs_mount; -struct xfs_log_callback; /* * Flags for log structure @@ -228,8 +227,8 @@ typedef struct xlog_in_core { /* Callback structures need their own cacheline */ spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; - struct xfs_log_callback *ic_callback; - struct xfs_log_callback **ic_callback_tail; + xfs_log_callback_t *ic_callback; + xfs_log_callback_t **ic_callback_tail; /* reference counts need their own cacheline */ atomic_t ic_refcnt ____cacheline_aligned_in_smp; @@ -255,7 +254,7 @@ struct xfs_cil_ctx { int space_used; /* aggregate size of regions */ struct list_head busy_extents; /* busy extents in chkpt */ struct xfs_log_vec *lv_chain; /* logvecs being pushed */ - struct xfs_log_callback log_cb; /* completion callback hook. */ + xfs_log_callback_t log_cb; /* completion callback hook. */ struct list_head committing; /* ctx committing list */ }; @@ -515,10 +514,12 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space) /* * Committed Item List interfaces */ -int xlog_cil_init(struct xlog *log); -void xlog_cil_init_post_recovery(struct xlog *log); -void xlog_cil_destroy(struct xlog *log); -bool xlog_cil_empty(struct xlog *log); +int +xlog_cil_init(struct xlog *log); +void +xlog_cil_init_post_recovery(struct xlog *log); +void +xlog_cil_destroy(struct xlog *log); /* * CIL force routines diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b6b669d..3979749 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -17,34 +17,42 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" #include "xfs_inum.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" +#include "xfs_error.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" -#include "xfs_log.h" +#include "xfs_inode_item.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h" #include "xfs_log_priv.h" +#include "xfs_buf_item.h" #include "xfs_log_recover.h" -#include "xfs_inode_item.h" #include "xfs_extfree_item.h" #include "xfs_trans_priv.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" #include "xfs_quota.h" #include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" -#include "xfs_error.h" +#include "xfs_icreate_item.h" + +/* Need all the magic numbers and buffer ops structures from these headers */ +#include "xfs_symlink.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" +#include "xfs_attr_leaf.h" +#include "xfs_attr_remote.h" #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) @@ -297,9 +305,9 @@ xlog_header_check_dump( xfs_mount_t *mp, xlog_rec_header_t *head) { - xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d", + xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n", __func__, &mp->m_sb.sb_uuid, XLOG_FMT); - xfs_debug(mp, " log : uuid = %pU, fmt = %d", + xfs_debug(mp, " log : uuid = %pU, fmt = %d\n", &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); } #else @@ -2354,7 +2362,7 @@ xlog_recover_do_reg_buffer( item->ri_buf[i].i_len, __func__); goto next; } - error = xfs_dqcheck(mp, item->ri_buf[i].i_addr, + error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr, -1, 0, XFS_QMOPT_DOWARN, "dquot_buf_recover"); if (error) @@ -2386,6 +2394,133 @@ xlog_recover_do_reg_buffer( } /* + * Do some primitive error checking on ondisk dquot data structures. + */ +int +xfs_qm_dqcheck( + struct xfs_mount *mp, + xfs_disk_dquot_t *ddq, + xfs_dqid_t id, + uint type, /* used only when IO_dorepair is true */ + uint flags, + char *str) +{ + xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; + int errs = 0; + + /* + * We can encounter an uninitialized dquot buffer for 2 reasons: + * 1. If we crash while deleting the quotainode(s), and those blks got + * used for user data. This is because we take the path of regular + * file deletion; however, the size field of quotainodes is never + * updated, so all the tricks that we play in itruncate_finish + * don't quite matter. + * + * 2. We don't play the quota buffers when there's a quotaoff logitem. + * But the allocation will be replayed so we'll end up with an + * uninitialized quota block. + * + * This is all fine; things are still consistent, and we haven't lost + * any quota information. Just don't complain about bad dquot blks. + */ + if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", + str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); + errs++; + } + if (ddq->d_version != XFS_DQUOT_VERSION) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", + str, id, ddq->d_version, XFS_DQUOT_VERSION); + errs++; + } + + if (ddq->d_flags != XFS_DQ_USER && + ddq->d_flags != XFS_DQ_PROJ && + ddq->d_flags != XFS_DQ_GROUP) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : XFS dquot ID 0x%x, unknown flags 0x%x", + str, id, ddq->d_flags); + errs++; + } + + if (id != -1 && id != be32_to_cpu(ddq->d_id)) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : ondisk-dquot 0x%p, ID mismatch: " + "0x%x expected, found id 0x%x", + str, ddq, id, be32_to_cpu(ddq->d_id)); + errs++; + } + + if (!errs && ddq->d_id) { + if (ddq->d_blk_softlimit && + be64_to_cpu(ddq->d_bcount) > + be64_to_cpu(ddq->d_blk_softlimit)) { + if (!ddq->d_btimer) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", + str, (int)be32_to_cpu(ddq->d_id), ddq); + errs++; + } + } + if (ddq->d_ino_softlimit && + be64_to_cpu(ddq->d_icount) > + be64_to_cpu(ddq->d_ino_softlimit)) { + if (!ddq->d_itimer) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", + str, (int)be32_to_cpu(ddq->d_id), ddq); + errs++; + } + } + if (ddq->d_rtb_softlimit && + be64_to_cpu(ddq->d_rtbcount) > + be64_to_cpu(ddq->d_rtb_softlimit)) { + if (!ddq->d_rtbtimer) { + if (flags & XFS_QMOPT_DOWARN) + xfs_alert(mp, + "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", + str, (int)be32_to_cpu(ddq->d_id), ddq); + errs++; + } + } + } + + if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) + return errs; + + if (flags & XFS_QMOPT_DOWARN) + xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); + + /* + * Typically, a repair is only requested by quotacheck. + */ + ASSERT(id != -1); + ASSERT(flags & XFS_QMOPT_DQREPAIR); + memset(d, 0, sizeof(xfs_dqblk_t)); + + d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); + d->dd_diskdq.d_version = XFS_DQUOT_VERSION; + d->dd_diskdq.d_flags = type; + d->dd_diskdq.d_id = cpu_to_be32(id); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); + xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF); + } + + return errs; +} + +/* * Perform a dquot buffer recovery. * Simple algorithm: if we have found a QUOTAOFF log item of the same type * (ie. USR or GRP), then just toss this buffer away; don't recover it. @@ -2990,7 +3125,7 @@ xlog_recover_dquot_pass2( */ dq_f = item->ri_buf[0].i_addr; ASSERT(dq_f); - error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, + error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, "xlog_recover_dquot_pass2 (log copy)"); if (error) return XFS_ERROR(EIO); @@ -3010,7 +3145,7 @@ xlog_recover_dquot_pass2( * was among a chunk of dquots created earlier, and we did some * minimal initialization then. */ - error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, + error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, "xlog_recover_dquot_pass2"); if (error) { xfs_buf_relse(bp); @@ -3942,7 +4077,7 @@ xlog_unpack_data_crc( if (crc != rhead->h_crc) { if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { xfs_alert(log->l_mp, - "log record CRC mismatch: found 0x%x, expected 0x%x.", + "log record CRC mismatch: found 0x%x, expected 0x%x.\n", le32_to_cpu(rhead->h_crc), le32_to_cpu(crc)); xfs_hex_dump(dp, 32); diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/xfs_log_rlimit.c index 2af1a0a..bbcec0b 100644 --- a/fs/xfs/xfs_log_rlimit.c +++ b/fs/xfs/xfs_log_rlimit.c @@ -17,19 +17,16 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_ag.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_trans_space.h" +#include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_da_btree.h" #include "xfs_attr_leaf.h" -#include "xfs_bmap_btree.h" /* * Calculate the maximum length in bytes that would be required for a local diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 63ca2f0..9163dc1 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -17,8 +17,9 @@ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 02df7b4..5dcc680 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -17,31 +17,35 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" #include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_inode.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_bmap.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_log.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_fsops.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_dinode.h" +#include "xfs_cksum.h" +#include "xfs_buf_item.h" #ifdef HAVE_PERCPU_SB @@ -719,22 +723,8 @@ xfs_mountfs( * Set the inode cluster size. * This may still be overridden by the file system * block size if it is larger than the chosen cluster size. - * - * For v5 filesystems, scale the cluster size with the inode size to - * keep a constant ratio of inode per cluster buffer, but only if mkfs - * has set the inode alignment value appropriately for larger cluster - * sizes. */ mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; - if (xfs_sb_version_hascrc(&mp->m_sb)) { - int new_size = mp->m_inode_cluster_size; - - new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; - if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) - mp->m_inode_cluster_size = new_size; - xfs_info(mp, "Using inode cluster size of %d bytes", - mp->m_inode_cluster_size); - } /* * Set inode alignment fields diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a466c5e..1fa0584 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -26,7 +26,6 @@ struct xfs_mru_cache; struct xfs_nameops; struct xfs_ail; struct xfs_quotainfo; -struct xfs_dir_ops; #ifdef HAVE_PERCPU_SB @@ -112,7 +111,7 @@ typedef struct xfs_mount { __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ __uint8_t m_agno_log; /* log #ag's */ __uint8_t m_agino_log; /* #bits for agino in inum */ - uint m_inode_cluster_size;/* min inode buf size */ + __uint16_t m_inode_cluster_size;/* min inode buf size */ uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ uint m_blockwmask; /* blockwsize-1 */ @@ -149,8 +148,6 @@ typedef struct xfs_mount { int m_dir_magicpct; /* 37% of the dir blocksize */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ - const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ - const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ int m_dirblksize; /* directory block sz--bytes */ int m_dirblkfsbs; /* directory block sz--fsbs */ xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 14a4996..3e6c2e6 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -17,28 +17,31 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_itable.h" -#include "xfs_quota.h" +#include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_bmap.h" -#include "xfs_bmap_btree.h" -#include "xfs_trans.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" #include "xfs_trans_space.h" #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_cksum.h" -#include "xfs_dinode.h" /* * The global quota manager. There is only one of these for the entire @@ -661,6 +664,20 @@ xfs_qm_dqdetach( } } +int +xfs_qm_calc_dquots_per_chunk( + struct xfs_mount *mp, + unsigned int nbblks) /* basic block units */ +{ + unsigned int ndquots; + + ASSERT(nbblks > 0); + ndquots = BBTOB(nbblks); + do_div(ndquots, sizeof(xfs_dqblk_t)); + + return ndquots; +} + struct xfs_qm_isolate { struct list_head buffers; struct list_head dispose; @@ -841,7 +858,7 @@ xfs_qm_init_quotainfo( /* Precalc some constants */ qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); - qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(mp, + qinf->qi_dqperchunk = xfs_qm_calc_dquots_per_chunk(mp, qinf->qi_dqchunklen); mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); @@ -1075,10 +1092,10 @@ xfs_qm_reset_dqcounts( /* * Do a sanity check, and if needed, repair the dqblk. Don't * output any warnings because it's perfectly possible to - * find uninitialised dquot blks. See comment in xfs_dqcheck. + * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. */ - xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, - "xfs_quotacheck"); + (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, + "xfs_quotacheck"); ddq->d_bcount = 0; ddq->d_icount = 0; ddq->d_rtbcount = 0; diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index a788b66..2b602df 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -103,6 +103,8 @@ xfs_dq_to_quota_inode(struct xfs_dquot *dqp) return NULL; } +extern int xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp, + unsigned int nbblks); extern void xfs_trans_mod_dquot(struct xfs_trans *, struct xfs_dquot *, uint, long); extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index e9be63a..3af50cc 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -18,15 +18,21 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_alloc.h" #include "xfs_quota.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" #include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_trans.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" #include "xfs_qm.h" diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 437c919..8174aad 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -20,18 +20,24 @@ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_trans.h" +#include "xfs_inode_item.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_quota.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_icache.h" @@ -281,7 +287,7 @@ xfs_qm_scall_trunc_qfiles( int error = 0, error2 = 0; if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { - xfs_debug(mp, "%s: flags=%x m_qflags=%x", + xfs_debug(mp, "%s: flags=%x m_qflags=%x\n", __func__, flags, mp->m_qflags); return XFS_ERROR(EINVAL); } @@ -319,7 +325,7 @@ xfs_qm_scall_quotaon( sbflags = 0; if (flags == 0) { - xfs_debug(mp, "%s: zero flags, m_qflags=%x", + xfs_debug(mp, "%s: zero flags, m_qflags=%x\n", __func__, mp->m_qflags); return XFS_ERROR(EINVAL); } @@ -342,7 +348,7 @@ xfs_qm_scall_quotaon( (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && (flags & XFS_PQUOTA_ENFD))) { xfs_debug(mp, - "%s: Can't enforce without acct, flags=%x sbflags=%x", + "%s: Can't enforce without acct, flags=%x sbflags=%x\n", __func__, flags, mp->m_sb.sb_qflags); return XFS_ERROR(EINVAL); } @@ -642,7 +648,7 @@ xfs_qm_scall_setqlim( q->qi_bsoftlimit = soft; } } else { - xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft); + xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); } hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : @@ -658,7 +664,7 @@ xfs_qm_scall_setqlim( q->qi_rtbsoftlimit = soft; } } else { - xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft); + xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); } hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? @@ -675,7 +681,7 @@ xfs_qm_scall_setqlim( q->qi_isoftlimit = soft; } } else { - xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft); + xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); } /* diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 5376dd4..e7d84d2 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -150,6 +150,10 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, pd, nb, ni, \ f | XFS_QMOPT_RES_REGBLKS) +extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, + xfs_dqid_t, uint, uint, char *); extern int xfs_mount_reset_sbqflags(struct xfs_mount *); +extern const struct xfs_buf_ops xfs_dquot_buf_ops; + #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/xfs_quota_defs.h index b3b2b10..e6b0d6e 100644 --- a/fs/xfs/xfs_quota_defs.h +++ b/fs/xfs/xfs_quota_defs.h @@ -154,8 +154,4 @@ typedef __uint16_t xfs_qwarncnt_t; (XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA) #define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) -extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, - xfs_dqid_t id, uint type, uint flags, char *str); -extern int xfs_calc_dquots_per_chunk(struct xfs_mount *mp, unsigned int nbblks); - #endif /* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index af33caf..1326d81 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -17,14 +17,15 @@ */ #include "xfs.h" #include "xfs_format.h" -#include "xfs_log_format.h" #include "xfs_trans_resv.h" +#include "xfs_log.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_inode.h" #include "xfs_quota.h" #include "xfs_trans.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" #include "xfs_qm.h" #include <linux/quota.h> diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a6a76b2..6f9e63c 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -17,260 +17,172 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" +#include "xfs_rtalloc.h" +#include "xfs_fsops.h" #include "xfs_error.h" -#include "xfs_trans.h" +#include "xfs_inode_item.h" #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_buf.h" #include "xfs_icache.h" -#include "xfs_dinode.h" -#include "xfs_rtalloc.h" /* - * Read and return the summary information for a given extent size, - * bitmap block combination. - * Keeps track of a current summary block, so we don't keep reading - * it from the buffer cache. + * Prototypes for internal functions. */ -STATIC int /* error */ -xfs_rtget_summary( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - int log, /* log2 of extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - xfs_suminfo_t *sum) /* out: summary info for this block */ -{ - xfs_buf_t *bp; /* buffer for summary block */ - int error; /* error value */ - xfs_fsblock_t sb; /* summary fsblock */ - int so; /* index into the summary file */ - xfs_suminfo_t *sp; /* pointer to returned data */ - /* - * Compute entry number in the summary file. - */ - so = XFS_SUMOFFS(mp, log, bbno); - /* - * Compute the block number in the summary file. - */ - sb = XFS_SUMOFFSTOBLOCK(mp, so); - /* - * If we have an old buffer, and the block number matches, use that. - */ - if (rbpp && *rbpp && *rsb == sb) - bp = *rbpp; - /* - * Otherwise we have to get the buffer. - */ - else { - /* - * If there was an old one, get rid of it first. - */ - if (rbpp && *rbpp) - xfs_trans_brelse(tp, *rbpp); - error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); - if (error) { - return error; - } - /* - * Remember this buffer and block for the next call. - */ - if (rbpp) { - *rbpp = bp; - *rsb = sb; - } - } - /* - * Point to the summary information & copy it out. - */ - sp = XFS_SUMPTR(mp, bp, so); - *sum = *sp; - /* - * Drop the buffer if we're not asked to remember it. - */ - if (!rbpp) - xfs_trans_brelse(tp, bp); - return 0; -} +STATIC int xfs_rtallocate_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, + xfs_extlen_t, xfs_buf_t **, xfs_fsblock_t *); +STATIC int xfs_rtany_summary(xfs_mount_t *, xfs_trans_t *, int, int, + xfs_rtblock_t, xfs_buf_t **, xfs_fsblock_t *, int *); +STATIC int xfs_rtcheck_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, + xfs_extlen_t, int, xfs_rtblock_t *, int *); +STATIC int xfs_rtfind_back(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, + xfs_rtblock_t, xfs_rtblock_t *); +STATIC int xfs_rtfind_forw(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, + xfs_rtblock_t, xfs_rtblock_t *); +STATIC int xfs_rtget_summary( xfs_mount_t *, xfs_trans_t *, int, + xfs_rtblock_t, xfs_buf_t **, xfs_fsblock_t *, xfs_suminfo_t *); +STATIC int xfs_rtmodify_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, + xfs_extlen_t, int); +STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int, + xfs_rtblock_t, int, xfs_buf_t **, xfs_fsblock_t *); /* - * Return whether there are any free extents in the size range given - * by low and high, for the bitmap block bbno. + * Internal functions. + */ + +/* + * Allocate space to the bitmap or summary file, and zero it, for growfs. */ STATIC int /* error */ -xfs_rtany_summary( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - int low, /* low log2 extent size */ - int high, /* high log2 extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb, /* in/out: summary block number */ - int *stat) /* out: any good extents here? */ +xfs_growfs_rt_alloc( + xfs_mount_t *mp, /* file system mount point */ + xfs_extlen_t oblocks, /* old count of blocks */ + xfs_extlen_t nblocks, /* new count of blocks */ + xfs_inode_t *ip) /* inode (bitmap/summary) */ { - int error; /* error value */ - int log; /* loop counter, log2 of ext. size */ - xfs_suminfo_t sum; /* summary data */ + xfs_fileoff_t bno; /* block number in file */ + xfs_buf_t *bp; /* temporary buffer for zeroing */ + int committed; /* transaction committed flag */ + xfs_daddr_t d; /* disk block address */ + int error; /* error return value */ + xfs_fsblock_t firstblock; /* first block allocated in xaction */ + xfs_bmap_free_t flist; /* list of freed blocks */ + xfs_fsblock_t fsbno; /* filesystem block for bno */ + xfs_bmbt_irec_t map; /* block map output */ + int nmap; /* number of block maps */ + int resblks; /* space reservation */ /* - * Loop over logs of extent sizes. Order is irrelevant. + * Allocate space to the file, as necessary. */ - for (log = low; log <= high; log++) { + while (oblocks < nblocks) { + int cancelflags = 0; + xfs_trans_t *tp; + + tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); + resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); /* - * Get one summary datum. + * Reserve space & log for one extent added to the file. */ - error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum); - if (error) { - return error; - } + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, + resblks, 0); + if (error) + goto error_cancel; + cancelflags = XFS_TRANS_RELEASE_LOG_RES; /* - * If there are any, return success. + * Lock the inode. */ - if (sum) { - *stat = 1; - return 0; - } - } - /* - * Found nothing, return failure. - */ - *stat = 0; - return 0; -} - - -/* - * Copy and transform the summary file, given the old and new - * parameters in the mount structures. - */ -STATIC int /* error */ -xfs_rtcopy_summary( - xfs_mount_t *omp, /* old file system mount point */ - xfs_mount_t *nmp, /* new file system mount point */ - xfs_trans_t *tp) /* transaction pointer */ -{ - xfs_rtblock_t bbno; /* bitmap block number */ - xfs_buf_t *bp; /* summary buffer */ - int error; /* error return value */ - int log; /* summary level number (log length) */ - xfs_suminfo_t sum; /* summary data */ - xfs_fsblock_t sumbno; /* summary block number */ + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - bp = NULL; - for (log = omp->m_rsumlevels - 1; log >= 0; log--) { - for (bbno = omp->m_sb.sb_rbmblocks - 1; - (xfs_srtblock_t)bbno >= 0; - bbno--) { - error = xfs_rtget_summary(omp, tp, log, bbno, &bp, - &sumbno, &sum); - if (error) - return error; - if (sum == 0) - continue; - error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum, - &bp, &sumbno); + xfs_bmap_init(&flist, &firstblock); + /* + * Allocate blocks to the bitmap file. + */ + nmap = 1; + cancelflags |= XFS_TRANS_ABORT; + error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, + XFS_BMAPI_METADATA, &firstblock, + resblks, &map, &nmap, &flist); + if (!error && nmap < 1) + error = XFS_ERROR(ENOSPC); + if (error) + goto error_cancel; + /* + * Free any blocks freed up in the transaction, then commit. + */ + error = xfs_bmap_finish(&tp, &flist, &committed); + if (error) + goto error_cancel; + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + if (error) + goto error; + /* + * Now we need to clear the allocated blocks. + * Do this one block per transaction, to keep it simple. + */ + cancelflags = 0; + for (bno = map.br_startoff, fsbno = map.br_startblock; + bno < map.br_startoff + map.br_blockcount; + bno++, fsbno++) { + tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO); + /* + * Reserve log for one block zeroing. + */ + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero, + 0, 0); if (error) - return error; - error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum, - &bp, &sumbno); + goto error_cancel; + /* + * Lock the bitmap inode. + */ + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + /* + * Get a buffer for the block. + */ + d = XFS_FSB_TO_DADDR(mp, fsbno); + bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, + mp->m_bsize, 0); + if (bp == NULL) { + error = XFS_ERROR(EIO); +error_cancel: + xfs_trans_cancel(tp, cancelflags); + goto error; + } + memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); + xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); + /* + * Commit the transaction. + */ + error = xfs_trans_commit(tp, 0); if (error) - return error; - ASSERT(sum > 0); + goto error; } + /* + * Go on to the next extent, if any. + */ + oblocks = map.br_startoff + map.br_blockcount; } return 0; -} -/* - * Mark an extent specified by start and len allocated. - * Updates all the summary information as well as the bitmap. - */ -STATIC int /* error */ -xfs_rtallocate_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* start block to allocate */ - xfs_extlen_t len, /* length to allocate */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ -{ - xfs_rtblock_t end; /* end of the allocated extent */ - int error; /* error value */ - xfs_rtblock_t postblock = 0; /* first block allocated > end */ - xfs_rtblock_t preblock = 0; /* first block allocated < start */ - end = start + len - 1; - /* - * Assume we're allocating out of the middle of a free extent. - * We need to find the beginning and end of the extent so we can - * properly update the summary. - */ - error = xfs_rtfind_back(mp, tp, start, 0, &preblock); - if (error) { - return error; - } - /* - * Find the next allocated block (end of free extent). - */ - error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, - &postblock); - if (error) { - return error; - } - /* - * Decrement the summary information corresponding to the entire - * (old) free extent. - */ - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock + 1 - preblock), - XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); - if (error) { - return error; - } - /* - * If there are blocks not being allocated at the front of the - * old extent, add summary data for them to be free. - */ - if (preblock < start) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(start - preblock), - XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); - if (error) { - return error; - } - } - /* - * If there are blocks not being allocated at the end of the - * old extent, add summary data for them to be free. - */ - if (postblock > end) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock - end), - XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb); - if (error) { - return error; - } - } - /* - * Modify the bitmap to mark this extent allocated. - */ - error = xfs_rtmodify_range(mp, tp, start, len, 0); +error: return error; } @@ -809,126 +721,1112 @@ xfs_rtallocate_extent_size( } /* - * Allocate space to the bitmap or summary file, and zero it, for growfs. + * Mark an extent specified by start and len allocated. + * Updates all the summary information as well as the bitmap. */ STATIC int /* error */ -xfs_growfs_rt_alloc( +xfs_rtallocate_range( xfs_mount_t *mp, /* file system mount point */ - xfs_extlen_t oblocks, /* old count of blocks */ - xfs_extlen_t nblocks, /* new count of blocks */ - xfs_inode_t *ip) /* inode (bitmap/summary) */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* start block to allocate */ + xfs_extlen_t len, /* length to allocate */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ { - xfs_fileoff_t bno; /* block number in file */ - xfs_buf_t *bp; /* temporary buffer for zeroing */ - int committed; /* transaction committed flag */ - xfs_daddr_t d; /* disk block address */ - int error; /* error return value */ - xfs_fsblock_t firstblock; /* first block allocated in xaction */ - xfs_bmap_free_t flist; /* list of freed blocks */ - xfs_fsblock_t fsbno; /* filesystem block for bno */ - xfs_bmbt_irec_t map; /* block map output */ - int nmap; /* number of block maps */ - int resblks; /* space reservation */ + xfs_rtblock_t end; /* end of the allocated extent */ + int error; /* error value */ + xfs_rtblock_t postblock = 0; /* first block allocated > end */ + xfs_rtblock_t preblock = 0; /* first block allocated < start */ + end = start + len - 1; /* - * Allocate space to the file, as necessary. + * Assume we're allocating out of the middle of a free extent. + * We need to find the beginning and end of the extent so we can + * properly update the summary. */ - while (oblocks < nblocks) { - int cancelflags = 0; - xfs_trans_t *tp; + error = xfs_rtfind_back(mp, tp, start, 0, &preblock); + if (error) { + return error; + } + /* + * Find the next allocated block (end of free extent). + */ + error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, + &postblock); + if (error) { + return error; + } + /* + * Decrement the summary information corresponding to the entire + * (old) free extent. + */ + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock + 1 - preblock), + XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); + if (error) { + return error; + } + /* + * If there are blocks not being allocated at the front of the + * old extent, add summary data for them to be free. + */ + if (preblock < start) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(start - preblock), + XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * If there are blocks not being allocated at the end of the + * old extent, add summary data for them to be free. + */ + if (postblock > end) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock - end), + XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * Modify the bitmap to mark this extent allocated. + */ + error = xfs_rtmodify_range(mp, tp, start, len, 0); + return error; +} - tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); - resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); +/* + * Return whether there are any free extents in the size range given + * by low and high, for the bitmap block bbno. + */ +STATIC int /* error */ +xfs_rtany_summary( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + int low, /* low log2 extent size */ + int high, /* high log2 extent size */ + xfs_rtblock_t bbno, /* bitmap block number */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb, /* in/out: summary block number */ + int *stat) /* out: any good extents here? */ +{ + int error; /* error value */ + int log; /* loop counter, log2 of ext. size */ + xfs_suminfo_t sum; /* summary data */ + + /* + * Loop over logs of extent sizes. Order is irrelevant. + */ + for (log = low; log <= high; log++) { /* - * Reserve space & log for one extent added to the file. + * Get one summary datum. */ - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, - resblks, 0); - if (error) - goto error_cancel; - cancelflags = XFS_TRANS_RELEASE_LOG_RES; + error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum); + if (error) { + return error; + } /* - * Lock the inode. + * If there are any, return success. */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + if (sum) { + *stat = 1; + return 0; + } + } + /* + * Found nothing, return failure. + */ + *stat = 0; + return 0; +} - xfs_bmap_init(&flist, &firstblock); +/* + * Get a buffer for the bitmap or summary file block specified. + * The buffer is returned read and locked. + */ +STATIC int /* error */ +xfs_rtbuf_get( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t block, /* block number in bitmap or summary */ + int issum, /* is summary not bitmap */ + xfs_buf_t **bpp) /* output: buffer for the block */ +{ + xfs_buf_t *bp; /* block buffer, result */ + xfs_inode_t *ip; /* bitmap or summary inode */ + xfs_bmbt_irec_t map; + int nmap = 1; + int error; /* error value */ + + ip = issum ? mp->m_rsumip : mp->m_rbmip; + + error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); + if (error) + return error; + + ASSERT(map.br_startblock != NULLFSBLOCK); + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, map.br_startblock), + mp->m_bsize, 0, &bp, NULL); + if (error) + return error; + ASSERT(!xfs_buf_geterror(bp)); + *bpp = bp; + return 0; +} + +#ifdef DEBUG +/* + * Check that the given extent (block range) is allocated already. + */ +STATIC int /* error */ +xfs_rtcheck_alloc_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t bno, /* starting block number of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* out: 1 for allocated, 0 for not */ +{ + xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */ + + return xfs_rtcheck_range(mp, tp, bno, len, 0, &new, stat); +} +#endif + +/* + * Check that the given range is either all allocated (val = 0) or + * all free (val = 1). + */ +STATIC int /* error */ +xfs_rtcheck_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block number of extent */ + xfs_extlen_t len, /* length of extent */ + int val, /* 1 for free, 0 for allocated */ + xfs_rtblock_t *new, /* out: first block not matching */ + int *stat) /* out: 1 for matches, 0 for not */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t lastbit; /* last useful bit in word */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute starting bitmap block number + */ + block = XFS_BITTOBLOCK(mp, start); + /* + * Read the bitmap block. + */ + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Compute the starting word's address, and starting bit. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + /* + * 0 (allocated) => all zero's; 1 (free) => all one's. + */ + val = -val; + /* + * If not starting on a word boundary, deal with the first + * (partial) word. + */ + if (bit) { /* - * Allocate blocks to the bitmap file. + * Compute first bit not examined. */ - nmap = 1; - cancelflags |= XFS_TRANS_ABORT; - error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, - XFS_BMAPI_METADATA, &firstblock, - resblks, &map, &nmap, &flist); - if (!error && nmap < 1) - error = XFS_ERROR(ENOSPC); - if (error) - goto error_cancel; + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); /* - * Free any blocks freed up in the transaction, then commit. + * Mask of relevant bits. */ - error = xfs_bmap_finish(&tp, &flist, &committed); - if (error) - goto error_cancel; - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - if (error) - goto error; + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; /* - * Now we need to clear the allocated blocks. - * Do this one block per transaction, to keep it simple. + * Compute difference between actual and desired value. */ - cancelflags = 0; - for (bno = map.br_startoff, fsbno = map.br_startblock; - bno < map.br_startoff + map.br_blockcount; - bno++, fsbno++) { - tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO); + if ((wdiff = (*b ^ val) & mask)) { /* - * Reserve log for one block zeroing. + * Different, compute first wrong bit and return. */ - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero, - 0, 0); - if (error) - goto error_cancel; + xfs_trans_brelse(tp, bp); + i = XFS_RTLOBIT(wdiff) - bit; + *new = start + i; + *stat = 0; + return 0; + } + i = lastbit - bit; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { /* - * Lock the bitmap inode. + * If done with this block, get the next one. */ - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { /* - * Get a buffer for the block. + * Go on to the next word in the buffer. */ - d = XFS_FSB_TO_DADDR(mp, fsbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, - mp->m_bsize, 0); - if (bp == NULL) { - error = XFS_ERROR(EIO); -error_cancel: - xfs_trans_cancel(tp, cancelflags); - goto error; + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = *b ^ val)) { + /* + * Different, compute first wrong bit and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *new = start + i; + *stat = 0; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the next one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; } - memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); - xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); + b = bufp = bp->b_addr; + word = 0; + } else { /* - * Commit the transaction. + * Go on to the next word in the buffer. */ - error = xfs_trans_commit(tp, 0); + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if ((lastbit = len - i)) { + /* + * Mask of relevant bits. + */ + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ val) & mask)) { + /* + * Different, compute first wrong bit and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *new = start + i; + *stat = 0; + return 0; + } else + i = len; + } + /* + * Successful, return. + */ + xfs_trans_brelse(tp, bp); + *new = start + i; + *stat = 1; + return 0; +} + +/* + * Copy and transform the summary file, given the old and new + * parameters in the mount structures. + */ +STATIC int /* error */ +xfs_rtcopy_summary( + xfs_mount_t *omp, /* old file system mount point */ + xfs_mount_t *nmp, /* new file system mount point */ + xfs_trans_t *tp) /* transaction pointer */ +{ + xfs_rtblock_t bbno; /* bitmap block number */ + xfs_buf_t *bp; /* summary buffer */ + int error; /* error return value */ + int log; /* summary level number (log length) */ + xfs_suminfo_t sum; /* summary data */ + xfs_fsblock_t sumbno; /* summary block number */ + + bp = NULL; + for (log = omp->m_rsumlevels - 1; log >= 0; log--) { + for (bbno = omp->m_sb.sb_rbmblocks - 1; + (xfs_srtblock_t)bbno >= 0; + bbno--) { + error = xfs_rtget_summary(omp, tp, log, bbno, &bp, + &sumbno, &sum); if (error) - goto error; + return error; + if (sum == 0) + continue; + error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum, + &bp, &sumbno); + if (error) + return error; + error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum, + &bp, &sumbno); + if (error) + return error; + ASSERT(sum > 0); + } + } + return 0; +} + +/* + * Searching backward from start to limit, find the first block whose + * allocated/free state is different from start's. + */ +STATIC int /* error */ +xfs_rtfind_back( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to look at */ + xfs_rtblock_t limit, /* last block to look at */ + xfs_rtblock_t *rtblock) /* out: start block found */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t firstbit; /* first useful bit in the word */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t len; /* length of inspected area */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t want; /* mask for "good" values */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute and read in starting bitmap block for starting block. + */ + block = XFS_BITTOBLOCK(mp, start); + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Get the first word's index & point to it. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + len = start - limit + 1; + /* + * Compute match value, based on the bit at start: if 1 (free) + * then all-ones, else all-zeroes. + */ + want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; + /* + * If the starting position is not word-aligned, deal with the + * partial word. + */ + if (bit < XFS_NBWORD - 1) { + /* + * Calculate first (leftmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); + mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << + firstbit; + /* + * Calculate the difference between the value there + * and what we're looking for. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different. Mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i = bit - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; } + i = bit - firstbit + 1; /* - * Go on to the next extent, if any. + * Go on to previous block if that's where the previous word is + * and we need the previous word. */ - oblocks = map.br_startoff + map.br_blockcount; + if (--word == -1 && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + word = XFS_BLOCKWMASK(mp); + b = &bufp[word]; + } else { + /* + * Go on to the previous word in the buffer. + */ + b--; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the previous one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = *b ^ want)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to previous block if that's where the previous word is + * and we need the previous word. + */ + if (--word == -1 && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + word = XFS_BLOCKWMASK(mp); + b = &bufp[word]; + } else { + /* + * Go on to the previous word in the buffer. + */ + b--; + } } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if (len - i) { + /* + * Calculate first (leftmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + firstbit = XFS_NBWORD - (len - i); + mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); + *rtblock = start - i + 1; + return 0; + } else + i = len; + } + /* + * No match, return that we scanned the whole area. + */ + xfs_trans_brelse(tp, bp); + *rtblock = start - i + 1; return 0; +} -error: +/* + * Searching forward from start to limit, find the first block whose + * allocated/free state is different from start's. + */ +STATIC int /* error */ +xfs_rtfind_forw( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to look at */ + xfs_rtblock_t limit, /* last block to look at */ + xfs_rtblock_t *rtblock) /* out: start block found */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtblock_t i; /* current bit number rel. to start */ + xfs_rtblock_t lastbit; /* last useful bit in the word */ + xfs_rtblock_t len; /* length of inspected area */ + xfs_rtword_t mask; /* mask of relevant bits for value */ + xfs_rtword_t want; /* mask for "good" values */ + xfs_rtword_t wdiff; /* difference from wanted value */ + int word; /* word number in the buffer */ + + /* + * Compute and read in starting bitmap block for starting block. + */ + block = XFS_BITTOBLOCK(mp, start); + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Get the first word's index & point to it. + */ + word = XFS_BITTOWORD(mp, start); + b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + len = limit - start + 1; + /* + * Compute match value, based on the bit at start: if 1 (free) + * then all-ones, else all-zeroes. + */ + want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; + /* + * If the starting position is not word-aligned, deal with the + * partial word. + */ + if (bit) { + /* + * Calculate last (rightmost) bit number to look at, + * and mask for all the relevant bits in this word. + */ + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + /* + * Calculate the difference between the value there + * and what we're looking for. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different. Mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i = XFS_RTLOBIT(wdiff) - bit; + *rtblock = start + i - 1; + return 0; + } + i = lastbit - bit; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the previous one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the previous word in the buffer. + */ + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = *b ^ want)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *rtblock = start + i - 1; + return 0; + } + i += XFS_NBWORD; + /* + * Go on to next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * If done with this block, get the next one. + */ + xfs_trans_brelse(tp, bp); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer. + */ + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if ((lastbit = len - i)) { + /* + * Calculate mask for all the relevant bits in this word. + */ + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Compute difference between actual and desired value. + */ + if ((wdiff = (*b ^ want) & mask)) { + /* + * Different, mark where we are and return. + */ + xfs_trans_brelse(tp, bp); + i += XFS_RTLOBIT(wdiff); + *rtblock = start + i - 1; + return 0; + } else + i = len; + } + /* + * No match, return that we scanned the whole area. + */ + xfs_trans_brelse(tp, bp); + *rtblock = start + i - 1; + return 0; +} + +/* + * Mark an extent specified by start and len freed. + * Updates all the summary information as well as the bitmap. + */ +STATIC int /* error */ +xfs_rtfree_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to free */ + xfs_extlen_t len, /* length to free */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ +{ + xfs_rtblock_t end; /* end of the freed extent */ + int error; /* error value */ + xfs_rtblock_t postblock; /* first block freed > end */ + xfs_rtblock_t preblock; /* first block freed < start */ + + end = start + len - 1; + /* + * Modify the bitmap to mark this extent freed. + */ + error = xfs_rtmodify_range(mp, tp, start, len, 1); + if (error) { + return error; + } + /* + * Assume we're freeing out of the middle of an allocated extent. + * We need to find the beginning and end of the extent so we can + * properly update the summary. + */ + error = xfs_rtfind_back(mp, tp, start, 0, &preblock); + if (error) { + return error; + } + /* + * Find the next allocated block (end of allocated extent). + */ + error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, + &postblock); + if (error) + return error; + /* + * If there are blocks not being freed at the front of the + * old extent, add summary data for them to be allocated. + */ + if (preblock < start) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(start - preblock), + XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * If there are blocks not being freed at the end of the + * old extent, add summary data for them to be allocated. + */ + if (postblock > end) { + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock - end), + XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); + if (error) { + return error; + } + } + /* + * Increment the summary information corresponding to the entire + * (new) free extent. + */ + error = xfs_rtmodify_summary(mp, tp, + XFS_RTBLOCKLOG(postblock + 1 - preblock), + XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); return error; } /* + * Read and return the summary information for a given extent size, + * bitmap block combination. + * Keeps track of a current summary block, so we don't keep reading + * it from the buffer cache. + */ +STATIC int /* error */ +xfs_rtget_summary( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ + int log, /* log2 of extent size */ + xfs_rtblock_t bbno, /* bitmap block number */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb, /* in/out: summary block number */ + xfs_suminfo_t *sum) /* out: summary info for this block */ +{ + xfs_buf_t *bp; /* buffer for summary block */ + int error; /* error value */ + xfs_fsblock_t sb; /* summary fsblock */ + int so; /* index into the summary file */ + xfs_suminfo_t *sp; /* pointer to returned data */ + + /* + * Compute entry number in the summary file. + */ + so = XFS_SUMOFFS(mp, log, bbno); + /* + * Compute the block number in the summary file. + */ + sb = XFS_SUMOFFSTOBLOCK(mp, so); + /* + * If we have an old buffer, and the block number matches, use that. + */ + if (rbpp && *rbpp && *rsb == sb) + bp = *rbpp; + /* + * Otherwise we have to get the buffer. + */ + else { + /* + * If there was an old one, get rid of it first. + */ + if (rbpp && *rbpp) + xfs_trans_brelse(tp, *rbpp); + error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); + if (error) { + return error; + } + /* + * Remember this buffer and block for the next call. + */ + if (rbpp) { + *rbpp = bp; + *rsb = sb; + } + } + /* + * Point to the summary information & copy it out. + */ + sp = XFS_SUMPTR(mp, bp, so); + *sum = *sp; + /* + * Drop the buffer if we're not asked to remember it. + */ + if (!rbpp) + xfs_trans_brelse(tp, bp); + return 0; +} + +/* + * Set the given range of bitmap bits to the given value. + * Do whatever I/O and logging is required. + */ +STATIC int /* error */ +xfs_rtmodify_range( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t start, /* starting block to modify */ + xfs_extlen_t len, /* length of extent to modify */ + int val) /* 1 for free, 0 for allocated */ +{ + xfs_rtword_t *b; /* current word in buffer */ + int bit; /* bit number in the word */ + xfs_rtblock_t block; /* bitmap block number */ + xfs_buf_t *bp; /* buf for the block */ + xfs_rtword_t *bufp; /* starting word in buffer */ + int error; /* error value */ + xfs_rtword_t *first; /* first used word in the buffer */ + int i; /* current bit number rel. to start */ + int lastbit; /* last useful bit in word */ + xfs_rtword_t mask; /* mask o frelevant bits for value */ + int word; /* word number in the buffer */ + + /* + * Compute starting bitmap block number. + */ + block = XFS_BITTOBLOCK(mp, start); + /* + * Read the bitmap block, and point to its data. + */ + error = xfs_rtbuf_get(mp, tp, block, 0, &bp); + if (error) { + return error; + } + bufp = bp->b_addr; + /* + * Compute the starting word's address, and starting bit. + */ + word = XFS_BITTOWORD(mp, start); + first = b = &bufp[word]; + bit = (int)(start & (XFS_NBWORD - 1)); + /* + * 0 (allocated) => all zeroes; 1 (free) => all ones. + */ + val = -val; + /* + * If not starting on a word boundary, deal with the first + * (partial) word. + */ + if (bit) { + /* + * Compute first bit not changed and mask of relevant bits. + */ + lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; + /* + * Set/clear the active bits. + */ + if (val) + *b |= mask; + else + *b &= ~mask; + i = lastbit - bit; + /* + * Go on to the next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * Log the changed part of this block. + * Get the next one. + */ + xfs_trans_log_buf(tp, bp, + (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp)); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + first = b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer + */ + b++; + } + } else { + /* + * Starting on a word boundary, no partial word. + */ + i = 0; + } + /* + * Loop over whole words in buffers. When we use up one buffer + * we move on to the next one. + */ + while (len - i >= XFS_NBWORD) { + /* + * Set the word value correctly. + */ + *b = val; + i += XFS_NBWORD; + /* + * Go on to the next block if that's where the next word is + * and we need the next word. + */ + if (++word == XFS_BLOCKWSIZE(mp) && i < len) { + /* + * Log the changed part of this block. + * Get the next one. + */ + xfs_trans_log_buf(tp, bp, + (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp)); + error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); + if (error) { + return error; + } + first = b = bufp = bp->b_addr; + word = 0; + } else { + /* + * Go on to the next word in the buffer + */ + b++; + } + } + /* + * If not ending on a word boundary, deal with the last + * (partial) word. + */ + if ((lastbit = len - i)) { + /* + * Compute a mask of relevant bits. + */ + bit = 0; + mask = ((xfs_rtword_t)1 << lastbit) - 1; + /* + * Set/clear the active bits. + */ + if (val) + *b |= mask; + else + *b &= ~mask; + b++; + } + /* + * Log any remaining changed bytes. + */ + if (b > first) + xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), + (uint)((char *)b - (char *)bufp - 1)); + return 0; +} + +/* + * Read and modify the summary information for a given extent size, + * bitmap block combination. + * Keeps track of a current summary block, so we don't keep reading + * it from the buffer cache. + */ +STATIC int /* error */ +xfs_rtmodify_summary( + xfs_mount_t *mp, /* file system mount point */ + xfs_trans_t *tp, /* transaction pointer */ + int log, /* log2 of extent size */ + xfs_rtblock_t bbno, /* bitmap block number */ + int delta, /* change to make to summary info */ + xfs_buf_t **rbpp, /* in/out: summary block buffer */ + xfs_fsblock_t *rsb) /* in/out: summary block number */ +{ + xfs_buf_t *bp; /* buffer for the summary block */ + int error; /* error value */ + xfs_fsblock_t sb; /* summary fsblock */ + int so; /* index into the summary file */ + xfs_suminfo_t *sp; /* pointer to returned data */ + + /* + * Compute entry number in the summary file. + */ + so = XFS_SUMOFFS(mp, log, bbno); + /* + * Compute the block number in the summary file. + */ + sb = XFS_SUMOFFSTOBLOCK(mp, so); + /* + * If we have an old buffer, and the block number matches, use that. + */ + if (rbpp && *rbpp && *rsb == sb) + bp = *rbpp; + /* + * Otherwise we have to get the buffer. + */ + else { + /* + * If there was an old one, get rid of it first. + */ + if (rbpp && *rbpp) + xfs_trans_brelse(tp, *rbpp); + error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); + if (error) { + return error; + } + /* + * Remember this buffer and block for the next call. + */ + if (rbpp) { + *rbpp = bp; + *rsb = sb; + } + } + /* + * Point to the summary information, modify and log it. + */ + sp = XFS_SUMPTR(mp, bp, so); + *sp += delta; + xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), + (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); + return 0; +} + +/* * Visible (exported) functions. */ @@ -1231,6 +2129,66 @@ xfs_rtallocate_extent( } /* + * Free an extent in the realtime subvolume. Length is expressed in + * realtime extents, as is the block number. + */ +int /* error */ +xfs_rtfree_extent( + xfs_trans_t *tp, /* transaction pointer */ + xfs_rtblock_t bno, /* starting block number to free */ + xfs_extlen_t len) /* length of extent freed */ +{ + int error; /* error value */ + xfs_mount_t *mp; /* file system mount structure */ + xfs_fsblock_t sb; /* summary file block number */ + xfs_buf_t *sumbp; /* summary file block buffer */ + + mp = tp->t_mountp; + + ASSERT(mp->m_rbmip->i_itemp != NULL); + ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); + +#ifdef DEBUG + /* + * Check to see that this whole range is currently allocated. + */ + { + int stat; /* result from checking range */ + + error = xfs_rtcheck_alloc_range(mp, tp, bno, len, &stat); + if (error) { + return error; + } + ASSERT(stat); + } +#endif + sumbp = NULL; + /* + * Free the range of realtime blocks. + */ + error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); + if (error) { + return error; + } + /* + * Mark more blocks free in the superblock. + */ + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); + /* + * If we've now freed all the blocks, reset the file sequence + * number to 0. + */ + if (tp->t_frextents_delta + mp->m_sb.sb_frextents == + mp->m_sb.sb_rextents) { + if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) + mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; + *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; + xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); + } + return 0; +} + +/* * Initialize realtime fields in the mount structure. */ int /* error */ diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 752b63d..b2a1a24 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -95,30 +95,6 @@ xfs_growfs_rt( struct xfs_mount *mp, /* file system mount structure */ xfs_growfs_rt_t *in); /* user supplied growfs struct */ -/* - * From xfs_rtbitmap.c - */ -int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t block, int issum, struct xfs_buf **bpp); -int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, int val, - xfs_rtblock_t *new, int *stat); -int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_rtblock_t limit, - xfs_rtblock_t *rtblock); -int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_rtblock_t limit, - xfs_rtblock_t *rtblock); -int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, int val); -int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, - xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, - xfs_fsblock_t *rsb); -int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, - struct xfs_buf **rbpp, xfs_fsblock_t *rsb); - - #else # define xfs_rtallocate_extent(t,b,min,max,l,a,f,p,rb) (ENOSYS) # define xfs_rtfree_extent(t,b,l) (ENOSYS) diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/xfs_rtbitmap.c deleted file mode 100644 index b1f2fe8..0000000 --- a/fs/xfs/xfs_rtbitmap.c +++ /dev/null @@ -1,974 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_error.h" -#include "xfs_trans.h" -#include "xfs_trans_space.h" -#include "xfs_trace.h" -#include "xfs_buf.h" -#include "xfs_icache.h" -#include "xfs_dinode.h" -#include "xfs_rtalloc.h" - - -/* - * Realtime allocator bitmap functions shared with userspace. - */ - -/* - * Get a buffer for the bitmap or summary file block specified. - * The buffer is returned read and locked. - */ -int -xfs_rtbuf_get( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t block, /* block number in bitmap or summary */ - int issum, /* is summary not bitmap */ - xfs_buf_t **bpp) /* output: buffer for the block */ -{ - xfs_buf_t *bp; /* block buffer, result */ - xfs_inode_t *ip; /* bitmap or summary inode */ - xfs_bmbt_irec_t map; - int nmap = 1; - int error; /* error value */ - - ip = issum ? mp->m_rsumip : mp->m_rbmip; - - error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); - if (error) - return error; - - ASSERT(map.br_startblock != NULLFSBLOCK); - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_FSB_TO_DADDR(mp, map.br_startblock), - mp->m_bsize, 0, &bp, NULL); - if (error) - return error; - ASSERT(!xfs_buf_geterror(bp)); - *bpp = bp; - return 0; -} - -/* - * Searching backward from start to limit, find the first block whose - * allocated/free state is different from start's. - */ -int -xfs_rtfind_back( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to look at */ - xfs_rtblock_t limit, /* last block to look at */ - xfs_rtblock_t *rtblock) /* out: start block found */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t firstbit; /* first useful bit in the word */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t len; /* length of inspected area */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t want; /* mask for "good" values */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ - - /* - * Compute and read in starting bitmap block for starting block. - */ - block = XFS_BITTOBLOCK(mp, start); - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Get the first word's index & point to it. - */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - len = start - limit + 1; - /* - * Compute match value, based on the bit at start: if 1 (free) - * then all-ones, else all-zeroes. - */ - want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; - /* - * If the starting position is not word-aligned, deal with the - * partial word. - */ - if (bit < XFS_NBWORD - 1) { - /* - * Calculate first (leftmost) bit number to look at, - * and mask for all the relevant bits in this word. - */ - firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); - mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << - firstbit; - /* - * Calculate the difference between the value there - * and what we're looking for. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different. Mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i = bit - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; - return 0; - } - i = bit - firstbit + 1; - /* - * Go on to previous block if that's where the previous word is - * and we need the previous word. - */ - if (--word == -1 && i < len) { - /* - * If done with this block, get the previous one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - word = XFS_BLOCKWMASK(mp); - b = &bufp[word]; - } else { - /* - * Go on to the previous word in the buffer. - */ - b--; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the previous one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = *b ^ want)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; - return 0; - } - i += XFS_NBWORD; - /* - * Go on to previous block if that's where the previous word is - * and we need the previous word. - */ - if (--word == -1 && i < len) { - /* - * If done with this block, get the previous one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - word = XFS_BLOCKWMASK(mp); - b = &bufp[word]; - } else { - /* - * Go on to the previous word in the buffer. - */ - b--; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if (len - i) { - /* - * Calculate first (leftmost) bit number to look at, - * and mask for all the relevant bits in this word. - */ - firstbit = XFS_NBWORD - (len - i); - mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit; - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); - *rtblock = start - i + 1; - return 0; - } else - i = len; - } - /* - * No match, return that we scanned the whole area. - */ - xfs_trans_brelse(tp, bp); - *rtblock = start - i + 1; - return 0; -} - -/* - * Searching forward from start to limit, find the first block whose - * allocated/free state is different from start's. - */ -int -xfs_rtfind_forw( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to look at */ - xfs_rtblock_t limit, /* last block to look at */ - xfs_rtblock_t *rtblock) /* out: start block found */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t lastbit; /* last useful bit in the word */ - xfs_rtblock_t len; /* length of inspected area */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t want; /* mask for "good" values */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ - - /* - * Compute and read in starting bitmap block for starting block. - */ - block = XFS_BITTOBLOCK(mp, start); - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Get the first word's index & point to it. - */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - len = limit - start + 1; - /* - * Compute match value, based on the bit at start: if 1 (free) - * then all-ones, else all-zeroes. - */ - want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; - /* - * If the starting position is not word-aligned, deal with the - * partial word. - */ - if (bit) { - /* - * Calculate last (rightmost) bit number to look at, - * and mask for all the relevant bits in this word. - */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); - mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; - /* - * Calculate the difference between the value there - * and what we're looking for. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different. Mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i = XFS_RTLOBIT(wdiff) - bit; - *rtblock = start + i - 1; - return 0; - } - i = lastbit - bit; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the previous one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the previous word in the buffer. - */ - b++; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the next one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = *b ^ want)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *rtblock = start + i - 1; - return 0; - } - i += XFS_NBWORD; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the next one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if ((lastbit = len - i)) { - /* - * Calculate mask for all the relevant bits in this word. - */ - mask = ((xfs_rtword_t)1 << lastbit) - 1; - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = (*b ^ want) & mask)) { - /* - * Different, mark where we are and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *rtblock = start + i - 1; - return 0; - } else - i = len; - } - /* - * No match, return that we scanned the whole area. - */ - xfs_trans_brelse(tp, bp); - *rtblock = start + i - 1; - return 0; -} - -/* - * Read and modify the summary information for a given extent size, - * bitmap block combination. - * Keeps track of a current summary block, so we don't keep reading - * it from the buffer cache. - */ -int -xfs_rtmodify_summary( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - int log, /* log2 of extent size */ - xfs_rtblock_t bbno, /* bitmap block number */ - int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ -{ - xfs_buf_t *bp; /* buffer for the summary block */ - int error; /* error value */ - xfs_fsblock_t sb; /* summary fsblock */ - int so; /* index into the summary file */ - xfs_suminfo_t *sp; /* pointer to returned data */ - - /* - * Compute entry number in the summary file. - */ - so = XFS_SUMOFFS(mp, log, bbno); - /* - * Compute the block number in the summary file. - */ - sb = XFS_SUMOFFSTOBLOCK(mp, so); - /* - * If we have an old buffer, and the block number matches, use that. - */ - if (rbpp && *rbpp && *rsb == sb) - bp = *rbpp; - /* - * Otherwise we have to get the buffer. - */ - else { - /* - * If there was an old one, get rid of it first. - */ - if (rbpp && *rbpp) - xfs_trans_brelse(tp, *rbpp); - error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); - if (error) { - return error; - } - /* - * Remember this buffer and block for the next call. - */ - if (rbpp) { - *rbpp = bp; - *rsb = sb; - } - } - /* - * Point to the summary information, modify and log it. - */ - sp = XFS_SUMPTR(mp, bp, so); - *sp += delta; - xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), - (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); - return 0; -} - -/* - * Set the given range of bitmap bits to the given value. - * Do whatever I/O and logging is required. - */ -int -xfs_rtmodify_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to modify */ - xfs_extlen_t len, /* length of extent to modify */ - int val) /* 1 for free, 0 for allocated */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtword_t *first; /* first used word in the buffer */ - int i; /* current bit number rel. to start */ - int lastbit; /* last useful bit in word */ - xfs_rtword_t mask; /* mask o frelevant bits for value */ - int word; /* word number in the buffer */ - - /* - * Compute starting bitmap block number. - */ - block = XFS_BITTOBLOCK(mp, start); - /* - * Read the bitmap block, and point to its data. - */ - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Compute the starting word's address, and starting bit. - */ - word = XFS_BITTOWORD(mp, start); - first = b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - /* - * 0 (allocated) => all zeroes; 1 (free) => all ones. - */ - val = -val; - /* - * If not starting on a word boundary, deal with the first - * (partial) word. - */ - if (bit) { - /* - * Compute first bit not changed and mask of relevant bits. - */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); - mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; - /* - * Set/clear the active bits. - */ - if (val) - *b |= mask; - else - *b &= ~mask; - i = lastbit - bit; - /* - * Go on to the next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * Log the changed part of this block. - * Get the next one. - */ - xfs_trans_log_buf(tp, bp, - (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp)); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - first = b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer - */ - b++; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the next one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Set the word value correctly. - */ - *b = val; - i += XFS_NBWORD; - /* - * Go on to the next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * Log the changed part of this block. - * Get the next one. - */ - xfs_trans_log_buf(tp, bp, - (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp)); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - first = b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer - */ - b++; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if ((lastbit = len - i)) { - /* - * Compute a mask of relevant bits. - */ - bit = 0; - mask = ((xfs_rtword_t)1 << lastbit) - 1; - /* - * Set/clear the active bits. - */ - if (val) - *b |= mask; - else - *b &= ~mask; - b++; - } - /* - * Log any remaining changed bytes. - */ - if (b > first) - xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), - (uint)((char *)b - (char *)bufp - 1)); - return 0; -} - -/* - * Mark an extent specified by start and len freed. - * Updates all the summary information as well as the bitmap. - */ -int -xfs_rtfree_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block to free */ - xfs_extlen_t len, /* length to free */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ - xfs_fsblock_t *rsb) /* in/out: summary block number */ -{ - xfs_rtblock_t end; /* end of the freed extent */ - int error; /* error value */ - xfs_rtblock_t postblock; /* first block freed > end */ - xfs_rtblock_t preblock; /* first block freed < start */ - - end = start + len - 1; - /* - * Modify the bitmap to mark this extent freed. - */ - error = xfs_rtmodify_range(mp, tp, start, len, 1); - if (error) { - return error; - } - /* - * Assume we're freeing out of the middle of an allocated extent. - * We need to find the beginning and end of the extent so we can - * properly update the summary. - */ - error = xfs_rtfind_back(mp, tp, start, 0, &preblock); - if (error) { - return error; - } - /* - * Find the next allocated block (end of allocated extent). - */ - error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, - &postblock); - if (error) - return error; - /* - * If there are blocks not being freed at the front of the - * old extent, add summary data for them to be allocated. - */ - if (preblock < start) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(start - preblock), - XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); - if (error) { - return error; - } - } - /* - * If there are blocks not being freed at the end of the - * old extent, add summary data for them to be allocated. - */ - if (postblock > end) { - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock - end), - XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); - if (error) { - return error; - } - } - /* - * Increment the summary information corresponding to the entire - * (new) free extent. - */ - error = xfs_rtmodify_summary(mp, tp, - XFS_RTBLOCKLOG(postblock + 1 - preblock), - XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); - return error; -} - -/* - * Check that the given range is either all allocated (val = 0) or - * all free (val = 1). - */ -int -xfs_rtcheck_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t start, /* starting block number of extent */ - xfs_extlen_t len, /* length of extent */ - int val, /* 1 for free, 0 for allocated */ - xfs_rtblock_t *new, /* out: first block not matching */ - int *stat) /* out: 1 for matches, 0 for not */ -{ - xfs_rtword_t *b; /* current word in buffer */ - int bit; /* bit number in the word */ - xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ - xfs_rtword_t *bufp; /* starting word in buffer */ - int error; /* error value */ - xfs_rtblock_t i; /* current bit number rel. to start */ - xfs_rtblock_t lastbit; /* last useful bit in word */ - xfs_rtword_t mask; /* mask of relevant bits for value */ - xfs_rtword_t wdiff; /* difference from wanted value */ - int word; /* word number in the buffer */ - - /* - * Compute starting bitmap block number - */ - block = XFS_BITTOBLOCK(mp, start); - /* - * Read the bitmap block. - */ - error = xfs_rtbuf_get(mp, tp, block, 0, &bp); - if (error) { - return error; - } - bufp = bp->b_addr; - /* - * Compute the starting word's address, and starting bit. - */ - word = XFS_BITTOWORD(mp, start); - b = &bufp[word]; - bit = (int)(start & (XFS_NBWORD - 1)); - /* - * 0 (allocated) => all zero's; 1 (free) => all one's. - */ - val = -val; - /* - * If not starting on a word boundary, deal with the first - * (partial) word. - */ - if (bit) { - /* - * Compute first bit not examined. - */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); - /* - * Mask of relevant bits. - */ - mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = (*b ^ val) & mask)) { - /* - * Different, compute first wrong bit and return. - */ - xfs_trans_brelse(tp, bp); - i = XFS_RTLOBIT(wdiff) - bit; - *new = start + i; - *stat = 0; - return 0; - } - i = lastbit - bit; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the next one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; - } - } else { - /* - * Starting on a word boundary, no partial word. - */ - i = 0; - } - /* - * Loop over whole words in buffers. When we use up one buffer - * we move on to the next one. - */ - while (len - i >= XFS_NBWORD) { - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = *b ^ val)) { - /* - * Different, compute first wrong bit and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *new = start + i; - *stat = 0; - return 0; - } - i += XFS_NBWORD; - /* - * Go on to next block if that's where the next word is - * and we need the next word. - */ - if (++word == XFS_BLOCKWSIZE(mp) && i < len) { - /* - * If done with this block, get the next one. - */ - xfs_trans_brelse(tp, bp); - error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); - if (error) { - return error; - } - b = bufp = bp->b_addr; - word = 0; - } else { - /* - * Go on to the next word in the buffer. - */ - b++; - } - } - /* - * If not ending on a word boundary, deal with the last - * (partial) word. - */ - if ((lastbit = len - i)) { - /* - * Mask of relevant bits. - */ - mask = ((xfs_rtword_t)1 << lastbit) - 1; - /* - * Compute difference between actual and desired value. - */ - if ((wdiff = (*b ^ val) & mask)) { - /* - * Different, compute first wrong bit and return. - */ - xfs_trans_brelse(tp, bp); - i += XFS_RTLOBIT(wdiff); - *new = start + i; - *stat = 0; - return 0; - } else - i = len; - } - /* - * Successful, return. - */ - xfs_trans_brelse(tp, bp); - *new = start + i; - *stat = 1; - return 0; -} - -#ifdef DEBUG -/* - * Check that the given extent (block range) is allocated already. - */ -STATIC int /* error */ -xfs_rtcheck_alloc_range( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number of extent */ - xfs_extlen_t len) /* length of extent */ -{ - xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */ - int stat; - int error; - - error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat); - if (error) - return error; - ASSERT(stat); - return 0; -} -#else -#define xfs_rtcheck_alloc_range(m,t,b,l) (0) -#endif -/* - * Free an extent in the realtime subvolume. Length is expressed in - * realtime extents, as is the block number. - */ -int /* error */ -xfs_rtfree_extent( - xfs_trans_t *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number to free */ - xfs_extlen_t len) /* length of extent freed */ -{ - int error; /* error value */ - xfs_mount_t *mp; /* file system mount structure */ - xfs_fsblock_t sb; /* summary file block number */ - xfs_buf_t *sumbp = NULL; /* summary file block buffer */ - - mp = tp->t_mountp; - - ASSERT(mp->m_rbmip->i_itemp != NULL); - ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); - - error = xfs_rtcheck_alloc_range(mp, tp, bno, len); - if (error) - return error; - - /* - * Free the range of realtime blocks. - */ - error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); - if (error) { - return error; - } - /* - * Mark more blocks free in the superblock. - */ - xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); - /* - * If we've now freed all the blocks, reset the file sequence - * number to 0. - */ - if (tp->t_frextents_delta + mp->m_sb.sb_frextents == - mp->m_sb.sb_rextents) { - if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) - mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; - *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; - xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); - } - return 0; -} - diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index b7c9aea..a5b59d9 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -17,26 +17,34 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_format.h" +#include "xfs_dir2.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" +#include "xfs_rtalloc.h" +#include "xfs_bmap.h" #include "xfs_error.h" +#include "xfs_quota.h" +#include "xfs_fsops.h" #include "xfs_trace.h" #include "xfs_cksum.h" -#include "xfs_trans.h" #include "xfs_buf_item.h" -#include "xfs_dinode.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -241,13 +249,13 @@ xfs_mount_validate_sb( if (xfs_sb_version_has_pquotino(sbp)) { if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { xfs_notice(mp, - "Version 5 of Super block has XFS_OQUOTA bits."); + "Version 5 of Super block has XFS_OQUOTA bits.\n"); return XFS_ERROR(EFSCORRUPTED); } } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { xfs_notice(mp, -"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits."); +"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.\n"); return XFS_ERROR(EFSCORRUPTED); } @@ -588,11 +596,6 @@ xfs_sb_verify( * single bit error could clear the feature bit and unused parts of the * superblock are supposed to be zero. Hence a non-null crc field indicates that * we've potentially lost a feature bit and we should check it anyway. - * - * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the - * last field in V4 secondary superblocks. So for secondary superblocks, - * we are more forgiving, and ignore CRC failures if the primary doesn't - * indicate that the fs version is V5. */ static void xfs_sb_read_verify( @@ -613,21 +616,16 @@ xfs_sb_read_verify( if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize), offsetof(struct xfs_sb, sb_crc))) { - /* Only fail bad secondaries on a known V5 filesystem */ - if (bp->b_bn != XFS_SB_DADDR && - xfs_sb_version_hascrc(&mp->m_sb)) { - error = EFSCORRUPTED; - goto out_error; - } + error = EFSCORRUPTED; + goto out_error; } } error = xfs_sb_verify(bp, true); out_error: if (error) { - if (error != EWRONGFS) - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - mp, bp->b_addr); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, + mp, bp->b_addr); xfs_buf_ioerror(bp, error); } } diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 35061d4..6835b44 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -699,4 +699,7 @@ extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); +extern const struct xfs_buf_ops xfs_sb_buf_ops; +extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h deleted file mode 100644 index 8c5035a1..0000000 --- a/fs/xfs/xfs_shared.h +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * Copyright (c) 2013 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SHARED_H__ -#define __XFS_SHARED_H__ - -/* - * Definitions shared between kernel and userspace that don't fit into any other - * header file that is shared with userspace. - */ -struct xfs_ifork; -struct xfs_buf; -struct xfs_buf_ops; -struct xfs_mount; -struct xfs_trans; -struct xfs_inode; - -/* - * Buffer verifier operations are widely used, including userspace tools - */ -extern const struct xfs_buf_ops xfs_agf_buf_ops; -extern const struct xfs_buf_ops xfs_agi_buf_ops; -extern const struct xfs_buf_ops xfs_agf_buf_ops; -extern const struct xfs_buf_ops xfs_agfl_buf_ops; -extern const struct xfs_buf_ops xfs_allocbt_buf_ops; -extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; -extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; -extern const struct xfs_buf_ops xfs_bmbt_buf_ops; -extern const struct xfs_buf_ops xfs_da3_node_buf_ops; -extern const struct xfs_buf_ops xfs_dquot_buf_ops; -extern const struct xfs_buf_ops xfs_symlink_buf_ops; -extern const struct xfs_buf_ops xfs_agi_buf_ops; -extern const struct xfs_buf_ops xfs_inobt_buf_ops; -extern const struct xfs_buf_ops xfs_inode_buf_ops; -extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; -extern const struct xfs_buf_ops xfs_dquot_buf_ops; -extern const struct xfs_buf_ops xfs_sb_buf_ops; -extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; -extern const struct xfs_buf_ops xfs_symlink_buf_ops; - -/* - * Transaction types. Used to distinguish types of buffers. These never reach - * the log. - */ -#define XFS_TRANS_SETATTR_NOT_SIZE 1 -#define XFS_TRANS_SETATTR_SIZE 2 -#define XFS_TRANS_INACTIVE 3 -#define XFS_TRANS_CREATE 4 -#define XFS_TRANS_CREATE_TRUNC 5 -#define XFS_TRANS_TRUNCATE_FILE 6 -#define XFS_TRANS_REMOVE 7 -#define XFS_TRANS_LINK 8 -#define XFS_TRANS_RENAME 9 -#define XFS_TRANS_MKDIR 10 -#define XFS_TRANS_RMDIR 11 -#define XFS_TRANS_SYMLINK 12 -#define XFS_TRANS_SET_DMATTRS 13 -#define XFS_TRANS_GROWFS 14 -#define XFS_TRANS_STRAT_WRITE 15 -#define XFS_TRANS_DIOSTRAT 16 -/* 17 was XFS_TRANS_WRITE_SYNC */ -#define XFS_TRANS_WRITEID 18 -#define XFS_TRANS_ADDAFORK 19 -#define XFS_TRANS_ATTRINVAL 20 -#define XFS_TRANS_ATRUNCATE 21 -#define XFS_TRANS_ATTR_SET 22 -#define XFS_TRANS_ATTR_RM 23 -#define XFS_TRANS_ATTR_FLAG 24 -#define XFS_TRANS_CLEAR_AGI_BUCKET 25 -#define XFS_TRANS_QM_SBCHANGE 26 -/* - * Dummy entries since we use the transaction type to index into the - * trans_type[] in xlog_recover_print_trans_head() - */ -#define XFS_TRANS_DUMMY1 27 -#define XFS_TRANS_DUMMY2 28 -#define XFS_TRANS_QM_QUOTAOFF 29 -#define XFS_TRANS_QM_DQALLOC 30 -#define XFS_TRANS_QM_SETQLIM 31 -#define XFS_TRANS_QM_DQCLUSTER 32 -#define XFS_TRANS_QM_QINOCREATE 33 -#define XFS_TRANS_QM_QUOTAOFF_END 34 -#define XFS_TRANS_SB_UNIT 35 -#define XFS_TRANS_FSYNC_TS 36 -#define XFS_TRANS_GROWFSRT_ALLOC 37 -#define XFS_TRANS_GROWFSRT_ZERO 38 -#define XFS_TRANS_GROWFSRT_FREE 39 -#define XFS_TRANS_SWAPEXT 40 -#define XFS_TRANS_SB_COUNT 41 -#define XFS_TRANS_CHECKPOINT 42 -#define XFS_TRANS_ICREATE 43 -#define XFS_TRANS_TYPE_MAX 43 -/* new transaction types need to be reflected in xfs_logprint(8) */ - -#define XFS_TRANS_TYPES \ - { XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \ - { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ - { XFS_TRANS_INACTIVE, "INACTIVE" }, \ - { XFS_TRANS_CREATE, "CREATE" }, \ - { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ - { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ - { XFS_TRANS_REMOVE, "REMOVE" }, \ - { XFS_TRANS_LINK, "LINK" }, \ - { XFS_TRANS_RENAME, "RENAME" }, \ - { XFS_TRANS_MKDIR, "MKDIR" }, \ - { XFS_TRANS_RMDIR, "RMDIR" }, \ - { XFS_TRANS_SYMLINK, "SYMLINK" }, \ - { XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \ - { XFS_TRANS_GROWFS, "GROWFS" }, \ - { XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \ - { XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \ - { XFS_TRANS_WRITEID, "WRITEID" }, \ - { XFS_TRANS_ADDAFORK, "ADDAFORK" }, \ - { XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \ - { XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \ - { XFS_TRANS_ATTR_SET, "ATTR_SET" }, \ - { XFS_TRANS_ATTR_RM, "ATTR_RM" }, \ - { XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \ - { XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \ - { XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \ - { XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \ - { XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \ - { XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \ - { XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \ - { XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \ - { XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \ - { XFS_TRANS_SB_UNIT, "SB_UNIT" }, \ - { XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \ - { XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \ - { XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \ - { XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \ - { XFS_TRANS_SWAPEXT, "SWAPEXT" }, \ - { XFS_TRANS_SB_COUNT, "SB_COUNT" }, \ - { XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \ - { XFS_TRANS_DUMMY1, "DUMMY1" }, \ - { XFS_TRANS_DUMMY2, "DUMMY2" }, \ - { XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" } - -/* - * This structure is used to track log items associated with - * a transaction. It points to the log item and keeps some - * flags to track the state of the log item. It also tracks - * the amount of space needed to log the item it describes - * once we get to commit processing (see xfs_trans_commit()). - */ -struct xfs_log_item_desc { - struct xfs_log_item *lid_item; - struct list_head lid_trans; - unsigned char lid_flags; -}; - -#define XFS_LID_DIRTY 0x1 - -/* log size calculation functions */ -int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); -int xfs_log_calc_minimum_size(struct xfs_mount *); - - -/* - * Values for t_flags. - */ -#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */ -#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ -#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ -#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ -#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ -#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ -#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer - count in superblock */ -/* - * Values for call flags parameter. - */ -#define XFS_TRANS_RELEASE_LOG_RES 0x4 -#define XFS_TRANS_ABORT 0x8 - -/* - * Field values for xfs_trans_mod_sb. - */ -#define XFS_TRANS_SB_ICOUNT 0x00000001 -#define XFS_TRANS_SB_IFREE 0x00000002 -#define XFS_TRANS_SB_FDBLOCKS 0x00000004 -#define XFS_TRANS_SB_RES_FDBLOCKS 0x00000008 -#define XFS_TRANS_SB_FREXTENTS 0x00000010 -#define XFS_TRANS_SB_RES_FREXTENTS 0x00000020 -#define XFS_TRANS_SB_DBLOCKS 0x00000040 -#define XFS_TRANS_SB_AGCOUNT 0x00000080 -#define XFS_TRANS_SB_IMAXPCT 0x00000100 -#define XFS_TRANS_SB_REXTSIZE 0x00000200 -#define XFS_TRANS_SB_RBMBLOCKS 0x00000400 -#define XFS_TRANS_SB_RBLOCKS 0x00000800 -#define XFS_TRANS_SB_REXTENTS 0x00001000 -#define XFS_TRANS_SB_REXTSLOG 0x00002000 - -/* - * Here we centralize the specification of XFS meta-data buffer reference count - * values. This determines how hard the buffer cache tries to hold onto the - * buffer. - */ -#define XFS_AGF_REF 4 -#define XFS_AGI_REF 4 -#define XFS_AGFL_REF 3 -#define XFS_INO_BTREE_REF 3 -#define XFS_ALLOC_BTREE_REF 2 -#define XFS_BMAP_BTREE_REF 2 -#define XFS_DIR_BTREE_REF 2 -#define XFS_INO_REF 2 -#define XFS_ATTR_BTREE_REF 1 -#define XFS_DQUOT_REF 1 - -/* - * Flags for xfs_trans_ichgtime(). - */ -#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ -#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ -#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ - - -/* - * Symlink decoding/encoding functions - */ -int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); -int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, - uint32_t size, struct xfs_buf *bp); -bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, - uint32_t size, struct xfs_buf *bp); -void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, - struct xfs_inode *ip, struct xfs_ifork *ifp); - -#endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f317488..15188cc 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -17,26 +17,34 @@ */ #include "xfs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" #include "xfs_inum.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" #include "xfs_mount.h" -#include "xfs_da_format.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_ialloc.h" #include "xfs_bmap.h" -#include "xfs_alloc.h" +#include "xfs_rtalloc.h" #include "xfs_error.h" +#include "xfs_itable.h" #include "xfs_fsops.h" -#include "xfs_trans.h" +#include "xfs_attr.h" #include "xfs_buf_item.h" -#include "xfs_log.h" #include "xfs_log_priv.h" +#include "xfs_trans_priv.h" +#include "xfs_filestream.h" #include "xfs_da_btree.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" @@ -44,9 +52,6 @@ #include "xfs_icache.h" #include "xfs_trace.h" #include "xfs_icreate_item.h" -#include "xfs_dinode.h" -#include "xfs_filestream.h" -#include "xfs_quota.h" #include <linux/namei.h> #include <linux/init.h> @@ -913,7 +918,7 @@ xfs_flush_inodes( struct super_block *sb = mp->m_super; if (down_read_trylock(&sb->s_umount)) { - sync_inodes_sb(sb, jiffies); + sync_inodes_sb(sb); up_read(&sb->s_umount); } } @@ -941,6 +946,10 @@ xfs_fs_destroy_inode( XFS_STATS_INC(vn_reclaim); + /* bad inode, get out here ASAP */ + if (is_bad_inode(inode)) + goto out_reclaim; + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); /* @@ -956,6 +965,7 @@ xfs_fs_destroy_inode( * this more efficiently than we can here, so simply let background * reclaim tear down all inodes. */ +out_reclaim: xfs_inode_set_reclaim_tag(ip); } @@ -1155,7 +1165,7 @@ xfs_restore_resvblks(struct xfs_mount *mp) * Note: xfs_log_quiesce() stops background log work - the callers must ensure * it is started again when appropriate. */ -static void +void xfs_quiesce_attr( struct xfs_mount *mp) { @@ -1236,7 +1246,7 @@ xfs_fs_remount( */ #if 0 xfs_info(mp, - "mount option \"%s\" not supported for remount", p); + "mount option \"%s\" not supported for remount\n", p); return -EINVAL; #else break; @@ -1481,6 +1491,10 @@ xfs_fs_fill_super( error = ENOENT; goto out_unmount; } + if (is_bad_inode(root)) { + error = EINVAL; + goto out_unmount; + } sb->s_root = d_make_root(root); if (!sb->s_root) { error = ENOMEM; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 14e58f2..f622a97 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -17,31 +17,31 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_shared.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_format.h" #include "xfs_dir2.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_bmap.h" -#include "xfs_bmap_btree.h" #include "xfs_bmap_util.h" #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_symlink.h" -#include "xfs_trans.h" -#include "xfs_log.h" -#include "xfs_dinode.h" +#include "xfs_buf_item.h" /* ----- Kernel only functions below ----- */ STATIC int @@ -424,7 +424,8 @@ xfs_symlink( */ STATIC int xfs_inactive_symlink_rmt( - struct xfs_inode *ip) + xfs_inode_t *ip, + xfs_trans_t **tpp) { xfs_buf_t *bp; int committed; @@ -436,9 +437,11 @@ xfs_inactive_symlink_rmt( xfs_mount_t *mp; xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS]; int nmaps; + xfs_trans_t *ntp; int size; xfs_trans_t *tp; + tp = *tpp; mp = ip->i_mount; ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS); /* @@ -450,16 +453,6 @@ xfs_inactive_symlink_rmt( */ ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); - tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); - error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, 0); - /* * Lock the inode, fix the size, and join it to the transaction. * Hold it so in the normal path, we still have it locked for @@ -478,7 +471,7 @@ xfs_inactive_symlink_rmt( error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size), mval, &nmaps, 0); if (error) - goto error_trans_cancel; + goto error0; /* * Invalidate the block(s). No validation is done. */ @@ -488,24 +481,22 @@ xfs_inactive_symlink_rmt( XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); if (!bp) { error = ENOMEM; - goto error_bmap_cancel; + goto error1; } xfs_trans_binval(tp, bp); } /* * Unmap the dead block(s) to the free_list. */ - error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, - &first_block, &free_list, &done); - if (error) - goto error_bmap_cancel; + if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, + &first_block, &free_list, &done))) + goto error1; ASSERT(done); /* * Commit the first transaction. This logs the EFI and the inode. */ - error = xfs_bmap_finish(&tp, &free_list, &committed); - if (error) - goto error_bmap_cancel; + if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) + goto error1; /* * The transaction must have been committed, since there were * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. @@ -520,13 +511,26 @@ xfs_inactive_symlink_rmt( xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); /* + * Get a new, empty transaction to return to our caller. + */ + ntp = xfs_trans_dup(tp); + /* * Commit the transaction containing extent freeing and EFDs. + * If we get an error on the commit here or on the reserve below, + * we need to unlock the inode since the new transaction doesn't + * have the inode attached. */ - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + error = xfs_trans_commit(tp, 0); + tp = ntp; if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); - goto error_unlock; + goto error0; } + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(tp->t_ticket); /* * Remove the memory for extent descriptions (just bookkeeping). @@ -534,16 +538,23 @@ xfs_inactive_symlink_rmt( if (ip->i_df.if_bytes) xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); ASSERT(ip->i_df.if_bytes == 0); + /* + * Put an itruncate log reservation in the new transaction + * for our caller. + */ + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); + if (error) { + ASSERT(XFS_FORCED_SHUTDOWN(mp)); + goto error0; + } - xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + *tpp = tp; return 0; -error_bmap_cancel: + error1: xfs_bmap_cancel(&free_list); -error_trans_cancel: - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); -error_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL); + error0: return error; } @@ -552,46 +563,41 @@ error_unlock: */ int xfs_inactive_symlink( - struct xfs_inode *ip) + struct xfs_inode *ip, + struct xfs_trans **tp) { struct xfs_mount *mp = ip->i_mount; int pathlen; trace_xfs_inactive_symlink(ip); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - xfs_ilock(ip, XFS_ILOCK_EXCL); - /* * Zero length symlinks _can_ exist. */ pathlen = (int)ip->i_d.di_size; - if (!pathlen) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (!pathlen) return 0; - } if (pathlen < 0 || pathlen > MAXPATHLEN) { xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)", __func__, (unsigned long long)ip->i_ino, pathlen); - xfs_iunlock(ip, XFS_ILOCK_EXCL); ASSERT(0); return XFS_ERROR(EFSCORRUPTED); } if (ip->i_df.if_flags & XFS_IFINLINE) { - if (ip->i_df.if_bytes > 0) + if (ip->i_df.if_bytes > 0) xfs_idata_realloc(ip, -(ip->i_df.if_bytes), XFS_DATA_FORK); - xfs_iunlock(ip, XFS_ILOCK_EXCL); ASSERT(ip->i_df.if_bytes == 0); return 0; } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* remove the remote symlink */ - return xfs_inactive_symlink_rmt(ip); + return xfs_inactive_symlink_rmt(ip, tp); } diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index e75245d..99338ba 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h @@ -22,6 +22,6 @@ int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, umode_t mode, struct xfs_inode **ipp); int xfs_readlink(struct xfs_inode *ip, char *link); -int xfs_inactive_symlink(struct xfs_inode *ip); +int xfs_inactive_symlink(struct xfs_inode *ip, struct xfs_trans **tpp); #endif /* __XFS_SYMLINK_H */ diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c index bf59a2b..01c85e3 100644 --- a/fs/xfs/xfs_symlink_remote.c +++ b/fs/xfs/xfs_symlink_remote.c @@ -19,9 +19,8 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_shared.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_ag.h" #include "xfs_sb.h" #include "xfs_mount.h" @@ -31,7 +30,6 @@ #include "xfs_trace.h" #include "xfs_symlink.h" #include "xfs_cksum.h" -#include "xfs_trans.h" #include "xfs_buf_item.h" diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index dee3279..5d7b3e4 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -17,16 +17,19 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" +#include "xfs_types.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_mount.h" #include "xfs_da_btree.h" #include "xfs_ialloc.h" #include "xfs_itable.h" @@ -34,8 +37,6 @@ #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" -#include "xfs_trans.h" -#include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_buf_item.h" #include "xfs_quota.h" @@ -45,7 +46,6 @@ #include "xfs_dquot.h" #include "xfs_log_recover.h" #include "xfs_inode_item.h" -#include "xfs_bmap_btree.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 425dfa4..47910e6 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -31,8 +31,8 @@ struct xfs_da_args; struct xfs_da_node_entry; struct xfs_dquot; struct xfs_log_item; -struct xlog; struct xlog_ticket; +struct xlog; struct xlog_recover; struct xlog_recover_item; struct xfs_buf_log_format; @@ -135,31 +135,6 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_set_eofblocks); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_eofblocks); -DECLARE_EVENT_CLASS(xfs_ag_class, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), - TP_ARGS(mp, agno), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - ), - TP_printk("dev %d:%d agno %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno) -); -#define DEFINE_AG_EVENT(name) \ -DEFINE_EVENT(xfs_ag_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), \ - TP_ARGS(mp, agno)) - -DEFINE_AG_EVENT(xfs_read_agf); -DEFINE_AG_EVENT(xfs_alloc_read_agf); -DEFINE_AG_EVENT(xfs_read_agi); -DEFINE_AG_EVENT(xfs_ialloc_read_agi); - TRACE_EVENT(xfs_attr_list_node_descend, TP_PROTO(struct xfs_attr_list_context *ctx, struct xfs_da_node_entry *btree), @@ -963,63 +938,6 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); -DECLARE_EVENT_CLASS(xfs_ail_class, - TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), - TP_ARGS(lip, old_lsn, new_lsn), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(void *, lip) - __field(uint, type) - __field(uint, flags) - __field(xfs_lsn_t, old_lsn) - __field(xfs_lsn_t, new_lsn) - ), - TP_fast_assign( - __entry->dev = lip->li_mountp->m_super->s_dev; - __entry->lip = lip; - __entry->type = lip->li_type; - __entry->flags = lip->li_flags; - __entry->old_lsn = old_lsn; - __entry->new_lsn = new_lsn; - ), - TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->lip, - CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), - CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn), - __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), - __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) -) - -#define DEFINE_AIL_EVENT(name) \ -DEFINE_EVENT(xfs_ail_class, name, \ - TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), \ - TP_ARGS(lip, old_lsn, new_lsn)) -DEFINE_AIL_EVENT(xfs_ail_insert); -DEFINE_AIL_EVENT(xfs_ail_move); -DEFINE_AIL_EVENT(xfs_ail_delete); - -TRACE_EVENT(xfs_log_assign_tail_lsn, - TP_PROTO(struct xlog *log, xfs_lsn_t new_lsn), - TP_ARGS(log, new_lsn), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_lsn_t, new_lsn) - __field(xfs_lsn_t, old_lsn) - __field(xfs_lsn_t, last_sync_lsn) - ), - TP_fast_assign( - __entry->dev = log->l_mp->m_super->s_dev; - __entry->new_lsn = new_lsn; - __entry->old_lsn = atomic64_read(&log->l_tail_lsn); - __entry->last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); - ), - TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, last sync %d/%d", - MAJOR(__entry->dev), MINOR(__entry->dev), - CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn), - CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), - CYCLE_LSN(__entry->last_sync_lsn), BLOCK_LSN(__entry->last_sync_lsn)) -) DECLARE_EVENT_CLASS(xfs_file_class, TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c812c5c..5411e01 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -18,21 +18,32 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_error.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_alloc.h" #include "xfs_extent_busy.h" +#include "xfs_bmap.h" #include "xfs_quota.h" -#include "xfs_trans.h" +#include "xfs_qm.h" #include "xfs_trans_priv.h" -#include "xfs_log.h" +#include "xfs_trans_space.h" +#include "xfs_inode_item.h" +#include "xfs_log_priv.h" +#include "xfs_buf_item.h" #include "xfs_trace.h" -#include "xfs_error.h" kmem_zone_t *xfs_trans_zone; kmem_zone_t *xfs_log_item_desc_zone; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 9b96d35..09cf40b 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -18,6 +18,10 @@ #ifndef __XFS_TRANS_H__ #define __XFS_TRANS_H__ +struct xfs_log_item; + +#include "xfs_trans_resv.h" + /* kernel only transaction subsystem defines */ struct xfs_buf; @@ -73,9 +77,6 @@ struct xfs_item_ops { void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); }; -void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, - int type, const struct xfs_item_ops *ops); - /* * Return values for the iop_push() routines. */ @@ -84,12 +85,18 @@ void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, #define XFS_ITEM_LOCKED 2 #define XFS_ITEM_FLUSHING 3 +/* + * This is the type of function which can be given to xfs_trans_callback() + * to be called upon the transaction's commit to disk. + */ +typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *); /* * This is the structure maintained for every active transaction. */ typedef struct xfs_trans { unsigned int t_magic; /* magic number */ + xfs_log_callback_t t_logcb; /* log callback struct */ unsigned int t_type; /* transaction type */ unsigned int t_log_res; /* amt of log space resvd */ unsigned int t_log_count; /* count for perm log res */ @@ -125,6 +132,7 @@ typedef struct xfs_trans { int64_t t_rextents_delta;/* superblocks rextents chg */ int64_t t_rextslog_delta;/* superblocks rextslog chg */ struct list_head t_items; /* log item descriptors */ + xfs_trans_header_t t_header; /* header for in-log trans */ struct list_head t_busy; /* list of busy extents */ unsigned long t_pflags; /* saved process flags state */ } xfs_trans_t; @@ -229,16 +237,10 @@ void xfs_trans_log_efd_extent(xfs_trans_t *, xfs_fsblock_t, xfs_extlen_t); int xfs_trans_commit(xfs_trans_t *, uint flags); -int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); void xfs_trans_cancel(xfs_trans_t *, int); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); -void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, - enum xfs_blft); -void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, - struct xfs_buf *src_bp); - extern kmem_zone_t *xfs_trans_zone; extern kmem_zone_t *xfs_log_item_desc_zone; diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index a728735..21c6d7d 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -18,16 +18,15 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_trace.h" #include "xfs_error.h" -#include "xfs_log.h" #ifdef DEBUG /* @@ -659,13 +658,11 @@ xfs_trans_ail_update_bulk( if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0) continue; - trace_xfs_ail_move(lip, lip->li_lsn, lsn); xfs_ail_delete(ailp, lip); if (mlip == lip) mlip_changed = 1; } else { lip->li_flags |= XFS_LI_IN_AIL; - trace_xfs_ail_insert(lip, 0, lsn); } lip->li_lsn = lsn; list_add(&lip->li_ail, &tmp); @@ -734,7 +731,6 @@ xfs_trans_ail_delete_bulk( return; } - trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); xfs_ail_delete(ailp, lip); lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index c035d11..8c75b8f 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -17,15 +17,17 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" #include "xfs_error.h" diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index cd2a10e..54ee3c5 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -17,18 +17,23 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" #include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" #include "xfs_error.h" -#include "xfs_trans.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" #include "xfs_trans_priv.h" -#include "xfs_quota.h" #include "xfs_qm.h" STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *); diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index 47978ba..8d71b16 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c @@ -17,13 +17,12 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_extfree_item.h" diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 50c3f56..53dfe46 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -17,15 +17,18 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_trans.h" +#include "xfs_btree.h" #include "xfs_trans_priv.h" #include "xfs_inode_item.h" #include "xfs_trace.h" @@ -111,14 +114,12 @@ xfs_trans_log_inode( /* * First time we log the inode in a transaction, bump the inode change - * counter if it is configured for this to occur. We don't use - * inode_inc_version() because there is no need for extra locking around - * i_version as we already hold the inode locked exclusively for - * metadata modification. + * counter if it is configured for this to occur. */ if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && IS_I_VERSION(VFS_I(ip))) { - ip->i_d.di_changecount = ++VFS_I(ip)->i_version; + inode_inc_iversion(VFS_I(ip)); + ip->i_d.di_changecount = VFS_I(ip)->i_version; flags |= XFS_ILOG_CORE; } diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 12e86af..c52def0 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -27,6 +27,7 @@ struct xfs_log_vec; void xfs_trans_init(struct xfs_mount *); +int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *); void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index 2fd59c0..a65a3cc4 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c @@ -18,19 +18,27 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" +#include "xfs_log.h" #include "xfs_trans_resv.h" +#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_inode.h" +#include "xfs_error.h" +#include "xfs_da_btree.h" #include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" #include "xfs_ialloc.h" +#include "xfs_alloc.h" +#include "xfs_extent_busy.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" #include "xfs_quota.h" -#include "xfs_trans.h" #include "xfs_qm.h" #include "xfs_trans_space.h" #include "xfs_trace.h" @@ -385,7 +393,8 @@ xfs_calc_ifree_reservation( xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + - max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) + + MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), + XFS_INODE_CLUSTER_SIZE(mp)) + xfs_calc_buf_res(1, 0) + xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels, 0) + diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h index 3e8e797..db14d0c 100644 --- a/fs/xfs/xfs_vnode.h +++ b/fs/xfs/xfs_vnode.h @@ -25,6 +25,14 @@ struct xfs_inode; struct attrlist_cursor_kern; /* + * Return values for xfs_inactive. A return value of + * VN_INACTIVE_NOCACHE implies that the file system behavior + * has disassociated its state and bhv_desc_t from the vnode. + */ +#define VN_INACTIVE_CACHE 0 +#define VN_INACTIVE_NOCACHE 1 + +/* * Flags for read/write calls - same values as IRIX */ #define IO_ISDIRECT 0x00004 /* bypass page cache */ diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 9d47907..e01f35e 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -17,13 +17,9 @@ */ #include "xfs.h" -#include "xfs_format.h" #include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" |