From 009d851837ab26cab18adda6169a813f70b0b21b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 8 Dec 2009 12:12:13 +0000 Subject: GFS2: Metadata address space clean up Since the start of GFS2, an "extra" inode has been used to store the metadata belonging to each inode. The only reason for using this inode was to have an extra address space, the other fields were unused. This means that the memory usage was rather inefficient. The reason for keeping each inode's metadata in a separate address space is that when glocks are requested on remote nodes, we need to be able to efficiently locate the data and metadata which relating to that glock (inode) in order to sync or sync and invalidate it (depending on the remotely requested lock mode). This patch adds a new type of glock, which has in addition to its normal fields, has an address space. This applies to all inode and rgrp glocks (but to no other glock types which remain as before). As a result, we no longer need to have the second inode. This results in three major improvements: 1. A saving of approx 25% of memory used in caching inodes 2. A removal of the circular dependency between inodes and glocks 3. No confusion between "normal" and "metadata" inodes in super.c Although the first of these is the more immediately apparent, the second is just as important as it now enables a number of clean ups at umount time. Those will be the subject of future patches. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 7b8da94..0c1d0b8 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1061,8 +1061,8 @@ out: int gfs2_releasepage(struct page *page, gfp_t gfp_mask) { - struct inode *aspace = page->mapping->host; - struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info; + struct address_space *mapping = page->mapping; + struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); struct buffer_head *bh, *head; struct gfs2_bufdata *bd; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f426633..dfb10a4 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -154,12 +154,14 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp, static void glock_free(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; - struct inode *aspace = gl->gl_aspace; + struct address_space *mapping = gfs2_glock2aspace(gl); + struct kmem_cache *cachep = gfs2_glock_cachep; - if (aspace) - gfs2_aspace_put(aspace); + GLOCK_BUG_ON(gl, mapping && mapping->nrpages); trace_gfs2_glock_put(gl); - sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl); + if (mapping) + cachep = gfs2_glock_aspace_cachep; + sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl); } /** @@ -750,10 +752,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp) { + struct super_block *s = sdp->sd_vfs; struct lm_lockname name = { .ln_number = number, .ln_type = glops->go_type }; struct gfs2_glock *gl, *tmp; unsigned int hash = gl_hash(sdp, &name); - int error; + struct address_space *mapping; read_lock(gl_lock_addr(hash)); gl = search_bucket(hash, sdp, &name); @@ -765,7 +768,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, if (!create) return -ENOENT; - gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); + if (glops->go_flags & GLOF_ASPACE) + gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL); + else + gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); if (!gl) return -ENOMEM; @@ -784,18 +790,18 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_tchange = jiffies; gl->gl_object = NULL; gl->gl_sbd = sdp; - gl->gl_aspace = NULL; INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); INIT_WORK(&gl->gl_delete, delete_work_func); - /* If this glock protects actual on-disk data or metadata blocks, - create a VFS inode to manage the pages/buffers holding them. */ - if (glops == &gfs2_inode_glops || glops == &gfs2_rgrp_glops) { - gl->gl_aspace = gfs2_aspace_get(sdp); - if (!gl->gl_aspace) { - error = -ENOMEM; - goto fail; - } + mapping = gfs2_glock2aspace(gl); + if (mapping) { + mapping->a_ops = &gfs2_meta_aops; + mapping->host = s->s_bdev->bd_inode; + mapping->flags = 0; + mapping_set_gfp_mask(mapping, GFP_NOFS); + mapping->assoc_mapping = NULL; + mapping->backing_dev_info = s->s_bdi; + mapping->writeback_index = 0; } write_lock(gl_lock_addr(hash)); @@ -812,10 +818,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, *glp = gl; return 0; - -fail: - kmem_cache_free(gfs2_glock_cachep, gl); - return error; } /** diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index c0262fa..2bda191 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -180,6 +180,13 @@ static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl) return gl->gl_state == LM_ST_SHARED; } +static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) +{ + if (gl->gl_ops->go_flags & GLOF_ASPACE) + return (struct address_space *)(gl + 1); + return NULL; +} + int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 78554ac..38e3749 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -87,7 +87,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) static void rgrp_go_sync(struct gfs2_glock *gl) { - struct address_space *metamapping = gl->gl_aspace->i_mapping; + struct address_space *metamapping = gfs2_glock2aspace(gl); int error; if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) @@ -113,7 +113,7 @@ static void rgrp_go_sync(struct gfs2_glock *gl) static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { - struct address_space *mapping = gl->gl_aspace->i_mapping; + struct address_space *mapping = gfs2_glock2aspace(gl); BUG_ON(!(flags & DIO_METADATA)); gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); @@ -134,7 +134,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) static void inode_go_sync(struct gfs2_glock *gl) { struct gfs2_inode *ip = gl->gl_object; - struct address_space *metamapping = gl->gl_aspace->i_mapping; + struct address_space *metamapping = gfs2_glock2aspace(gl); int error; if (ip && !S_ISREG(ip->i_inode.i_mode)) @@ -183,7 +183,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); if (flags & DIO_METADATA) { - struct address_space *mapping = gl->gl_aspace->i_mapping; + struct address_space *mapping = gfs2_glock2aspace(gl); truncate_inode_pages(mapping, 0); if (ip) { set_bit(GIF_INVALID, &ip->i_flags); @@ -282,7 +282,8 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) static int rgrp_go_demote_ok(const struct gfs2_glock *gl) { - return !gl->gl_aspace->i_mapping->nrpages; + const struct address_space *mapping = (const struct address_space *)(gl + 1); + return !mapping->nrpages; } /** @@ -387,8 +388,7 @@ static void iopen_go_callback(struct gfs2_glock *gl) struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; if (gl->gl_demote_state == LM_ST_UNLOCKED && - gl->gl_state == LM_ST_SHARED && - ip && test_bit(GIF_USER, &ip->i_flags)) { + gl->gl_state == LM_ST_SHARED && ip) { gfs2_glock_hold(gl); if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) gfs2_glock_put_nolock(gl); @@ -407,6 +407,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, .go_min_hold_time = HZ / 5, + .go_flags = GLOF_ASPACE, }; const struct gfs2_glock_operations gfs2_rgrp_glops = { @@ -418,6 +419,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, .go_min_hold_time = HZ / 5, + .go_flags = GLOF_ASPACE, }; const struct gfs2_glock_operations gfs2_trans_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index bc0ad15..1de7e1b 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -162,6 +162,8 @@ struct gfs2_glock_operations { void (*go_callback) (struct gfs2_glock *gl); const int go_type; const unsigned long go_min_hold_time; + const unsigned long go_flags; +#define GLOF_ASPACE 1 }; enum { @@ -225,7 +227,6 @@ struct gfs2_glock { struct gfs2_sbd *gl_sbd; - struct inode *gl_aspace; struct list_head gl_ail_list; atomic_t gl_ail_count; struct delayed_work gl_work; @@ -258,7 +259,6 @@ enum { GIF_INVALID = 0, GIF_QD_LOCKED = 1, GIF_SW_PAGED = 3, - GIF_USER = 4, /* user inode, not metadata addr space */ }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6e220f4..b1bf269 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -45,7 +45,7 @@ static int iget_test(struct inode *inode, void *opaque) struct gfs2_inode *ip = GFS2_I(inode); u64 *no_addr = opaque; - if (ip->i_no_addr == *no_addr && test_bit(GIF_USER, &ip->i_flags)) + if (ip->i_no_addr == *no_addr) return 1; return 0; @@ -58,7 +58,6 @@ static int iget_set(struct inode *inode, void *opaque) inode->i_ino = (unsigned long)*no_addr; ip->i_no_addr = *no_addr; - set_bit(GIF_USER, &ip->i_flags); return 0; } @@ -84,7 +83,7 @@ static int iget_skip_test(struct inode *inode, void *opaque) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_skip_data *data = opaque; - if (ip->i_no_addr == data->no_addr && test_bit(GIF_USER, &ip->i_flags)){ + if (ip->i_no_addr == data->no_addr) { if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ data->skipped = 1; return 0; @@ -103,7 +102,6 @@ static int iget_skip_set(struct inode *inode, void *opaque) return 1; inode->i_ino = (unsigned long)(data->no_addr); ip->i_no_addr = data->no_addr; - set_bit(GIF_USER, &ip->i_flags); return 0; } diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 0e5e0e7..569b462 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -30,7 +30,10 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ - kmem_cache_free(gfs2_glock_cachep, gl); + if (gl->gl_ops->go_flags & GLOF_ASPACE) + kmem_cache_free(gfs2_glock_aspace_cachep, gl); + else + kmem_cache_free(gfs2_glock_cachep, gl); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_glock_wait); return; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 5b31f77..a88fadc 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -52,6 +52,22 @@ static void gfs2_init_glock_once(void *foo) atomic_set(&gl->gl_ail_count, 0); } +static void gfs2_init_gl_aspace_once(void *foo) +{ + struct gfs2_glock *gl = foo; + struct address_space *mapping = (struct address_space *)(gl + 1); + + gfs2_init_glock_once(gl); + memset(mapping, 0, sizeof(*mapping)); + INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); + spin_lock_init(&mapping->tree_lock); + spin_lock_init(&mapping->i_mmap_lock); + INIT_LIST_HEAD(&mapping->private_list); + spin_lock_init(&mapping->private_lock); + INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); + INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); +} + /** * init_gfs2_fs - Register GFS2 as a filesystem * @@ -78,6 +94,14 @@ static int __init init_gfs2_fs(void) if (!gfs2_glock_cachep) goto fail; + gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock (aspace)", + sizeof(struct gfs2_glock) + + sizeof(struct address_space), + 0, 0, gfs2_init_gl_aspace_once); + + if (!gfs2_glock_aspace_cachep) + goto fail; + gfs2_inode_cachep = kmem_cache_create("gfs2_inode", sizeof(struct gfs2_inode), 0, SLAB_RECLAIM_ACCOUNT| @@ -144,6 +168,9 @@ fail: if (gfs2_inode_cachep) kmem_cache_destroy(gfs2_inode_cachep); + if (gfs2_glock_aspace_cachep) + kmem_cache_destroy(gfs2_glock_aspace_cachep); + if (gfs2_glock_cachep) kmem_cache_destroy(gfs2_glock_cachep); @@ -169,6 +196,7 @@ static void __exit exit_gfs2_fs(void) kmem_cache_destroy(gfs2_rgrpd_cachep); kmem_cache_destroy(gfs2_bufdata_cachep); kmem_cache_destroy(gfs2_inode_cachep); + kmem_cache_destroy(gfs2_glock_aspace_cachep); kmem_cache_destroy(gfs2_glock_cachep); gfs2_sys_uninit(); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 6f68a5f..0bb12c8 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -93,49 +93,13 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb return err; } -static const struct address_space_operations aspace_aops = { +const struct address_space_operations gfs2_meta_aops = { .writepage = gfs2_aspace_writepage, .releasepage = gfs2_releasepage, .sync_page = block_sync_page, }; /** - * gfs2_aspace_get - Create and initialize a struct inode structure - * @sdp: the filesystem the aspace is in - * - * Right now a struct inode is just a struct inode. Maybe Linux - * will supply a more lightweight address space construct (that works) - * in the future. - * - * Make sure pages/buffers in this aspace aren't in high memory. - * - * Returns: the aspace - */ - -struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp) -{ - struct inode *aspace; - struct gfs2_inode *ip; - - aspace = new_inode(sdp->sd_vfs); - if (aspace) { - mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); - aspace->i_mapping->a_ops = &aspace_aops; - aspace->i_size = MAX_LFS_FILESIZE; - ip = GFS2_I(aspace); - clear_bit(GIF_USER, &ip->i_flags); - insert_inode_hash(aspace); - } - return aspace; -} - -void gfs2_aspace_put(struct inode *aspace) -{ - remove_inode_hash(aspace); - iput(aspace); -} - -/** * gfs2_meta_sync - Sync all buffers associated with a glock * @gl: The glock * @@ -143,7 +107,7 @@ void gfs2_aspace_put(struct inode *aspace) void gfs2_meta_sync(struct gfs2_glock *gl) { - struct address_space *mapping = gl->gl_aspace->i_mapping; + struct address_space *mapping = gfs2_glock2aspace(gl); int error; filemap_fdatawrite(mapping); @@ -164,7 +128,7 @@ void gfs2_meta_sync(struct gfs2_glock *gl) struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) { - struct address_space *mapping = gl->gl_aspace->i_mapping; + struct address_space *mapping = gfs2_glock2aspace(gl); struct gfs2_sbd *sdp = gl->gl_sbd; struct page *page; struct buffer_head *bh; @@ -344,8 +308,10 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta) { - struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host); + struct address_space *mapping = bh->b_page->mapping; + struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); struct gfs2_bufdata *bd = bh->b_private; + if (test_clear_buffer_pinned(bh)) { list_del_init(&bd->bd_le.le_list); if (meta) { diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index de270c2..6a1d9ba 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -37,8 +37,16 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh, 0, from_head - to_head); } -struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp); -void gfs2_aspace_put(struct inode *aspace); +extern const struct address_space_operations gfs2_meta_aops; + +static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) +{ + struct inode *inode = mapping->host; + if (mapping->a_ops == &gfs2_meta_aops) + return (((struct gfs2_glock *)mapping) - 1)->gl_sbd; + else + return inode->i_sb->s_fs_info; +} void gfs2_meta_sync(struct gfs2_glock *gl); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b9dd3da..ad7bc2d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -722,8 +722,7 @@ static int gfs2_write_inode(struct inode *inode, int sync) int ret = 0; /* Check this is a "normal" inode, etc */ - if (!test_bit(GIF_USER, &ip->i_flags) || - (current->flags & PF_MEMALLOC)) + if (current->flags & PF_MEMALLOC) return 0; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) @@ -1194,7 +1193,7 @@ static void gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); - if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) { + if (inode->i_nlink) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) clear_nlink(inode); @@ -1212,18 +1211,12 @@ static void gfs2_clear_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); - /* This tells us its a "real" inode and not one which only - * serves to contain an address space (see rgrp.c, meta_io.c) - * which therefore doesn't have its own glocks. - */ - if (test_bit(GIF_USER, &ip->i_flags)) { - ip->i_gl->gl_object = NULL; - gfs2_glock_put(ip->i_gl); - ip->i_gl = NULL; - if (ip->i_iopen_gh.gh_gl) { - ip->i_iopen_gh.gh_gl->gl_object = NULL; - gfs2_glock_dq_uninit(&ip->i_iopen_gh); - } + ip->i_gl->gl_object = NULL; + gfs2_glock_put(ip->i_gl); + ip->i_gl = NULL; + if (ip->i_iopen_gh.gh_gl) { + ip->i_iopen_gh.gh_gl->gl_object = NULL; + gfs2_glock_dq_uninit(&ip->i_iopen_gh); } } @@ -1358,9 +1351,6 @@ static void gfs2_delete_inode(struct inode *inode) struct gfs2_holder gh; int error; - if (!test_bit(GIF_USER, &ip->i_flags)) - goto out; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (unlikely(error)) { gfs2_glock_dq_uninit(&ip->i_iopen_gh); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index f6a7efa..226f2bf 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -21,6 +21,7 @@ #include "util.h" struct kmem_cache *gfs2_glock_cachep __read_mostly; +struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly; struct kmem_cache *gfs2_inode_cachep __read_mostly; struct kmem_cache *gfs2_bufdata_cachep __read_mostly; struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 33e96b0..b432e04 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -145,6 +145,7 @@ gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__); extern struct kmem_cache *gfs2_glock_cachep; +extern struct kmem_cache *gfs2_glock_aspace_cachep; extern struct kmem_cache *gfs2_inode_cachep; extern struct kmem_cache *gfs2_bufdata_cachep; extern struct kmem_cache *gfs2_rgrpd_cachep; -- cgit v0.10.2 From c1184f8ab7ea26681f3cab18284a870aad678b0f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 8 Jan 2010 16:14:29 +0000 Subject: GFS2: Remove loopy umount code As a consequence of the previous patch, we can now remove the loop which used to be required due to the circular dependency between the inodes and glocks. Instead we can just invalidate the inodes, and then clear up any glocks which are left. Also we no longer need the rwsem since there is no longer any danger of the inode invalidation calling back into the glock code (and from there back into the inode code). Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index dfb10a4..4773f90 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -60,7 +59,6 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); -static DECLARE_RWSEM(gfs2_umount_flush_sem); static struct dentry *gfs2_root; static struct workqueue_struct *glock_workqueue; struct workqueue_struct *gfs2_delete_workqueue; @@ -714,7 +712,6 @@ static void glock_work_func(struct work_struct *work) finish_xmote(gl, gl->gl_reply); drop_ref = 1; } - down_read(&gfs2_umount_flush_sem); spin_lock(&gl->gl_spin); if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && @@ -727,7 +724,6 @@ static void glock_work_func(struct work_struct *work) } run_queue(gl, 0); spin_unlock(&gl->gl_spin); - up_read(&gfs2_umount_flush_sem); if (!delay || queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); @@ -1512,35 +1508,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { - unsigned long t; unsigned int x; - int cont; - t = jiffies; - - for (;;) { - cont = 0; - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { - if (examine_bucket(clear_glock, sdp, x)) - cont = 1; - } - - if (!cont) - break; - - if (time_after_eq(jiffies, - t + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) { - fs_warn(sdp, "Unmount seems to be stalled. " - "Dumping lock state...\n"); - gfs2_dump_lockstate(sdp); - t = jiffies; - } - - down_write(&gfs2_umount_flush_sem); - invalidate_inodes(sdp->sd_vfs); - up_write(&gfs2_umount_flush_sem); - msleep(10); - } + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) + examine_bucket(clear_glock, sdp, x); flush_workqueue(glock_workqueue); wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); gfs2_dump_lockstate(sdp); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 1de7e1b..b8025e5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -451,7 +451,6 @@ struct gfs2_tune { unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ unsigned int gt_new_files_jdata; unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ - unsigned int gt_stall_secs; /* Detects trouble! */ unsigned int gt_complain_secs; unsigned int gt_statfs_quantum; unsigned int gt_statfs_slow; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index a86ed63..a054b52 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -65,7 +65,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_quota_scale_den = 1; gt->gt_new_files_jdata = 0; gt->gt_max_readahead = 1 << 18; - gt->gt_stall_secs = 600; gt->gt_complain_secs = 10; } @@ -1241,10 +1240,9 @@ fail_sb: fail_locking: init_locking(sdp, &mount_gh, UNDO); fail_lm: + invalidate_inodes(sb); gfs2_gl_hash_clear(sdp); gfs2_lm_unmount(sdp); - while (invalidate_inodes(sb)) - yield(); fail_sys: gfs2_sys_fs_del(sdp); fail: diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ad7bc2d..e5e2262 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -859,6 +859,7 @@ restart: gfs2_clear_rgrpd(sdp); gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ + invalidate_inodes(sdp->sd_vfs); gfs2_gl_hash_clear(sdp); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 0dc3462..a0db1c9 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -478,7 +478,6 @@ TUNE_ATTR(complain_secs, 0); TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); TUNE_ATTR(quota_simul_sync, 1); -TUNE_ATTR(stall_secs, 1); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); @@ -491,7 +490,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_complain_secs.attr, &tune_attr_statfs_slow.attr, &tune_attr_quota_simul_sync.attr, - &tune_attr_stall_secs.attr, &tune_attr_statfs_quantum.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, -- cgit v0.10.2 From 1ccaba3056796ab1f933736d763ffcd1958866cd Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Thu, 10 Dec 2009 18:52:54 -0500 Subject: GFS2: Remove old, unused linked list code from quota This is the kernel portion of the patch-set for upstream gfs2, to remove the quota-linked-list stuff and replace it with fiemap-based traversal of the quota file. The corresponding userland fixes have been pushed to STABLE3 and master branches of cluster.git and gfs2-utils.git respectively (Refer Red Hat bug #536902). Signed-off-by: Abhi Das Signed-off-by: Steven Whitehouse diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 81f90a5..4f44629 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -180,33 +180,6 @@ struct gfs2_rgrp { }; /* - * quota linked list: user quotas and group quotas form two separate - * singly linked lists. ll_next stores uids or gids of next quotas in the - * linked list. - -Given the uid/gid, how to calculate the quota file offsets for the corresponding -gfs2_quota structures on disk: - -for user quotas, given uid, -offset = uid * sizeof(struct gfs2_quota); - -for group quotas, given gid, -offset = (gid * sizeof(struct gfs2_quota)) + sizeof(struct gfs2_quota); - - - uid:0 gid:0 uid:12 gid:12 uid:17 gid:17 uid:5142 gid:5142 -+-------+-------+ +-------+-------+ +-------+- - - -+ +- - - -+-------+ -| valid | valid | :: | valid | valid | :: | valid | inval | :: | inval | valid | -+-------+-------+ +-------+-------+ +-------+- - - -+ +- - - -+-------+ -next:12 next:12 next:17 next:5142 next:NULL next:NULL - | | | | |<-- user quota list | - \______|___________/ \______|___________/ group quota list -->| - | | | - \__________________/ \_______________________________________/ - -*/ - -/* * quota structure */ @@ -214,8 +187,7 @@ struct gfs2_quota { __be64 qu_limit; __be64 qu_warn; __be64 qu_value; - __be32 qu_ll_next; /* location of next quota in list */ - __u8 qu_reserved[60]; + __u8 qu_reserved[64]; }; /* -- cgit v0.10.2 From e5884636da3a128617032747654284ae7badc7ff Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 5 Feb 2010 16:45:25 +1100 Subject: GFS2: ordered writes are backwards When we queue data buffers for ordered write, the buffers are added to the head of the ordered write list. When the log needs to push these buffers to disk, it also walks the list from the head. The result is that the the ordered buffers are submitted to disk in reverse order. For large writes, this means that whenever the log flushes large streams of reverse sequential order buffers are pushed down into the block layers. The elevators don't handle this particularly well, so IO rates tend to be significantly lower than if the IO was issued in ascending block order. Queue new ordered buffers to the tail of the ordered buffer list to ensure that IO is dispatched in the order it was submitted. This should significantly improve large sequential write speeds. On a disk capable of 85MB/s, speeds increase from 50MB/s to 65MB/s for noop and from 38MB/s to 50MB/s for cfq. Signed-off-by: Dave Chinner Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index de97632..adc260f 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -528,9 +528,9 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) gfs2_pin(sdp, bd->bd_bh); tr->tr_num_databuf_new++; sdp->sd_log_num_databuf++; - list_add(&le->le_list, &sdp->sd_log_le_databuf); + list_add_tail(&le->le_list, &sdp->sd_log_le_databuf); } else { - list_add(&le->le_list, &sdp->sd_log_le_ordered); + list_add_tail(&le->le_list, &sdp->sd_log_le_ordered); } out: gfs2_log_unlock(sdp); -- cgit v0.10.2 From 4818972efb105730f007e5efc05e203a065fc318 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 23 Feb 2010 12:20:00 -0500 Subject: GFS2: print glock numbers in hex This patch changes glock numbers from printing in decimal to hex. Since DLM prints corresponding resource IDs in hex, it makes debugging easier. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 4773f90..454d4b4 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1658,7 +1658,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llu f:%s t:%s d:%s/%llu a:%d r:%d\n", + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n", state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, -- cgit v0.10.2