From 07c8f67587724b417f60bffb32c448dd94647b54 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 16:11:59 +1100 Subject: [XFS] Make use of the init-once slab optimisation. To avoid having to initialise some fields of the XFS inode on every allocation, we can use the slab init-once feature to initialise them. All we have to guarantee is that when we free the inode, all it's entries are in the initial state. Add asserts where possible to ensure debug kernels check this initial state before freeing and after allocation. SGI-PV: 981498 SGI-Modid: xfs-linux-melb:xfs-kern:31925a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 37ebe36..d1c4dec 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -887,6 +887,41 @@ xfs_fs_inode_init_once( inode_init_once((struct inode *)vnode); } + +/* + * Slab object creation initialisation for the XFS inode. + * This covers only the idempotent fields in the XFS inode; + * all other fields need to be initialised on allocation + * from the slab. This avoids the need to repeatedly intialise + * fields in the xfs inode that left in the initialise state + * when freeing the inode. + */ +void +xfs_inode_init_once( + kmem_zone_t *zone, + void *inode) +{ + struct xfs_inode *ip = inode; + + memset(ip, 0, sizeof(struct xfs_inode)); + atomic_set(&ip->i_iocount, 0); + atomic_set(&ip->i_pincount, 0); + spin_lock_init(&ip->i_flags_lock); + INIT_LIST_HEAD(&ip->i_reclaim); + init_waitqueue_head(&ip->i_ipin_wait); + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&ip->i_flush); + complete(&ip->i_flush); + + mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, + "xfsino", ip->i_ino); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); +} + /* * Attempt to flush the inode, this will actually fail * if the inode is pinned, but we dirty the inode again @@ -2018,7 +2053,7 @@ xfs_init_zones(void) xfs_inode_zone = kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, NULL); + KM_ZONE_SPREAD, xfs_inode_init_once); if (!xfs_inode_zone) goto out_destroy_efi_zone; diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index e229e9e..5be89d7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -210,21 +210,6 @@ finish_inode: xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - - mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, - "xfsino", ip->i_ino); - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - init_waitqueue_head(&ip->i_ipin_wait); - atomic_set(&ip->i_pincount, 0); - - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&ip->i_flush); - complete(&ip->i_flush); - if (lock_flags) xfs_ilock(ip, lock_flags); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index dbd9cef..b0c604e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -788,6 +788,70 @@ xfs_dic2xflags( } /* + * Allocate and initialise an xfs_inode. + */ +struct xfs_inode * +xfs_inode_alloc( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + struct xfs_inode *ip; + + /* + * if this didn't occur in transactions, we could use + * KM_MAYFAIL and return NULL here on ENOMEM. Set the + * code up to do this anyway. + */ + ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); + if (!ip) + return NULL; + + ASSERT(atomic_read(&ip->i_iocount) == 0); + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!spin_is_locked(&ip->i_flags_lock)); + ASSERT(list_empty(&ip->i_reclaim)); + + ip->i_ino = ino; + ip->i_mount = mp; + ip->i_blkno = 0; + ip->i_len = 0; + ip->i_boffset =0; + ip->i_afp = NULL; + memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); + ip->i_flags = 0; + ip->i_update_core = 0; + ip->i_update_size = 0; + ip->i_delayed_blks = 0; + memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); + ip->i_size = 0; + ip->i_new_size = 0; + + /* + * Initialize inode's trace buffers. + */ +#ifdef XFS_INODE_TRACE + ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_BMAP_TRACE + ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_BMBT_TRACE + ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_RW_TRACE + ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_ILOCK_TRACE + ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_DIR2_TRACE + ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); +#endif + + return ip; +} + +/* * Given a mount structure and an inode number, return a pointer * to a newly allocated in-core inode corresponding to the given * inode number. @@ -809,13 +873,9 @@ xfs_iread( xfs_inode_t *ip; int error; - ASSERT(xfs_inode_zone != NULL); - - ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); - ip->i_ino = ino; - ip->i_mount = mp; - atomic_set(&ip->i_iocount, 0); - spin_lock_init(&ip->i_flags_lock); + ip = xfs_inode_alloc(mp, ino); + if (!ip) + return ENOMEM; /* * Get pointer's to the on-disk inode and the buffer containing it. @@ -831,34 +891,11 @@ xfs_iread( } /* - * Initialize inode's trace buffers. - * Do this before xfs_iformat in case it adds entries. - */ -#ifdef XFS_INODE_TRACE - ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BMAP_TRACE - ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BMBT_TRACE - ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_RW_TRACE - ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_ILOCK_TRACE - ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_DIR2_TRACE - ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); -#endif - - /* * If we got something that isn't an inode it means someone * (nfs or dmi) has a stale handle. */ if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) { - kmem_zone_free(xfs_inode_zone, ip); + xfs_idestroy(ip); xfs_trans_brelse(tp, bp); #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " @@ -881,7 +918,7 @@ xfs_iread( xfs_dinode_from_disk(&ip->i_d, &dip->di_core); error = xfs_iformat(ip, dip); if (error) { - kmem_zone_free(xfs_inode_zone, ip); + xfs_idestroy(ip); xfs_trans_brelse(tp, bp); #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " @@ -911,8 +948,6 @@ xfs_iread( XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); } - INIT_LIST_HEAD(&ip->i_reclaim); - /* * The inode format changed when we moved the link count and * made it 32 bits long. If this is an old format inode, @@ -2631,8 +2666,6 @@ xfs_idestroy( } if (ip->i_afp) xfs_idestroy_fork(ip, XFS_ATTR_FORK); - mrfree(&ip->i_lock); - mrfree(&ip->i_iolock); #ifdef XFS_INODE_TRACE ktrace_free(ip->i_trace); @@ -2671,7 +2704,13 @@ xfs_idestroy( spin_unlock(&mp->m_ail_lock); } xfs_inode_item_destroy(ip); + ip->i_itemp = NULL; } + /* asserts to verify all state is correct here */ + ASSERT(atomic_read(&ip->i_iocount) == 0); + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!spin_is_locked(&ip->i_flags_lock)); + ASSERT(list_empty(&ip->i_reclaim)); kmem_zone_free(xfs_inode_zone, ip); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1420c49..3af1f6d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -513,6 +513,7 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, xfs_fsize_t, int, int); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); +struct xfs_inode * xfs_inode_alloc(struct xfs_mount *, xfs_ino_t); void xfs_idestroy_fork(xfs_inode_t *, int); void xfs_idestroy(xfs_inode_t *); void xfs_idata_realloc(xfs_inode_t *, int, int); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index cf6754a..4f4c939 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -594,21 +594,21 @@ xfs_bulkstat( /* * Get the inode cluster buffer */ - ASSERT(xfs_inode_zone != NULL); - ip = kmem_zone_zalloc(xfs_inode_zone, - KM_SLEEP); - ip->i_ino = ino; - ip->i_mount = mp; - spin_lock_init(&ip->i_flags_lock); if (bp) xfs_buf_relse(bp); + ip = xfs_inode_alloc(mp, ino); + if (!ip) { + bp = NULL; + rval = ENOMEM; + break; + } error = xfs_itobp(mp, NULL, ip, &dip, &bp, bno, XFS_IMAP_BULKSTAT, XFS_BUF_LOCK); if (!error) clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; - kmem_zone_free(xfs_inode_zone, ip); + xfs_idestroy(ip); if (XFS_TEST_ERROR(error != 0, mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, XFS_RANDOM_BULKSTAT_READ_CHUNK)) { -- cgit v0.10.2 From be8b78a626dd9bc92c12e9ac34f3bc3db1204d25 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:42:34 +1100 Subject: [XFS] Remove kmem_zone_t argument from xfs_inode_init_once() kmem cache constructor no longer takes a kmem_zone_t argument. SGI-PV: 957103 SGI-Modid: xfs-linux-melb:xfs-kern:32254a Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index d1c4dec..9bfb260 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -898,7 +898,6 @@ xfs_fs_inode_init_once( */ void xfs_inode_init_once( - kmem_zone_t *zone, void *inode) { struct xfs_inode *ip = inode; -- cgit v0.10.2 From d07c60e54fb7647d8247ae392f128e8ee8f3e5f3 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:50:35 +1100 Subject: [XFS] Use xfs_idestroy() to cleanup an inode. SGI-PV: 981498 SGI-Modid: xfs-linux-melb:xfs-kern:31927a Signed-off-by: Lachlan McIlroy Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b0c604e..2a158a2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -886,7 +886,7 @@ xfs_iread( */ error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); if (error) { - kmem_zone_free(xfs_inode_zone, ip); + xfs_idestroy(ip); return error; } -- cgit v0.10.2 From 46039928c9abe466ed1bc0da20c2e596b1d41236 Mon Sep 17 00:00:00 2001 From: Barry Naujok Date: Thu, 30 Oct 2008 16:52:35 +1100 Subject: [XFS] Remove final remnants of dirv1 macros and other stuff SGI-PV: 981498 SGI-Modid: xfs-linux-melb:xfs-kern:32002a Signed-off-by: Barry Naujok Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 8be0b00..599e270 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -72,27 +72,7 @@ typedef struct xfs_da_intnode { typedef struct xfs_da_node_hdr xfs_da_node_hdr_t; typedef struct xfs_da_node_entry xfs_da_node_entry_t; -#define XFS_DA_MAXHASH ((xfs_dahash_t)-1) /* largest valid hash value */ - #define XFS_LBSIZE(mp) (mp)->m_sb.sb_blocksize -#define XFS_LBLOG(mp) (mp)->m_sb.sb_blocklog - -#define XFS_DA_MAKE_BNOENTRY(mp,bno,entry) \ - (((bno) << (mp)->m_dircook_elog) | (entry)) -#define XFS_DA_MAKE_COOKIE(mp,bno,entry,hash) \ - (((xfs_off_t)XFS_DA_MAKE_BNOENTRY(mp, bno, entry) << 32) | (hash)) -#define XFS_DA_COOKIE_HASH(mp,cookie) ((xfs_dahash_t)cookie) -#define XFS_DA_COOKIE_BNO(mp,cookie) \ - ((((xfs_off_t)(cookie) >> 31) == -1LL ? \ - (xfs_dablk_t)0 : \ - (xfs_dablk_t)((xfs_off_t)(cookie) >> \ - ((mp)->m_dircook_elog + 32)))) -#define XFS_DA_COOKIE_ENTRY(mp,cookie) \ - ((((xfs_off_t)(cookie) >> 31) == -1LL ? \ - (xfs_dablk_t)0 : \ - (xfs_dablk_t)(((xfs_off_t)(cookie) >> 32) & \ - ((1 << (mp)->m_dircook_elog) - 1)))) - /*======================================================================== * Btree searching and modification structure definitions. diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f3c1024..49d647e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -267,7 +267,6 @@ typedef struct xfs_mount { xfs_buftarg_t *m_ddev_targp; /* saves taking the address */ xfs_buftarg_t *m_logdev_targp;/* ptr to log device */ xfs_buftarg_t *m_rtdev_targp; /* ptr to rt device */ - __uint8_t m_dircook_elog; /* log d-cookie entry bits */ __uint8_t m_blkbit_log; /* blocklog + NBBY */ __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ __uint8_t m_agno_log; /* log #ag's */ -- cgit v0.10.2 From a357a1215602f79182abdde27aaddc7166dbd709 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:53:25 +1100 Subject: [XFS] Fix use-after-free with log and quotas Destroying the quota stuff on unmount can access the log - ie XFS_QM_DONE() ends up in xfs_dqunlock() which calls xfs_trans_unlocked_item() and then xfs_log_move_tail(). By this time the log has already been destroyed. Just move the cleanup of the quota code earlier in xfs_unmountfs() before the call to xfs_log_unmount(). Moving XFS_QM_DONE() up near XFS_QM_DQPURGEALL() seems like a good spot. SGI-PV: 987086 SGI-Modid: xfs-linux-melb:xfs-kern:32148a Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig Signed-off-by: Peter Leckie diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index a4503f5..15f5dd2 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1245,6 +1245,9 @@ xfs_unmountfs( XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); + if (mp->m_quotainfo) + XFS_QM_DONE(mp); + /* * Flush out the log synchronously so that we know for sure * that nothing is pinned. This is important because bflush() @@ -1297,8 +1300,6 @@ xfs_unmountfs( xfs_errortag_clearall(mp, 0); #endif xfs_free_perag(mp); - if (mp->m_quotainfo) - XFS_QM_DONE(mp); } STATIC void -- cgit v0.10.2 From f338f9036400e453ab553b16639a9cc838b02d44 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:53:38 +1100 Subject: [XFS] Unlock inode before calling xfs_idestroy() Lock debugging reported the ilock was being destroyed without being unlocked. We don't need to lock the inode until we are going to insert it into the radix tree. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32159a Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 5be89d7..4c92d19 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -210,9 +210,6 @@ finish_inode: xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - if (lock_flags) - xfs_ilock(ip, lock_flags); - if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { xfs_idestroy(ip); xfs_put_perag(mp, pag); @@ -228,6 +225,10 @@ finish_inode: delay(1); goto again; } + + if (lock_flags) + xfs_ilock(ip, lock_flags); + mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = agino & mask; write_lock(&pag->pag_ici_lock); @@ -239,6 +240,8 @@ finish_inode: BUG_ON(error != -EEXIST); write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); + if (lock_flags) + xfs_iunlock(ip, lock_flags); xfs_idestroy(ip); XFS_STATS_INC(xs_ig_dup); goto again; -- cgit v0.10.2 From f2277f06e626d694e61bb356524ff536ced24acf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:53:47 +1100 Subject: [XFS] kill struct xfs_btree_hdr This type is only embedded in struct xfs_btree_block and never used directly. By moving the fields directly into struct xfs_btree_block a lot of the macros for struct xfs_btree_sblock and struct xfs_btree_lblock can be used for struct xfs_btree_block too now which helps greatly with some of the migrations during implementing the generic btree code. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32174a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index cc593a8..3100209 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -62,13 +62,13 @@ xfs_btree_maxrecs( case XFS_BTNUM_BNO: case XFS_BTNUM_CNT: return (int)XFS_ALLOC_BLOCK_MAXRECS( - be16_to_cpu(block->bb_h.bb_level), cur); + be16_to_cpu(block->bb_level), cur); case XFS_BTNUM_BMAP: return (int)XFS_BMAP_BLOCK_IMAXRECS( - be16_to_cpu(block->bb_h.bb_level), cur); + be16_to_cpu(block->bb_level), cur); case XFS_BTNUM_INO: return (int)XFS_INOBT_BLOCK_MAXRECS( - be16_to_cpu(block->bb_h.bb_level), cur); + be16_to_cpu(block->bb_level), cur); default: ASSERT(0); return 0; @@ -634,7 +634,7 @@ xfs_btree_firstrec( /* * It's empty, there is no such record. */ - if (!block->bb_h.bb_numrecs) + if (!block->bb_numrecs) return 0; /* * Set the ptr value to 1, that's the first record/key. @@ -663,12 +663,12 @@ xfs_btree_lastrec( /* * It's empty, there is no such record. */ - if (!block->bb_h.bb_numrecs) + if (!block->bb_numrecs) return 0; /* * Set the ptr value to numrecs, that's the last record/key. */ - cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs); + cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); return 1; } diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 1f528a2..332b9f1 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -63,15 +63,10 @@ typedef struct xfs_btree_lblock { /* * Combined header and structure, used by common code. */ -typedef struct xfs_btree_hdr -{ +typedef struct xfs_btree_block { __be32 bb_magic; /* magic number for block type */ __be16 bb_level; /* 0 is a leaf */ __be16 bb_numrecs; /* current # of data records */ -} xfs_btree_hdr_t; - -typedef struct xfs_btree_block { - xfs_btree_hdr_t bb_h; /* header */ union { struct { __be32 bb_leftsib; -- cgit v0.10.2 From 561f7d17390d00444e6cd0b02b7516c91528082e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:53:59 +1100 Subject: [XFS] split up xfs_btree_init_cursor xfs_btree_init_cursor contains close to little shared code for the different btrees and will get even more non-common code in the future. Split it up into one routine per btree type. Because xfs_btree_dup_cursor needs to call the init routine for a generic btree cursor add a new btree operation vector that contains a dup_cursor method that initializes a new cursor based on an existing one. The btree operations vector is based on an idea and code from Dave Chinner and will grow more entries later during this series. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32176a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 1956f83..69833eb 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -640,8 +640,8 @@ xfs_alloc_ag_vextent_exact( /* * Allocate/initialize a cursor for the by-number freespace btree. */ - bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); /* * Lookup bno and minlen in the btree (minlen is irrelevant, really). * Look for the closest free block <= bno, it must contain bno @@ -696,8 +696,8 @@ xfs_alloc_ag_vextent_exact( * We are allocating agbno for rlen [agbno .. end] * Allocate/initialize a cursor for the by-size btree. */ - cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT, NULL, 0); + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); ASSERT(args->agbno + args->len <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, @@ -759,8 +759,8 @@ xfs_alloc_ag_vextent_near( /* * Get a cursor for the by-size btree. */ - cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT, NULL, 0); + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); ltlen = 0; bno_cur_lt = bno_cur_gt = NULL; /* @@ -886,8 +886,8 @@ xfs_alloc_ag_vextent_near( /* * Set up a cursor for the by-bno tree. */ - bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, - args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, + args->agbp, args->agno, XFS_BTNUM_BNO); /* * Fix up the btree entries. */ @@ -914,8 +914,8 @@ xfs_alloc_ag_vextent_near( /* * Allocate and initialize the cursor for the leftward search. */ - bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); /* * Lookup <= bno to find the leftward search's starting point. */ @@ -1267,8 +1267,8 @@ xfs_alloc_ag_vextent_size( /* * Allocate and initialize a cursor for the by-size btree. */ - cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT, NULL, 0); + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); bno_cur = NULL; /* * Look for an entry >= maxlen+alignment-1 blocks. @@ -1372,8 +1372,8 @@ xfs_alloc_ag_vextent_size( /* * Allocate and initialize a cursor for the by-block tree. */ - bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, rbno, rlen, XFSA_FIXUP_CNT_OK))) goto error0; @@ -1515,8 +1515,7 @@ xfs_free_ag_extent( /* * Allocate and initialize a cursor for the by-block btree. */ - bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL, - 0); + bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); cnt_cur = NULL; /* * Look for a neighboring block on the left (lower block numbers) @@ -1575,8 +1574,7 @@ xfs_free_ag_extent( /* * Now allocate and initialize a cursor for the by-size tree. */ - cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL, - 0); + cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); /* * Have both left and right contiguous neighbors. * Merge all three into a single free block. diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 3ce2645..60c121f 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -2209,3 +2209,48 @@ xfs_alloc_update( } return 0; } + +STATIC struct xfs_btree_cur * +xfs_allocbt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_btnum); +} + +static const struct xfs_btree_ops xfs_allocbt_ops = { + .dup_cursor = xfs_allocbt_dup_cursor, +}; + +/* + * Allocate a new allocation btree cursor. + */ +struct xfs_btree_cur * /* new alloc btree cursor */ +xfs_allocbt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for agf structure */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_btnum_t btnum) /* btree identifier */ +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + struct xfs_btree_cur *cur; + + ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); + + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]); + cur->bc_btnum = btnum; + cur->bc_blocklog = mp->m_sb.sb_blocklog; + + cur->bc_ops = &xfs_allocbt_ops; + + cur->bc_private.a.agbp = agbp; + cur->bc_private.a.agno = agno; + + return cur; +} diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 5bd1a2c..6073538 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -152,4 +152,9 @@ extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len); + +extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, + struct xfs_trans *, struct xfs_buf *, + xfs_agnumber_t, xfs_btnum_t); + #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index a1aab92..a84d0c3 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -422,8 +422,7 @@ xfs_bmap_add_attrfork_btree( if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) *flags |= XFS_ILOG_DBROOT; else { - cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, - XFS_DATA_FORK); + cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); cur->bc_private.b.flist = flist; cur->bc_private.b.firstblock = *firstblock; if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) @@ -3441,8 +3440,7 @@ xfs_bmap_extents_to_btree( * Need a cursor. Can't allocate until bb_level is filled in. */ mp = ip->i_mount; - cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, - whichfork); + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; @@ -5029,8 +5027,7 @@ xfs_bmapi( if (abno == NULLFSBLOCK) break; if ((ifp->if_flags & XFS_IFBROOT) && !cur) { - cur = xfs_btree_init_cursor(mp, - tp, NULL, 0, XFS_BTNUM_BMAP, + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; @@ -5147,9 +5144,8 @@ xfs_bmapi( */ ASSERT(mval->br_blockcount <= len); if ((ifp->if_flags & XFS_IFBROOT) && !cur) { - cur = xfs_btree_init_cursor(mp, - tp, NULL, 0, XFS_BTNUM_BMAP, - ip, whichfork); + cur = xfs_bmbt_init_cursor(mp, + tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; @@ -5440,8 +5436,7 @@ xfs_bunmapi( logflags = 0; if (ifp->if_flags & XFS_IFBROOT) { ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); - cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip, - whichfork); + cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = 0; diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 23efad2..cfbdd00 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2608,3 +2608,62 @@ xfs_check_nostate_extents( } return 0; } + + +STATIC struct xfs_btree_cur * +xfs_bmbt_dup_cursor( + struct xfs_btree_cur *cur) +{ + struct xfs_btree_cur *new; + + new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.b.ip, cur->bc_private.b.whichfork); + + /* + * Copy the firstblock, flist, and flags values, + * since init cursor doesn't get them. + */ + new->bc_private.b.firstblock = cur->bc_private.b.firstblock; + new->bc_private.b.flist = cur->bc_private.b.flist; + new->bc_private.b.flags = cur->bc_private.b.flags; + + return new; +} + +static const struct xfs_btree_ops xfs_bmbt_ops = { + .dup_cursor = xfs_bmbt_dup_cursor, +}; + +/* + * Allocate a new bmap btree cursor. + */ +struct xfs_btree_cur * /* new bmap btree cursor */ +xfs_bmbt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_inode *ip, /* inode owning the btree */ + int whichfork) /* data or attr fork */ +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_btree_cur *cur; + + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; + cur->bc_btnum = XFS_BTNUM_BMAP; + cur->bc_blocklog = mp->m_sb.sb_blocklog; + + cur->bc_ops = &xfs_bmbt_ops; + + cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); + cur->bc_private.b.ip = ip; + cur->bc_private.b.firstblock = NULLFSBLOCK; + cur->bc_private.b.flist = NULL; + cur->bc_private.b.allocated = 0; + cur->bc_private.b.flags = 0; + cur->bc_private.b.whichfork = whichfork; + + return cur; +} diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index cd0d4b4..4f12fff 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -24,6 +24,7 @@ struct xfs_btree_cur; struct xfs_btree_lblock; struct xfs_mount; struct xfs_inode; +struct xfs_trans; /* * Bmap root header, on-disk form only. @@ -300,6 +301,9 @@ extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t, xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t); +extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, + struct xfs_trans *, struct xfs_inode *, int); + #endif /* __KERNEL__ */ #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 3100209..074f7f6 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -387,16 +387,17 @@ xfs_btree_dup_cursor( tp = cur->bc_tp; mp = cur->bc_mp; + /* * Allocate a new cursor like the old one. */ - new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp, - cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip, - cur->bc_private.b.whichfork); + new = cur->bc_ops->dup_cursor(cur); + /* * Copy the record currently in the cursor. */ new->bc_rec = cur->bc_rec; + /* * For each level current, re-get the buffer and copy the ptr value. */ @@ -416,15 +417,6 @@ xfs_btree_dup_cursor( } else new->bc_bufs[i] = NULL; } - /* - * For bmap btrees, copy the firstblock, flist, and flags values, - * since init cursor doesn't get them. - */ - if (new->bc_btnum == XFS_BTNUM_BMAP) { - new->bc_private.b.firstblock = cur->bc_private.b.firstblock; - new->bc_private.b.flist = cur->bc_private.b.flist; - new->bc_private.b.flags = cur->bc_private.b.flags; - } *ncur = new; return 0; } @@ -505,97 +497,6 @@ xfs_btree_get_bufs( } /* - * Allocate a new btree cursor. - * The cursor is either for allocation (A) or bmap (B) or inodes (I). - */ -xfs_btree_cur_t * /* new btree cursor */ -xfs_btree_init_cursor( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* (A only) buffer for agf structure */ - /* (I only) buffer for agi structure */ - xfs_agnumber_t agno, /* (AI only) allocation group number */ - xfs_btnum_t btnum, /* btree identifier */ - xfs_inode_t *ip, /* (B only) inode owning the btree */ - int whichfork) /* (B only) data or attr fork */ -{ - xfs_agf_t *agf; /* (A) allocation group freespace */ - xfs_agi_t *agi; /* (I) allocation group inodespace */ - xfs_btree_cur_t *cur; /* return value */ - xfs_ifork_t *ifp; /* (I) inode fork pointer */ - int nlevels=0; /* number of levels in the btree */ - - ASSERT(xfs_btree_cur_zone != NULL); - /* - * Allocate a new cursor. - */ - cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); - /* - * Deduce the number of btree levels from the arguments. - */ - switch (btnum) { - case XFS_BTNUM_BNO: - case XFS_BTNUM_CNT: - agf = XFS_BUF_TO_AGF(agbp); - nlevels = be32_to_cpu(agf->agf_levels[btnum]); - break; - case XFS_BTNUM_BMAP: - ifp = XFS_IFORK_PTR(ip, whichfork); - nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; - break; - case XFS_BTNUM_INO: - agi = XFS_BUF_TO_AGI(agbp); - nlevels = be32_to_cpu(agi->agi_level); - break; - default: - ASSERT(0); - } - /* - * Fill in the common fields. - */ - cur->bc_tp = tp; - cur->bc_mp = mp; - cur->bc_nlevels = nlevels; - cur->bc_btnum = btnum; - cur->bc_blocklog = mp->m_sb.sb_blocklog; - /* - * Fill in private fields. - */ - switch (btnum) { - case XFS_BTNUM_BNO: - case XFS_BTNUM_CNT: - /* - * Allocation btree fields. - */ - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; - break; - case XFS_BTNUM_INO: - /* - * Inode allocation btree fields. - */ - cur->bc_private.a.agbp = agbp; - cur->bc_private.a.agno = agno; - break; - case XFS_BTNUM_BMAP: - /* - * Bmap btree fields. - */ - cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); - cur->bc_private.b.ip = ip; - cur->bc_private.b.firstblock = NULLFSBLOCK; - cur->bc_private.b.flist = NULL; - cur->bc_private.b.allocated = 0; - cur->bc_private.b.flags = 0; - cur->bc_private.b.whichfork = whichfork; - break; - default: - ASSERT(0); - } - return cur; -} - -/* * Check for the cursor referring to the last block at the given level. */ int /* 1=is last block, 0=not last block */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 332b9f1..d30ee74 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -131,6 +131,11 @@ extern const __uint32_t xfs_magics[]; #define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ +struct xfs_btree_ops { + /* cursor operations */ + struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); +}; + /* * Btree cursor structure. * This collects all information needed by the btree code in one place. @@ -139,6 +144,7 @@ typedef struct xfs_btree_cur { struct xfs_trans *bc_tp; /* transaction we're in, if any */ struct xfs_mount *bc_mp; /* file system mount struct */ + const struct xfs_btree_ops *bc_ops; union { xfs_alloc_rec_incore_t a; xfs_bmbt_irec_t b; @@ -308,20 +314,6 @@ xfs_btree_get_bufs( uint lock); /* lock flags for get_buf */ /* - * Allocate a new btree cursor. - * The cursor is either for allocation (A) or bmap (B). - */ -xfs_btree_cur_t * /* new btree cursor */ -xfs_btree_init_cursor( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* (A only) buffer for agf structure */ - xfs_agnumber_t agno, /* (A only) allocation group number */ - xfs_btnum_t btnum, /* btree identifier */ - struct xfs_inode *ip, /* (B only) inode owning the btree */ - int whichfork); /* (B only) data/attr fork */ - -/* * Check for the cursor referring to the last block at the given level. */ int /* 1=is last block, 0=not last block */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index aad8c5d..11bb169 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -335,8 +335,7 @@ xfs_ialloc_ag_alloc( /* * Insert records describing the new inode chunk into the btree. */ - cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno, - XFS_BTNUM_INO, (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); for (thisino = newino; thisino < newino + newlen; thisino += XFS_INODES_PER_CHUNK) { @@ -676,8 +675,7 @@ nextag: */ agno = tagno; *IO_agbp = NULL; - cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno), - XFS_BTNUM_INO, (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -1022,8 +1020,7 @@ xfs_difree( /* * Initialize the cursor. */ - cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, - (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); #ifdef DEBUG if (cur->bc_nlevels == 1) { int freecount = 0; @@ -1259,8 +1256,7 @@ xfs_dilocate( #endif /* DEBUG */ return error; } - cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO, - (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 83502f3..8c0c474 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -2076,3 +2076,44 @@ xfs_inobt_update( } return 0; } + +STATIC struct xfs_btree_cur * +xfs_inobt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agbp, cur->bc_private.a.agno); +} + +static const struct xfs_btree_ops xfs_inobt_ops = { + .dup_cursor = xfs_inobt_dup_cursor, +}; + +/* + * Allocate a new inode btree cursor. + */ +struct xfs_btree_cur * /* new inode btree cursor */ +xfs_inobt_init_cursor( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + struct xfs_buf *agbp, /* buffer for agi structure */ + xfs_agnumber_t agno) /* allocation group number */ +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + struct xfs_btree_cur *cur; + + cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP); + + cur->bc_tp = tp; + cur->bc_mp = mp; + cur->bc_nlevels = be32_to_cpu(agi->agi_level); + cur->bc_btnum = XFS_BTNUM_INO; + cur->bc_blocklog = mp->m_sb.sb_blocklog; + + cur->bc_ops = &xfs_inobt_ops; + + cur->bc_private.a.agbp = agbp; + cur->bc_private.a.agno = agno; + + return cur; +} diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 8efc4a5..eea4093 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -175,4 +175,8 @@ extern int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, extern int xfs_inobt_update(struct xfs_btree_cur *cur, xfs_agino_t ino, __int32_t fcnt, xfs_inofree_t free); + +extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, + struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); + #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 4f4c939..a5f02f0 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -416,8 +416,7 @@ xfs_bulkstat( /* * Allocate and initialize a btree cursor for ialloc btree. */ - cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO, - (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); irbp = irbuf; irbufend = irbuf + nirbuf; end_of_ag = 0; @@ -842,8 +841,7 @@ xfs_inumbers( agino = 0; continue; } - cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, - XFS_BTNUM_INO, (xfs_inode_t *)0, 0); + cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp); if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); -- cgit v0.10.2 From de227dd9604934d2a6d33cd332d1be431719c93e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:54:12 +1100 Subject: [XFS] add generic btree types Add generic union types for btree pointers, keys and records. The generic btree pointer contains either a 32 and 64bit big endian scalar for short and long form btrees, and the key and record contain the relevant type for each possible btree. Split out from a bigger patch from Dave Chinner and simplified a little further. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32178a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index d30ee74..428e81f 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -80,6 +80,31 @@ typedef struct xfs_btree_block { } xfs_btree_block_t; /* + * Generic key, ptr and record wrapper structures. + * + * These are disk format structures, and are converted where necessary + * by the btree specific code that needs to interpret them. + */ +union xfs_btree_ptr { + __be32 s; /* short form ptr */ + __be64 l; /* long form ptr */ +}; + +union xfs_btree_key { + xfs_bmbt_key_t bmbt; + xfs_bmdr_key_t bmbr; /* bmbt root block */ + xfs_alloc_key_t alloc; + xfs_inobt_key_t inobt; +}; + +union xfs_btree_rec { + xfs_bmbt_rec_t bmbt; + xfs_bmdr_rec_t bmbr; /* bmbt root block */ + xfs_alloc_rec_t alloc; + xfs_inobt_rec_t inobt; +}; + +/* * For logging record fields. */ #define XFS_BB_MAGIC 0x01 -- cgit v0.10.2 From 8186e517fab1854554c48955cdbcbb6710e7baef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:54:22 +1100 Subject: [XFS] make btree root in inode support generic The bmap btree is rooted in the inode and not in a disk block. Make the support for this feature more generic by adding a btree flag to for this feature instead of relying on the XFS_BTNUM_BMAP btnum check. Also clean up xfs_btree_get_block where this new flag is used. Based upon a patch from Dave Chinner. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32180a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index cfbdd00..d9bbed6 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2656,6 +2656,7 @@ xfs_bmbt_init_cursor( cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_ops = &xfs_bmbt_ops; + cur->bc_flags = XFS_BTREE_ROOT_IN_INODE; cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); cur->bc_private.b.ip = ip; diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 074f7f6..57e858f 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -422,32 +422,39 @@ xfs_btree_dup_cursor( } /* + * Get a the root block which is stored in the inode. + * + * For now this btree implementation assumes the btree root is always + * stored in the if_broot field of an inode fork. + */ +STATIC struct xfs_btree_block * +xfs_btree_get_iroot( + struct xfs_btree_cur *cur) +{ + struct xfs_ifork *ifp; + + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); + return (struct xfs_btree_block *)ifp->if_broot; +} + +/* * Retrieve the block pointer from the cursor at the given level. - * This may be a bmap btree root or from a buffer. + * This may be an inode btree root or from a buffer. */ -STATIC xfs_btree_block_t * /* generic btree block pointer */ +STATIC struct xfs_btree_block * /* generic btree block pointer */ xfs_btree_get_block( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ int level, /* level in btree */ - xfs_buf_t **bpp) /* buffer containing the block */ + struct xfs_buf **bpp) /* buffer containing the block */ { - xfs_btree_block_t *block; /* return value */ - xfs_buf_t *bp; /* return buffer */ - xfs_ifork_t *ifp; /* inode fork pointer */ - int whichfork; /* data or attr fork */ - - if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) { - whichfork = cur->bc_private.b.whichfork; - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork); - block = (xfs_btree_block_t *)ifp->if_broot; - bp = NULL; - } else { - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_BLOCK(bp); + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level == cur->bc_nlevels - 1)) { + *bpp = NULL; + return xfs_btree_get_iroot(cur); } - ASSERT(block != NULL); - *bpp = bp; - return block; + + *bpp = cur->bc_bufs[level]; + return XFS_BUF_TO_BLOCK(*bpp); } /* diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 428e81f..fefbc69 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -170,6 +170,7 @@ typedef struct xfs_btree_cur struct xfs_trans *bc_tp; /* transaction we're in, if any */ struct xfs_mount *bc_mp; /* file system mount struct */ const struct xfs_btree_ops *bc_ops; + uint bc_flags; /* btree features - below */ union { xfs_alloc_rec_incore_t a; xfs_bmbt_irec_t b; @@ -201,6 +202,10 @@ typedef struct xfs_btree_cur } bc_private; /* per-btree type data */ } xfs_btree_cur_t; +/* cursor flags */ +#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ + + #define XFS_BTREE_NOERROR 0 #define XFS_BTREE_ERROR 1 -- cgit v0.10.2 From e99ab90d6a9e8ac92f05d2c31d44aa7feee15394 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:54:33 +1100 Subject: [XFS] add a long pointers flag to xfs_btree_cur Add a flag to the xfs btree cursor when using long (64bit) block pointers instead of checking btnum == XFS_BTNUM_BMAP. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32181a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index d9bbed6..1ec494e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2656,7 +2656,7 @@ xfs_bmbt_init_cursor( cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_ops = &xfs_bmbt_ops; - cur->bc_flags = XFS_BTREE_ROOT_IN_INODE; + cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork); cur->bc_private.b.ip = ip; diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 57e858f..59796b4 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -90,7 +90,7 @@ xfs_btree_check_block( int level, /* level of the btree block */ xfs_buf_t *bp) /* buffer containing block, if any */ { - if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level, bp); else @@ -516,7 +516,7 @@ xfs_btree_islastblock( block = xfs_btree_get_block(cur, level, &bp); xfs_btree_check_block(cur, block, level, bp); - if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; else return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; @@ -808,7 +808,7 @@ xfs_btree_setbuf( if (!bp) return; b = XFS_BUF_TO_BLOCK(bp); - if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) { + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index fefbc69..dd93fd3 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -116,11 +116,6 @@ union xfs_btree_rec { #define XFS_BB_ALL_BITS ((1 << XFS_BB_NUM_BITS) - 1) /* - * Boolean to select which form of xfs_btree_block_t.bb_u to use. - */ -#define XFS_BTREE_LONG_PTRS(btnum) ((btnum) == XFS_BTNUM_BMAP) - -/* * Magic numbers for btree blocks. */ extern const __uint32_t xfs_magics[]; @@ -203,6 +198,7 @@ typedef struct xfs_btree_cur } xfs_btree_cur_t; /* cursor flags */ +#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ #define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ -- cgit v0.10.2 From b524bfeee2152fa64b6210f28ced80489b9d2439 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:54:43 +1100 Subject: [XFS] refactor xfs_btree_readahead From: Dave Chinner Refactor xfs_btree_readahead to make it more readable: (a) remove the inline xfs_btree_readahead wrapper and move all checks out of line into the main routine. (b) factor out helpers for short/long form btrees (c) move check for root in inodes from the callers into xfs_btree_readahead [hch: split out from a big patch and minor cleanups] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32182a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 1ec494e..519249e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -1721,8 +1721,9 @@ xfs_bmbt_decrement( XFS_BMBT_TRACE_CURSOR(cur, ENTRY); XFS_BMBT_TRACE_ARGI(cur, level); ASSERT(level < cur->bc_nlevels); - if (level < cur->bc_nlevels - 1) - xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + + xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + if (--cur->bc_ptrs[level] > 0) { XFS_BMBT_TRACE_CURSOR(cur, EXIT); *stat = 1; @@ -1743,8 +1744,7 @@ xfs_bmbt_decrement( for (lev = level + 1; lev < cur->bc_nlevels; lev++) { if (--cur->bc_ptrs[lev] > 0) break; - if (lev < cur->bc_nlevels - 1) - xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); + xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); } if (lev == cur->bc_nlevels) { XFS_BMBT_TRACE_CURSOR(cur, EXIT); @@ -1995,8 +1995,8 @@ xfs_bmbt_increment( XFS_BMBT_TRACE_CURSOR(cur, ENTRY); XFS_BMBT_TRACE_ARGI(cur, level); ASSERT(level < cur->bc_nlevels); - if (level < cur->bc_nlevels - 1) - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); block = xfs_bmbt_get_block(cur, level, &bp); #ifdef DEBUG if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { @@ -2024,8 +2024,7 @@ xfs_bmbt_increment( #endif if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs)) break; - if (lev < cur->bc_nlevels - 1) - xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); + xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); } if (lev == cur->bc_nlevels) { XFS_BMBT_TRACE_CURSOR(cur, EXIT); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 59796b4..4d793e4 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -725,66 +725,84 @@ xfs_btree_reada_bufs( xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); } +STATIC int +xfs_btree_readahead_lblock( + struct xfs_btree_cur *cur, + int lr, + struct xfs_btree_block *block) +{ + int rval = 0; + xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); + xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); + + if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { + xfs_btree_reada_bufl(cur->bc_mp, left, 1); + rval++; + } + + if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { + xfs_btree_reada_bufl(cur->bc_mp, right, 1); + rval++; + } + + return rval; +} + +STATIC int +xfs_btree_readahead_sblock( + struct xfs_btree_cur *cur, + int lr, + struct xfs_btree_block *block) +{ + int rval = 0; + xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); + xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); + + + if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + left, 1); + rval++; + } + + if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, + right, 1); + rval++; + } + + return rval; +} + /* * Read-ahead btree blocks, at the given level. * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. */ int -xfs_btree_readahead_core( - xfs_btree_cur_t *cur, /* btree cursor */ +xfs_btree_readahead( + struct xfs_btree_cur *cur, /* btree cursor */ int lev, /* level in btree */ int lr) /* left/right bits */ { - xfs_alloc_block_t *a; - xfs_bmbt_block_t *b; - xfs_inobt_block_t *i; - int rval = 0; + struct xfs_btree_block *block; + + /* + * No readahead needed if we are at the root level and the + * btree root is stored in the inode. + */ + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (lev == cur->bc_nlevels - 1)) + return 0; + + if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) + return 0; - ASSERT(cur->bc_bufs[lev] != NULL); cur->bc_ra[lev] |= lr; - switch (cur->bc_btnum) { - case XFS_BTNUM_BNO: - case XFS_BTNUM_CNT: - a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); - if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(a->bb_leftsib), 1); - rval++; - } - if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(a->bb_rightsib), 1); - rval++; - } - break; - case XFS_BTNUM_BMAP: - b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]); - if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) { - xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1); - rval++; - } - if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) { - xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1); - rval++; - } - break; - case XFS_BTNUM_INO: - i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); - if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(i->bb_leftsib), 1); - rval++; - } - if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(i->bb_rightsib), 1); - rval++; - } - break; - default: - ASSERT(0); - } - return rval; + block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return xfs_btree_readahead_lblock(cur, lr, block); + return xfs_btree_readahead_sblock(cur, lr, block); } /* diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index dd93fd3..8be838f 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -421,23 +421,10 @@ xfs_btree_reada_bufs( * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. */ int /* readahead block count */ -xfs_btree_readahead_core( - xfs_btree_cur_t *cur, /* btree cursor */ - int lev, /* level in btree */ - int lr); /* left/right bits */ - -static inline int /* readahead block count */ xfs_btree_readahead( xfs_btree_cur_t *cur, /* btree cursor */ int lev, /* level in btree */ - int lr) /* left/right bits */ -{ - if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) - return 0; - - return xfs_btree_readahead_core(cur, lev, lr); -} - + int lr); /* left/right bits */ /* * Set the buffer for level "lev" in the cursor to bp, releasing -- cgit v0.10.2 From a23f6ef8ce966abc0f6e24a81ceb6a74ed30693b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:54:53 +1100 Subject: [XFS] refactor btree validation helpers Move the various btree validation helpers around in xfs_btree.c so that they are close to each other and in common #ifdef DEBUG sections. Also add a new xfs_btree_check_ptr helper to check a btree ptr that can be either long or short form. Split out from a bigger patch from Dave Chinner with various small changes applied by me. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32183a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 4d793e4..966d58d5 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -81,24 +81,6 @@ xfs_btree_maxrecs( #ifdef DEBUG /* - * Debug routine: check that block header is ok. - */ -void -xfs_btree_check_block( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_block_t *block, /* generic btree block pointer */ - int level, /* level of the btree block */ - xfs_buf_t *bp) /* buffer containing block, if any */ -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level, - bp); - else - xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level, - bp); -} - -/* * Debug routine: check that keys are in the right order. */ void @@ -150,66 +132,8 @@ xfs_btree_check_key( ASSERT(0); } } -#endif /* DEBUG */ - -/* - * Checking routine: check that long form block header is ok. - */ -/* ARGSUSED */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lblock( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_lblock_t *block, /* btree long form block pointer */ - int level, /* level of the btree block */ - xfs_buf_t *bp) /* buffer for block, if any */ -{ - int lblock_ok; /* block passes checks */ - xfs_mount_t *mp; /* file system mount point */ - - mp = cur->bc_mp; - lblock_ok = - be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && - be16_to_cpu(block->bb_level) == level && - be16_to_cpu(block->bb_numrecs) <= - xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && - block->bb_leftsib && - (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || - XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && - block->bb_rightsib && - (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO || - XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib))); - if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK, - XFS_RANDOM_BTREE_CHECK_LBLOCK))) { - if (bp) - xfs_buftrace("LBTREE ERROR", bp); - XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, - mp); - return XFS_ERROR(EFSCORRUPTED); - } - return 0; -} /* - * Checking routine: check that (long) pointer is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lptr( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_dfsbno_t ptr, /* btree block disk address */ - int level) /* btree block level */ -{ - xfs_mount_t *mp; /* file system mount point */ - - mp = cur->bc_mp; - XFS_WANT_CORRUPTED_RETURN( - level > 0 && - ptr != NULLDFSBNO && - XFS_FSB_SANITY_CHECK(mp, ptr)); - return 0; -} - -#ifdef DEBUG -/* * Debug routine: check that records are in the right order. */ void @@ -268,19 +192,49 @@ xfs_btree_check_rec( } #endif /* DEBUG */ -/* - * Checking routine: check that block header is ok. - */ -/* ARGSUSED */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lblock( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_lblock *block, /* btree long form block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp) /* buffer for block, if any */ +{ + int lblock_ok; /* block passes checks */ + struct xfs_mount *mp; /* file system mount point */ + + mp = cur->bc_mp; + lblock_ok = + be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && + be16_to_cpu(block->bb_level) == level && + be16_to_cpu(block->bb_numrecs) <= + xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && + block->bb_leftsib && + (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || + XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && + block->bb_rightsib && + (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO || + XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib))); + if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, + XFS_ERRTAG_BTREE_CHECK_LBLOCK, + XFS_RANDOM_BTREE_CHECK_LBLOCK))) { + if (bp) + xfs_buftrace("LBTREE ERROR", bp); + XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, + mp); + return XFS_ERROR(EFSCORRUPTED); + } + return 0; +} + int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_sblock( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_sblock_t *block, /* btree short form block pointer */ + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_sblock *block, /* btree short form block pointer */ int level, /* level of the btree block */ - xfs_buf_t *bp) /* buffer containing block */ + struct xfs_buf *bp) /* buffer containing block */ { - xfs_buf_t *agbp; /* buffer for ag. freespace struct */ - xfs_agf_t *agf; /* ag. freespace structure */ + struct xfs_buf *agbp; /* buffer for ag. freespace struct */ + struct xfs_agf *agf; /* ag. freespace structure */ xfs_agblock_t agflen; /* native ag. freespace length */ int sblock_ok; /* block passes checks */ @@ -311,27 +265,79 @@ xfs_btree_check_sblock( } /* - * Checking routine: check that (short) pointer is ok. + * Debug routine: check that block header is ok. + */ +int +xfs_btree_check_block( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_block *block, /* generic btree block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp) /* buffer containing block, if any */ +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + return xfs_btree_check_lblock(cur, + (struct xfs_btree_lblock *)block, level, bp); + } else { + return xfs_btree_check_sblock(cur, + (struct xfs_btree_sblock *)block, level, bp); + } +} + +/* + * Check that (long) pointer is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lptr( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_dfsbno_t bno, /* btree block disk address */ + int level) /* btree block level */ +{ + XFS_WANT_CORRUPTED_RETURN( + level > 0 && + bno != NULLDFSBNO && + XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); + return 0; +} + +/* + * Check that (short) pointer is ok. */ int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_sptr( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t ptr, /* btree block disk address */ - int level) /* btree block level */ + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* btree block disk address */ + int level) /* btree block level */ { - xfs_buf_t *agbp; /* buffer for ag. freespace struct */ - xfs_agf_t *agf; /* ag. freespace structure */ + xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; - agbp = cur->bc_private.a.agbp; - agf = XFS_BUF_TO_AGF(agbp); XFS_WANT_CORRUPTED_RETURN( level > 0 && - ptr != NULLAGBLOCK && ptr != 0 && - ptr < be32_to_cpu(agf->agf_length)); + bno != NULLAGBLOCK && + bno != 0 && + bno < agblocks); return 0; } /* + * Check that block ptr is ok. + */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_ptr( + struct xfs_btree_cur *cur, /* btree cursor */ + union xfs_btree_ptr *ptr, /* btree block disk address */ + int index, /* offset from ptr to check */ + int level) /* btree block level */ +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + return xfs_btree_check_lptr(cur, + be64_to_cpu((&ptr->l)[index]), level); + } else { + return xfs_btree_check_sptr(cur, + be32_to_cpu((&ptr->s)[index]), level); + } +} + +/* * Delete the btree cursor. */ void diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 8be838f..a579182 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -215,81 +215,92 @@ typedef struct xfs_btree_cur #ifdef __KERNEL__ -#ifdef DEBUG /* - * Debug routine: check that block header is ok. + * Check that long form block header is ok. */ -void -xfs_btree_check_block( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_block_t *block, /* generic btree block pointer */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_lblock( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_lblock *block, /* btree long form block pointer */ int level, /* level of the btree block */ struct xfs_buf *bp); /* buffer containing block, if any */ /* - * Debug routine: check that keys are in the right order. + * Check that short form block header is ok. */ -void -xfs_btree_check_key( - xfs_btnum_t btnum, /* btree identifier */ - void *ak1, /* pointer to left (lower) key */ - void *ak2); /* pointer to right (higher) key */ - -/* - * Debug routine: check that records are in the right order. - */ -void -xfs_btree_check_rec( - xfs_btnum_t btnum, /* btree identifier */ - void *ar1, /* pointer to left (lower) record */ - void *ar2); /* pointer to right (higher) record */ -#else -#define xfs_btree_check_block(a,b,c,d) -#define xfs_btree_check_key(a,b,c) -#define xfs_btree_check_rec(a,b,c) -#endif /* DEBUG */ +int /* error (0 or EFSCORRUPTED) */ +xfs_btree_check_sblock( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_sblock *block, /* btree short form block pointer */ + int level, /* level of the btree block */ + struct xfs_buf *bp); /* buffer containing block */ /* - * Checking routine: check that long form block header is ok. + * Check that block header is ok. */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lblock( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_lblock_t *block, /* btree long form block pointer */ +int +xfs_btree_check_block( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_btree_block *block, /* generic btree block pointer */ int level, /* level of the btree block */ struct xfs_buf *bp); /* buffer containing block, if any */ /* - * Checking routine: check that (long) pointer is ok. + * Check that (long) pointer is ok. */ int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lptr( - xfs_btree_cur_t *cur, /* btree cursor */ + struct xfs_btree_cur *cur, /* btree cursor */ xfs_dfsbno_t ptr, /* btree block disk address */ int level); /* btree block level */ #define xfs_btree_check_lptr_disk(cur, ptr, level) \ xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level) + /* - * Checking routine: check that short form block header is ok. + * Check that (short) pointer is ok. */ int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sblock( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_sblock_t *block, /* btree short form block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp); /* buffer containing block */ +xfs_btree_check_sptr( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t ptr, /* btree block disk address */ + int level); /* btree block level */ /* - * Checking routine: check that (short) pointer is ok. + * Check that (short) pointer is ok. */ int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sptr( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t ptr, /* btree block disk address */ +xfs_btree_check_ptr( + struct xfs_btree_cur *cur, /* btree cursor */ + union xfs_btree_ptr *ptr, /* btree block disk address */ + int index, /* offset from ptr to check */ int level); /* btree block level */ +#ifdef DEBUG + +/* + * Debug routine: check that keys are in the right order. + */ +void +xfs_btree_check_key( + xfs_btnum_t btnum, /* btree identifier */ + void *ak1, /* pointer to left (lower) key */ + void *ak2); /* pointer to right (higher) key */ + +/* + * Debug routine: check that records are in the right order. + */ +void +xfs_btree_check_rec( + xfs_btnum_t btnum, /* btree identifier */ + void *ar1, /* pointer to left (lower) record */ + void *ar2); /* pointer to right (higher) record */ +#else +#define xfs_btree_check_key(a, b, c) +#define xfs_btree_check_rec(a, b, c) +#endif /* DEBUG */ + /* * Delete the btree cursor. */ -- cgit v0.10.2 From 854929f05831d3a290a802815ee955b96c740c61 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 16:55:03 +1100 Subject: [XFS] add new btree statistics From: Dave Chinner Introduce statistics coverage of all the btrees and cover all the btree operations, not just some. Invaluable for determining test code coverage of all the btree operations.... SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32184a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index 3d5b67c..64f4ec9 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c @@ -53,6 +53,10 @@ xfs_read_xfsstats( { "icluster", XFSSTAT_END_INODE_CLUSTER }, { "vnodes", XFSSTAT_END_VNODE_OPS }, { "buf", XFSSTAT_END_BUF }, + { "abtb2", XFSSTAT_END_ABTB_V2 }, + { "abtc2", XFSSTAT_END_ABTC_V2 }, + { "bmbt2", XFSSTAT_END_BMBT_V2 }, + { "ibt2", XFSSTAT_END_IBT_V2 }, }; /* Loop over all stats groups */ diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h index e83820f..736854b 100644 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ b/fs/xfs/linux-2.6/xfs_stats.h @@ -118,6 +118,71 @@ struct xfsstats { __uint32_t xb_page_retries; __uint32_t xb_page_found; __uint32_t xb_get_read; +/* Version 2 btree counters */ +#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15) + __uint32_t xs_abtb_2_lookup; + __uint32_t xs_abtb_2_compare; + __uint32_t xs_abtb_2_insrec; + __uint32_t xs_abtb_2_delrec; + __uint32_t xs_abtb_2_newroot; + __uint32_t xs_abtb_2_killroot; + __uint32_t xs_abtb_2_increment; + __uint32_t xs_abtb_2_decrement; + __uint32_t xs_abtb_2_lshift; + __uint32_t xs_abtb_2_rshift; + __uint32_t xs_abtb_2_split; + __uint32_t xs_abtb_2_join; + __uint32_t xs_abtb_2_alloc; + __uint32_t xs_abtb_2_free; + __uint32_t xs_abtb_2_moves; +#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15) + __uint32_t xs_abtc_2_lookup; + __uint32_t xs_abtc_2_compare; + __uint32_t xs_abtc_2_insrec; + __uint32_t xs_abtc_2_delrec; + __uint32_t xs_abtc_2_newroot; + __uint32_t xs_abtc_2_killroot; + __uint32_t xs_abtc_2_increment; + __uint32_t xs_abtc_2_decrement; + __uint32_t xs_abtc_2_lshift; + __uint32_t xs_abtc_2_rshift; + __uint32_t xs_abtc_2_split; + __uint32_t xs_abtc_2_join; + __uint32_t xs_abtc_2_alloc; + __uint32_t xs_abtc_2_free; + __uint32_t xs_abtc_2_moves; +#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15) + __uint32_t xs_bmbt_2_lookup; + __uint32_t xs_bmbt_2_compare; + __uint32_t xs_bmbt_2_insrec; + __uint32_t xs_bmbt_2_delrec; + __uint32_t xs_bmbt_2_newroot; + __uint32_t xs_bmbt_2_killroot; + __uint32_t xs_bmbt_2_increment; + __uint32_t xs_bmbt_2_decrement; + __uint32_t xs_bmbt_2_lshift; + __uint32_t xs_bmbt_2_rshift; + __uint32_t xs_bmbt_2_split; + __uint32_t xs_bmbt_2_join; + __uint32_t xs_bmbt_2_alloc; + __uint32_t xs_bmbt_2_free; + __uint32_t xs_bmbt_2_moves; +#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15) + __uint32_t xs_ibt_2_lookup; + __uint32_t xs_ibt_2_compare; + __uint32_t xs_ibt_2_insrec; + __uint32_t xs_ibt_2_delrec; + __uint32_t xs_ibt_2_newroot; + __uint32_t xs_ibt_2_killroot; + __uint32_t xs_ibt_2_increment; + __uint32_t xs_ibt_2_decrement; + __uint32_t xs_ibt_2_lshift; + __uint32_t xs_ibt_2_rshift; + __uint32_t xs_ibt_2_split; + __uint32_t xs_ibt_2_join; + __uint32_t xs_ibt_2_alloc; + __uint32_t xs_ibt_2_free; + __uint32_t xs_ibt_2_moves; /* Extra precision counters */ __uint64_t xs_xstrat_bytes; __uint64_t xs_write_bytes; diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index a579182..57d3bd3 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -121,6 +121,34 @@ union xfs_btree_rec { extern const __uint32_t xfs_magics[]; /* + * Generic stats interface + */ +#define __XFS_BTREE_STATS_INC(type, stat) \ + XFS_STATS_INC(xs_ ## type ## _2_ ## stat) +#define XFS_BTREE_STATS_INC(cur, stat) \ +do { \ + switch (cur->bc_btnum) { \ + case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \ + case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ + case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ + case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ + case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ + } \ +} while (0) + +#define __XFS_BTREE_STATS_ADD(type, stat, val) \ + XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val) +#define XFS_BTREE_STATS_ADD(cur, stat, val) \ +do { \ + switch (cur->bc_btnum) { \ + case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \ + case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ + case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ + case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ + case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ + } \ +} while (0) +/* * Maximum and minimum records in a btree block. * Given block size, type prefix, and leaf flag (0 or 1). * The divisor below is equivalent to lf ? (e1) : (e2) but that produces -- cgit v0.10.2 From 8c4ed633e65d0bd0a25d45aad9b4646e3236cad7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:55:13 +1100 Subject: [XFS] make btree tracing generic Make the existing bmap btree tracing generic so that it applies to all btree types. Some fragments lifted from a patch by Dave Chinner. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32187a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 737c9a4..75b2be7 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -91,7 +91,8 @@ xfs-y += xfs_alloc.o \ xfs_dmops.o \ xfs_qmops.o -xfs-$(CONFIG_XFS_TRACE) += xfs_dir2_trace.o +xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \ + xfs_dir2_trace.o # Objects in linux/ xfs-y += $(addprefix $(XFS_LINUX)/, \ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9bfb260..0c71c21 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -36,6 +36,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" @@ -1916,10 +1917,19 @@ xfs_alloc_trace_bufs(void) if (!xfs_bmap_trace_buf) goto out_free_alloc_trace; #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE + xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE, + KM_MAYFAIL); + if (!xfs_allocbt_trace_buf) + goto out_free_bmap_trace; + + xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL); + if (!xfs_inobt_trace_buf) + goto out_free_allocbt_trace; + xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL); if (!xfs_bmbt_trace_buf) - goto out_free_bmap_trace; + goto out_free_inobt_trace; #endif #ifdef XFS_ATTR_TRACE xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL); @@ -1941,8 +1951,12 @@ xfs_alloc_trace_bufs(void) ktrace_free(xfs_attr_trace_buf); out_free_bmbt_trace: #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE ktrace_free(xfs_bmbt_trace_buf); + out_free_inobt_trace: + ktrace_free(xfs_inobt_trace_buf); + out_free_allocbt_trace: + ktrace_free(xfs_allocbt_trace_buf); out_free_bmap_trace: #endif #ifdef XFS_BMAP_TRACE @@ -1965,8 +1979,10 @@ xfs_free_trace_bufs(void) #ifdef XFS_ATTR_TRACE ktrace_free(xfs_attr_trace_buf); #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE ktrace_free(xfs_bmbt_trace_buf); + ktrace_free(xfs_inobt_trace_buf); + ktrace_free(xfs_allocbt_trace_buf); #endif #ifdef XFS_BMAP_TRACE ktrace_free(xfs_bmap_trace_buf); diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 540e4c9..17254b5 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -30,7 +30,7 @@ #define XFS_ATTR_TRACE 1 #define XFS_BLI_TRACE 1 #define XFS_BMAP_TRACE 1 -#define XFS_BMBT_TRACE 1 +#define XFS_BTREE_TRACE 1 #define XFS_DIR2_TRACE 1 #define XFS_DQUOT_TRACE 1 #define XFS_ILOCK_TRACE 1 diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 60c121f..9c91dfc 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -2219,8 +2219,81 @@ xfs_allocbt_dup_cursor( cur->bc_btnum); } +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_allocbt_trace_buf; + +STATIC void +xfs_allocbt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type, + (void *)func, (void *)s, NULL, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_allocbt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + *s0 = cur->bc_private.a.agno; + *l0 = cur->bc_rec.a.ar_startblock; + *l1 = cur->bc_rec.a.ar_blockcount; +} + +STATIC void +xfs_allocbt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be32_to_cpu(key->alloc.ar_startblock); + *l1 = be32_to_cpu(key->alloc.ar_blockcount); +} + +STATIC void +xfs_allocbt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + *l0 = be32_to_cpu(rec->alloc.ar_startblock); + *l1 = be32_to_cpu(rec->alloc.ar_blockcount); + *l2 = 0; +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_allocbt_trace_enter, + .trace_cursor = xfs_allocbt_trace_cursor, + .trace_key = xfs_allocbt_trace_key, + .trace_record = xfs_allocbt_trace_record, +#endif }; /* diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 519249e..16f2fde 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -37,16 +37,13 @@ #include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_itable.h" #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_quota.h" -#if defined(XFS_BMBT_TRACE) -ktrace_t *xfs_bmbt_trace_buf; -#endif - /* * Prototypes for internal btree functions. */ @@ -61,245 +58,33 @@ STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, __uint64_t *, xfs_btree_cur_t **, int *); STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int); - -#if defined(XFS_BMBT_TRACE) - -static char ARGS[] = "args"; -static char ENTRY[] = "entry"; -static char ERROR[] = "error"; #undef EXIT -static char EXIT[] = "exit"; -/* - * Add a trace buffer entry for the arguments given to the routine, - * generic form. - */ -STATIC void -xfs_bmbt_trace_enter( - const char *func, - xfs_btree_cur_t *cur, - char *s, - int type, - int line, - __psunsigned_t a0, - __psunsigned_t a1, - __psunsigned_t a2, - __psunsigned_t a3, - __psunsigned_t a4, - __psunsigned_t a5, - __psunsigned_t a6, - __psunsigned_t a7, - __psunsigned_t a8, - __psunsigned_t a9, - __psunsigned_t a10) -{ - xfs_inode_t *ip; - int whichfork; +#define ENTRY XBT_ENTRY +#define ERROR XBT_ERROR +#define EXIT XBT_EXIT - ip = cur->bc_private.b.ip; - whichfork = cur->bc_private.b.whichfork; - ktrace_enter(xfs_bmbt_trace_buf, - (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), - (void *)func, (void *)s, (void *)ip, (void *)cur, - (void *)a0, (void *)a1, (void *)a2, (void *)a3, - (void *)a4, (void *)a5, (void *)a6, (void *)a7, - (void *)a8, (void *)a9, (void *)a10); - ASSERT(ip->i_btrace); - ktrace_enter(ip->i_btrace, - (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), - (void *)func, (void *)s, (void *)ip, (void *)cur, - (void *)a0, (void *)a1, (void *)a2, (void *)a3, - (void *)a4, (void *)a5, (void *)a6, (void *)a7, - (void *)a8, (void *)a9, (void *)a10); -} /* - * Add a trace buffer entry for arguments, for a buffer & 1 integer arg. + * Keep the XFS_BMBT_TRACE_ names around for now until all code using them + * is converted to be generic and thus switches to the XFS_BTREE_TRACE_ names. */ -STATIC void -xfs_bmbt_trace_argbi( - const char *func, - xfs_btree_cur_t *cur, - xfs_buf_t *b, - int i, - int line) -{ - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBI, line, - (__psunsigned_t)b, i, 0, 0, - 0, 0, 0, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for a buffer & 2 integer args. - */ -STATIC void -xfs_bmbt_trace_argbii( - const char *func, - xfs_btree_cur_t *cur, - xfs_buf_t *b, - int i0, - int i1, - int line) -{ - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGBII, line, - (__psunsigned_t)b, i0, i1, 0, - 0, 0, 0, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for 3 block-length args - * and an integer arg. - */ -STATIC void -xfs_bmbt_trace_argfffi( - const char *func, - xfs_btree_cur_t *cur, - xfs_dfiloff_t o, - xfs_dfsbno_t b, - xfs_dfilblks_t i, - int j, - int line) -{ - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGFFFI, line, - o >> 32, (int)o, b >> 32, (int)b, - i >> 32, (int)i, (int)j, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for one integer arg. - */ -STATIC void -xfs_bmbt_trace_argi( - const char *func, - xfs_btree_cur_t *cur, - int i, - int line) -{ - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGI, line, - i, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for int, fsblock, key. - */ -STATIC void -xfs_bmbt_trace_argifk( - const char *func, - xfs_btree_cur_t *cur, - int i, - xfs_fsblock_t f, - xfs_dfiloff_t o, - int line) -{ - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line, - i, (xfs_dfsbno_t)f >> 32, (int)f, o >> 32, - (int)o, 0, 0, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for int, fsblock, rec. - */ -STATIC void -xfs_bmbt_trace_argifr( - const char *func, - xfs_btree_cur_t *cur, - int i, - xfs_fsblock_t f, - xfs_bmbt_rec_t *r, - int line) -{ - xfs_dfsbno_t b; - xfs_dfilblks_t c; - xfs_dfsbno_t d; - xfs_dfiloff_t o; - xfs_bmbt_irec_t s; - - d = (xfs_dfsbno_t)f; - xfs_bmbt_disk_get_all(r, &s); - o = (xfs_dfiloff_t)s.br_startoff; - b = (xfs_dfsbno_t)s.br_startblock; - c = s.br_blockcount; - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFR, line, - i, d >> 32, (int)d, o >> 32, - (int)o, b >> 32, (int)b, c >> 32, - (int)c, 0, 0); -} - -/* - * Add a trace buffer entry for arguments, for int, key. - */ -STATIC void -xfs_bmbt_trace_argik( - const char *func, - xfs_btree_cur_t *cur, - int i, - xfs_bmbt_key_t *k, - int line) -{ - xfs_dfiloff_t o; - - o = be64_to_cpu(k->br_startoff); - xfs_bmbt_trace_enter(func, cur, ARGS, XFS_BMBT_KTRACE_ARGIFK, line, - i, o >> 32, (int)o, 0, - 0, 0, 0, 0, - 0, 0, 0); -} - -/* - * Add a trace buffer entry for the cursor/operation. - */ -STATIC void -xfs_bmbt_trace_cursor( - const char *func, - xfs_btree_cur_t *cur, - char *s, - int line) -{ - xfs_bmbt_rec_host_t r; - - xfs_bmbt_set_all(&r, &cur->bc_rec.b); - xfs_bmbt_trace_enter(func, cur, s, XFS_BMBT_KTRACE_CUR, line, - (cur->bc_nlevels << 24) | (cur->bc_private.b.flags << 16) | - cur->bc_private.b.allocated, - r.l0 >> 32, (int)r.l0, - r.l1 >> 32, (int)r.l1, - (unsigned long)cur->bc_bufs[0], (unsigned long)cur->bc_bufs[1], - (unsigned long)cur->bc_bufs[2], (unsigned long)cur->bc_bufs[3], - (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1], - (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]); -} - -#define XFS_BMBT_TRACE_ARGBI(c,b,i) \ - xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__) -#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ - xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__) -#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ - xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__) -#define XFS_BMBT_TRACE_ARGI(c,i) \ - xfs_bmbt_trace_argi(__func__, c, i, __LINE__) -#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ - xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__) -#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ - xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__) -#define XFS_BMBT_TRACE_ARGIK(c,i,k) \ - xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__) -#define XFS_BMBT_TRACE_CURSOR(c,s) \ - xfs_bmbt_trace_cursor(__func__, c, s, __LINE__) -#else -#define XFS_BMBT_TRACE_ARGBI(c,b,i) -#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) -#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) -#define XFS_BMBT_TRACE_ARGI(c,i) -#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) -#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) -#define XFS_BMBT_TRACE_ARGIK(c,i,k) -#define XFS_BMBT_TRACE_CURSOR(c,s) -#endif /* XFS_BMBT_TRACE */ +#define XFS_BMBT_TRACE_ARGBI(c,b,i) \ + XFS_BTREE_TRACE_ARGBI(c,b,i) +#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ + XFS_BTREE_TRACE_ARGBII(c,b,i,j) +#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ + XFS_BTREE_TRACE_ARGFFFI(c,o,b,i,j) +#define XFS_BMBT_TRACE_ARGI(c,i) \ + XFS_BTREE_TRACE_ARGI(c,i) +#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ + XFS_BTREE_TRACE_ARGIPK(c,i,(union xfs_btree_ptr)f,s) +#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ + XFS_BTREE_TRACE_ARGIPR(c,i, \ + (union xfs_btree_ptr)f, (union xfs_btree_rec *)r) +#define XFS_BMBT_TRACE_ARGIK(c,i,k) \ + XFS_BTREE_TRACE_ARGIK(c,i,(union xfs_btree_key *)k) +#define XFS_BMBT_TRACE_CURSOR(c,s) \ + XFS_BTREE_TRACE_CURSOR(c,s) /* @@ -1485,7 +1270,8 @@ xfs_bmbt_split( xfs_bmbt_rec_t *rrp; /* right record pointer */ XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff); + // disable until merged into common code +// XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff); args.tp = cur->bc_tp; args.mp = cur->bc_mp; lbp = cur->bc_bufs[level]; @@ -2629,8 +2415,100 @@ xfs_bmbt_dup_cursor( return new; } +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_bmbt_trace_buf; + +STATIC void +xfs_bmbt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + struct xfs_inode *ip = cur->bc_private.b.ip; + int whichfork = cur->bc_private.b.whichfork; + + ktrace_enter(xfs_bmbt_trace_buf, + (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), + (void *)func, (void *)s, (void *)ip, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); + ktrace_enter(ip->i_btrace, + (void *)((__psint_t)type | (whichfork << 8) | (line << 16)), + (void *)func, (void *)s, (void *)ip, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_bmbt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + struct xfs_bmbt_rec_host r; + + xfs_bmbt_set_all(&r, &cur->bc_rec.b); + + *s0 = (cur->bc_nlevels << 24) | + (cur->bc_private.b.flags << 16) | + cur->bc_private.b.allocated; + *l0 = r.l0; + *l1 = r.l1; +} + +STATIC void +xfs_bmbt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be64_to_cpu(key->bmbt.br_startoff); + *l1 = 0; +} + +STATIC void +xfs_bmbt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + struct xfs_bmbt_irec irec; + + xfs_bmbt_disk_get_all(&rec->bmbt, &irec); + *l0 = irec.br_startoff; + *l1 = irec.br_startblock; + *l2 = irec.br_blockcount; +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_bmbt_ops = { .dup_cursor = xfs_bmbt_dup_cursor, + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_bmbt_trace_enter, + .trace_cursor = xfs_bmbt_trace_cursor, + .trace_key = xfs_bmbt_trace_key, + .trace_record = xfs_bmbt_trace_record, +#endif }; /* diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 4f12fff..5628d89 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -233,24 +233,6 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; #ifdef __KERNEL__ -#if defined(XFS_BMBT_TRACE) -/* - * Trace buffer entry types. - */ -#define XFS_BMBT_KTRACE_ARGBI 1 -#define XFS_BMBT_KTRACE_ARGBII 2 -#define XFS_BMBT_KTRACE_ARGFFFI 3 -#define XFS_BMBT_KTRACE_ARGI 4 -#define XFS_BMBT_KTRACE_ARGIFK 5 -#define XFS_BMBT_KTRACE_ARGIFR 6 -#define XFS_BMBT_KTRACE_ARGIK 7 -#define XFS_BMBT_KTRACE_CUR 8 - -#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */ -#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */ -extern ktrace_t *xfs_bmbt_trace_buf; -#endif - /* * Prototypes for xfs_bmap.c to call. */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 57d3bd3..0647a0e 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -182,6 +182,25 @@ do { \ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + + /* btree tracing */ +#ifdef XFS_BTREE_TRACE + void (*trace_enter)(struct xfs_btree_cur *, const char *, + char *, int, int, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t, + __psunsigned_t, __psunsigned_t); + void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *, + __uint64_t *, __uint64_t *); + void (*trace_key)(struct xfs_btree_cur *, + union xfs_btree_key *, __uint64_t *, + __uint64_t *); + void (*trace_record)(struct xfs_btree_cur *, + union xfs_btree_rec *, __uint64_t *, + __uint64_t *, __uint64_t *); +#endif }; /* diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 8c0c474..fc99524 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -2085,8 +2085,81 @@ xfs_inobt_dup_cursor( cur->bc_private.a.agbp, cur->bc_private.a.agno); } +#ifdef XFS_BTREE_TRACE +ktrace_t *xfs_inobt_trace_buf; + +STATIC void +xfs_inobt_trace_enter( + struct xfs_btree_cur *cur, + const char *func, + char *s, + int type, + int line, + __psunsigned_t a0, + __psunsigned_t a1, + __psunsigned_t a2, + __psunsigned_t a3, + __psunsigned_t a4, + __psunsigned_t a5, + __psunsigned_t a6, + __psunsigned_t a7, + __psunsigned_t a8, + __psunsigned_t a9, + __psunsigned_t a10) +{ + ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type, + (void *)func, (void *)s, NULL, (void *)cur, + (void *)a0, (void *)a1, (void *)a2, (void *)a3, + (void *)a4, (void *)a5, (void *)a6, (void *)a7, + (void *)a8, (void *)a9, (void *)a10); +} + +STATIC void +xfs_inobt_trace_cursor( + struct xfs_btree_cur *cur, + __uint32_t *s0, + __uint64_t *l0, + __uint64_t *l1) +{ + *s0 = cur->bc_private.a.agno; + *l0 = cur->bc_rec.i.ir_startino; + *l1 = cur->bc_rec.i.ir_free; +} + +STATIC void +xfs_inobt_trace_key( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + __uint64_t *l0, + __uint64_t *l1) +{ + *l0 = be32_to_cpu(key->inobt.ir_startino); + *l1 = 0; +} + +STATIC void +xfs_inobt_trace_record( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + __uint64_t *l0, + __uint64_t *l1, + __uint64_t *l2) +{ + *l0 = be32_to_cpu(rec->inobt.ir_startino); + *l1 = be32_to_cpu(rec->inobt.ir_freecount); + *l2 = be64_to_cpu(rec->inobt.ir_free); +} +#endif /* XFS_BTREE_TRACE */ + static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, + +#ifdef XFS_BTREE_TRACE + .trace_enter = xfs_inobt_trace_enter, + .trace_cursor = xfs_inobt_trace_cursor, + .trace_key = xfs_inobt_trace_key, + .trace_record = xfs_inobt_trace_record, +#endif }; /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2a158a2..cc0474d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -41,6 +41,7 @@ #include "xfs_buf_item.h" #include "xfs_inode_item.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" @@ -835,7 +836,7 @@ xfs_inode_alloc( #ifdef XFS_BMAP_TRACE ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); #endif #ifdef XFS_RW_TRACE @@ -2673,7 +2674,7 @@ xfs_idestroy( #ifdef XFS_BMAP_TRACE ktrace_free(ip->i_xtrace); #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE ktrace_free(ip->i_btrace); #endif #ifdef XFS_RW_TRACE diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3af1f6d..2a69a7d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -245,7 +245,7 @@ typedef struct xfs_inode { #ifdef XFS_BMAP_TRACE struct ktrace *i_xtrace; /* inode extent list trace */ #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE struct ktrace *i_btrace; /* inode bmap btree trace */ #endif #ifdef XFS_RW_TRACE -- cgit v0.10.2 From ce5e42db421a41b1ad0cfd68c6058566b963e14b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:55:23 +1100 Subject: [XFS] add get_maxrecs btree operation Factor xfs_btree_maxrecs into a per-btree operation. The get_maxrecs method is based on a patch from Dave Chinner. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32188a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 9c91dfc..1f268b6 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -2219,6 +2219,14 @@ xfs_allocbt_dup_cursor( cur->bc_btnum); } +STATIC int +xfs_allocbt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_alloc_mxr[level != 0]; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_allocbt_trace_buf; @@ -2287,6 +2295,7 @@ xfs_allocbt_trace_record( static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, + .get_maxrecs = xfs_allocbt_get_maxrecs, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_allocbt_trace_enter, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 16f2fde..bdcfbea 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2415,6 +2415,14 @@ xfs_bmbt_dup_cursor( return new; } +STATIC int +xfs_bmbt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return XFS_BMAP_BLOCK_IMAXRECS(level, cur); +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_bmbt_trace_buf; @@ -2502,6 +2510,7 @@ xfs_bmbt_trace_record( static const struct xfs_btree_ops xfs_bmbt_ops = { .dup_cursor = xfs_bmbt_dup_cursor, + .get_maxrecs = xfs_bmbt_get_maxrecs, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_bmbt_trace_enter, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 966d58d5..893e86f 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -51,31 +51,6 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { }; /* - * Checking routine: return maxrecs for the block. - */ -STATIC int /* number of records fitting in block */ -xfs_btree_maxrecs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_block_t *block) /* generic btree block pointer */ -{ - switch (cur->bc_btnum) { - case XFS_BTNUM_BNO: - case XFS_BTNUM_CNT: - return (int)XFS_ALLOC_BLOCK_MAXRECS( - be16_to_cpu(block->bb_level), cur); - case XFS_BTNUM_BMAP: - return (int)XFS_BMAP_BLOCK_IMAXRECS( - be16_to_cpu(block->bb_level), cur); - case XFS_BTNUM_INO: - return (int)XFS_INOBT_BLOCK_MAXRECS( - be16_to_cpu(block->bb_level), cur); - default: - ASSERT(0); - return 0; - } -} - -/* * External routines. */ @@ -207,7 +182,7 @@ xfs_btree_check_lblock( be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= - xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && + cur->bc_ops->get_maxrecs(cur, level) && block->bb_leftsib && (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && @@ -245,7 +220,7 @@ xfs_btree_check_sblock( be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= - xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) && + cur->bc_ops->get_maxrecs(cur, level) && (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK || be32_to_cpu(block->bb_leftsib) < agflen) && block->bb_leftsib && diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 0647a0e..5398cd0 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -183,6 +183,9 @@ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + /* records in block/level */ + int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); + /* btree tracing */ #ifdef XFS_BTREE_TRACE void (*trace_enter)(struct xfs_btree_cur *, const char *, diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index fc99524..18867f1 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -2085,6 +2085,14 @@ xfs_inobt_dup_cursor( cur->bc_private.a.agbp, cur->bc_private.a.agno); } +STATIC int +xfs_inobt_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_inobt_mxr[level != 0]; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_inobt_trace_buf; @@ -2153,6 +2161,7 @@ xfs_inobt_trace_record( static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, + .get_maxrecs = xfs_inobt_get_maxrecs, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_inobt_trace_enter, -- cgit v0.10.2 From 65f1eaeac0efc968797f3ac955b85ba3f5d4f9c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:55:34 +1100 Subject: [XFS] add helpers for addressing entities inside a btree block Add new helpers in xfs_btree.c to find the record, key and block pointer entries inside a btree block. To implement this genericly the ->get_maxrecs methods and two new xfs_btree_ops entries for the key and record sizes are used. Also add a big comment describing how the addressing inside a btree block works. Note that these helpers are unused until users are introduced in the next patches and this patch will thus cause some harmless compiler warnings. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32189a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 1f268b6..9e2421c 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -2294,6 +2294,9 @@ xfs_allocbt_trace_record( #endif /* XFS_BTREE_TRACE */ static const struct xfs_btree_ops xfs_allocbt_ops = { + .rec_len = sizeof(xfs_alloc_rec_t), + .key_len = sizeof(xfs_alloc_key_t), + .dup_cursor = xfs_allocbt_dup_cursor, .get_maxrecs = xfs_allocbt_get_maxrecs, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index bdcfbea..a71010a 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -2509,6 +2509,9 @@ xfs_bmbt_trace_record( #endif /* XFS_BTREE_TRACE */ static const struct xfs_btree_ops xfs_bmbt_ops = { + .rec_len = sizeof(xfs_bmbt_rec_t), + .key_len = sizeof(xfs_bmbt_key_t), + .dup_cursor = xfs_bmbt_dup_cursor, .get_maxrecs = xfs_bmbt_get_maxrecs, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 893e86f..4aec7c7 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -403,6 +403,136 @@ xfs_btree_dup_cursor( } /* + * XFS btree block layout and addressing: + * + * There are two types of blocks in the btree: leaf and non-leaf blocks. + * + * The leaf record start with a header then followed by records containing + * the values. A non-leaf block also starts with the same header, and + * then first contains lookup keys followed by an equal number of pointers + * to the btree blocks at the previous level. + * + * +--------+-------+-------+-------+-------+-------+-------+ + * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | + * +--------+-------+-------+-------+-------+-------+-------+ + * + * +--------+-------+-------+-------+-------+-------+-------+ + * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | + * +--------+-------+-------+-------+-------+-------+-------+ + * + * The header is called struct xfs_btree_block for reasons better left unknown + * and comes in different versions for short (32bit) and long (64bit) block + * pointers. The record and key structures are defined by the btree instances + * and opaque to the btree core. The block pointers are simple disk endian + * integers, available in a short (32bit) and long (64bit) variant. + * + * The helpers below calculate the offset of a given record, key or pointer + * into a btree block (xfs_btree_*_offset) or return a pointer to the given + * record, key or pointer (xfs_btree_*_addr). Note that all addressing + * inside the btree block is done using indices starting at one, not zero! + */ + +/* + * Return size of the btree block header for this btree instance. + */ +static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) +{ + return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? + sizeof(struct xfs_btree_lblock) : + sizeof(struct xfs_btree_sblock); +} + +/* + * Return size of btree block pointers for this btree instance. + */ +static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) +{ + return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? + sizeof(__be64) : sizeof(__be32); +} + +/* + * Calculate offset of the n-th record in a btree block. + */ +STATIC size_t +xfs_btree_rec_offset( + struct xfs_btree_cur *cur, + int n) +{ + return xfs_btree_block_len(cur) + + (n - 1) * cur->bc_ops->rec_len; +} + +/* + * Calculate offset of the n-th key in a btree block. + */ +STATIC size_t +xfs_btree_key_offset( + struct xfs_btree_cur *cur, + int n) +{ + return xfs_btree_block_len(cur) + + (n - 1) * cur->bc_ops->key_len; +} + +/* + * Calculate offset of the n-th block pointer in a btree block. + */ +STATIC size_t +xfs_btree_ptr_offset( + struct xfs_btree_cur *cur, + int n, + int level) +{ + return xfs_btree_block_len(cur) + + cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + + (n - 1) * xfs_btree_ptr_len(cur); +} + +/* + * Return a pointer to the n-th record in the btree block. + */ +STATIC union xfs_btree_rec * +xfs_btree_rec_addr( + struct xfs_btree_cur *cur, + int n, + struct xfs_btree_block *block) +{ + return (union xfs_btree_rec *) + ((char *)block + xfs_btree_rec_offset(cur, n)); +} + +/* + * Return a pointer to the n-th key in the btree block. + */ +STATIC union xfs_btree_key * +xfs_btree_key_addr( + struct xfs_btree_cur *cur, + int n, + struct xfs_btree_block *block) +{ + return (union xfs_btree_key *) + ((char *)block + xfs_btree_key_offset(cur, n)); +} + +/* + * Return a pointer to the n-th block pointer in the btree block. + */ +STATIC union xfs_btree_ptr * +xfs_btree_ptr_addr( + struct xfs_btree_cur *cur, + int n, + struct xfs_btree_block *block) +{ + int level = xfs_btree_get_level(block); + + ASSERT(block->bb_level != 0); + + return (union xfs_btree_ptr *) + ((char *)block + xfs_btree_ptr_offset(cur, n, level)); +} + +/* * Get a the root block which is stored in the inode. * * For now this btree implementation assumes the btree root is always diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 5398cd0..593f82b 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -180,6 +180,10 @@ do { \ #define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ struct xfs_btree_ops { + /* size of the key and record structures */ + size_t key_len; + size_t rec_len; + /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); @@ -497,6 +501,15 @@ xfs_btree_setbuf( int lev, /* level in btree */ struct xfs_buf *bp); /* new buffer to set */ + +/* + * Helpers. + */ +static inline int xfs_btree_get_level(struct xfs_btree_block *block) +{ + return be16_to_cpu(block->bb_level); +} + #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 18867f1..fc6db94 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -2160,6 +2160,9 @@ xfs_inobt_trace_record( #endif /* XFS_BTREE_TRACE */ static const struct xfs_btree_ops xfs_inobt_ops = { + .rec_len = sizeof(xfs_inobt_rec_t), + .key_len = sizeof(xfs_inobt_key_t), + .dup_cursor = xfs_inobt_dup_cursor, .get_maxrecs = xfs_inobt_get_maxrecs, -- cgit v0.10.2 From 637aa50f461b8ea6b1e8bf9877b0d13d00085043 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:55:45 +1100 Subject: [XFS] implement generic xfs_btree_increment From: Dave Chinner Because this is the first major generic btree routine this patch includes some infrastrucure, first a few routines to deal with a btree block that can be either in short or long form, second xfs_btree_read_buf_block, which is the new central routine to read a btree block given a cursor, and third the new xfs_btree_ptr_addr routine to calculate the address for a given btree pointer record. [hch: split out from bigger patch and minor adaptions] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32190a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 69833eb..b8bb694 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -818,7 +818,7 @@ xfs_alloc_ag_vextent_near( XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (ltlen >= args->minlen) break; - if ((error = xfs_alloc_increment(cnt_cur, 0, &i))) + if ((error = xfs_btree_increment(cnt_cur, 0, &i))) goto error0; } while (i); ASSERT(ltlen >= args->minlen); @@ -828,7 +828,7 @@ xfs_alloc_ag_vextent_near( i = cnt_cur->bc_ptrs[0]; for (j = 1, blen = 0, bdiff = 0; !error && j && (blen < args->maxlen || bdiff > 0); - error = xfs_alloc_increment(cnt_cur, 0, &j)) { + error = xfs_btree_increment(cnt_cur, 0, &j)) { /* * For each entry, decide if it's better than * the previous best entry. @@ -938,7 +938,7 @@ xfs_alloc_ag_vextent_near( * Increment the cursor, so we will point at the entry just right * of the leftward entry if any, or to the leftmost entry. */ - if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) + if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) goto error0; if (!i) { /* @@ -977,7 +977,7 @@ xfs_alloc_ag_vextent_near( args->minlen, >bnoa, >lena); if (gtlena >= args->minlen) break; - if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) + if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) goto error0; if (!i) { xfs_btree_del_cursor(bno_cur_gt, @@ -1066,7 +1066,7 @@ xfs_alloc_ag_vextent_near( /* * Fell off the right end. */ - if ((error = xfs_alloc_increment( + if ((error = xfs_btree_increment( bno_cur_gt, 0, &i))) goto error0; if (!i) { @@ -1548,7 +1548,7 @@ xfs_free_ag_extent( * Look for a neighboring block on the right (higher block numbers) * that is contiguous with this space. */ - if ((error = xfs_alloc_increment(bno_cur, 0, &haveright))) + if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) goto error0; if (haveright) { /* diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 9e2421c..febc2d5 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -303,7 +303,7 @@ xfs_alloc_delrec( */ i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_increment(tcur, level, &i))) + if ((error = xfs_btree_increment(tcur, level, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); i = xfs_btree_lastrec(tcur, level); @@ -517,7 +517,7 @@ xfs_alloc_delrec( * us, increment the cursor at that level. */ else if (level + 1 < cur->bc_nlevels && - (error = xfs_alloc_increment(cur, level + 1, &i))) + (error = xfs_btree_increment(cur, level + 1, &i))) return error; /* * Fix up the number of records in the surviving block. @@ -1134,7 +1134,7 @@ xfs_alloc_lookup( int i; cur->bc_ptrs[0] = keyno; - if ((error = xfs_alloc_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); *stat = 1; @@ -1570,7 +1570,7 @@ xfs_alloc_rshift( return error; i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_increment(tcur, level, &i)) || + if ((error = xfs_btree_increment(tcur, level, &i)) || (error = xfs_alloc_updkey(tcur, rkp, level + 1))) goto error0; xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); @@ -1943,97 +1943,6 @@ xfs_alloc_get_rec( } /* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_alloc_increment( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree, 0 is leaf */ - int *stat) /* success/failure */ -{ - xfs_alloc_block_t *block; /* btree block */ - xfs_buf_t *bp; /* tree block buffer */ - int error; /* error return value */ - int lev; /* btree level */ - - ASSERT(level < cur->bc_nlevels); - /* - * Read-ahead to the right at this level. - */ - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); - /* - * Get a pointer to the btree block. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - /* - * Increment the ptr at this level. If we're still in the block - * then we're done. - */ - if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) { - *stat = 1; - return 0; - } - /* - * If we just went off the right edge of the tree, return failure. - */ - if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * March up the tree incrementing pointers. - * Stop when we don't go off the right edge of a block. - */ - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - bp = cur->bc_bufs[lev]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; -#endif - if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs)) - break; - /* - * Read-ahead the right block, we're going to read it - * in the next loop. - */ - xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); - } - /* - * If we went off the root then we are seriously confused. - */ - ASSERT(lev < cur->bc_nlevels); - /* - * Now walk back down the tree, fixing up the cursor's buffer - * pointers and key numbers. - */ - for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp); - lev > level; ) { - xfs_agblock_t agbno; /* block number of btree block */ - - agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, agbno, 0, &bp, - XFS_ALLOC_BTREE_REF))) - return error; - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; - cur->bc_ptrs[lev] = 1; - } - *stat = 1; - return 0; -} - -/* * Insert the current record at the point referenced by cur. * The cursor may be inconsistent on return if splits have been done. */ diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 6073538..643cfab 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -114,12 +114,6 @@ extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, xfs_extlen_t *len, int *stat); /* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -extern int xfs_alloc_increment(struct xfs_btree_cur *cur, int level, int *stat); - -/* * Insert the current record at the point referenced by cur. * The cursor may be inconsistent on return if splits have been done. */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index a84d0c3..4b1ec44 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1646,7 +1646,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_blockcount - new->br_blockcount, oldext))) goto done; - if ((error = xfs_bmbt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto done; if ((error = xfs_bmbt_update(cur, new->br_startoff, new->br_startblock, @@ -3253,7 +3253,7 @@ xfs_bmap_del_extent( got.br_startblock, temp, got.br_state))) goto done; - if ((error = xfs_bmbt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto done; cur->bc_rec.b = new; error = xfs_bmbt_insert(cur, &i); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index a71010a..2d29a49 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -254,7 +254,7 @@ xfs_bmbt_delrec( if (rbno != NULLFSBLOCK) { i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_bmbt_increment(tcur, level, &i))) { + if ((error = xfs_btree_increment(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -445,7 +445,7 @@ xfs_bmbt_delrec( cur->bc_bufs[level] = lbp; cur->bc_ptrs[level] += lrecs; cur->bc_ra[level] = 0; - } else if ((error = xfs_bmbt_increment(cur, level + 1, &i))) { + } else if ((error = xfs_btree_increment(cur, level + 1, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -929,7 +929,7 @@ xfs_bmbt_lookup( if (dir == XFS_LOOKUP_GE && keyno > be16_to_cpu(block->bb_numrecs) && be64_to_cpu(block->bb_rightsib) != NULLDFSBNO) { cur->bc_ptrs[0] = keyno; - if ((error = xfs_bmbt_increment(cur, 0, &i))) { + if ((error = xfs_btree_increment(cur, 0, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -1202,7 +1202,7 @@ xfs_bmbt_rshift( } i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_bmbt_increment(tcur, level, &i))) { + if ((error = xfs_btree_increment(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(tcur, ERROR); goto error1; } @@ -1761,86 +1761,6 @@ xfs_bmbt_disk_get_startoff( } /* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_bmbt_increment( - xfs_btree_cur_t *cur, - int level, - int *stat) /* success/failure */ -{ - xfs_bmbt_block_t *block; - xfs_buf_t *bp; - int error; /* error return value */ - xfs_fsblock_t fsbno; - int lev; - xfs_mount_t *mp; - xfs_trans_t *tp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, level); - ASSERT(level < cur->bc_nlevels); - - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); - block = xfs_bmbt_get_block(cur, level, &bp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - if (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - block = xfs_bmbt_get_block(cur, lev, &bp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs)) - break; - xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); - } - if (lev == cur->bc_nlevels) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - tp = cur->bc_tp; - mp = cur->bc_mp; - for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) { - fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, - XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_BMBT_BLOCK(bp); - if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - cur->bc_ptrs[lev] = 1; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -} - -/* * Insert the current record at the point referenced by cur. * * A multi-level split of the tree on insert will invalidate the original diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 5628d89..a45be38 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -251,7 +251,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); -extern int xfs_bmbt_increment(struct xfs_btree_cur *, int, int *); extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *); extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 4aec7c7..e9ab86b 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -35,6 +35,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_error.h" @@ -949,3 +950,224 @@ xfs_btree_setbuf( cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } } + +STATIC int +xfs_btree_ptr_is_null( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return be64_to_cpu(ptr->l) == NULLFSBLOCK; + else + return be32_to_cpu(ptr->s) == NULLAGBLOCK; +} + +/* + * Get/set/init sibling pointers + */ +STATIC void +xfs_btree_get_sibling( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_ptr *ptr, + int lr) +{ + ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (lr == XFS_BB_RIGHTSIB) + ptr->l = block->bb_u.l.bb_rightsib; + else + ptr->l = block->bb_u.l.bb_leftsib; + } else { + if (lr == XFS_BB_RIGHTSIB) + ptr->s = block->bb_u.s.bb_rightsib; + else + ptr->s = block->bb_u.s.bb_leftsib; + } +} + +STATIC xfs_daddr_t +xfs_btree_ptr_to_daddr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK); + + return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); + } else { + ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); + ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK); + + return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, + be32_to_cpu(ptr->s)); + } +} + +STATIC void +xfs_btree_set_refs( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + switch (cur->bc_btnum) { + case XFS_BTNUM_BNO: + case XFS_BTNUM_CNT: + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); + break; + case XFS_BTNUM_INO: + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); + break; + case XFS_BTNUM_BMAP: + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); + break; + default: + ASSERT(0); + } +} + +/* + * Read in the buffer at the given ptr and return the buffer and + * the block pointer within the buffer. + */ +STATIC int +xfs_btree_read_buf_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int level, + int flags, + struct xfs_btree_block **block, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = cur->bc_mp; + xfs_daddr_t d; + int error; + + /* need to sort out how callers deal with failures first */ + ASSERT(!(flags & XFS_BUF_TRYLOCK)); + + d = xfs_btree_ptr_to_daddr(cur, ptr); + error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, + mp->m_bsize, flags, bpp); + if (error) + return error; + + ASSERT(*bpp != NULL); + ASSERT(!XFS_BUF_GETERROR(*bpp)); + + xfs_btree_set_refs(cur, *bpp); + *block = XFS_BUF_TO_BLOCK(*bpp); + + error = xfs_btree_check_block(cur, *block, level, *bpp); + if (error) + xfs_trans_brelse(cur->bc_tp, *bpp); + return error; +} + +/* + * Increment cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_btree_increment( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; + union xfs_btree_ptr ptr; + struct xfs_buf *bp; + int error; /* error return value */ + int lev; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + ASSERT(level < cur->bc_nlevels); + + /* Read-ahead to the right at this level. */ + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + + /* Get a pointer to the btree block. */ + block = xfs_btree_get_block(cur, level, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; +#endif + + /* We're done if we remain in the block after the increment. */ + if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) + goto out1; + + /* Fail if we just went off the right edge of the tree. */ + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + if (xfs_btree_ptr_is_null(cur, &ptr)) + goto out0; + + XFS_BTREE_STATS_INC(cur, increment); + + /* + * March up the tree incrementing pointers. + * Stop when we don't go off the right edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + block = xfs_btree_get_block(cur, lev, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, lev, bp); + if (error) + goto error0; +#endif + + if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) + break; + + /* Read-ahead the right block for the next loop. */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); + } + + /* + * If we went off the root then we are either seriously + * confused or have the tree root in an inode. + */ + if (lev == cur->bc_nlevels) { + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + goto out0; + ASSERT(0); + error = EFSCORRUPTED; + goto error0; + } + ASSERT(lev < cur->bc_nlevels); + + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { + union xfs_btree_ptr *ptrp; + + ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + error = xfs_btree_read_buf_block(cur, ptrp, --lev, + 0, &block, &bp); + if (error) + goto error0; + + xfs_btree_setbuf(cur, lev, bp); + cur->bc_ptrs[lev] = 1; + } +out1: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 593f82b..f5a4b8e 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -503,8 +503,18 @@ xfs_btree_setbuf( /* + * Common btree core entry points. + */ +int xfs_btree_increment(struct xfs_btree_cur *, int, int *); + +/* * Helpers. */ +static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block) +{ + return be16_to_cpu(block->bb_numrecs); +} + static inline int xfs_btree_get_level(struct xfs_btree_block *block) { return be16_to_cpu(block->bb_level); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 11bb169..3a8f067 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -695,7 +695,7 @@ nextag: goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); freecount += rec.ir_freecount; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error0; } while (i == 1); @@ -753,7 +753,7 @@ nextag: /* * Search right with cur, go forward 1 record. */ - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error1; doneright = !i; if (!doneright) { @@ -835,7 +835,7 @@ nextag: * further right. */ else { - if ((error = xfs_inobt_increment(cur, 0, + if ((error = xfs_btree_increment(cur, 0, &i))) goto error1; doneright = !i; @@ -890,7 +890,7 @@ nextag: XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (rec.ir_freecount > 0) break; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); } @@ -924,7 +924,7 @@ nextag: goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); freecount += rec.ir_freecount; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error0; } while (i == 1); ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || @@ -1033,7 +1033,7 @@ xfs_difree( goto error0; if (i) { freecount += rec.ir_freecount; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error0; } } while (i == 1); @@ -1138,7 +1138,7 @@ xfs_difree( goto error0; if (i) { freecount += rec.ir_freecount; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) goto error0; } } while (i == 1); diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index fc6db94..41717da 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -253,7 +253,7 @@ xfs_inobt_delrec( */ i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_inobt_increment(tcur, level, &i))) + if ((error = xfs_btree_increment(tcur, level, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); i = xfs_btree_lastrec(tcur, level); @@ -463,7 +463,7 @@ xfs_inobt_delrec( * us, increment the cursor at that level. */ else if (level + 1 < cur->bc_nlevels && - (error = xfs_alloc_increment(cur, level + 1, &i))) + (error = xfs_btree_increment(cur, level + 1, &i))) return error; /* * Fix up the number of records in the surviving block. @@ -1014,7 +1014,7 @@ xfs_inobt_lookup( int i; cur->bc_ptrs[0] = keyno; - if ((error = xfs_inobt_increment(cur, 0, &i))) + if ((error = xfs_btree_increment(cur, 0, &i))) return error; ASSERT(i == 1); *stat = 1; @@ -1443,7 +1443,7 @@ xfs_inobt_rshift( if ((error = xfs_btree_dup_cursor(cur, &tcur))) return error; xfs_btree_lastrec(tcur, level); - if ((error = xfs_inobt_increment(tcur, level, &i)) || + if ((error = xfs_btree_increment(tcur, level, &i)) || (error = xfs_inobt_updkey(tcur, rkp, level + 1))) { xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; @@ -1821,97 +1821,6 @@ xfs_inobt_get_rec( } /* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_inobt_increment( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree, 0 is leaf */ - int *stat) /* success/failure */ -{ - xfs_inobt_block_t *block; /* btree block */ - xfs_buf_t *bp; /* buffer containing btree block */ - int error; /* error return value */ - int lev; /* btree level */ - - ASSERT(level < cur->bc_nlevels); - /* - * Read-ahead to the right at this level. - */ - xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); - /* - * Get a pointer to the btree block. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - /* - * Increment the ptr at this level. If we're still in the block - * then we're done. - */ - if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) { - *stat = 1; - return 0; - } - /* - * If we just went off the right edge of the tree, return failure. - */ - if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * March up the tree incrementing pointers. - * Stop when we don't go off the right edge of a block. - */ - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - bp = cur->bc_bufs[lev]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; -#endif - if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs)) - break; - /* - * Read-ahead the right block, we're going to read it - * in the next loop. - */ - xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); - } - /* - * If we went off the root then we are seriously confused. - */ - ASSERT(lev < cur->bc_nlevels); - /* - * Now walk back down the tree, fixing up the cursor's buffer - * pointers and key numbers. - */ - for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp); - lev > level; ) { - xfs_agblock_t agbno; /* block number of btree block */ - - agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, agbno, 0, &bp, - XFS_INO_BTREE_REF))) - return error; - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_INOBT_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; - cur->bc_ptrs[lev] = 1; - } - *stat = 1; - return 0; -} - -/* * Insert the current record at the point referenced by cur. * The cursor may be inconsistent on return if splits have been done. */ diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index eea4093..07fed62 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -136,12 +136,6 @@ extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, __int32_t *fcnt, xfs_inofree_t *free, int *stat); /* - * Increment cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -extern int xfs_inobt_increment(struct xfs_btree_cur *cur, int level, int *stat); - -/* * Insert the current record at the point referenced by cur. * The cursor may be inconsistent on return if splits have been done. */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index a5f02f0..42a214b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -471,7 +471,7 @@ xfs_bulkstat( * In any case, increment to the next record. */ if (!error) - error = xfs_inobt_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &tmp); } else { /* * Start of ag. Lookup the first inode chunk. @@ -538,7 +538,7 @@ xfs_bulkstat( * Set agino to after this chunk and bump the cursor. */ agino = gino + XFS_INODES_PER_CHUNK; - error = xfs_inobt_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &tmp); cond_resched(); } /* @@ -885,7 +885,7 @@ xfs_inumbers( bufidx = 0; } if (left) { - error = xfs_inobt_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &tmp); if (error) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); cur = NULL; -- cgit v0.10.2 From 8df4da4a0a642d3a016028c0d922bcb4d5a4a6d7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:55:58 +1100 Subject: [XFS] implement generic xfs_btree_decrement From: Dave Chinner [hch: split out from bigger patch and minor adaptions] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32191a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index b8bb694..7ca6903 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -961,7 +961,7 @@ xfs_alloc_ag_vextent_near( args->minlen, <bnoa, <lena); if (ltlena >= args->minlen) break; - if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) + if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) goto error0; if (!i) { xfs_btree_del_cursor(bno_cur_lt, @@ -1162,7 +1162,7 @@ xfs_alloc_ag_vextent_near( /* * Fell off the left end. */ - if ((error = xfs_alloc_decrement( + if ((error = xfs_btree_decrement( bno_cur_lt, 0, &i))) goto error0; if (!i) { @@ -1321,7 +1321,7 @@ xfs_alloc_ag_vextent_size( bestflen = flen; bestfbno = fbno; for (;;) { - if ((error = xfs_alloc_decrement(cnt_cur, 0, &i))) + if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) goto error0; if (i == 0) break; @@ -1416,7 +1416,7 @@ xfs_alloc_ag_vextent_small( xfs_extlen_t flen; int i; - if ((error = xfs_alloc_decrement(ccur, 0, &i))) + if ((error = xfs_btree_decrement(ccur, 0, &i))) goto error0; if (i) { if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) @@ -1607,7 +1607,7 @@ xfs_free_ag_extent( /* * Move the by-block cursor back to the left neighbor. */ - if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) + if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); #ifdef DEBUG @@ -1653,7 +1653,7 @@ xfs_free_ag_extent( * Back up the by-block cursor to the left neighbor, and * update its length. */ - if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) + if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); nbno = ltbno; diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index febc2d5..6b45481 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -256,7 +256,7 @@ xfs_alloc_delrec( xfs_btree_setbuf(cur, level, NULL); cur->bc_nlevels--; } else if (level > 0 && - (error = xfs_alloc_decrement(cur, level, &i))) + (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -272,7 +272,7 @@ xfs_alloc_delrec( * the minimum, we're done. */ if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i))) + if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -336,7 +336,7 @@ xfs_alloc_delrec( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); if (level > 0 && - (error = xfs_alloc_decrement(cur, level, + (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; @@ -352,7 +352,7 @@ xfs_alloc_delrec( if (lbno != NULLAGBLOCK) { i = xfs_btree_firstrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_decrement(tcur, level, &i))) + if ((error = xfs_btree_decrement(tcur, level, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); } @@ -368,7 +368,7 @@ xfs_alloc_delrec( */ i = xfs_btree_firstrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_decrement(tcur, level, &i))) + if ((error = xfs_btree_decrement(tcur, level, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); xfs_btree_firstrec(tcur, level); @@ -468,7 +468,7 @@ xfs_alloc_delrec( * Just return. This is probably a logic error, but it's not fatal. */ else { - if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i))) + if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -1780,90 +1780,6 @@ xfs_alloc_updkey( */ /* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_alloc_decrement( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree, 0 is leaf */ - int *stat) /* success/failure */ -{ - xfs_alloc_block_t *block; /* btree block */ - int error; /* error return value */ - int lev; /* btree level */ - - ASSERT(level < cur->bc_nlevels); - /* - * Read-ahead to the left at this level. - */ - xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); - /* - * Decrement the ptr at this level. If we're still in the block - * then we're done. - */ - if (--cur->bc_ptrs[level] > 0) { - *stat = 1; - return 0; - } - /* - * Get a pointer to the btree block. - */ - block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, - cur->bc_bufs[level]))) - return error; -#endif - /* - * If we just went off the left edge of the tree, return failure. - */ - if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * March up the tree decrementing pointers. - * Stop when we don't go off the left edge of a block. - */ - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - if (--cur->bc_ptrs[lev] > 0) - break; - /* - * Read-ahead the left block, we're going to read it - * in the next loop. - */ - xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); - } - /* - * If we went off the root then we are seriously confused. - */ - ASSERT(lev < cur->bc_nlevels); - /* - * Now walk back down the tree, fixing up the cursor's buffer - * pointers and key numbers. - */ - for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) { - xfs_agblock_t agbno; /* block number of btree block */ - xfs_buf_t *bp; /* buffer pointer for block */ - - agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, agbno, 0, &bp, - XFS_ALLOC_BTREE_REF))) - return error; - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; - cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs); - } - *stat = 1; - return 0; -} - -/* * Delete the record pointed to by cur. * The cursor refers to the place where the record was (could be inserted) * when the operation returns. @@ -1889,7 +1805,7 @@ xfs_alloc_delete( if (i == 0) { for (level = 1; level < cur->bc_nlevels; level++) { if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_alloc_decrement(cur, level, &i))) + if ((error = xfs_btree_decrement(cur, level, &i))) return error; break; } diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 643cfab..b59d7fc 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -95,12 +95,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur)) /* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -extern int xfs_alloc_decrement(struct xfs_btree_cur *cur, int level, int *stat); - -/* * Delete the record pointed to by cur. * The cursor refers to the place where the record was (could be inserted) * when the operation returns. diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 4b1ec44..bdbab54 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -820,7 +820,7 @@ xfs_bmap_add_extent_delay_real( if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, @@ -1381,13 +1381,13 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, @@ -1430,7 +1430,7 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, @@ -1473,7 +1473,7 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, new->br_startoff, @@ -1556,7 +1556,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_blockcount - new->br_blockcount, oldext))) goto done; - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; if (xfs_bmbt_update(cur, LEFT.br_startoff, LEFT.br_startblock, @@ -2108,7 +2108,7 @@ xfs_bmap_add_extent_hole_real( if ((error = xfs_bmbt_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_decrement(cur, 0, &i))) + if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, left.br_startoff, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 2d29a49..7b5181d 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -203,7 +203,7 @@ xfs_bmbt_delrec( XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } - if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) { + if (level > 0 && (error = xfs_btree_decrement(cur, level, &j))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -216,7 +216,7 @@ xfs_bmbt_delrec( goto error0; } if (numrecs >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &j))) { + if (level > 0 && (error = xfs_btree_decrement(cur, level, &j))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -237,7 +237,7 @@ xfs_bmbt_delrec( XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } - if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) { + if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -282,7 +282,7 @@ xfs_bmbt_delrec( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); tcur = NULL; if (level > 0) { - if ((error = xfs_bmbt_decrement(cur, + if ((error = xfs_btree_decrement(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); @@ -298,7 +298,7 @@ xfs_bmbt_delrec( if (lbno != NULLFSBLOCK) { i = xfs_btree_firstrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_bmbt_decrement(tcur, level, &i))) { + if ((error = xfs_btree_decrement(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -311,7 +311,7 @@ xfs_bmbt_delrec( /* * decrement to last in block */ - if ((error = xfs_bmbt_decrement(tcur, level, &i))) { + if ((error = xfs_btree_decrement(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -383,7 +383,7 @@ xfs_bmbt_delrec( } lrecs = be16_to_cpu(left->bb_numrecs); } else { - if (level > 0 && (error = xfs_bmbt_decrement(cur, level, &i))) { + if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -1487,80 +1487,6 @@ xfs_bmdr_to_bmbt( } /* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_bmbt_decrement( - xfs_btree_cur_t *cur, - int level, - int *stat) /* success/failure */ -{ - xfs_bmbt_block_t *block; - xfs_buf_t *bp; - int error; /* error return value */ - xfs_fsblock_t fsbno; - int lev; - xfs_mount_t *mp; - xfs_trans_t *tp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, level); - ASSERT(level < cur->bc_nlevels); - - xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); - - if (--cur->bc_ptrs[level] > 0) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - block = xfs_bmbt_get_block(cur, level, &bp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - if (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - if (--cur->bc_ptrs[lev] > 0) - break; - xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); - } - if (lev == cur->bc_nlevels) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - tp = cur->bc_tp; - mp = cur->bc_mp; - for (block = xfs_bmbt_get_block(cur, lev, &bp); lev > level; ) { - fsbno = be64_to_cpu(*XFS_BMAP_PTR_IADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufl(mp, tp, fsbno, 0, &bp, - XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_BMBT_BLOCK(bp); - if ((error = xfs_btree_check_lblock(cur, block, lev, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs); - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -} - -/* * Delete the record pointed to by cur. */ int /* error */ @@ -1582,7 +1508,7 @@ xfs_bmbt_delete( if (i == 0) { for (level = 1; level < cur->bc_nlevels; level++) { if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_bmbt_decrement(cur, level, + if ((error = xfs_btree_decrement(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index a45be38..1e0f1d1 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -237,7 +237,6 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; * Prototypes for xfs_bmap.c to call. */ extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int); -extern int xfs_bmbt_decrement(struct xfs_btree_cur *, int, int *); extern int xfs_bmbt_delete(struct xfs_btree_cur *, int *); extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern xfs_bmbt_block_t *xfs_bmbt_get_block(struct xfs_btree_cur *cur, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index e9ab86b..3d561f8 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -1171,3 +1171,102 @@ error0: XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } + +/* + * Decrement cursor by one record at the level. + * For nonzero levels the leaf-ward information is untouched. + */ +int /* error */ +xfs_btree_decrement( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; + xfs_buf_t *bp; + int error; /* error return value */ + int lev; + union xfs_btree_ptr ptr; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + ASSERT(level < cur->bc_nlevels); + + /* Read-ahead to the left at this level. */ + xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); + + /* We're done if we remain in the block after the decrement. */ + if (--cur->bc_ptrs[level] > 0) + goto out1; + + /* Get a pointer to the btree block. */ + block = xfs_btree_get_block(cur, level, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; +#endif + + /* Fail if we just went off the left edge of the tree. */ + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); + if (xfs_btree_ptr_is_null(cur, &ptr)) + goto out0; + + XFS_BTREE_STATS_INC(cur, decrement); + + /* + * March up the tree decrementing pointers. + * Stop when we don't go off the left edge of a block. + */ + for (lev = level + 1; lev < cur->bc_nlevels; lev++) { + if (--cur->bc_ptrs[lev] > 0) + break; + /* Read-ahead the left block for the next loop. */ + xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); + } + + /* + * If we went off the root then we are seriously confused. + * or the root of the tree is in an inode. + */ + if (lev == cur->bc_nlevels) { + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + goto out0; + ASSERT(0); + error = EFSCORRUPTED; + goto error0; + } + ASSERT(lev < cur->bc_nlevels); + + /* + * Now walk back down the tree, fixing up the cursor's buffer + * pointers and key numbers. + */ + for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { + union xfs_btree_ptr *ptrp; + + ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); + error = xfs_btree_read_buf_block(cur, ptrp, --lev, + 0, &block, &bp); + if (error) + goto error0; + xfs_btree_setbuf(cur, lev, bp); + cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); + } +out1: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index f5a4b8e..52b2da6 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -506,6 +506,7 @@ xfs_btree_setbuf( * Common btree core entry points. */ int xfs_btree_increment(struct xfs_btree_cur *, int, int *); +int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 3a8f067..d36b42b 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -739,7 +739,7 @@ nextag: /* * Search left with tcur, back up 1 record. */ - if ((error = xfs_inobt_decrement(tcur, 0, &i))) + if ((error = xfs_btree_decrement(tcur, 0, &i))) goto error1; doneleft = !i; if (!doneleft) { @@ -815,7 +815,7 @@ nextag: * further left. */ if (useleft) { - if ((error = xfs_inobt_decrement(tcur, 0, + if ((error = xfs_btree_decrement(tcur, 0, &i))) goto error1; doneleft = !i; diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 41717da..9099a32 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -205,7 +205,7 @@ xfs_inobt_delrec( cur->bc_bufs[level] = NULL; cur->bc_nlevels--; } else if (level > 0 && - (error = xfs_inobt_decrement(cur, level, &i))) + (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -222,7 +222,7 @@ xfs_inobt_delrec( */ if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) { if (level > 0 && - (error = xfs_inobt_decrement(cur, level, &i))) + (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -286,7 +286,7 @@ xfs_inobt_delrec( xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); if (level > 0 && - (error = xfs_inobt_decrement(cur, level, + (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; @@ -301,7 +301,7 @@ xfs_inobt_delrec( rrecs = be16_to_cpu(right->bb_numrecs); if (lbno != NULLAGBLOCK) { xfs_btree_firstrec(tcur, level); - if ((error = xfs_inobt_decrement(tcur, level, &i))) + if ((error = xfs_btree_decrement(tcur, level, &i))) goto error0; } } @@ -315,7 +315,7 @@ xfs_inobt_delrec( * previous block. */ xfs_btree_firstrec(tcur, level); - if ((error = xfs_inobt_decrement(tcur, level, &i))) + if ((error = xfs_btree_decrement(tcur, level, &i))) goto error0; xfs_btree_firstrec(tcur, level); /* @@ -414,7 +414,7 @@ xfs_inobt_delrec( * Just return. This is probably a logic error, but it's not fatal. */ else { - if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i))) + if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) return error; *stat = 1; return 0; @@ -1656,90 +1656,6 @@ xfs_inobt_updkey( */ /* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -int /* error */ -xfs_inobt_decrement( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree, 0 is leaf */ - int *stat) /* success/failure */ -{ - xfs_inobt_block_t *block; /* btree block */ - int error; - int lev; /* btree level */ - - ASSERT(level < cur->bc_nlevels); - /* - * Read-ahead to the left at this level. - */ - xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); - /* - * Decrement the ptr at this level. If we're still in the block - * then we're done. - */ - if (--cur->bc_ptrs[level] > 0) { - *stat = 1; - return 0; - } - /* - * Get a pointer to the btree block. - */ - block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, - cur->bc_bufs[level]))) - return error; -#endif - /* - * If we just went off the left edge of the tree, return failure. - */ - if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * March up the tree decrementing pointers. - * Stop when we don't go off the left edge of a block. - */ - for (lev = level + 1; lev < cur->bc_nlevels; lev++) { - if (--cur->bc_ptrs[lev] > 0) - break; - /* - * Read-ahead the left block, we're going to read it - * in the next loop. - */ - xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); - } - /* - * If we went off the root then we are seriously confused. - */ - ASSERT(lev < cur->bc_nlevels); - /* - * Now walk back down the tree, fixing up the cursor's buffer - * pointers and key numbers. - */ - for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) { - xfs_agblock_t agbno; /* block number of btree block */ - xfs_buf_t *bp; /* buffer containing btree block */ - - agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, agbno, 0, &bp, - XFS_INO_BTREE_REF))) - return error; - lev--; - xfs_btree_setbuf(cur, lev, bp); - block = XFS_BUF_TO_INOBT_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, lev, bp))) - return error; - cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs); - } - *stat = 1; - return 0; -} - -/* * Delete the record pointed to by cur. * The cursor refers to the place where the record was (could be inserted) * when the operation returns. @@ -1765,7 +1681,7 @@ xfs_inobt_delete( if (i == 0) { for (level = 1; level < cur->bc_nlevels; level++) { if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_inobt_decrement(cur, level, &i))) + if ((error = xfs_btree_decrement(cur, level, &i))) return error; break; } diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 07fed62..8455459 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -117,12 +117,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; i, XFS_INOBT_BLOCK_MAXRECS(1, cur))) /* - * Decrement cursor by one record at the level. - * For nonzero levels the leaf-ward information is untouched. - */ -extern int xfs_inobt_decrement(struct xfs_btree_cur *cur, int level, int *stat); - -/* * Delete the record pointed to by cur. * The cursor refers to the place where the record was (could be inserted) * when the operation returns. -- cgit v0.10.2 From fe033cc848489851f0c7de48f0b1bab5d744ad8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:56:09 +1100 Subject: [XFS] implement generic xfs_btree_lookup From: Dave Chinner [hch: split out from bigger patch and minor adaptions] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32192a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 7ca6903..6bda0ae 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -90,6 +90,54 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, */ /* + * Lookup the record equal to [bno, len] in the btree given by cur. + */ +STATIC int /* error */ +xfs_alloc_lookup_eq( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +/* + * Lookup the first record greater than or equal to [bno, len] + * in the btree given by cur. + */ +STATIC int /* error */ +xfs_alloc_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* + * Lookup the first record less than or equal to [bno, len] + * in the btree given by cur. + */ +STATIC int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); +} + + +/* * Compute aligned version of the found extent. * Takes alignment and min length into account. */ diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 6b45481..b81fbf1 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -938,223 +938,6 @@ xfs_alloc_log_recs( } /* - * Lookup the record. The cursor is made to point to it, based on dir. - * Return 0 if can't find any such record, 1 for success. - */ -STATIC int /* error */ -xfs_alloc_lookup( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_lookup_t dir, /* <=, ==, or >= */ - int *stat) /* success/failure */ -{ - xfs_agblock_t agbno; /* a.g. relative btree block number */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_alloc_block_t *block=NULL; /* current btree block */ - int diff; /* difference for the current key */ - int error; /* error return value */ - int keyno=0; /* current key number */ - int level; /* level in the btree */ - xfs_mount_t *mp; /* file system mount point */ - - XFS_STATS_INC(xs_abt_lookup); - /* - * Get the allocation group header, and the root block number. - */ - mp = cur->bc_mp; - - { - xfs_agf_t *agf; /* a.g. freespace header */ - - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - agno = be32_to_cpu(agf->agf_seqno); - agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]); - } - /* - * Iterate over each level in the btree, starting at the root. - * For each level above the leaves, find the key we need, based - * on the lookup record, then follow the corresponding block - * pointer down to the next level. - */ - for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { - xfs_buf_t *bp; /* buffer pointer for btree block */ - xfs_daddr_t d; /* disk address of btree block */ - - /* - * Get the disk address we're looking for. - */ - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - /* - * If the old buffer at this level is for a different block, - * throw it away, otherwise just use it. - */ - bp = cur->bc_bufs[level]; - if (bp && XFS_BUF_ADDR(bp) != d) - bp = NULL; - if (!bp) { - /* - * Need to get a new buffer. Read it, then - * set it in the cursor, releasing the old one. - */ - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno, - agbno, 0, &bp, XFS_ALLOC_BTREE_REF))) - return error; - xfs_btree_setbuf(cur, level, bp); - /* - * Point to the btree block, now that we have the buffer - */ - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, level, - bp))) - return error; - } else - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - /* - * If we already had a key match at a higher level, we know - * we need to use the first entry in this block. - */ - if (diff == 0) - keyno = 1; - /* - * Otherwise we need to search this block. Do a binary search. - */ - else { - int high; /* high entry number */ - xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */ - xfs_alloc_rec_t *krbase=NULL;/* base of records in block */ - int low; /* low entry number */ - - /* - * Get a pointer to keys or records. - */ - if (level > 0) - kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur); - else - krbase = XFS_ALLOC_REC_ADDR(block, 1, cur); - /* - * Set low and high entry numbers, 1-based. - */ - low = 1; - if (!(high = be16_to_cpu(block->bb_numrecs))) { - /* - * If the block is empty, the tree must - * be an empty leaf. - */ - ASSERT(level == 0 && cur->bc_nlevels == 1); - cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; - *stat = 0; - return 0; - } - /* - * Binary search the block. - */ - while (low <= high) { - xfs_extlen_t blockcount; /* key value */ - xfs_agblock_t startblock; /* key value */ - - XFS_STATS_INC(xs_abt_compare); - /* - * keyno is average of low and high. - */ - keyno = (low + high) >> 1; - /* - * Get startblock & blockcount. - */ - if (level > 0) { - xfs_alloc_key_t *kkp; - - kkp = kkbase + keyno - 1; - startblock = be32_to_cpu(kkp->ar_startblock); - blockcount = be32_to_cpu(kkp->ar_blockcount); - } else { - xfs_alloc_rec_t *krp; - - krp = krbase + keyno - 1; - startblock = be32_to_cpu(krp->ar_startblock); - blockcount = be32_to_cpu(krp->ar_blockcount); - } - /* - * Compute difference to get next direction. - */ - if (cur->bc_btnum == XFS_BTNUM_BNO) - diff = (int)startblock - - (int)cur->bc_rec.a.ar_startblock; - else if (!(diff = (int)blockcount - - (int)cur->bc_rec.a.ar_blockcount)) - diff = (int)startblock - - (int)cur->bc_rec.a.ar_startblock; - /* - * Less than, move right. - */ - if (diff < 0) - low = keyno + 1; - /* - * Greater than, move left. - */ - else if (diff > 0) - high = keyno - 1; - /* - * Equal, we're done. - */ - else - break; - } - } - /* - * If there are more levels, set up for the next level - * by getting the block number and filling in the cursor. - */ - if (level > 0) { - /* - * If we moved left, need the previous key number, - * unless there isn't one. - */ - if (diff > 0 && --keyno < 1) - keyno = 1; - agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur)); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, agbno, level))) - return error; -#endif - cur->bc_ptrs[level] = keyno; - } - } - /* - * Done with the search. - * See if we need to adjust the results. - */ - if (dir != XFS_LOOKUP_LE && diff < 0) { - keyno++; - /* - * If ge search and we went off the end of the block, but it's - * not the last block, we're in the wrong block. - */ - if (dir == XFS_LOOKUP_GE && - keyno > be16_to_cpu(block->bb_numrecs) && - be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) { - int i; - - cur->bc_ptrs[0] = keyno; - if ((error = xfs_btree_increment(cur, 0, &i))) - return error; - XFS_WANT_CORRUPTED_RETURN(i == 1); - *stat = 1; - return 0; - } - } - else if (dir == XFS_LOOKUP_LE && diff > 0) - keyno--; - cur->bc_ptrs[0] = keyno; - /* - * Return if we succeeded or not. - */ - if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) - *stat = 0; - else - *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0)); - return 0; -} - -/* * Move 1 record left from cur/level if possible. * Update cur to reflect the new path. */ @@ -1919,53 +1702,6 @@ xfs_alloc_insert( } /* - * Lookup the record equal to [bno, len] in the btree given by cur. - */ -int /* error */ -xfs_alloc_lookup_eq( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len, /* length of extent */ - int *stat) /* success/failure */ -{ - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat); -} - -/* - * Lookup the first record greater than or equal to [bno, len] - * in the btree given by cur. - */ -int /* error */ -xfs_alloc_lookup_ge( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len, /* length of extent */ - int *stat) /* success/failure */ -{ - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat); -} - -/* - * Lookup the first record less than or equal to [bno, len] - * in the btree given by cur. - */ -int /* error */ -xfs_alloc_lookup_le( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len, /* length of extent */ - int *stat) /* success/failure */ -{ - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat); -} - -/* * Update the record referred to by cur, to the value given by [bno, len]. * This either works (return 0) or gets an EFSCORRUPTED error. */ @@ -2052,6 +1788,51 @@ xfs_allocbt_get_maxrecs( return cur->bc_mp->m_alloc_mxr[level != 0]; } +STATIC void +xfs_allocbt_init_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + ASSERT(rec->alloc.ar_startblock != 0); + + key->alloc.ar_startblock = rec->alloc.ar_startblock; + key->alloc.ar_blockcount = rec->alloc.ar_blockcount; +} + +STATIC void +xfs_allocbt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno)); + ASSERT(agf->agf_roots[cur->bc_btnum] != 0); + + ptr->s = agf->agf_roots[cur->bc_btnum]; +} + +STATIC __int64_t +xfs_allocbt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + xfs_alloc_rec_incore_t *rec = &cur->bc_rec.a; + xfs_alloc_key_t *kp = &key->alloc; + __int64_t diff; + + if (cur->bc_btnum == XFS_BTNUM_BNO) { + return (__int64_t)be32_to_cpu(kp->ar_startblock) - + rec->ar_startblock; + } + + diff = (__int64_t)be32_to_cpu(kp->ar_blockcount) - rec->ar_blockcount; + if (diff) + return diff; + + return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_allocbt_trace_buf; @@ -2124,6 +1905,9 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, .get_maxrecs = xfs_allocbt_get_maxrecs, + .init_key_from_rec = xfs_allocbt_init_key_from_rec, + .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, + .key_diff = xfs_allocbt_key_diff, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_allocbt_trace_enter, diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index b59d7fc..aa110ff 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -114,26 +114,6 @@ extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat); /* - * Lookup the record equal to [bno, len] in the btree given by cur. - */ -extern int xfs_alloc_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno, - xfs_extlen_t len, int *stat); - -/* - * Lookup the first record greater than or equal to [bno, len] - * in the btree given by cur. - */ -extern int xfs_alloc_lookup_ge(struct xfs_btree_cur *cur, xfs_agblock_t bno, - xfs_extlen_t len, int *stat); - -/* - * Lookup the first record less than or equal to [bno, len] - * in the btree given by cur. - */ -extern int xfs_alloc_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno, - xfs_extlen_t len, int *stat); - -/* * Update the record referred to by cur, to the value given by [bno, len]. * This either works (return 0) or gets an EFSCORRUPTED error. */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index bdbab54..1296b41 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -402,6 +402,35 @@ xfs_bmap_disk_count_leaves( * Bmap internal routines. */ +STATIC int /* error */ +xfs_bmbt_lookup_eq( + struct xfs_btree_cur *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + int *stat) /* success/failure */ +{ + cur->bc_rec.b.br_startoff = off; + cur->bc_rec.b.br_startblock = bno; + cur->bc_rec.b.br_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +STATIC int /* error */ +xfs_bmbt_lookup_ge( + struct xfs_btree_cur *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + int *stat) /* success/failure */ +{ + cur->bc_rec.b.br_startoff = off; + cur->bc_rec.b.br_startblock = bno; + cur->bc_rec.b.br_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); +} + + /* * Called from xfs_bmap_add_attrfork to handle btree format files. */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 7b5181d..8403d15 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -813,146 +813,6 @@ xfs_bmbt_log_ptrs( } /* - * Lookup the record. The cursor is made to point to it, based on dir. - */ -STATIC int /* error */ -xfs_bmbt_lookup( - xfs_btree_cur_t *cur, - xfs_lookup_t dir, - int *stat) /* success/failure */ -{ - xfs_bmbt_block_t *block=NULL; - xfs_buf_t *bp; - xfs_daddr_t d; - xfs_sfiloff_t diff; - int error; /* error return value */ - xfs_fsblock_t fsbno=0; - int high; - int i; - int keyno=0; - xfs_bmbt_key_t *kkbase=NULL; - xfs_bmbt_key_t *kkp; - xfs_bmbt_rec_t *krbase=NULL; - xfs_bmbt_rec_t *krp; - int level; - int low; - xfs_mount_t *mp; - xfs_bmbt_ptr_t *pp; - xfs_bmbt_irec_t *rp; - xfs_fileoff_t startoff; - xfs_trans_t *tp; - - XFS_STATS_INC(xs_bmbt_lookup); - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, (int)dir); - tp = cur->bc_tp; - mp = cur->bc_mp; - rp = &cur->bc_rec.b; - for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { - if (level < cur->bc_nlevels - 1) { - d = XFS_FSB_TO_DADDR(mp, fsbno); - bp = cur->bc_bufs[level]; - if (bp && XFS_BUF_ADDR(bp) != d) - bp = NULL; - if (!bp) { - if ((error = xfs_btree_read_bufl(mp, tp, fsbno, - 0, &bp, XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - xfs_btree_setbuf(cur, level, bp); - block = XFS_BUF_TO_BMBT_BLOCK(bp); - if ((error = xfs_btree_check_lblock(cur, block, - level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } else - block = XFS_BUF_TO_BMBT_BLOCK(bp); - } else - block = xfs_bmbt_get_block(cur, level, &bp); - if (diff == 0) - keyno = 1; - else { - if (level > 0) - kkbase = XFS_BMAP_KEY_IADDR(block, 1, cur); - else - krbase = XFS_BMAP_REC_IADDR(block, 1, cur); - low = 1; - if (!(high = be16_to_cpu(block->bb_numrecs))) { - ASSERT(level == 0); - cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - while (low <= high) { - XFS_STATS_INC(xs_bmbt_compare); - keyno = (low + high) >> 1; - if (level > 0) { - kkp = kkbase + keyno - 1; - startoff = be64_to_cpu(kkp->br_startoff); - } else { - krp = krbase + keyno - 1; - startoff = xfs_bmbt_disk_get_startoff(krp); - } - diff = (xfs_sfiloff_t) - (startoff - rp->br_startoff); - if (diff < 0) - low = keyno + 1; - else if (diff > 0) - high = keyno - 1; - else - break; - } - } - if (level > 0) { - if (diff > 0 && --keyno < 1) - keyno = 1; - pp = XFS_BMAP_PTR_IADDR(block, keyno, cur); - fsbno = be64_to_cpu(*pp); -#ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, fsbno, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - cur->bc_ptrs[level] = keyno; - } - } - if (dir != XFS_LOOKUP_LE && diff < 0) { - keyno++; - /* - * If ge search and we went off the end of the block, but it's - * not the last block, we're in the wrong block. - */ - if (dir == XFS_LOOKUP_GE && keyno > be16_to_cpu(block->bb_numrecs) && - be64_to_cpu(block->bb_rightsib) != NULLDFSBNO) { - cur->bc_ptrs[0] = keyno; - if ((error = xfs_btree_increment(cur, 0, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - XFS_WANT_CORRUPTED_RETURN(i == 1); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - } - else if (dir == XFS_LOOKUP_LE && diff > 0) - keyno--; - cur->bc_ptrs[0] = keyno; - if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - } else { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0)); - } - return 0; -} - -/* * Move 1 record left from cur/level if possible. * Update cur to reflect the new path. */ @@ -1809,34 +1669,6 @@ xfs_bmbt_log_recs( XFS_BMBT_TRACE_CURSOR(cur, EXIT); } -int /* error */ -xfs_bmbt_lookup_eq( - xfs_btree_cur_t *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - int *stat) /* success/failure */ -{ - cur->bc_rec.b.br_startoff = off; - cur->bc_rec.b.br_startblock = bno; - cur->bc_rec.b.br_blockcount = len; - return xfs_bmbt_lookup(cur, XFS_LOOKUP_EQ, stat); -} - -int /* error */ -xfs_bmbt_lookup_ge( - xfs_btree_cur_t *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - int *stat) /* success/failure */ -{ - cur->bc_rec.b.br_startoff = off; - cur->bc_rec.b.br_startblock = bno; - cur->bc_rec.b.br_blockcount = len; - return xfs_bmbt_lookup(cur, XFS_LOOKUP_GE, stat); -} - /* * Give the bmap btree a new root block. Copy the old broot contents * down into a real block and make the broot point to it. @@ -2269,6 +2101,32 @@ xfs_bmbt_get_maxrecs( return XFS_BMAP_BLOCK_IMAXRECS(level, cur); } +STATIC void +xfs_bmbt_init_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + key->bmbt.br_startoff = + cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec->bmbt)); +} + +STATIC void +xfs_bmbt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + ptr->l = 0; +} + +STATIC __int64_t +xfs_bmbt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + return (__int64_t)be64_to_cpu(key->bmbt.br_startoff) - + cur->bc_rec.b.br_startoff; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_bmbt_trace_buf; @@ -2360,6 +2218,9 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .dup_cursor = xfs_bmbt_dup_cursor, .get_maxrecs = xfs_bmbt_get_maxrecs, + .init_key_from_rec = xfs_bmbt_init_key_from_rec, + .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, + .key_diff = xfs_bmbt_key_diff, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_bmbt_trace_enter, diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 1e0f1d1..d04198c 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -254,10 +254,6 @@ extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *); extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int); -extern int xfs_bmbt_lookup_eq(struct xfs_btree_cur *, xfs_fileoff_t, - xfs_fsblock_t, xfs_filblks_t, int *); -extern int xfs_bmbt_lookup_ge(struct xfs_btree_cur *, xfs_fileoff_t, - xfs_fsblock_t, xfs_filblks_t, int *); /* * Give the bmap btree a new root block. Copy the old broot contents diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 3d561f8..41912a0 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -1270,3 +1270,222 @@ error0: return error; } + +STATIC int +xfs_btree_lookup_get_block( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level in the btree */ + union xfs_btree_ptr *pp, /* ptr to btree block */ + struct xfs_btree_block **blkp) /* return btree block */ +{ + struct xfs_buf *bp; /* buffer pointer for btree block */ + int error = 0; + + /* special case the root block if in an inode */ + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level == cur->bc_nlevels - 1)) { + *blkp = xfs_btree_get_iroot(cur); + return 0; + } + + /* + * If the old buffer at this level for the disk address we are + * looking for re-use it. + * + * Otherwise throw it away and get a new one. + */ + bp = cur->bc_bufs[level]; + if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) { + *blkp = XFS_BUF_TO_BLOCK(bp); + return 0; + } + + error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp); + if (error) + return error; + + xfs_btree_setbuf(cur, level, bp); + return 0; +} + +/* + * Get current search key. For level 0 we don't actually have a key + * structure so we make one up from the record. For all other levels + * we just return the right key. + */ +STATIC union xfs_btree_key * +xfs_lookup_get_search_key( + struct xfs_btree_cur *cur, + int level, + int keyno, + struct xfs_btree_block *block, + union xfs_btree_key *kp) +{ + if (level == 0) { + cur->bc_ops->init_key_from_rec(kp, + xfs_btree_rec_addr(cur, keyno, block)); + return kp; + } + + return xfs_btree_key_addr(cur, keyno, block); +} + +/* + * Lookup the record. The cursor is made to point to it, based on dir. + * Return 0 if can't find any such record, 1 for success. + */ +int /* error */ +xfs_btree_lookup( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_lookup_t dir, /* <=, ==, or >= */ + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; /* current btree block */ + __int64_t diff; /* difference for the current key */ + int error; /* error return value */ + int keyno; /* current key number */ + int level; /* level in the btree */ + union xfs_btree_ptr *pp; /* ptr to btree block */ + union xfs_btree_ptr ptr; /* ptr to btree block */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, dir); + + XFS_BTREE_STATS_INC(cur, lookup); + + block = NULL; + keyno = 0; + + /* initialise start pointer from cursor */ + cur->bc_ops->init_ptr_from_cur(cur, &ptr); + pp = &ptr; + + /* + * Iterate over each level in the btree, starting at the root. + * For each level above the leaves, find the key we need, based + * on the lookup record, then follow the corresponding block + * pointer down to the next level. + */ + for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { + /* Get the block we need to do the lookup on. */ + error = xfs_btree_lookup_get_block(cur, level, pp, &block); + if (error) + goto error0; + + if (diff == 0) { + /* + * If we already had a key match at a higher level, we + * know we need to use the first entry in this block. + */ + keyno = 1; + } else { + /* Otherwise search this block. Do a binary search. */ + + int high; /* high entry number */ + int low; /* low entry number */ + + /* Set low and high entry numbers, 1-based. */ + low = 1; + high = xfs_btree_get_numrecs(block); + if (!high) { + /* Block is empty, must be an empty leaf. */ + ASSERT(level == 0 && cur->bc_nlevels == 1); + + cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + /* Binary search the block. */ + while (low <= high) { + union xfs_btree_key key; + union xfs_btree_key *kp; + + XFS_BTREE_STATS_INC(cur, compare); + + /* keyno is average of low and high. */ + keyno = (low + high) >> 1; + + /* Get current search key */ + kp = xfs_lookup_get_search_key(cur, level, + keyno, block, &key); + + /* + * Compute difference to get next direction: + * - less than, move right + * - greater than, move left + * - equal, we're done + */ + diff = cur->bc_ops->key_diff(cur, kp); + if (diff < 0) + low = keyno + 1; + else if (diff > 0) + high = keyno - 1; + else + break; + } + } + + /* + * If there are more levels, set up for the next level + * by getting the block number and filling in the cursor. + */ + if (level > 0) { + /* + * If we moved left, need the previous key number, + * unless there isn't one. + */ + if (diff > 0 && --keyno < 1) + keyno = 1; + pp = xfs_btree_ptr_addr(cur, keyno, block); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, pp, 0, level); + if (error) + goto error0; +#endif + cur->bc_ptrs[level] = keyno; + } + } + + /* Done with the search. See if we need to adjust the results. */ + if (dir != XFS_LOOKUP_LE && diff < 0) { + keyno++; + /* + * If ge search and we went off the end of the block, but it's + * not the last block, we're in the wrong block. + */ + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + if (dir == XFS_LOOKUP_GE && + keyno > xfs_btree_get_numrecs(block) && + !xfs_btree_ptr_is_null(cur, &ptr)) { + int i; + + cur->bc_ptrs[0] = keyno; + error = xfs_btree_increment(cur, 0, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_RETURN(i == 1); + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + } + } else if (dir == XFS_LOOKUP_LE && diff > 0) + keyno--; + cur->bc_ptrs[0] = keyno; + + /* Return if we succeeded or not. */ + if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) + *stat = 0; + else if (dir != XFS_LOOKUP_EQ || diff == 0) + *stat = 1; + else + *stat = 0; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 52b2da6..c151175 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -190,6 +190,16 @@ struct xfs_btree_ops { /* records in block/level */ int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); + /* init values of btree structures */ + void (*init_key_from_rec)(union xfs_btree_key *key, + union xfs_btree_rec *rec); + void (*init_ptr_from_cur)(struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr); + + /* difference between key value and cursor value */ + __int64_t (*key_diff)(struct xfs_btree_cur *cur, + union xfs_btree_key *key); + /* btree tracing */ #ifdef XFS_BTREE_TRACE void (*trace_enter)(struct xfs_btree_cur *, const char *, @@ -507,6 +517,7 @@ xfs_btree_setbuf( */ int xfs_btree_increment(struct xfs_btree_cur *, int, int *); int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); +int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index d36b42b..bbf537f 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -119,6 +119,59 @@ xfs_ialloc_cluster_alignment( } /* + * Lookup the record equal to ino in the btree given by cur. + */ +STATIC int /* error */ +xfs_inobt_lookup_eq( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat) /* success/failure */ +{ + cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_freecount = fcnt; + cur->bc_rec.i.ir_free = free; + return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +/* + * Lookup the first record greater than or equal to ino + * in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat) /* success/failure */ +{ + cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_freecount = fcnt; + cur->bc_rec.i.ir_free = free; + return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* + * Lookup the first record less than or equal to ino + * in the btree given by cur. + */ +int /* error */ +xfs_inobt_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free, /* free inode mask */ + int *stat) /* success/failure */ +{ + cur->bc_rec.i.ir_startino = ino; + cur->bc_rec.i.ir_freecount = fcnt; + cur->bc_rec.i.ir_free = free; + return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); +} + +/* * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. */ diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 4e30ec1..4026578 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -154,6 +154,21 @@ xfs_ialloc_pagi_init( struct xfs_trans *tp, /* transaction pointer */ xfs_agnumber_t agno); /* allocation group number */ +/* + * Lookup the first record greater than or equal to ino + * in the btree given by cur. + */ +int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, + __int32_t fcnt, xfs_inofree_t free, int *stat); + +/* + * Lookup the first record less than or equal to ino + * in the btree given by cur. + */ +int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, + __int32_t fcnt, xfs_inofree_t free, int *stat); + + #endif /* __KERNEL__ */ #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 9099a32..161c3b2 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -829,212 +829,6 @@ xfs_inobt_log_recs( } /* - * Lookup the record. The cursor is made to point to it, based on dir. - * Return 0 if can't find any such record, 1 for success. - */ -STATIC int /* error */ -xfs_inobt_lookup( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_lookup_t dir, /* <=, ==, or >= */ - int *stat) /* success/failure */ -{ - xfs_agblock_t agbno; /* a.g. relative btree block number */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_inobt_block_t *block=NULL; /* current btree block */ - __int64_t diff; /* difference for the current key */ - int error; /* error return value */ - int keyno=0; /* current key number */ - int level; /* level in the btree */ - xfs_mount_t *mp; /* file system mount point */ - - /* - * Get the allocation group header, and the root block number. - */ - mp = cur->bc_mp; - { - xfs_agi_t *agi; /* a.g. inode header */ - - agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); - agno = be32_to_cpu(agi->agi_seqno); - agbno = be32_to_cpu(agi->agi_root); - } - /* - * Iterate over each level in the btree, starting at the root. - * For each level above the leaves, find the key we need, based - * on the lookup record, then follow the corresponding block - * pointer down to the next level. - */ - for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { - xfs_buf_t *bp; /* buffer pointer for btree block */ - xfs_daddr_t d; /* disk address of btree block */ - - /* - * Get the disk address we're looking for. - */ - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - /* - * If the old buffer at this level is for a different block, - * throw it away, otherwise just use it. - */ - bp = cur->bc_bufs[level]; - if (bp && XFS_BUF_ADDR(bp) != d) - bp = NULL; - if (!bp) { - /* - * Need to get a new buffer. Read it, then - * set it in the cursor, releasing the old one. - */ - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - agno, agbno, 0, &bp, XFS_INO_BTREE_REF))) - return error; - xfs_btree_setbuf(cur, level, bp); - /* - * Point to the btree block, now that we have the buffer - */ - block = XFS_BUF_TO_INOBT_BLOCK(bp); - if ((error = xfs_btree_check_sblock(cur, block, level, - bp))) - return error; - } else - block = XFS_BUF_TO_INOBT_BLOCK(bp); - /* - * If we already had a key match at a higher level, we know - * we need to use the first entry in this block. - */ - if (diff == 0) - keyno = 1; - /* - * Otherwise we need to search this block. Do a binary search. - */ - else { - int high; /* high entry number */ - xfs_inobt_key_t *kkbase=NULL;/* base of keys in block */ - xfs_inobt_rec_t *krbase=NULL;/* base of records in block */ - int low; /* low entry number */ - - /* - * Get a pointer to keys or records. - */ - if (level > 0) - kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur); - else - krbase = XFS_INOBT_REC_ADDR(block, 1, cur); - /* - * Set low and high entry numbers, 1-based. - */ - low = 1; - if (!(high = be16_to_cpu(block->bb_numrecs))) { - /* - * If the block is empty, the tree must - * be an empty leaf. - */ - ASSERT(level == 0 && cur->bc_nlevels == 1); - cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; - *stat = 0; - return 0; - } - /* - * Binary search the block. - */ - while (low <= high) { - xfs_agino_t startino; /* key value */ - - /* - * keyno is average of low and high. - */ - keyno = (low + high) >> 1; - /* - * Get startino. - */ - if (level > 0) { - xfs_inobt_key_t *kkp; - - kkp = kkbase + keyno - 1; - startino = be32_to_cpu(kkp->ir_startino); - } else { - xfs_inobt_rec_t *krp; - - krp = krbase + keyno - 1; - startino = be32_to_cpu(krp->ir_startino); - } - /* - * Compute difference to get next direction. - */ - diff = (__int64_t) - startino - cur->bc_rec.i.ir_startino; - /* - * Less than, move right. - */ - if (diff < 0) - low = keyno + 1; - /* - * Greater than, move left. - */ - else if (diff > 0) - high = keyno - 1; - /* - * Equal, we're done. - */ - else - break; - } - } - /* - * If there are more levels, set up for the next level - * by getting the block number and filling in the cursor. - */ - if (level > 0) { - /* - * If we moved left, need the previous key number, - * unless there isn't one. - */ - if (diff > 0 && --keyno < 1) - keyno = 1; - agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur)); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, agbno, level))) - return error; -#endif - cur->bc_ptrs[level] = keyno; - } - } - /* - * Done with the search. - * See if we need to adjust the results. - */ - if (dir != XFS_LOOKUP_LE && diff < 0) { - keyno++; - /* - * If ge search and we went off the end of the block, but it's - * not the last block, we're in the wrong block. - */ - if (dir == XFS_LOOKUP_GE && - keyno > be16_to_cpu(block->bb_numrecs) && - be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) { - int i; - - cur->bc_ptrs[0] = keyno; - if ((error = xfs_btree_increment(cur, 0, &i))) - return error; - ASSERT(i == 1); - *stat = 1; - return 0; - } - } - else if (dir == XFS_LOOKUP_LE && diff > 0) - keyno--; - cur->bc_ptrs[0] = keyno; - /* - * Return if we succeeded or not. - */ - if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs)) - *stat = 0; - else - *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0)); - return 0; -} - -/* * Move 1 record left from cur/level if possible. * Update cur to reflect the new path. */ @@ -1798,59 +1592,6 @@ xfs_inobt_insert( } /* - * Lookup the record equal to ino in the btree given by cur. - */ -int /* error */ -xfs_inobt_lookup_eq( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat); -} - -/* - * Lookup the first record greater than or equal to ino - * in the btree given by cur. - */ -int /* error */ -xfs_inobt_lookup_ge( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat); -} - -/* - * Lookup the first record less than or equal to ino - * in the btree given by cur. - */ -int /* error */ -xfs_inobt_lookup_le( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free, /* free inode mask */ - int *stat) /* success/failure */ -{ - cur->bc_rec.i.ir_startino = ino; - cur->bc_rec.i.ir_freecount = fcnt; - cur->bc_rec.i.ir_free = free; - return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat); -} - -/* * Update the record referred to by cur, to the value given * by [ino, fcnt, free]. * This either works (return 0) or gets an EFSCORRUPTED error. @@ -1918,6 +1659,38 @@ xfs_inobt_get_maxrecs( return cur->bc_mp->m_inobt_mxr[level != 0]; } +STATIC void +xfs_inobt_init_key_from_rec( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + key->inobt.ir_startino = rec->inobt.ir_startino; +} + +/* + * intial value of ptr for lookup + */ +STATIC void +xfs_inobt_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + + ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); + + ptr->s = agi->agi_root; +} + +STATIC __int64_t +xfs_inobt_key_diff( + struct xfs_btree_cur *cur, + union xfs_btree_key *key) +{ + return (__int64_t)be32_to_cpu(key->inobt.ir_startino) - + cur->bc_rec.i.ir_startino; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_inobt_trace_buf; @@ -1990,6 +1763,9 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .get_maxrecs = xfs_inobt_get_maxrecs, + .init_key_from_rec = xfs_inobt_init_key_from_rec, + .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, + .key_diff = xfs_inobt_key_diff, #ifdef XFS_BTREE_TRACE .trace_enter = xfs_inobt_trace_enter, diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 8455459..674b459 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -136,26 +136,6 @@ extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat); /* - * Lookup the record equal to ino in the btree given by cur. - */ -extern int xfs_inobt_lookup_eq(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free, int *stat); - -/* - * Lookup the first record greater than or equal to ino - * in the btree given by cur. - */ -extern int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free, int *stat); - -/* - * Lookup the first record less than or equal to ino - * in the btree given by cur. - */ -extern int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free, int *stat); - -/* * Update the record referred to by cur, to the value given * by [ino, fcnt, free]. * This either works (return 0) or gets an EFSCORRUPTED error. -- cgit v0.10.2 From 38bb74237d2d94c1aced2ec626d7d0f317e360da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:56:22 +1100 Subject: [XFS] implement generic xfs_btree_updkey From: Dave Chinner Note that there are many > 80 char lines introduced due to the xfs_btree_key casts. But the places where this happens is throw-away code once the whole btree code gets merged into a common implementation. The same is true for the temporary xfs_alloc_log_keys define to the new name. All old users will be gone after a few patches. [hch: split out from bigger patch and minor adaptions] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32193a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index b81fbf1..28c6a69 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -52,7 +52,6 @@ STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_alloc_key_t *, xfs_btree_cur_t **, int *); -STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int); /* * Internal functions. @@ -265,7 +264,7 @@ xfs_alloc_delrec( * If we deleted the leftmost entry in the block, update the * key values above us in the tree. */ - if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1))) + if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)lkp, level + 1))) return error; /* * If the number of records remaining in the block is at least @@ -798,7 +797,7 @@ xfs_alloc_insrec( /* * If we inserted at the start of a block, update the parents' keys. */ - if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1))) + if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) return error; /* * Look to see if the longest extent in the allocation group @@ -1068,7 +1067,7 @@ xfs_alloc_lshift( /* * Update the parent key values of right. */ - if ((error = xfs_alloc_updkey(cur, rkp, level + 1))) + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) return error; /* * Slide the cursor value left one. @@ -1354,7 +1353,7 @@ xfs_alloc_rshift( i = xfs_btree_lastrec(tcur, level); XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if ((error = xfs_btree_increment(tcur, level, &i)) || - (error = xfs_alloc_updkey(tcur, rkp, level + 1))) + (error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) goto error0; xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); *stat = 1; @@ -1520,45 +1519,6 @@ xfs_alloc_split( } /* - * Update keys at all levels from here to the root along the cursor's path. - */ -STATIC int /* error */ -xfs_alloc_updkey( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_alloc_key_t *keyp, /* new key value to update to */ - int level) /* starting level for update */ -{ - int ptr; /* index of key in block */ - - /* - * Go up the tree from this level toward the root. - * At each level, update the key value to the value input. - * Stop when we reach a level where the cursor isn't pointing - * at the first entry in the block. - */ - for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { - xfs_alloc_block_t *block; /* btree block */ - xfs_buf_t *bp; /* buffer for block */ -#ifdef DEBUG - int error; /* error return value */ -#endif - xfs_alloc_key_t *kp; /* ptr to btree block keys */ - - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - ptr = cur->bc_ptrs[level]; - kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); - *kp = *keyp; - xfs_alloc_log_keys(cur, bp, ptr, ptr); - } - return 0; -} - -/* * Externally visible routines. */ @@ -1765,7 +1725,7 @@ xfs_alloc_update( key.ar_startblock = cpu_to_be32(bno); key.ar_blockcount = cpu_to_be32(len); - if ((error = xfs_alloc_updkey(cur, &key, 1))) + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) return error; } return 0; diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 8403d15..0a56257 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -56,7 +56,6 @@ STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, __uint64_t *, xfs_btree_cur_t **, int *); -STATIC int xfs_bmbt_updkey(xfs_btree_cur_t *, xfs_bmbt_key_t *, int); #undef EXIT @@ -211,7 +210,7 @@ xfs_bmbt_delrec( *stat = 1; return 0; } - if (ptr == 1 && (error = xfs_bmbt_updkey(cur, kp, level + 1))) { + if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)kp, level + 1))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -635,7 +634,7 @@ xfs_bmbt_insrec( kp + ptr); } #endif - if (optr == 1 && (error = xfs_bmbt_updkey(cur, &key, level + 1))) { + if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -935,7 +934,7 @@ xfs_bmbt_lshift( key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp)); rkp = &key; } - if ((error = xfs_bmbt_updkey(cur, rkp, level + 1))) { + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -1067,7 +1066,7 @@ xfs_bmbt_rshift( goto error1; } XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_bmbt_updkey(tcur, rkp, level + 1))) { + if ((error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) { XFS_BMBT_TRACE_CURSOR(tcur, ERROR); goto error1; } @@ -1276,44 +1275,6 @@ xfs_bmbt_split( return 0; } - -/* - * Update keys for the record. - */ -STATIC int -xfs_bmbt_updkey( - xfs_btree_cur_t *cur, - xfs_bmbt_key_t *keyp, /* on-disk format */ - int level) -{ - xfs_bmbt_block_t *block; - xfs_buf_t *bp; -#ifdef DEBUG - int error; -#endif - xfs_bmbt_key_t *kp; - int ptr; - - ASSERT(level >= 1); - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGIK(cur, level, keyp); - for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { - block = xfs_bmbt_get_block(cur, level, &bp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - ptr = cur->bc_ptrs[level]; - kp = XFS_BMAP_KEY_IADDR(block, ptr, cur); - *kp = *keyp; - xfs_bmbt_log_keys(cur, bp, ptr, ptr); - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; -} - /* * Convert on-disk form of btree root to in-memory form. */ @@ -2039,7 +2000,7 @@ xfs_bmbt_update( return 0; } key.br_startoff = cpu_to_be64(off); - if ((error = xfs_bmbt_updkey(cur, &key, 1))) { + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 41912a0..1459a2b 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -34,6 +34,7 @@ #include "xfs_attr_sf.h" #include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_inode_item.h" #include "xfs_btree.h" #include "xfs_btree_trace.h" #include "xfs_ialloc.h" @@ -1065,6 +1066,45 @@ xfs_btree_read_buf_block( } /* + * Copy keys from one btree block to another. + */ +STATIC void +xfs_btree_copy_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *dst_key, + union xfs_btree_key *src_key, + int numkeys) +{ + ASSERT(numkeys >= 0); + memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); +} + +/* + * Log key values from the btree block. + */ +STATIC void +xfs_btree_log_keys( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int first, + int last) +{ + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); + + if (bp) { + xfs_trans_log_buf(cur->bc_tp, bp, + xfs_btree_key_offset(cur, first), + xfs_btree_key_offset(cur, last + 1) - 1); + } else { + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, + xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + +/* * Increment cursor by one record at the level. * For nonzero levels the leaf-ward information is untouched. */ @@ -1489,3 +1529,50 @@ error0: XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } + +/* + * Update keys at all levels from here to the root along the cursor's path. + */ +int +xfs_btree_updkey( + struct xfs_btree_cur *cur, + union xfs_btree_key *keyp, + int level) +{ + struct xfs_btree_block *block; + struct xfs_buf *bp; + union xfs_btree_key *kp; + int ptr; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGIK(cur, level, keyp); + + ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1); + + /* + * Go up the tree from this level toward the root. + * At each level, update the key value to the value input. + * Stop when we reach a level where the cursor isn't pointing + * at the first entry in the block. + */ + for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { +#ifdef DEBUG + int error; +#endif + block = xfs_btree_get_block(cur, level, &bp); +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } +#endif + ptr = cur->bc_ptrs[level]; + kp = xfs_btree_key_addr(cur, ptr, block); + xfs_btree_copy_keys(cur, kp, keyp, 1); + xfs_btree_log_keys(cur, bp, ptr, ptr); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index c151175..ac3f527 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -518,6 +518,7 @@ xfs_btree_setbuf( int xfs_btree_increment(struct xfs_btree_cur *, int, int *); int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); +int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 161c3b2..cd8bb51 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -48,7 +48,6 @@ STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *); STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_inobt_key_t *, xfs_btree_cur_t **, int *); -STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int); /* * Single level of the xfs_inobt_delete record deletion routine. @@ -214,7 +213,7 @@ xfs_inobt_delrec( * If we deleted the leftmost entry in the block, update the * key values above us in the tree. */ - if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1))) + if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)kp, level + 1))) return error; /* * If the number of records remaining in the block is at least @@ -723,7 +722,7 @@ xfs_inobt_insrec( /* * If we inserted at the start of a block, update the parents' keys. */ - if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1))) + if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) return error; /* * Return the new block number, if any. @@ -960,7 +959,7 @@ xfs_inobt_lshift( /* * Update the parent key values of right. */ - if ((error = xfs_inobt_updkey(cur, rkp, level + 1))) + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) return error; /* * Slide the cursor value left one. @@ -1238,7 +1237,7 @@ xfs_inobt_rshift( return error; xfs_btree_lastrec(tcur, level); if ((error = xfs_btree_increment(tcur, level, &i)) || - (error = xfs_inobt_updkey(tcur, rkp, level + 1))) { + (error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) { xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; } @@ -1407,45 +1406,6 @@ xfs_inobt_split( } /* - * Update keys at all levels from here to the root along the cursor's path. - */ -STATIC int /* error */ -xfs_inobt_updkey( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_inobt_key_t *keyp, /* new key value to update to */ - int level) /* starting level for update */ -{ - int ptr; /* index of key in block */ - - /* - * Go up the tree from this level toward the root. - * At each level, update the key value to the value input. - * Stop when we reach a level where the cursor isn't pointing - * at the first entry in the block. - */ - for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { - xfs_buf_t *bp; /* buffer for block */ - xfs_inobt_block_t *block; /* btree block */ -#ifdef DEBUG - int error; /* error return value */ -#endif - xfs_inobt_key_t *kp; /* ptr to btree block keys */ - - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - ptr = cur->bc_ptrs[level]; - kp = XFS_INOBT_KEY_ADDR(block, ptr, cur); - *kp = *keyp; - xfs_inobt_log_keys(cur, bp, ptr, ptr); - } - return 0; -} - -/* * Externally visible routines. */ @@ -1637,7 +1597,7 @@ xfs_inobt_update( xfs_inobt_key_t key; /* key containing [ino] */ key.ir_startino = cpu_to_be32(ino); - if ((error = xfs_inobt_updkey(cur, &key, 1))) + if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) return error; } return 0; -- cgit v0.10.2 From 278d0ca14e889c3932a05d1a68675252a12b3466 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:56:32 +1100 Subject: [XFS] implement generic xfs_btree_update From: Dave Chinner The most complicated part here is the lastrec tracking for the alloc btree. Most logic is in the update_lastrec method which has to do some hopefully good enough dirty magic to maintain it. [hch: split out from bigger patch and a rework of the lastrec logic] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32194a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 6bda0ae..875e1ba 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -136,6 +136,23 @@ xfs_alloc_lookup_le( return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); } +/* + * Update the record referred to by cur to the value given + * by [bno, len]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int /* error */ +xfs_alloc_update( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len) /* length of extent */ +{ + union xfs_btree_rec rec; + + rec.alloc.ar_startblock = cpu_to_be32(bno); + rec.alloc.ar_blockcount = cpu_to_be32(len); + return xfs_btree_update(cur, &rec); +} /* * Compute aligned version of the found extent. diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 28c6a69..c5c3299 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -1661,83 +1661,50 @@ xfs_alloc_insert( return 0; } +STATIC struct xfs_btree_cur * +xfs_allocbt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, + cur->bc_private.a.agbp, cur->bc_private.a.agno, + cur->bc_btnum); +} + /* - * Update the record referred to by cur, to the value given by [bno, len]. - * This either works (return 0) or gets an EFSCORRUPTED error. + * Update the longest extent in the AGF */ -int /* error */ -xfs_alloc_update( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t bno, /* starting block of extent */ - xfs_extlen_t len) /* length of extent */ +STATIC void +xfs_allocbt_update_lastrec( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_rec *rec, + int ptr, + int reason) { - xfs_alloc_block_t *block; /* btree block to update */ - int error; /* error return value */ - int ptr; /* current record number (updating) */ + struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); + xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + __be32 len; - ASSERT(len > 0); - /* - * Pick up the a.g. freelist struct and the current block. - */ - block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))) - return error; -#endif - /* - * Get the address of the rec to be updated. - */ - ptr = cur->bc_ptrs[0]; - { - xfs_alloc_rec_t *rp; /* pointer to updated record */ + ASSERT(cur->bc_btnum == XFS_BTNUM_CNT); - rp = XFS_ALLOC_REC_ADDR(block, ptr, cur); + switch (reason) { + case LASTREC_UPDATE: /* - * Fill in the new contents and log them. + * If this is the last leaf block and it's the last record, + * then update the size of the longest extent in the AG. */ - rp->ar_startblock = cpu_to_be32(bno); - rp->ar_blockcount = cpu_to_be32(len); - xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr); - } - /* - * If it's the by-size btree and it's the last leaf block and - * it's the last record... then update the size of the longest - * extent in the a.g., which we cache in the a.g. freelist header. - */ - if (cur->bc_btnum == XFS_BTNUM_CNT && - be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && - ptr == be16_to_cpu(block->bb_numrecs)) { - xfs_agf_t *agf; /* a.g. freespace header */ - xfs_agnumber_t seqno; - - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - seqno = be32_to_cpu(agf->agf_seqno); - cur->bc_mp->m_perag[seqno].pagf_longest = len; - agf->agf_longest = cpu_to_be32(len); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, - XFS_AGF_LONGEST); - } - /* - * Updating first record in leaf. Pass new key value up to our parent. - */ - if (ptr == 1) { - xfs_alloc_key_t key; /* key containing [bno, len] */ - - key.ar_startblock = cpu_to_be32(bno); - key.ar_blockcount = cpu_to_be32(len); - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) - return error; + if (ptr != xfs_btree_get_numrecs(block)) + return; + len = rec->alloc.ar_blockcount; + break; + default: + ASSERT(0); + return; } - return 0; -} -STATIC struct xfs_btree_cur * -xfs_allocbt_dup_cursor( - struct xfs_btree_cur *cur) -{ - return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agbp, cur->bc_private.a.agno, - cur->bc_btnum); + agf->agf_longest = len; + cur->bc_mp->m_perag[seqno].pagf_longest = be32_to_cpu(len); + xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, XFS_AGF_LONGEST); } STATIC int @@ -1864,6 +1831,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .key_len = sizeof(xfs_alloc_key_t), .dup_cursor = xfs_allocbt_dup_cursor, + .update_lastrec = xfs_allocbt_update_lastrec, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, @@ -1902,6 +1870,8 @@ xfs_allocbt_init_cursor( cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_ops = &xfs_allocbt_ops; + if (btnum == XFS_BTNUM_CNT) + cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index aa110ff..81e2f36 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -113,13 +113,6 @@ extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, */ extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat); -/* - * Update the record referred to by cur, to the value given by [bno, len]. - * This either works (return 0) or gets an EFSCORRUPTED error. - */ -extern int xfs_alloc_update(struct xfs_btree_cur *cur, xfs_agblock_t bno, - xfs_extlen_t len); - extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 1296b41..7d6c4ac 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -430,6 +430,24 @@ xfs_bmbt_lookup_ge( return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); } +/* +* Update the record referred to by cur to the value given + * by [off, bno, len, state]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int +xfs_bmbt_update( + struct xfs_btree_cur *cur, + xfs_fileoff_t off, + xfs_fsblock_t bno, + xfs_filblks_t len, + xfs_exntst_t state) +{ + union xfs_btree_rec rec; + + xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state); + return xfs_btree_update(cur, &rec); +} /* * Called from xfs_bmap_add_attrfork to handle btree format files. diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 0a56257..99200a98 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -1964,51 +1964,6 @@ xfs_bmbt_to_bmdr( } /* - * Update the record to the passed values. - */ -int -xfs_bmbt_update( - xfs_btree_cur_t *cur, - xfs_fileoff_t off, - xfs_fsblock_t bno, - xfs_filblks_t len, - xfs_exntst_t state) -{ - xfs_bmbt_block_t *block; - xfs_buf_t *bp; - int error; - xfs_bmbt_key_t key; - int ptr; - xfs_bmbt_rec_t *rp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGFFFI(cur, (xfs_dfiloff_t)off, (xfs_dfsbno_t)bno, - (xfs_dfilblks_t)len, (int)state); - block = xfs_bmbt_get_block(cur, 0, &bp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, 0, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - ptr = cur->bc_ptrs[0]; - rp = XFS_BMAP_REC_IADDR(block, ptr, cur); - xfs_bmbt_disk_set_allf(rp, off, bno, len, state); - xfs_bmbt_log_recs(cur, bp, ptr, ptr); - if (ptr > 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; - } - key.br_startoff = cpu_to_be64(off); - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; -} - -/* * Check extent records, which have just been read, for * any bit in the extent flag field. ASSERT on debug * kernels, as this condition should not occur. diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index d04198c..6bfd62e 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -274,8 +274,6 @@ extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); -extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t, - xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t); extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 1459a2b..205272f 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -988,6 +988,30 @@ xfs_btree_get_sibling( } } +/* + * Return true if ptr is the last record in the btree and + * we need to track updateѕ to this record. The decision + * will be further refined in the update_lastrec method. + */ +STATIC int +xfs_btree_is_lastrec( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + int level) +{ + union xfs_btree_ptr ptr; + + if (level > 0) + return 0; + if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) + return 0; + + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + if (!xfs_btree_ptr_is_null(cur, &ptr)) + return 0; + return 1; +} + STATIC xfs_daddr_t xfs_btree_ptr_to_daddr( struct xfs_btree_cur *cur, @@ -1080,6 +1104,20 @@ xfs_btree_copy_keys( } /* + * Copy records from one btree block to another. + */ +STATIC void +xfs_btree_copy_recs( + struct xfs_btree_cur *cur, + union xfs_btree_rec *dst_rec, + union xfs_btree_rec *src_rec, + int numrecs) +{ + ASSERT(numrecs >= 0); + memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); +} + +/* * Log key values from the btree block. */ STATIC void @@ -1105,6 +1143,26 @@ xfs_btree_log_keys( } /* + * Log record values from the btree block. + */ +STATIC void +xfs_btree_log_recs( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int first, + int last) +{ + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); + + xfs_trans_log_buf(cur->bc_tp, bp, + xfs_btree_rec_offset(cur, first), + xfs_btree_rec_offset(cur, last + 1) - 1); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + +/* * Increment cursor by one record at the level. * For nonzero levels the leaf-ward information is untouched. */ @@ -1576,3 +1634,66 @@ xfs_btree_updkey( XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; } + +/* + * Update the record referred to by cur to the value in the + * given record. This either works (return 0) or gets an + * EFSCORRUPTED error. + */ +int +xfs_btree_update( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + struct xfs_btree_block *block; + struct xfs_buf *bp; + int error; + int ptr; + union xfs_btree_rec *rp; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGR(cur, rec); + + /* Pick up the current block. */ + block = xfs_btree_get_block(cur, 0, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, 0, bp); + if (error) + goto error0; +#endif + /* Get the address of the rec to be updated. */ + ptr = cur->bc_ptrs[0]; + rp = xfs_btree_rec_addr(cur, ptr, block); + + /* Fill in the new contents and log them. */ + xfs_btree_copy_recs(cur, rp, rec, 1); + xfs_btree_log_recs(cur, bp, ptr, ptr); + + /* + * If we are tracking the last record in the tree and + * we are at the far right edge of the tree, update it. + */ + if (xfs_btree_is_lastrec(cur, block, 0)) { + cur->bc_ops->update_lastrec(cur, block, rec, + ptr, LASTREC_UPDATE); + } + + /* Updating first rec in leaf. Pass new key value up to our parent. */ + if (ptr == 1) { + union xfs_btree_key key; + + cur->bc_ops->init_key_from_rec(&key, rec); + error = xfs_btree_updkey(cur, &key, 1); + if (error) + goto error0; + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index ac3f527..c3bfa55 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -187,6 +187,12 @@ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + /* update last record information */ + void (*update_lastrec)(struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_rec *rec, + int ptr, int reason); + /* records in block/level */ int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); @@ -221,6 +227,12 @@ struct xfs_btree_ops { }; /* + * Reasons for the update_lastrec method to be called. + */ +#define LASTREC_UPDATE 0 + + +/* * Btree cursor structure. * This collects all information needed by the btree code in one place. */ @@ -264,6 +276,7 @@ typedef struct xfs_btree_cur /* cursor flags */ #define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ #define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ +#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ #define XFS_BTREE_NOERROR 0 @@ -519,6 +532,7 @@ int xfs_btree_increment(struct xfs_btree_cur *, int, int *); int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); +int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index bbf537f..138651a 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -172,6 +172,26 @@ xfs_inobt_lookup_le( } /* + * Update the record referred to by cur to the value given + * by [ino, fcnt, free]. + * This either works (return 0) or gets an EFSCORRUPTED error. + */ +STATIC int /* error */ +xfs_inobt_update( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t ino, /* starting inode of chunk */ + __int32_t fcnt, /* free inode count */ + xfs_inofree_t free) /* free inode mask */ +{ + union xfs_btree_rec rec; + + rec.inobt.ir_startino = cpu_to_be32(ino); + rec.inobt.ir_freecount = cpu_to_be32(fcnt); + rec.inobt.ir_free = cpu_to_be64(free); + return xfs_btree_update(cur, &rec); +} + +/* * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index cd8bb51..d080a68 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -1551,58 +1551,6 @@ xfs_inobt_insert( return 0; } -/* - * Update the record referred to by cur, to the value given - * by [ino, fcnt, free]. - * This either works (return 0) or gets an EFSCORRUPTED error. - */ -int /* error */ -xfs_inobt_update( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agino_t ino, /* starting inode of chunk */ - __int32_t fcnt, /* free inode count */ - xfs_inofree_t free) /* free inode mask */ -{ - xfs_inobt_block_t *block; /* btree block to update */ - xfs_buf_t *bp; /* buffer containing btree block */ - int error; /* error return value */ - int ptr; /* current record number (updating) */ - xfs_inobt_rec_t *rp; /* pointer to updated record */ - - /* - * Pick up the current block. - */ - bp = cur->bc_bufs[0]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, 0, bp))) - return error; -#endif - /* - * Get the address of the rec to be updated. - */ - ptr = cur->bc_ptrs[0]; - rp = XFS_INOBT_REC_ADDR(block, ptr, cur); - /* - * Fill in the new contents and log them. - */ - rp->ir_startino = cpu_to_be32(ino); - rp->ir_freecount = cpu_to_be32(fcnt); - rp->ir_free = cpu_to_be64(free); - xfs_inobt_log_recs(cur, bp, ptr, ptr); - /* - * Updating first record in leaf. Pass new key value up to our parent. - */ - if (ptr == 1) { - xfs_inobt_key_t key; /* key containing [ino] */ - - key.ir_startino = cpu_to_be32(ino); - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, 1))) - return error; - } - return 0; -} - STATIC struct xfs_btree_cur * xfs_inobt_dup_cursor( struct xfs_btree_cur *cur) diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 674b459..7f77549 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -135,14 +135,6 @@ extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, */ extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat); -/* - * Update the record referred to by cur, to the value given - * by [ino, fcnt, free]. - * This either works (return 0) or gets an EFSCORRUPTED error. - */ -extern int xfs_inobt_update(struct xfs_btree_cur *cur, xfs_agino_t ino, - __int32_t fcnt, xfs_inofree_t free); - extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); -- cgit v0.10.2 From 9eaead51bed957af0070a277d945744a76df0c8b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:56:43 +1100 Subject: [XFS] implement generic xfs_btree_rshift Make the btree right shift code generic. Based on a patch from David Chinner with lots of changes to follow the original btree implementations more closely. While this loses some of the generic helper routines for inserting/moving/removing records it also solves some of the one off bugs in the original code and makes it easier to verify. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32196a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index c5c3299..31e4289 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -49,7 +49,6 @@ STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); -STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_alloc_key_t *, xfs_btree_cur_t **, int *); @@ -391,7 +390,7 @@ xfs_alloc_delrec( */ if (be16_to_cpu(left->bb_numrecs) - 1 >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_alloc_rshift(tcur, level, &i))) + if ((error = xfs_btree_rshift(tcur, level, &i))) goto error0; if (i) { ASSERT(be16_to_cpu(block->bb_numrecs) >= @@ -683,7 +682,7 @@ xfs_alloc_insrec( /* * First, try shifting an entry to the right neighbor. */ - if ((error = xfs_alloc_rshift(cur, level, &i))) + if ((error = xfs_btree_rshift(cur, level, &i))) return error; if (i) { /* nothing */ @@ -1233,137 +1232,6 @@ xfs_alloc_newroot( } /* - * Move 1 record right from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_alloc_rshift( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to shift record on */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* loop index */ - xfs_alloc_key_t key; /* key value for leaf level upward */ - xfs_buf_t *lbp; /* buffer for left (current) block */ - xfs_alloc_block_t *left; /* left (current) btree block */ - xfs_buf_t *rbp; /* buffer for right neighbor block */ - xfs_alloc_block_t *right; /* right neighbor btree block */ - xfs_alloc_key_t *rkp; /* key pointer for right block */ - xfs_btree_cur_t *tcur; /* temporary cursor */ - - /* - * Set up variables for this block as "left". - */ - lbp = cur->bc_bufs[level]; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; -#endif - /* - * If we've got no right sibling then we can't shift an entry right. - */ - if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * If the cursor entry is the one that would be moved, don't - * do it... it's too complicated. - */ - if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) { - *stat = 0; - return 0; - } - /* - * Set up the right neighbor as "right". - */ - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), - 0, &rbp, XFS_ALLOC_BTREE_REF))) - return error; - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; - /* - * If it's full, it can't take another entry. - */ - if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { - *stat = 0; - return 0; - } - /* - * Make a hole at the start of the right neighbor block, then - * copy the last left block entry to the hole. - */ - if (level > 0) { - xfs_alloc_key_t *lkp; /* key pointer for left block */ - xfs_alloc_ptr_t *lpp; /* address pointer for left block */ - xfs_alloc_ptr_t *rpp; /* address pointer for right block */ - - lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); - rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) - return error; - } -#endif - memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) - return error; -#endif - *rkp = *lkp; - *rpp = *lpp; - xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); - } else { - xfs_alloc_rec_t *lrp; /* record pointer for left block */ - xfs_alloc_rec_t *rrp; /* record pointer for right block */ - - lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); - memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - *rrp = *lrp; - xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - key.ar_startblock = rrp->ar_startblock; - key.ar_blockcount = rrp->ar_blockcount; - rkp = &key; - xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1); - } - /* - * Decrement and log left's numrecs, bump and log right's numrecs. - */ - be16_add_cpu(&left->bb_numrecs, -1); - xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add_cpu(&right->bb_numrecs, 1); - xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); - /* - * Using a temporary cursor, update the parent key values of the - * block on the right. - */ - if ((error = xfs_btree_dup_cursor(cur, &tcur))) - return error; - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_increment(tcur, level, &i)) || - (error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) - goto error0; - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - *stat = 1; - return 0; -error0: - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - -/* * Split cur/level block in half. * Return new block number and its first record (to be inserted into parent). */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 99200a98..9d18fa8 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -53,7 +53,6 @@ STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *); STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *); -STATIC int xfs_bmbt_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, __uint64_t *, xfs_btree_cur_t **, int *); @@ -327,7 +326,7 @@ xfs_bmbt_delrec( bno = be64_to_cpu(left->bb_rightsib); if (be16_to_cpu(left->bb_numrecs) - 1 >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if ((error = xfs_bmbt_rshift(tcur, level, &i))) { + if ((error = xfs_btree_rshift(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -538,7 +537,7 @@ xfs_bmbt_insrec( logflags); block = xfs_bmbt_get_block(cur, level, &bp); } else { - if ((error = xfs_bmbt_rshift(cur, level, &i))) { + if ((error = xfs_btree_rshift(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -945,143 +944,6 @@ xfs_bmbt_lshift( } /* - * Move 1 record right from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_bmbt_rshift( - xfs_btree_cur_t *cur, - int level, - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* loop counter */ - xfs_bmbt_key_t key; /* bmap btree key */ - xfs_buf_t *lbp; /* left buffer pointer */ - xfs_bmbt_block_t *left; /* left btree block */ - xfs_bmbt_key_t *lkp; /* left btree key */ - xfs_bmbt_ptr_t *lpp; /* left address pointer */ - xfs_bmbt_rec_t *lrp; /* left record pointer */ - xfs_mount_t *mp; /* file system mount point */ - xfs_buf_t *rbp; /* right buffer pointer */ - xfs_bmbt_block_t *right; /* right btree block */ - xfs_bmbt_key_t *rkp; /* right btree key */ - xfs_bmbt_ptr_t *rpp; /* right address pointer */ - xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */ - struct xfs_btree_cur *tcur; /* temporary btree cursor */ - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, level); - if (level == cur->bc_nlevels - 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - lbp = cur->bc_bufs[level]; - left = XFS_BUF_TO_BMBT_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - if (be64_to_cpu(left->bb_rightsib) == NULLDFSBNO) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - mp = cur->bc_mp; - if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(left->bb_rightsib), 0, - &rbp, XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - right = XFS_BUF_TO_BMBT_BLOCK(rbp); - if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (be16_to_cpu(right->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - if (level > 0) { - lkp = XFS_BMAP_KEY_IADDR(left, be16_to_cpu(left->bb_numrecs), cur); - lpp = XFS_BMAP_PTR_IADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); - rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); -#ifdef DEBUG - for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { - if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); -#ifdef DEBUG - if ((error = xfs_btree_check_lptr_disk(cur, *lpp, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - *rkp = *lkp; - *rpp = *lpp; - xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - } else { - lrp = XFS_BMAP_REC_IADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rrp = XFS_BMAP_REC_IADDR(right, 1, cur); - memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - *rrp = *lrp; - xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp)); - rkp = &key; - } - be16_add_cpu(&left->bb_numrecs, -1); - xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS); - be16_add_cpu(&right->bb_numrecs, 1); -#ifdef DEBUG - if (level > 0) - xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1); - else - xfs_btree_check_rec(XFS_BTNUM_BMAP, rrp, rrp + 1); -#endif - xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS); - if ((error = xfs_btree_dup_cursor(cur, &tcur))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_increment(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(tcur, ERROR); - goto error1; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) { - XFS_BMBT_TRACE_CURSOR(tcur, ERROR); - goto error1; - } - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -error0: - XFS_BMBT_TRACE_CURSOR(cur, ERROR); -error1: - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - -/* * Determine the extent state. */ /* ARGSUSED */ diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 205272f..e1a2137 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -1118,6 +1118,77 @@ xfs_btree_copy_recs( } /* + * Copy block pointers from one btree block to another. + */ +STATIC void +xfs_btree_copy_ptrs( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *dst_ptr, + union xfs_btree_ptr *src_ptr, + int numptrs) +{ + ASSERT(numptrs >= 0); + memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); +} + +/* + * Shift keys one index left/right inside a single btree block. + */ +STATIC void +xfs_btree_shift_keys( + struct xfs_btree_cur *cur, + union xfs_btree_key *key, + int dir, + int numkeys) +{ + char *dst_key; + + ASSERT(numkeys >= 0); + ASSERT(dir == 1 || dir == -1); + + dst_key = (char *)key + (dir * cur->bc_ops->key_len); + memmove(dst_key, key, numkeys * cur->bc_ops->key_len); +} + +/* + * Shift records one index left/right inside a single btree block. + */ +STATIC void +xfs_btree_shift_recs( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + int dir, + int numrecs) +{ + char *dst_rec; + + ASSERT(numrecs >= 0); + ASSERT(dir == 1 || dir == -1); + + dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); + memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); +} + +/* + * Shift block pointers one index left/right inside a single btree block. + */ +STATIC void +xfs_btree_shift_ptrs( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int dir, + int numptrs) +{ + char *dst_ptr; + + ASSERT(numptrs >= 0); + ASSERT(dir == 1 || dir == -1); + + dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); + memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); +} + +/* * Log key values from the btree block. */ STATIC void @@ -1163,6 +1234,79 @@ xfs_btree_log_recs( } /* + * Log block pointer fields from a btree block (nonleaf). + */ +STATIC void +xfs_btree_log_ptrs( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_buf *bp, /* buffer containing btree block */ + int first, /* index of first pointer to log */ + int last) /* index of last pointer to log */ +{ + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); + + if (bp) { + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + int level = xfs_btree_get_level(block); + + xfs_trans_log_buf(cur->bc_tp, bp, + xfs_btree_ptr_offset(cur, first, level), + xfs_btree_ptr_offset(cur, last + 1, level) - 1); + } else { + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, + xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + +/* + * Log fields from a btree block header. + */ +STATIC void +xfs_btree_log_block( + struct xfs_btree_cur *cur, /* btree cursor */ + struct xfs_buf *bp, /* buffer containing btree block */ + int fields) /* mask of fields: XFS_BB_... */ +{ + int first; /* first byte offset logged */ + int last; /* last byte offset logged */ + static const short soffsets[] = { /* table of offsets (short) */ + offsetof(struct xfs_btree_sblock, bb_magic), + offsetof(struct xfs_btree_sblock, bb_level), + offsetof(struct xfs_btree_sblock, bb_numrecs), + offsetof(struct xfs_btree_sblock, bb_leftsib), + offsetof(struct xfs_btree_sblock, bb_rightsib), + sizeof(struct xfs_btree_sblock) + }; + static const short loffsets[] = { /* table of offsets (long) */ + offsetof(struct xfs_btree_lblock, bb_magic), + offsetof(struct xfs_btree_lblock, bb_level), + offsetof(struct xfs_btree_lblock, bb_numrecs), + offsetof(struct xfs_btree_lblock, bb_leftsib), + offsetof(struct xfs_btree_lblock, bb_rightsib), + sizeof(struct xfs_btree_lblock) + }; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGBI(cur, bp, fields); + + if (bp) { + xfs_btree_offsets(fields, + (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? + loffsets : soffsets, + XFS_BB_NUM_BITS, &first, &last); + xfs_trans_log_buf(cur->bc_tp, bp, first, last); + } else { + xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, + xfs_ilog_fbroot(cur->bc_private.b.whichfork)); + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); +} + +/* * Increment cursor by one record at the level. * For nonzero levels the leaf-ward information is untouched. */ @@ -1368,7 +1512,6 @@ error0: return error; } - STATIC int xfs_btree_lookup_get_block( struct xfs_btree_cur *cur, /* btree cursor */ @@ -1697,3 +1840,177 @@ error0: return error; } +/* + * Move 1 record right from cur/level if possible. + * Update cur to reflect the new path. + */ +int /* error */ +xfs_btree_rshift( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + union xfs_btree_key key; /* btree key */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + struct xfs_btree_cur *tcur; /* temporary btree cursor */ + union xfs_btree_ptr rptr; /* right block pointer */ + union xfs_btree_key *rkp; /* right btree key */ + int rrecs; /* right record count */ + int lrecs; /* left record count */ + int error; /* error return value */ + int i; /* loop counter */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level == cur->bc_nlevels - 1)) + goto out0; + + /* Set up variables for this block as "left". */ + left = xfs_btree_get_block(cur, level, &lbp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, left, level, lbp); + if (error) + goto error0; +#endif + + /* If we've got no right sibling then we can't shift an entry right. */ + xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); + if (xfs_btree_ptr_is_null(cur, &rptr)) + goto out0; + + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + lrecs = xfs_btree_get_numrecs(left); + if (cur->bc_ptrs[level] >= lrecs) + goto out0; + + /* Set up the right neighbor as "right". */ + error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp); + if (error) + goto error0; + + /* If it's full, it can't take another entry. */ + rrecs = xfs_btree_get_numrecs(right); + if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) + goto out0; + + XFS_BTREE_STATS_INC(cur, rshift); + XFS_BTREE_STATS_ADD(cur, moves, rrecs); + + /* + * Make a hole at the start of the right neighbor block, then + * copy the last left block entry to the hole. + */ + if (level > 0) { + /* It's a nonleaf. make a hole in the keys and ptrs */ + union xfs_btree_key *lkp; + union xfs_btree_ptr *lpp; + union xfs_btree_ptr *rpp; + + lkp = xfs_btree_key_addr(cur, lrecs, left); + lpp = xfs_btree_ptr_addr(cur, lrecs, left); + rkp = xfs_btree_key_addr(cur, 1, right); + rpp = xfs_btree_ptr_addr(cur, 1, right); + +#ifdef DEBUG + for (i = rrecs - 1; i >= 0; i--) { + error = xfs_btree_check_ptr(cur, rpp, i, level); + if (error) + goto error0; + } +#endif + + xfs_btree_shift_keys(cur, rkp, 1, rrecs); + xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, lpp, 0, level); + if (error) + goto error0; +#endif + + /* Now put the new data in, and log it. */ + xfs_btree_copy_keys(cur, rkp, lkp, 1); + xfs_btree_copy_ptrs(cur, rpp, lpp, 1); + + xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); + xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); + + xfs_btree_check_key(cur->bc_btnum, rkp, + xfs_btree_key_addr(cur, 2, right)); + } else { + /* It's a leaf. make a hole in the records */ + union xfs_btree_rec *lrp; + union xfs_btree_rec *rrp; + + lrp = xfs_btree_rec_addr(cur, lrecs, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_shift_recs(cur, rrp, 1, rrecs); + + /* Now put the new data in, and log it. */ + xfs_btree_copy_recs(cur, rrp, lrp, 1); + xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); + + cur->bc_ops->init_key_from_rec(&key, rrp); + rkp = &key; + + xfs_btree_check_rec(cur->bc_btnum, rrp, + xfs_btree_rec_addr(cur, 2, right)); + } + + /* + * Decrement and log left's numrecs, bump and log right's numrecs. + */ + xfs_btree_set_numrecs(left, --lrecs); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); + + xfs_btree_set_numrecs(right, ++rrecs); + xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); + + /* + * Using a temporary cursor, update the parent key values of the + * block on the right. + */ + error = xfs_btree_dup_cursor(cur, &tcur); + if (error) + goto error0; + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_increment(tcur, level, &i); + if (error) + goto error1; + + error = xfs_btree_updkey(tcur, rkp, level + 1); + if (error) + goto error1; + + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + +error1: + XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index c3bfa55..04311db 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -533,6 +533,7 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); +int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); /* * Helpers. @@ -542,6 +543,12 @@ static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block) return be16_to_cpu(block->bb_numrecs); } +static inline void xfs_btree_set_numrecs(struct xfs_btree_block *block, + __uint16_t numrecs) +{ + block->bb_numrecs = cpu_to_be16(numrecs); +} + static inline int xfs_btree_get_level(struct xfs_btree_block *block) { return be16_to_cpu(block->bb_level); diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index d080a68..457f88a 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -45,7 +45,6 @@ STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *); -STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_inobt_key_t *, xfs_btree_cur_t **, int *); @@ -337,7 +336,7 @@ xfs_inobt_delrec( */ if (be16_to_cpu(left->bb_numrecs) - 1 >= XFS_INOBT_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_inobt_rshift(tcur, level, &i))) + if ((error = xfs_btree_rshift(tcur, level, &i))) goto error0; if (i) { ASSERT(be16_to_cpu(block->bb_numrecs) >= @@ -608,7 +607,7 @@ xfs_inobt_insrec( /* * First, try shifting an entry to the right neighbor. */ - if ((error = xfs_inobt_rshift(cur, level, &i))) + if ((error = xfs_btree_rshift(cur, level, &i))) return error; if (i) { /* nothing */ @@ -1117,136 +1116,6 @@ xfs_inobt_newroot( } /* - * Move 1 record right from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_inobt_rshift( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to shift record on */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* loop index */ - xfs_inobt_key_t key; /* key value for leaf level upward */ - xfs_buf_t *lbp; /* buffer for left (current) block */ - xfs_inobt_block_t *left; /* left (current) btree block */ - xfs_inobt_key_t *lkp; /* key pointer for left block */ - xfs_inobt_ptr_t *lpp; /* address pointer for left block */ - xfs_inobt_rec_t *lrp; /* record pointer for left block */ - xfs_buf_t *rbp; /* buffer for right neighbor block */ - xfs_inobt_block_t *right; /* right neighbor btree block */ - xfs_inobt_key_t *rkp; /* key pointer for right block */ - xfs_inobt_ptr_t *rpp; /* address pointer for right block */ - xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */ - xfs_btree_cur_t *tcur; /* temporary cursor */ - - /* - * Set up variables for this block as "left". - */ - lbp = cur->bc_bufs[level]; - left = XFS_BUF_TO_INOBT_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; -#endif - /* - * If we've got no right sibling then we can't shift an entry right. - */ - if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * If the cursor entry is the one that would be moved, don't - * do it... it's too complicated. - */ - if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) { - *stat = 0; - return 0; - } - /* - * Set up the right neighbor as "right". - */ - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), - 0, &rbp, XFS_INO_BTREE_REF))) - return error; - right = XFS_BUF_TO_INOBT_BLOCK(rbp); - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; - /* - * If it's full, it can't take another entry. - */ - if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) { - *stat = 0; - return 0; - } - /* - * Make a hole at the start of the right neighbor block, then - * copy the last left block entry to the hole. - */ - if (level > 0) { - lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); - rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) - return error; - } -#endif - memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level))) - return error; -#endif - *rkp = *lkp; - *rpp = *lpp; - xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - } else { - lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur); - rrp = XFS_INOBT_REC_ADDR(right, 1, cur); - memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - *rrp = *lrp; - xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1); - key.ir_startino = rrp->ir_startino; - rkp = &key; - } - /* - * Decrement and log left's numrecs, bump and log right's numrecs. - */ - be16_add_cpu(&left->bb_numrecs, -1); - xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add_cpu(&right->bb_numrecs, 1); -#ifdef DEBUG - if (level > 0) - xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); - else - xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1); -#endif - xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); - /* - * Using a temporary cursor, update the parent key values of the - * block on the right. - */ - if ((error = xfs_btree_dup_cursor(cur, &tcur))) - return error; - xfs_btree_lastrec(tcur, level); - if ((error = xfs_btree_increment(tcur, level, &i)) || - (error = xfs_btree_updkey(tcur, (union xfs_btree_key *)rkp, level + 1))) { - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; - } - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - *stat = 1; - return 0; -} - -/* * Split cur/level block in half. * Return new block number and its first record (to be inserted into parent). */ -- cgit v0.10.2 From 687b890a184fef263ebb773926e1f4aa69240d01 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:56:53 +1100 Subject: [XFS] implement generic xfs_btree_lshift Make the btree left shift code generic. Based on a patch from David Chinner with lots of changes to follow the original btree implementations more closely. While this loses some of the generic helper routines for inserting/moving/removing records it also solves some of the one off bugs in the original code and makes it easier to verify. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32197a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 31e4289..974a412 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -47,7 +47,6 @@ STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int); STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_alloc_key_t *, xfs_btree_cur_t **, int *); @@ -326,7 +325,7 @@ xfs_alloc_delrec( */ if (be16_to_cpu(right->bb_numrecs) - 1 >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_alloc_lshift(tcur, level, &i))) + if ((error = xfs_btree_lshift(tcur, level, &i))) goto error0; if (i) { ASSERT(be16_to_cpu(block->bb_numrecs) >= @@ -691,7 +690,7 @@ xfs_alloc_insrec( * Next, try shifting an entry to the left neighbor. */ else { - if ((error = xfs_alloc_lshift(cur, level, &i))) + if ((error = xfs_btree_lshift(cur, level, &i))) return error; if (i) optr = ptr = cur->bc_ptrs[level]; @@ -936,147 +935,6 @@ xfs_alloc_log_recs( } /* - * Move 1 record left from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_alloc_lshift( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to shift record on */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ -#ifdef DEBUG - int i; /* loop index */ -#endif - xfs_alloc_key_t key; /* key value for leaf level upward */ - xfs_buf_t *lbp; /* buffer for left neighbor block */ - xfs_alloc_block_t *left; /* left neighbor btree block */ - int nrec; /* new number of left block entries */ - xfs_buf_t *rbp; /* buffer for right (current) block */ - xfs_alloc_block_t *right; /* right (current) btree block */ - xfs_alloc_key_t *rkp=NULL; /* key pointer for right block */ - xfs_alloc_ptr_t *rpp=NULL; /* address pointer for right block */ - xfs_alloc_rec_t *rrp=NULL; /* record pointer for right block */ - - /* - * Set up variables for this block as "right". - */ - rbp = cur->bc_bufs[level]; - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; -#endif - /* - * If we've got no left sibling then we can't shift an entry left. - */ - if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * If the cursor entry is the one that would be moved, don't - * do it... it's too complicated. - */ - if (cur->bc_ptrs[level] <= 1) { - *stat = 0; - return 0; - } - /* - * Set up the left neighbor as "left". - */ - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib), - 0, &lbp, XFS_ALLOC_BTREE_REF))) - return error; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; - /* - * If it's full, it can't take another entry. - */ - if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { - *stat = 0; - return 0; - } - nrec = be16_to_cpu(left->bb_numrecs) + 1; - /* - * If non-leaf, copy a key and a ptr to the left block. - */ - if (level > 0) { - xfs_alloc_key_t *lkp; /* key pointer for left block */ - xfs_alloc_ptr_t *lpp; /* address pointer for left block */ - - lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur); - rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); - *lkp = *rkp; - xfs_alloc_log_keys(cur, lbp, nrec, nrec); - lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur); - rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level))) - return error; -#endif - *lpp = *rpp; - xfs_alloc_log_ptrs(cur, lbp, nrec, nrec); - xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp); - } - /* - * If leaf, copy a record to the left block. - */ - else { - xfs_alloc_rec_t *lrp; /* record pointer for left block */ - - lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur); - rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); - *lrp = *rrp; - xfs_alloc_log_recs(cur, lbp, nrec, nrec); - xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp); - } - /* - * Bump and log left's numrecs, decrement and log right's numrecs. - */ - be16_add_cpu(&left->bb_numrecs, 1); - xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add_cpu(&right->bb_numrecs, -1); - xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); - /* - * Slide the contents of right down one entry. - */ - if (level > 0) { -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]), - level))) - return error; - } -#endif - memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); - xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - } else { - memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - key.ar_startblock = rrp->ar_startblock; - key.ar_blockcount = rrp->ar_blockcount; - rkp = &key; - } - /* - * Update the parent key values of right. - */ - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) - return error; - /* - * Slide the cursor value left one. - */ - cur->bc_ptrs[level]--; - *stat = 1; - return 0; -} - -/* * Allocate a new root block, fill it in. */ STATIC int /* error */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 9d18fa8..809bd6e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -52,7 +52,6 @@ STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *); STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_bmbt_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, __uint64_t *, xfs_btree_cur_t **, int *); @@ -270,7 +269,7 @@ xfs_bmbt_delrec( bno = be64_to_cpu(right->bb_leftsib); if (be16_to_cpu(right->bb_numrecs) - 1 >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if ((error = xfs_bmbt_lshift(tcur, level, &i))) { + if ((error = xfs_btree_lshift(tcur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -544,7 +543,7 @@ xfs_bmbt_insrec( if (i) { /* nothing */ } else { - if ((error = xfs_bmbt_lshift(cur, level, &i))) { + if ((error = xfs_btree_lshift(cur, level, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } @@ -811,139 +810,6 @@ xfs_bmbt_log_ptrs( } /* - * Move 1 record left from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_bmbt_lshift( - xfs_btree_cur_t *cur, - int level, - int *stat) /* success/failure */ -{ - int error; /* error return value */ -#ifdef DEBUG - int i; /* loop counter */ -#endif - xfs_bmbt_key_t key; /* bmap btree key */ - xfs_buf_t *lbp; /* left buffer pointer */ - xfs_bmbt_block_t *left; /* left btree block */ - xfs_bmbt_key_t *lkp=NULL; /* left btree key */ - xfs_bmbt_ptr_t *lpp; /* left address pointer */ - int lrecs; /* left record count */ - xfs_bmbt_rec_t *lrp=NULL; /* left record pointer */ - xfs_mount_t *mp; /* file system mount point */ - xfs_buf_t *rbp; /* right buffer pointer */ - xfs_bmbt_block_t *right; /* right btree block */ - xfs_bmbt_key_t *rkp=NULL; /* right btree key */ - xfs_bmbt_ptr_t *rpp=NULL; /* right address pointer */ - xfs_bmbt_rec_t *rrp=NULL; /* right record pointer */ - int rrecs; /* right record count */ - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, level); - if (level == cur->bc_nlevels - 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - rbp = cur->bc_bufs[level]; - right = XFS_BUF_TO_BMBT_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - if (be64_to_cpu(right->bb_leftsib) == NULLDFSBNO) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - if (cur->bc_ptrs[level] <= 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - mp = cur->bc_mp; - if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, be64_to_cpu(right->bb_leftsib), 0, - &lbp, XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - left = XFS_BUF_TO_BMBT_BLOCK(lbp); - if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (be16_to_cpu(left->bb_numrecs) == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - lrecs = be16_to_cpu(left->bb_numrecs) + 1; - if (level > 0) { - lkp = XFS_BMAP_KEY_IADDR(left, lrecs, cur); - rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); - *lkp = *rkp; - xfs_bmbt_log_keys(cur, lbp, lrecs, lrecs); - lpp = XFS_BMAP_PTR_IADDR(left, lrecs, cur); - rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); -#ifdef DEBUG - if ((error = xfs_btree_check_lptr_disk(cur, *rpp, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - *lpp = *rpp; - xfs_bmbt_log_ptrs(cur, lbp, lrecs, lrecs); - } else { - lrp = XFS_BMAP_REC_IADDR(left, lrecs, cur); - rrp = XFS_BMAP_REC_IADDR(right, 1, cur); - *lrp = *rrp; - xfs_bmbt_log_recs(cur, lbp, lrecs, lrecs); - } - left->bb_numrecs = cpu_to_be16(lrecs); - xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS); -#ifdef DEBUG - if (level > 0) - xfs_btree_check_key(XFS_BTNUM_BMAP, lkp - 1, lkp); - else - xfs_btree_check_rec(XFS_BTNUM_BMAP, lrp - 1, lrp); -#endif - rrecs = be16_to_cpu(right->bb_numrecs) - 1; - right->bb_numrecs = cpu_to_be16(rrecs); - xfs_bmbt_log_block(cur, rbp, XFS_BB_NUMRECS); - if (level > 0) { -#ifdef DEBUG - for (i = 0; i < rrecs; i++) { - if ((error = xfs_btree_check_lptr_disk(cur, rpp[i + 1], - level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memmove(rkp, rkp + 1, rrecs * sizeof(*rkp)); - memmove(rpp, rpp + 1, rrecs * sizeof(*rpp)); - xfs_bmbt_log_keys(cur, rbp, 1, rrecs); - xfs_bmbt_log_ptrs(cur, rbp, 1, rrecs); - } else { - memmove(rrp, rrp + 1, rrecs * sizeof(*rrp)); - xfs_bmbt_log_recs(cur, rbp, 1, rrecs); - key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp)); - rkp = &key; - } - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - cur->bc_ptrs[level]--; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -} - -/* * Determine the extent state. */ /* ARGSUSED */ diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index e1a2137..2b0d142 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -1841,6 +1841,191 @@ error0: } /* + * Move 1 record left from cur/level if possible. + * Update cur to reflect the new path. + */ +int /* error */ +xfs_btree_lshift( + struct xfs_btree_cur *cur, + int level, + int *stat) /* success/failure */ +{ + union xfs_btree_key key; /* btree key */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + int lrecs; /* left record count */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + int rrecs; /* right record count */ + union xfs_btree_ptr lptr; /* left btree pointer */ + union xfs_btree_key *rkp = NULL; /* right btree key */ + union xfs_btree_ptr *rpp = NULL; /* right address pointer */ + union xfs_btree_rec *rrp = NULL; /* right record pointer */ + int error; /* error return value */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 1) + goto out0; + + /* Set up variables for this block as "right". */ + right = xfs_btree_get_block(cur, level, &rbp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, right, level, rbp); + if (error) + goto error0; +#endif + + /* If we've got no left sibling then we can't shift an entry left. */ + xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); + if (xfs_btree_ptr_is_null(cur, &lptr)) + goto out0; + + /* + * If the cursor entry is the one that would be moved, don't + * do it... it's too complicated. + */ + if (cur->bc_ptrs[level] <= 1) + goto out0; + + /* Set up the left neighbor as "left". */ + error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp); + if (error) + goto error0; + + /* If it's full, it can't take another entry. */ + lrecs = xfs_btree_get_numrecs(left); + if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) + goto out0; + + rrecs = xfs_btree_get_numrecs(right); + + /* + * We add one entry to the left side and remove one for the right side. + * Accout for it here, the changes will be updated on disk and logged + * later. + */ + lrecs++; + rrecs--; + + XFS_BTREE_STATS_INC(cur, lshift); + XFS_BTREE_STATS_ADD(cur, moves, 1); + + /* + * If non-leaf, copy a key and a ptr to the left block. + * Log the changes to the left block. + */ + if (level > 0) { + /* It's a non-leaf. Move keys and pointers. */ + union xfs_btree_key *lkp; /* left btree key */ + union xfs_btree_ptr *lpp; /* left address pointer */ + + lkp = xfs_btree_key_addr(cur, lrecs, left); + rkp = xfs_btree_key_addr(cur, 1, right); + + lpp = xfs_btree_ptr_addr(cur, lrecs, left); + rpp = xfs_btree_ptr_addr(cur, 1, right); +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, rpp, 0, level); + if (error) + goto error0; +#endif + xfs_btree_copy_keys(cur, lkp, rkp, 1); + xfs_btree_copy_ptrs(cur, lpp, rpp, 1); + + xfs_btree_log_keys(cur, lbp, lrecs, lrecs); + xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); + + xfs_btree_check_key(cur->bc_btnum, + xfs_btree_key_addr(cur, lrecs - 1, left), + lkp); + } else { + /* It's a leaf. Move records. */ + union xfs_btree_rec *lrp; /* left record pointer */ + + lrp = xfs_btree_rec_addr(cur, lrecs, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_copy_recs(cur, lrp, rrp, 1); + xfs_btree_log_recs(cur, lbp, lrecs, lrecs); + + xfs_btree_check_rec(cur->bc_btnum, + xfs_btree_rec_addr(cur, lrecs - 1, left), + lrp); + } + + xfs_btree_set_numrecs(left, lrecs); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); + + xfs_btree_set_numrecs(right, rrecs); + xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); + + /* + * Slide the contents of right down one entry. + */ + XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); + if (level > 0) { + /* It's a nonleaf. operate on keys and ptrs */ +#ifdef DEBUG + int i; /* loop index */ + + for (i = 0; i < rrecs; i++) { + error = xfs_btree_check_ptr(cur, rpp, i + 1, level); + if (error) + goto error0; + } +#endif + xfs_btree_shift_keys(cur, + xfs_btree_key_addr(cur, 2, right), + -1, rrecs); + xfs_btree_shift_ptrs(cur, + xfs_btree_ptr_addr(cur, 2, right), + -1, rrecs); + + xfs_btree_log_keys(cur, rbp, 1, rrecs); + xfs_btree_log_ptrs(cur, rbp, 1, rrecs); + } else { + /* It's a leaf. operate on records */ + xfs_btree_shift_recs(cur, + xfs_btree_rec_addr(cur, 2, right), + -1, rrecs); + xfs_btree_log_recs(cur, rbp, 1, rrecs); + + /* + * If it's the first record in the block, we'll need a key + * structure to pass up to the next level (updkey). + */ + cur->bc_ops->init_key_from_rec(&key, + xfs_btree_rec_addr(cur, 1, right)); + rkp = &key; + } + + /* Update the parent key values of right. */ + error = xfs_btree_updkey(cur, rkp, level + 1); + if (error) + goto error0; + + /* Slide the cursor value left one. */ + cur->bc_ptrs[level]--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* * Move 1 record right from cur/level if possible. * Update cur to reflect the new path. */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 04311db..7cde287b 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -533,6 +533,7 @@ int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); +int xfs_btree_lshift(struct xfs_btree_cur *, int, int *); int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); /* diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 457f88a..60f5db5 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -43,7 +43,6 @@ STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int); STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *); STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *); STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *, xfs_inobt_key_t *, xfs_btree_cur_t **, int *); @@ -276,7 +275,7 @@ xfs_inobt_delrec( */ if (be16_to_cpu(right->bb_numrecs) - 1 >= XFS_INOBT_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_inobt_lshift(tcur, level, &i))) + if ((error = xfs_btree_lshift(tcur, level, &i))) goto error0; if (i) { ASSERT(be16_to_cpu(block->bb_numrecs) >= @@ -616,7 +615,7 @@ xfs_inobt_insrec( * Next, try shifting an entry to the left neighbor. */ else { - if ((error = xfs_inobt_lshift(cur, level, &i))) + if ((error = xfs_btree_lshift(cur, level, &i))) return error; if (i) { optr = ptr = cur->bc_ptrs[level]; @@ -827,148 +826,6 @@ xfs_inobt_log_recs( } /* - * Move 1 record left from cur/level if possible. - * Update cur to reflect the new path. - */ -STATIC int /* error */ -xfs_inobt_lshift( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to shift record on */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ -#ifdef DEBUG - int i; /* loop index */ -#endif - xfs_inobt_key_t key; /* key value for leaf level upward */ - xfs_buf_t *lbp; /* buffer for left neighbor block */ - xfs_inobt_block_t *left; /* left neighbor btree block */ - xfs_inobt_key_t *lkp=NULL; /* key pointer for left block */ - xfs_inobt_ptr_t *lpp; /* address pointer for left block */ - xfs_inobt_rec_t *lrp=NULL; /* record pointer for left block */ - int nrec; /* new number of left block entries */ - xfs_buf_t *rbp; /* buffer for right (current) block */ - xfs_inobt_block_t *right; /* right (current) btree block */ - xfs_inobt_key_t *rkp=NULL; /* key pointer for right block */ - xfs_inobt_ptr_t *rpp=NULL; /* address pointer for right block */ - xfs_inobt_rec_t *rrp=NULL; /* record pointer for right block */ - - /* - * Set up variables for this block as "right". - */ - rbp = cur->bc_bufs[level]; - right = XFS_BUF_TO_INOBT_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; -#endif - /* - * If we've got no left sibling then we can't shift an entry left. - */ - if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) { - *stat = 0; - return 0; - } - /* - * If the cursor entry is the one that would be moved, don't - * do it... it's too complicated. - */ - if (cur->bc_ptrs[level] <= 1) { - *stat = 0; - return 0; - } - /* - * Set up the left neighbor as "left". - */ - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib), - 0, &lbp, XFS_INO_BTREE_REF))) - return error; - left = XFS_BUF_TO_INOBT_BLOCK(lbp); - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; - /* - * If it's full, it can't take another entry. - */ - if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) { - *stat = 0; - return 0; - } - nrec = be16_to_cpu(left->bb_numrecs) + 1; - /* - * If non-leaf, copy a key and a ptr to the left block. - */ - if (level > 0) { - lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur); - rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); - *lkp = *rkp; - xfs_inobt_log_keys(cur, lbp, nrec, nrec); - lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur); - rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level))) - return error; -#endif - *lpp = *rpp; - xfs_inobt_log_ptrs(cur, lbp, nrec, nrec); - } - /* - * If leaf, copy a record to the left block. - */ - else { - lrp = XFS_INOBT_REC_ADDR(left, nrec, cur); - rrp = XFS_INOBT_REC_ADDR(right, 1, cur); - *lrp = *rrp; - xfs_inobt_log_recs(cur, lbp, nrec, nrec); - } - /* - * Bump and log left's numrecs, decrement and log right's numrecs. - */ - be16_add_cpu(&left->bb_numrecs, 1); - xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); -#ifdef DEBUG - if (level > 0) - xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp); - else - xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp); -#endif - be16_add_cpu(&right->bb_numrecs, -1); - xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); - /* - * Slide the contents of right down one entry. - */ - if (level > 0) { -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]), - level))) - return error; - } -#endif - memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); - xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - } else { - memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - key.ir_startino = rrp->ir_startino; - rkp = &key; - } - /* - * Update the parent key values of right. - */ - if ((error = xfs_btree_updkey(cur, (union xfs_btree_key *)rkp, level + 1))) - return error; - /* - * Slide the cursor value left one. - */ - cur->bc_ptrs[level]--; - *stat = 1; - return 0; -} - -/* * Allocate a new root block, fill it in. */ STATIC int /* error */ -- cgit v0.10.2 From f5eb8e7ca58bc1e92436614444006120d21668ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:57:03 +1100 Subject: [XFS] implement generic xfs_btree_split Make the btree split code generic. Based on a patch from David Chinner with lots of changes to follow the original btree implementations more closely. While this loses some of the generic helper routines for inserting/moving/removing records it also solves some of the one off bugs in the original code and makes it easier to verify. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32198a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 974a412..8a8d1ae 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -35,6 +35,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -48,8 +49,6 @@ STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); -STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *, - xfs_alloc_key_t *, xfs_btree_cur_t **, int *); /* * Internal functions. @@ -695,15 +694,18 @@ xfs_alloc_insrec( if (i) optr = ptr = cur->bc_ptrs[level]; else { + union xfs_btree_ptr bno = { .s = cpu_to_be32(nbno) }; /* * Next, try splitting the current block in * half. If this works we have to re-set our * variables because we could be in a * different block now. */ - if ((error = xfs_alloc_split(cur, level, &nbno, - &nkey, &ncur, &i))) + if ((error = xfs_btree_split(cur, level, &bno, + (union xfs_btree_key *)&nkey, + &ncur, &i))) return error; + nbno = be32_to_cpu(bno.s); if (i) { bp = cur->bc_bufs[level]; block = XFS_BUF_TO_ALLOC_BLOCK(bp); @@ -1089,160 +1091,6 @@ xfs_alloc_newroot( return 0; } -/* - * Split cur/level block in half. - * Return new block number and its first record (to be inserted into parent). - */ -STATIC int /* error */ -xfs_alloc_split( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to split */ - xfs_agblock_t *bnop, /* output: block number allocated */ - xfs_alloc_key_t *keyp, /* output: first key of new block */ - xfs_btree_cur_t **curp, /* output: new cursor */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* loop index/record number */ - xfs_agblock_t lbno; /* left (current) block number */ - xfs_buf_t *lbp; /* buffer for left block */ - xfs_alloc_block_t *left; /* left (current) btree block */ - xfs_agblock_t rbno; /* right (new) block number */ - xfs_buf_t *rbp; /* buffer for right block */ - xfs_alloc_block_t *right; /* right (new) btree block */ - - /* - * Allocate the new block from the freelist. - * If we can't do it, we're toast. Give up. - */ - error = xfs_alloc_get_freelist(cur->bc_tp, - cur->bc_private.a.agbp, &rbno, 1); - if (error) - return error; - if (rbno == NULLAGBLOCK) { - *stat = 0; - return 0; - } - xfs_trans_agbtree_delta(cur->bc_tp, 1); - rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno, - rbno, 0); - /* - * Set up the new block as "right". - */ - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); - /* - * "Left" is the current (according to the cursor) block. - */ - lbp = cur->bc_bufs[level]; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; -#endif - /* - * Fill in the btree header for the new block. - */ - right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); - right->bb_level = left->bb_level; - right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2); - /* - * Make sure that if there's an odd number of entries now, that - * each new block will have the same number of entries. - */ - if ((be16_to_cpu(left->bb_numrecs) & 1) && - cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add_cpu(&right->bb_numrecs, 1); - i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; - /* - * For non-leaf blocks, copy keys and addresses over to the new block. - */ - if (level > 0) { - xfs_alloc_key_t *lkp; /* left btree key pointer */ - xfs_alloc_ptr_t *lpp; /* left btree address pointer */ - xfs_alloc_key_t *rkp; /* right btree key pointer */ - xfs_alloc_ptr_t *rpp; /* right btree address pointer */ - - lkp = XFS_ALLOC_KEY_ADDR(left, i, cur); - lpp = XFS_ALLOC_PTR_ADDR(left, i, cur); - rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); - rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level))) - return error; - } -#endif - memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); - xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - *keyp = *rkp; - } - /* - * For leaf blocks, copy records over to the new block. - */ - else { - xfs_alloc_rec_t *lrp; /* left btree record pointer */ - xfs_alloc_rec_t *rrp; /* right btree record pointer */ - - lrp = XFS_ALLOC_REC_ADDR(left, i, cur); - rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); - memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - keyp->ar_startblock = rrp->ar_startblock; - keyp->ar_blockcount = rrp->ar_blockcount; - } - /* - * Find the left block number by looking in the buffer. - * Adjust numrecs, sibling pointers. - */ - lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp)); - be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); - right->bb_rightsib = left->bb_rightsib; - left->bb_rightsib = cpu_to_be32(rbno); - right->bb_leftsib = cpu_to_be32(lbno); - xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS); - xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - /* - * If there's a block to the new block's right, make that block - * point back to right instead of to left. - */ - if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) { - xfs_alloc_block_t *rrblock; /* rr btree block */ - xfs_buf_t *rrbp; /* buffer for rrblock */ - - if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0, - &rrbp, XFS_ALLOC_BTREE_REF))) - return error; - rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); - if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) - return error; - rrblock->bb_leftsib = cpu_to_be32(rbno); - xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); - } - /* - * If the cursor is really in the right block, move it there. - * If it's just pointing past the last entry in left, then we'll - * insert there, so don't change anything in that case. - */ - if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) { - xfs_btree_setbuf(cur, level, rbp); - cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs); - } - /* - * If there are more levels, we'll need another cursor which refers to - * the right block, no matter where this cursor was. - */ - if (level + 1 < cur->bc_nlevels) { - if ((error = xfs_btree_dup_cursor(cur, curp))) - return error; - (*curp)->bc_ptrs[level + 1]++; - } - *bnop = rbno; - *stat = 1; - return 0; -} /* * Externally visible routines. @@ -1396,6 +1244,41 @@ xfs_allocbt_dup_cursor( cur->bc_btnum); } +STATIC int +xfs_allocbt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int length, + int *stat) +{ + int error; + xfs_agblock_t bno; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + /* Allocate the new block from the freelist. If we can't, give up. */ + error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp, + &bno, 1); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + if (bno == NULLAGBLOCK) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + xfs_trans_agbtree_delta(cur->bc_tp, 1); + new->s = cpu_to_be32(bno); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +} + /* * Update the longest extent in the AGF */ @@ -1557,6 +1440,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .key_len = sizeof(xfs_alloc_key_t), .dup_cursor = xfs_allocbt_dup_cursor, + .alloc_block = xfs_allocbt_alloc_block, .update_lastrec = xfs_allocbt_update_lastrec, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 809bd6e..e753926 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -52,8 +52,6 @@ STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *); STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_bmbt_split(xfs_btree_cur_t *, int, xfs_fsblock_t *, - __uint64_t *, xfs_btree_cur_t **, int *); #undef EXIT @@ -550,13 +548,17 @@ xfs_bmbt_insrec( if (i) { optr = ptr = cur->bc_ptrs[level]; } else { - if ((error = xfs_bmbt_split(cur, level, - &nbno, &startoff, &ncur, + union xfs_btree_ptr bno = { .l = cpu_to_be64(nbno) }; + union xfs_btree_key skey; + if ((error = xfs_btree_split(cur, level, + &bno, &skey, &ncur, &i))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } + nbno = be64_to_cpu(bno.l); + startoff = be64_to_cpu(skey.bmbt.br_startoff); if (i) { block = xfs_bmbt_get_block( cur, level, &bp); @@ -825,184 +827,6 @@ xfs_extent_state( return XFS_EXT_NORM; } - -/* - * Split cur/level block in half. - * Return new block number and its first record (to be inserted into parent). - */ -STATIC int /* error */ -xfs_bmbt_split( - xfs_btree_cur_t *cur, - int level, - xfs_fsblock_t *bnop, - __uint64_t *startoff, - xfs_btree_cur_t **curp, - int *stat) /* success/failure */ -{ - xfs_alloc_arg_t args; /* block allocation args */ - int error; /* error return value */ - int i; /* loop counter */ - xfs_fsblock_t lbno; /* left sibling block number */ - xfs_buf_t *lbp; /* left buffer pointer */ - xfs_bmbt_block_t *left; /* left btree block */ - xfs_bmbt_key_t *lkp; /* left btree key */ - xfs_bmbt_ptr_t *lpp; /* left address pointer */ - xfs_bmbt_rec_t *lrp; /* left record pointer */ - xfs_buf_t *rbp; /* right buffer pointer */ - xfs_bmbt_block_t *right; /* right btree block */ - xfs_bmbt_key_t *rkp; /* right btree key */ - xfs_bmbt_ptr_t *rpp; /* right address pointer */ - xfs_bmbt_block_t *rrblock; /* right-right btree block */ - xfs_buf_t *rrbp; /* right-right buffer pointer */ - xfs_bmbt_rec_t *rrp; /* right record pointer */ - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - // disable until merged into common code -// XFS_BMBT_TRACE_ARGIFK(cur, level, *bnop, *startoff); - args.tp = cur->bc_tp; - args.mp = cur->bc_mp; - lbp = cur->bc_bufs[level]; - lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp)); - left = XFS_BUF_TO_BMBT_BLOCK(lbp); - args.fsbno = cur->bc_private.b.firstblock; - args.firstblock = args.fsbno; - args.minleft = 0; - if (args.fsbno == NULLFSBLOCK) { - args.fsbno = lbno; - args.type = XFS_ALLOCTYPE_START_BNO; - /* - * Make sure there is sufficient room left in the AG to - * complete a full tree split for an extent insert. If - * we are converting the middle part of an extent then - * we may need space for two tree splits. - * - * We are relying on the caller to make the correct block - * reservation for this operation to succeed. If the - * reservation amount is insufficient then we may fail a - * block allocation here and corrupt the filesystem. - */ - args.minleft = xfs_trans_get_block_res(args.tp); - } else if (cur->bc_private.b.flist->xbf_low) - args.type = XFS_ALLOCTYPE_START_BNO; - else - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.mod = args.alignment = args.total = args.isfl = - args.userdata = args.minalignslop = 0; - args.minlen = args.maxlen = args.prod = 1; - args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; - if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return XFS_ERROR(ENOSPC); - } - if ((error = xfs_alloc_vextent(&args))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (args.fsbno == NULLFSBLOCK && args.minleft) { - /* - * Could not find an AG with enough free space to satisfy - * a full btree split. Try again without minleft and if - * successful activate the lowspace algorithm. - */ - args.fsbno = 0; - args.type = XFS_ALLOCTYPE_FIRST_AG; - args.minleft = 0; - if ((error = xfs_alloc_vextent(&args))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - cur->bc_private.b.flist->xbf_low = 1; - } - if (args.fsbno == NULLFSBLOCK) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - ASSERT(args.len == 1); - cur->bc_private.b.firstblock = args.fsbno; - cur->bc_private.b.allocated++; - cur->bc_private.b.ip->i_d.di_nblocks++; - xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); - XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip, - XFS_TRANS_DQ_BCOUNT, 1L); - rbp = xfs_btree_get_bufl(args.mp, args.tp, args.fsbno, 0); - right = XFS_BUF_TO_BMBT_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, left, level, rbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - right->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); - right->bb_level = left->bb_level; - right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2); - if ((be16_to_cpu(left->bb_numrecs) & 1) && - cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add_cpu(&right->bb_numrecs, 1); - i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; - if (level > 0) { - lkp = XFS_BMAP_KEY_IADDR(left, i, cur); - lpp = XFS_BMAP_PTR_IADDR(left, i, cur); - rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); - rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_lptr_disk(cur, lpp[i], level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); - xfs_bmbt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - xfs_bmbt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - *startoff = be64_to_cpu(rkp->br_startoff); - } else { - lrp = XFS_BMAP_REC_IADDR(left, i, cur); - rrp = XFS_BMAP_REC_IADDR(right, 1, cur); - memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - *startoff = xfs_bmbt_disk_get_startoff(rrp); - } - be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); - right->bb_rightsib = left->bb_rightsib; - left->bb_rightsib = cpu_to_be64(args.fsbno); - right->bb_leftsib = cpu_to_be64(lbno); - xfs_bmbt_log_block(cur, rbp, XFS_BB_ALL_BITS); - xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - if (be64_to_cpu(right->bb_rightsib) != NULLDFSBNO) { - if ((error = xfs_btree_read_bufl(args.mp, args.tp, - be64_to_cpu(right->bb_rightsib), 0, &rrbp, - XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp); - if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - rrblock->bb_leftsib = cpu_to_be64(args.fsbno); - xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB); - } - if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) { - xfs_btree_setbuf(cur, level, rbp); - cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs); - } - if (level + 1 < cur->bc_nlevels) { - if ((error = xfs_btree_dup_cursor(cur, curp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - (*curp)->bc_ptrs[level + 1]++; - } - *bnop = args.fsbno; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -} - /* * Convert on-disk form of btree root to in-memory form. */ @@ -1738,6 +1562,92 @@ xfs_bmbt_dup_cursor( } STATIC int +xfs_bmbt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int length, + int *stat) +{ + xfs_alloc_arg_t args; /* block allocation args */ + int error; /* error return value */ + + memset(&args, 0, sizeof(args)); + args.tp = cur->bc_tp; + args.mp = cur->bc_mp; + args.fsbno = cur->bc_private.b.firstblock; + args.firstblock = args.fsbno; + + if (args.fsbno == NULLFSBLOCK) { + args.fsbno = be64_to_cpu(start->l); + args.type = XFS_ALLOCTYPE_START_BNO; + /* + * Make sure there is sufficient room left in the AG to + * complete a full tree split for an extent insert. If + * we are converting the middle part of an extent then + * we may need space for two tree splits. + * + * We are relying on the caller to make the correct block + * reservation for this operation to succeed. If the + * reservation amount is insufficient then we may fail a + * block allocation here and corrupt the filesystem. + */ + args.minleft = xfs_trans_get_block_res(args.tp); + } else if (cur->bc_private.b.flist->xbf_low) { + args.type = XFS_ALLOCTYPE_START_BNO; + } else { + args.type = XFS_ALLOCTYPE_NEAR_BNO; + } + + args.minlen = args.maxlen = args.prod = 1; + args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; + if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) { + error = XFS_ERROR(ENOSPC); + goto error0; + } + error = xfs_alloc_vextent(&args); + if (error) + goto error0; + + if (args.fsbno == NULLFSBLOCK && args.minleft) { + /* + * Could not find an AG with enough free space to satisfy + * a full btree split. Try again without minleft and if + * successful activate the lowspace algorithm. + */ + args.fsbno = 0; + args.type = XFS_ALLOCTYPE_FIRST_AG; + args.minleft = 0; + error = xfs_alloc_vextent(&args); + if (error) + goto error0; + cur->bc_private.b.flist->xbf_low = 1; + } + if (args.fsbno == NULLFSBLOCK) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + cur->bc_private.b.firstblock = args.fsbno; + cur->bc_private.b.allocated++; + cur->bc_private.b.ip->i_d.di_nblocks++; + xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE); + XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip, + XFS_TRANS_DQ_BCOUNT, 1L); + + new->l = cpu_to_be64(args.fsbno); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + + error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +STATIC int xfs_bmbt_get_maxrecs( struct xfs_btree_cur *cur, int level) @@ -1861,6 +1771,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .key_len = sizeof(xfs_bmbt_key_t), .dup_cursor = xfs_bmbt_dup_cursor, + .alloc_block = xfs_bmbt_alloc_block, .get_maxrecs = xfs_bmbt_get_maxrecs, .init_key_from_rec = xfs_bmbt_init_key_from_rec, .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 2b0d142..8057669 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -988,6 +988,48 @@ xfs_btree_get_sibling( } } +STATIC void +xfs_btree_set_sibling( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + union xfs_btree_ptr *ptr, + int lr) +{ + ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (lr == XFS_BB_RIGHTSIB) + block->bb_u.l.bb_rightsib = ptr->l; + else + block->bb_u.l.bb_leftsib = ptr->l; + } else { + if (lr == XFS_BB_RIGHTSIB) + block->bb_u.s.bb_rightsib = ptr->s; + else + block->bb_u.s.bb_leftsib = ptr->s; + } +} + +STATIC void +xfs_btree_init_block( + struct xfs_btree_cur *cur, + int level, + int numrecs, + struct xfs_btree_block *new) /* new block */ +{ + new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); + new->bb_level = cpu_to_be16(level); + new->bb_numrecs = cpu_to_be16(numrecs); + + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + new->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); + new->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); + } else { + new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); + new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); + } +} + /* * Return true if ptr is the last record in the btree and * we need to track updateѕ to this record. The decision @@ -1012,6 +1054,21 @@ xfs_btree_is_lastrec( return 1; } +STATIC void +xfs_btree_buf_to_ptr( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, + XFS_BUF_ADDR(bp))); + else { + ptr->s = cpu_to_be32(XFS_DADDR_TO_AGBNO(cur->bc_mp, + XFS_BUF_ADDR(bp))); + } +} + STATIC xfs_daddr_t xfs_btree_ptr_to_daddr( struct xfs_btree_cur *cur, @@ -1051,6 +1108,31 @@ xfs_btree_set_refs( } } +STATIC int +xfs_btree_get_buf_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int flags, + struct xfs_btree_block **block, + struct xfs_buf **bpp) +{ + struct xfs_mount *mp = cur->bc_mp; + xfs_daddr_t d; + + /* need to sort out how callers deal with failures first */ + ASSERT(!(flags & XFS_BUF_TRYLOCK)); + + d = xfs_btree_ptr_to_daddr(cur, ptr); + *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, + mp->m_bsize, flags); + + ASSERT(*bpp); + ASSERT(!XFS_BUF_GETERROR(*bpp)); + + *block = XFS_BUF_TO_BLOCK(*bpp); + return 0; +} + /* * Read in the buffer at the given ptr and return the buffer and * the block pointer within the buffer. @@ -2199,3 +2281,189 @@ error1: xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); return error; } + +/* + * Split cur/level block in half. + * Return new block number and the key to its first + * record (to be inserted into parent). + */ +int /* error */ +xfs_btree_split( + struct xfs_btree_cur *cur, + int level, + union xfs_btree_ptr *ptrp, + union xfs_btree_key *key, + struct xfs_btree_cur **curp, + int *stat) /* success/failure */ +{ + union xfs_btree_ptr lptr; /* left sibling block ptr */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + union xfs_btree_ptr rptr; /* right sibling block ptr */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + union xfs_btree_ptr rrptr; /* right-right sibling ptr */ + struct xfs_buf *rrbp; /* right-right buffer pointer */ + struct xfs_btree_block *rrblock; /* right-right btree block */ + int lrecs; + int rrecs; + int src_index; + int error; /* error return value */ +#ifdef DEBUG + int i; +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key); + + XFS_BTREE_STATS_INC(cur, split); + + /* Set up left block (current one). */ + left = xfs_btree_get_block(cur, level, &lbp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, left, level, lbp); + if (error) + goto error0; +#endif + + xfs_btree_buf_to_ptr(cur, lbp, &lptr); + + /* Allocate the new block. If we can't do it, we're toast. Give up. */ + error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat); + if (error) + goto error0; + if (*stat == 0) + goto out0; + XFS_BTREE_STATS_INC(cur, alloc); + + /* Set up the new block as "right". */ + error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); + if (error) + goto error0; + + /* Fill in the btree header for the new right block. */ + xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right); + + /* + * Split the entries between the old and the new block evenly. + * Make sure that if there's an odd number of entries now, that + * each new block will have the same number of entries. + */ + lrecs = xfs_btree_get_numrecs(left); + rrecs = lrecs / 2; + if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) + rrecs++; + src_index = (lrecs - rrecs + 1); + + XFS_BTREE_STATS_ADD(cur, moves, rrecs); + + /* + * Copy btree block entries from the left block over to the + * new block, the right. Update the right block and log the + * changes. + */ + if (level > 0) { + /* It's a non-leaf. Move keys and pointers. */ + union xfs_btree_key *lkp; /* left btree key */ + union xfs_btree_ptr *lpp; /* left address pointer */ + union xfs_btree_key *rkp; /* right btree key */ + union xfs_btree_ptr *rpp; /* right address pointer */ + + lkp = xfs_btree_key_addr(cur, src_index, left); + lpp = xfs_btree_ptr_addr(cur, src_index, left); + rkp = xfs_btree_key_addr(cur, 1, right); + rpp = xfs_btree_ptr_addr(cur, 1, right); + +#ifdef DEBUG + for (i = src_index; i < rrecs; i++) { + error = xfs_btree_check_ptr(cur, lpp, i, level); + if (error) + goto error0; + } +#endif + + xfs_btree_copy_keys(cur, rkp, lkp, rrecs); + xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); + + xfs_btree_log_keys(cur, rbp, 1, rrecs); + xfs_btree_log_ptrs(cur, rbp, 1, rrecs); + + /* Grab the keys to the entries moved to the right block */ + xfs_btree_copy_keys(cur, key, rkp, 1); + } else { + /* It's a leaf. Move records. */ + union xfs_btree_rec *lrp; /* left record pointer */ + union xfs_btree_rec *rrp; /* right record pointer */ + + lrp = xfs_btree_rec_addr(cur, src_index, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_copy_recs(cur, rrp, lrp, rrecs); + xfs_btree_log_recs(cur, rbp, 1, rrecs); + + cur->bc_ops->init_key_from_rec(key, + xfs_btree_rec_addr(cur, 1, right)); + } + + + /* + * Find the left block number by looking in the buffer. + * Adjust numrecs, sibling pointers. + */ + xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); + xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); + xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); + xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); + + lrecs -= rrecs; + xfs_btree_set_numrecs(left, lrecs); + xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); + + xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + + /* + * If there's a block to the new block's right, make that block + * point back to right instead of to left. + */ + if (!xfs_btree_ptr_is_null(cur, &rrptr)) { + error = xfs_btree_read_buf_block(cur, &rrptr, level, + 0, &rrblock, &rrbp); + if (error) + goto error0; + xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); + xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); + } + /* + * If the cursor is really in the right block, move it there. + * If it's just pointing past the last entry in left, then we'll + * insert there, so don't change anything in that case. + */ + if (cur->bc_ptrs[level] > lrecs + 1) { + xfs_btree_setbuf(cur, level, rbp); + cur->bc_ptrs[level] -= lrecs; + } + /* + * If there are more levels, we'll need another cursor which refers + * the right block, no matter where this cursor was. + */ + if (level + 1 < cur->bc_nlevels) { + error = xfs_btree_dup_cursor(cur, curp); + if (error) + goto error0; + (*curp)->bc_ptrs[level + 1]++; + } + *ptrp = rptr; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7cde287b..354a665 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -187,6 +187,12 @@ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + /* block allocation / freeing */ + int (*alloc_block)(struct xfs_btree_cur *cur, + union xfs_btree_ptr *start_bno, + union xfs_btree_ptr *new_bno, + int length, int *stat); + /* update last record information */ void (*update_lastrec)(struct xfs_btree_cur *cur, struct xfs_btree_block *block, @@ -535,6 +541,8 @@ int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); int xfs_btree_lshift(struct xfs_btree_cur *, int, int *); int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); +int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, + union xfs_btree_key *, struct xfs_btree_cur **, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 60f5db5..c76190a 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -35,6 +35,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" @@ -44,8 +45,6 @@ STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *); -STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *, - xfs_inobt_key_t *, xfs_btree_cur_t **, int *); /* * Single level of the xfs_inobt_delete record deletion routine. @@ -620,15 +619,18 @@ xfs_inobt_insrec( if (i) { optr = ptr = cur->bc_ptrs[level]; } else { + union xfs_btree_ptr bno = { .s = cpu_to_be32(nbno) }; /* * Next, try splitting the current block * in half. If this works we have to * re-set our variables because * we could be in a different block now. */ - if ((error = xfs_inobt_split(cur, level, &nbno, - &nkey, &ncur, &i))) + if ((error = xfs_btree_split(cur, level, &bno, + (union xfs_btree_key *)&nkey, + &ncur, &i))) return error; + nbno = be32_to_cpu(bno.s); if (i) { bp = cur->bc_bufs[level]; block = XFS_BUF_TO_INOBT_BLOCK(bp); @@ -973,165 +975,6 @@ xfs_inobt_newroot( } /* - * Split cur/level block in half. - * Return new block number and its first record (to be inserted into parent). - */ -STATIC int /* error */ -xfs_inobt_split( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to split */ - xfs_agblock_t *bnop, /* output: block number allocated */ - xfs_inobt_key_t *keyp, /* output: first key of new block */ - xfs_btree_cur_t **curp, /* output: new cursor */ - int *stat) /* success/failure */ -{ - xfs_alloc_arg_t args; /* allocation argument structure */ - int error; /* error return value */ - int i; /* loop index/record number */ - xfs_agblock_t lbno; /* left (current) block number */ - xfs_buf_t *lbp; /* buffer for left block */ - xfs_inobt_block_t *left; /* left (current) btree block */ - xfs_inobt_key_t *lkp; /* left btree key pointer */ - xfs_inobt_ptr_t *lpp; /* left btree address pointer */ - xfs_inobt_rec_t *lrp; /* left btree record pointer */ - xfs_buf_t *rbp; /* buffer for right block */ - xfs_inobt_block_t *right; /* right (new) btree block */ - xfs_inobt_key_t *rkp; /* right btree key pointer */ - xfs_inobt_ptr_t *rpp; /* right btree address pointer */ - xfs_inobt_rec_t *rrp; /* right btree record pointer */ - - /* - * Set up left block (current one). - */ - lbp = cur->bc_bufs[level]; - args.tp = cur->bc_tp; - args.mp = cur->bc_mp; - lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp)); - /* - * Allocate the new block. - * If we can't do it, we're toast. Give up. - */ - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno); - args.mod = args.minleft = args.alignment = args.total = args.wasdel = - args.isfl = args.userdata = args.minalignslop = 0; - args.minlen = args.maxlen = args.prod = 1; - args.type = XFS_ALLOCTYPE_NEAR_BNO; - if ((error = xfs_alloc_vextent(&args))) - return error; - if (args.fsbno == NULLFSBLOCK) { - *stat = 0; - return 0; - } - ASSERT(args.len == 1); - rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0); - /* - * Set up the new block as "right". - */ - right = XFS_BUF_TO_INOBT_BLOCK(rbp); - /* - * "Left" is the current (according to the cursor) block. - */ - left = XFS_BUF_TO_INOBT_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; -#endif - /* - * Fill in the btree header for the new block. - */ - right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); - right->bb_level = left->bb_level; - right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2); - /* - * Make sure that if there's an odd number of entries now, that - * each new block will have the same number of entries. - */ - if ((be16_to_cpu(left->bb_numrecs) & 1) && - cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add_cpu(&right->bb_numrecs, 1); - i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; - /* - * For non-leaf blocks, copy keys and addresses over to the new block. - */ - if (level > 0) { - lkp = XFS_INOBT_KEY_ADDR(left, i, cur); - lpp = XFS_INOBT_PTR_ADDR(left, i, cur); - rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); - rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level))) - return error; - } -#endif - memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp)); - memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp)); - xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - *keyp = *rkp; - } - /* - * For leaf blocks, copy records over to the new block. - */ - else { - lrp = XFS_INOBT_REC_ADDR(left, i, cur); - rrp = XFS_INOBT_REC_ADDR(right, 1, cur); - memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp)); - xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); - keyp->ir_startino = rrp->ir_startino; - } - /* - * Find the left block number by looking in the buffer. - * Adjust numrecs, sibling pointers. - */ - be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); - right->bb_rightsib = left->bb_rightsib; - left->bb_rightsib = cpu_to_be32(args.agbno); - right->bb_leftsib = cpu_to_be32(lbno); - xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS); - xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - /* - * If there's a block to the new block's right, make that block - * point back to right instead of to left. - */ - if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) { - xfs_inobt_block_t *rrblock; /* rr btree block */ - xfs_buf_t *rrbp; /* buffer for rrblock */ - - if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno, - be32_to_cpu(right->bb_rightsib), 0, &rrbp, - XFS_INO_BTREE_REF))) - return error; - rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); - if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) - return error; - rrblock->bb_leftsib = cpu_to_be32(args.agbno); - xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB); - } - /* - * If the cursor is really in the right block, move it there. - * If it's just pointing past the last entry in left, then we'll - * insert there, so don't change anything in that case. - */ - if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) { - xfs_btree_setbuf(cur, level, rbp); - cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs); - } - /* - * If there are more levels, we'll need another cursor which refers - * the right block, no matter where this cursor was. - */ - if (level + 1 < cur->bc_nlevels) { - if ((error = xfs_btree_dup_cursor(cur, curp))) - return error; - (*curp)->bc_ptrs[level + 1]++; - } - *bnop = args.agbno; - *stat = 1; - return 0; -} - -/* * Externally visible routines. */ @@ -1286,6 +1129,48 @@ xfs_inobt_dup_cursor( } STATIC int +xfs_inobt_alloc_block( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int length, + int *stat) +{ + xfs_alloc_arg_t args; /* block allocation args */ + int error; /* error return value */ + xfs_agblock_t sbno = be32_to_cpu(start->s); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + memset(&args, 0, sizeof(args)); + args.tp = cur->bc_tp; + args.mp = cur->bc_mp; + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); + args.minlen = 1; + args.maxlen = 1; + args.prod = 1; + args.type = XFS_ALLOCTYPE_NEAR_BNO; + + error = xfs_alloc_vextent(&args); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + if (args.fsbno == NULLFSBLOCK) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + ASSERT(args.len == 1); + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + + new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); + *stat = 1; + return 0; +} + + +STATIC int xfs_inobt_get_maxrecs( struct xfs_btree_cur *cur, int level) @@ -1396,6 +1281,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .key_len = sizeof(xfs_inobt_key_t), .dup_cursor = xfs_inobt_dup_cursor, + .alloc_block = xfs_inobt_alloc_block, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, -- cgit v0.10.2 From 344207ce8474b79be331eb93e6df4cb5bdd48ab2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:57:16 +1100 Subject: [XFS] implement semi-generic xfs_btree_new_root From: Dave Chinner Add a xfs_btree_new_root helper for the alloc and ialloc btrees. The bmap btree needs it's own version and is not converted. [hch: split out from bigger patch and minor adaptions] SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32200a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 8a8d1ae..f21a3e9 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -48,7 +48,6 @@ STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int); STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *); /* * Internal functions. @@ -628,7 +627,7 @@ xfs_alloc_insrec( */ if (level >= cur->bc_nlevels) { XFS_STATS_INC(xs_abt_insrec); - if ((error = xfs_alloc_newroot(cur, &i))) + if ((error = xfs_btree_new_root(cur, &i))) return error; *bnop = NULLAGBLOCK; *stat = i; @@ -936,161 +935,6 @@ xfs_alloc_log_recs( xfs_trans_log_buf(cur->bc_tp, bp, first, last); } -/* - * Allocate a new root block, fill it in. - */ -STATIC int /* error */ -xfs_alloc_newroot( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - xfs_agblock_t lbno; /* left block number */ - xfs_buf_t *lbp; /* left btree buffer */ - xfs_alloc_block_t *left; /* left btree block */ - xfs_mount_t *mp; /* mount structure */ - xfs_agblock_t nbno; /* new block number */ - xfs_buf_t *nbp; /* new (root) buffer */ - xfs_alloc_block_t *new; /* new (root) btree block */ - int nptr; /* new value for key index, 1 or 2 */ - xfs_agblock_t rbno; /* right block number */ - xfs_buf_t *rbp; /* right btree buffer */ - xfs_alloc_block_t *right; /* right btree block */ - - mp = cur->bc_mp; - - ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp)); - /* - * Get a buffer from the freelist blocks, for the new root. - */ - error = xfs_alloc_get_freelist(cur->bc_tp, - cur->bc_private.a.agbp, &nbno, 1); - if (error) - return error; - /* - * None available, we fail. - */ - if (nbno == NULLAGBLOCK) { - *stat = 0; - return 0; - } - xfs_trans_agbtree_delta(cur->bc_tp, 1); - nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno, - 0); - new = XFS_BUF_TO_ALLOC_BLOCK(nbp); - /* - * Set the root data in the a.g. freespace structure. - */ - { - xfs_agf_t *agf; /* a.g. freespace header */ - xfs_agnumber_t seqno; - - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno); - be32_add_cpu(&agf->agf_levels[cur->bc_btnum], 1); - seqno = be32_to_cpu(agf->agf_seqno); - mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++; - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, - XFS_AGF_ROOTS | XFS_AGF_LEVELS); - } - /* - * At the previous root level there are now two blocks: the old - * root, and the new block generated when it was split. - * We don't know which one the cursor is pointing at, so we - * set up variables "left" and "right" for each case. - */ - lbp = cur->bc_bufs[cur->bc_nlevels - 1]; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp))) - return error; -#endif - if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) { - /* - * Our block is left, pick up the right block. - */ - lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp)); - rbno = be32_to_cpu(left->bb_rightsib); - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, rbno, 0, &rbp, - XFS_ALLOC_BTREE_REF))) - return error; - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); - if ((error = xfs_btree_check_sblock(cur, right, - cur->bc_nlevels - 1, rbp))) - return error; - nptr = 1; - } else { - /* - * Our block is right, pick up the left block. - */ - rbp = lbp; - right = left; - rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp)); - lbno = be32_to_cpu(right->bb_leftsib); - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, lbno, 0, &lbp, - XFS_ALLOC_BTREE_REF))) - return error; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); - if ((error = xfs_btree_check_sblock(cur, left, - cur->bc_nlevels - 1, lbp))) - return error; - nptr = 2; - } - /* - * Fill in the new block's btree header and log it. - */ - new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); - new->bb_level = cpu_to_be16(cur->bc_nlevels); - new->bb_numrecs = cpu_to_be16(2); - new->bb_leftsib = cpu_to_be32(NULLAGBLOCK); - new->bb_rightsib = cpu_to_be32(NULLAGBLOCK); - xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS); - ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK); - /* - * Fill in the key data in the new root. - */ - { - xfs_alloc_key_t *kp; /* btree key pointer */ - - kp = XFS_ALLOC_KEY_ADDR(new, 1, cur); - if (be16_to_cpu(left->bb_level) > 0) { - kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); - kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur); - } else { - xfs_alloc_rec_t *rp; /* btree record pointer */ - - rp = XFS_ALLOC_REC_ADDR(left, 1, cur); - kp[0].ar_startblock = rp->ar_startblock; - kp[0].ar_blockcount = rp->ar_blockcount; - rp = XFS_ALLOC_REC_ADDR(right, 1, cur); - kp[1].ar_startblock = rp->ar_startblock; - kp[1].ar_blockcount = rp->ar_blockcount; - } - } - xfs_alloc_log_keys(cur, nbp, 1, 2); - /* - * Fill in the pointer data in the new root. - */ - { - xfs_alloc_ptr_t *pp; /* btree address pointer */ - - pp = XFS_ALLOC_PTR_ADDR(new, 1, cur); - pp[0] = cpu_to_be32(lbno); - pp[1] = cpu_to_be32(rbno); - } - xfs_alloc_log_ptrs(cur, nbp, 1, 2); - /* - * Fix up the cursor. - */ - xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); - cur->bc_ptrs[cur->bc_nlevels] = nptr; - cur->bc_nlevels++; - *stat = 1; - return 0; -} - /* * Externally visible routines. @@ -1244,6 +1088,26 @@ xfs_allocbt_dup_cursor( cur->bc_btnum); } +STATIC void +xfs_allocbt_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + int inc) +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); + int btnum = cur->bc_btnum; + + ASSERT(ptr->s != 0); + + agf->agf_roots[btnum] = ptr->s; + be32_add_cpu(&agf->agf_levels[btnum], inc); + cur->bc_mp->m_perag[seqno].pagf_levels[btnum] += inc; + + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); +} + STATIC int xfs_allocbt_alloc_block( struct xfs_btree_cur *cur, @@ -1440,6 +1304,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .key_len = sizeof(xfs_alloc_key_t), .dup_cursor = xfs_allocbt_dup_cursor, + .set_root = xfs_allocbt_set_root, .alloc_block = xfs_allocbt_alloc_block, .update_lastrec = xfs_allocbt_update_lastrec, .get_maxrecs = xfs_allocbt_get_maxrecs, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 8057669..8de884c 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -2467,3 +2467,132 @@ error0: XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } + +/* + * Allocate a new root block, fill it in. + */ +int /* error */ +xfs_btree_new_root( + struct xfs_btree_cur *cur, /* btree cursor */ + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; /* one half of the old root block */ + struct xfs_buf *bp; /* buffer containing block */ + int error; /* error return value */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + struct xfs_buf *nbp; /* new (root) buffer */ + struct xfs_btree_block *new; /* new (root) btree block */ + int nptr; /* new value for key index, 1 or 2 */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + union xfs_btree_ptr rptr; + union xfs_btree_ptr lptr; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, newroot); + + /* initialise our start point from the cursor */ + cur->bc_ops->init_ptr_from_cur(cur, &rptr); + + /* Allocate the new block. If we can't do it, we're toast. Give up. */ + error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat); + if (error) + goto error0; + if (*stat == 0) + goto out0; + XFS_BTREE_STATS_INC(cur, alloc); + + /* Set up the new block. */ + error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); + if (error) + goto error0; + + /* Set the root in the holding structure increasing the level by 1. */ + cur->bc_ops->set_root(cur, &lptr, 1); + + /* + * At the previous root level there are now two blocks: the old root, + * and the new block generated when it was split. We don't know which + * one the cursor is pointing at, so we set up variables "left" and + * "right" for each case. + */ + block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); + if (error) + goto error0; +#endif + + xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); + if (!xfs_btree_ptr_is_null(cur, &rptr)) { + /* Our block is left, pick up the right block. */ + lbp = bp; + xfs_btree_buf_to_ptr(cur, lbp, &lptr); + left = block; + error = xfs_btree_read_buf_block(cur, &rptr, + cur->bc_nlevels - 1, 0, &right, &rbp); + if (error) + goto error0; + bp = rbp; + nptr = 1; + } else { + /* Our block is right, pick up the left block. */ + rbp = bp; + xfs_btree_buf_to_ptr(cur, rbp, &rptr); + right = block; + xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); + error = xfs_btree_read_buf_block(cur, &lptr, + cur->bc_nlevels - 1, 0, &left, &lbp); + if (error) + goto error0; + bp = lbp; + nptr = 2; + } + /* Fill in the new block's btree header and log it. */ + xfs_btree_init_block(cur, cur->bc_nlevels, 2, new); + xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); + ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && + !xfs_btree_ptr_is_null(cur, &rptr)); + + /* Fill in the key data in the new root. */ + if (xfs_btree_get_level(left) > 0) { + xfs_btree_copy_keys(cur, + xfs_btree_key_addr(cur, 1, new), + xfs_btree_key_addr(cur, 1, left), 1); + xfs_btree_copy_keys(cur, + xfs_btree_key_addr(cur, 2, new), + xfs_btree_key_addr(cur, 1, right), 1); + } else { + cur->bc_ops->init_key_from_rec( + xfs_btree_key_addr(cur, 1, new), + xfs_btree_rec_addr(cur, 1, left)); + cur->bc_ops->init_key_from_rec( + xfs_btree_key_addr(cur, 2, new), + xfs_btree_rec_addr(cur, 1, right)); + } + xfs_btree_log_keys(cur, nbp, 1, 2); + + /* Fill in the pointer data in the new root. */ + xfs_btree_copy_ptrs(cur, + xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); + xfs_btree_copy_ptrs(cur, + xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); + xfs_btree_log_ptrs(cur, nbp, 1, 2); + + /* Fix up the cursor. */ + xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); + cur->bc_ptrs[cur->bc_nlevels] = nptr; + cur->bc_nlevels++; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 354a665..1801539 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -187,6 +187,10 @@ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + /* update btree root pointer */ + void (*set_root)(struct xfs_btree_cur *cur, + union xfs_btree_ptr *nptr, int level_change); + /* block allocation / freeing */ int (*alloc_block)(struct xfs_btree_cur *cur, union xfs_btree_ptr *start_bno, @@ -543,6 +547,7 @@ int xfs_btree_lshift(struct xfs_btree_cur *, int, int *); int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, union xfs_btree_key *, struct xfs_btree_cur **, int *); +int xfs_btree_new_root(struct xfs_btree_cur *, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index c76190a..7ba3c7b 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -44,7 +44,6 @@ STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int); STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *); /* * Single level of the xfs_inobt_delete record deletion routine. @@ -556,7 +555,7 @@ xfs_inobt_insrec( * and we're done. */ if (level >= cur->bc_nlevels) { - error = xfs_inobt_newroot(cur, &i); + error = xfs_btree_new_root(cur, &i); *bnop = NULLAGBLOCK; *stat = i; return error; @@ -827,152 +826,6 @@ xfs_inobt_log_recs( xfs_trans_log_buf(cur->bc_tp, bp, first, last); } -/* - * Allocate a new root block, fill it in. - */ -STATIC int /* error */ -xfs_inobt_newroot( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - xfs_agi_t *agi; /* a.g. inode header */ - xfs_alloc_arg_t args; /* allocation argument structure */ - xfs_inobt_block_t *block; /* one half of the old root block */ - xfs_buf_t *bp; /* buffer containing block */ - int error; /* error return value */ - xfs_inobt_key_t *kp; /* btree key pointer */ - xfs_agblock_t lbno; /* left block number */ - xfs_buf_t *lbp; /* left buffer pointer */ - xfs_inobt_block_t *left; /* left btree block */ - xfs_buf_t *nbp; /* new (root) buffer */ - xfs_inobt_block_t *new; /* new (root) btree block */ - int nptr; /* new value for key index, 1 or 2 */ - xfs_inobt_ptr_t *pp; /* btree address pointer */ - xfs_agblock_t rbno; /* right block number */ - xfs_buf_t *rbp; /* right buffer pointer */ - xfs_inobt_block_t *right; /* right btree block */ - xfs_inobt_rec_t *rp; /* btree record pointer */ - - ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp)); - - /* - * Get a block & a buffer. - */ - agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); - args.tp = cur->bc_tp; - args.mp = cur->bc_mp; - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, - be32_to_cpu(agi->agi_root)); - args.mod = args.minleft = args.alignment = args.total = args.wasdel = - args.isfl = args.userdata = args.minalignslop = 0; - args.minlen = args.maxlen = args.prod = 1; - args.type = XFS_ALLOCTYPE_NEAR_BNO; - if ((error = xfs_alloc_vextent(&args))) - return error; - /* - * None available, we fail. - */ - if (args.fsbno == NULLFSBLOCK) { - *stat = 0; - return 0; - } - ASSERT(args.len == 1); - nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0); - new = XFS_BUF_TO_INOBT_BLOCK(nbp); - /* - * Set the root data in the a.g. inode structure. - */ - agi->agi_root = cpu_to_be32(args.agbno); - be32_add_cpu(&agi->agi_level, 1); - xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp, - XFS_AGI_ROOT | XFS_AGI_LEVEL); - /* - * At the previous root level there are now two blocks: the old - * root, and the new block generated when it was split. - * We don't know which one the cursor is pointing at, so we - * set up variables "left" and "right" for each case. - */ - bp = cur->bc_bufs[cur->bc_nlevels - 1]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp))) - return error; -#endif - if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) { - /* - * Our block is left, pick up the right block. - */ - lbp = bp; - lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp)); - left = block; - rbno = be32_to_cpu(left->bb_rightsib); - if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno, - rbno, 0, &rbp, XFS_INO_BTREE_REF))) - return error; - bp = rbp; - right = XFS_BUF_TO_INOBT_BLOCK(rbp); - if ((error = xfs_btree_check_sblock(cur, right, - cur->bc_nlevels - 1, rbp))) - return error; - nptr = 1; - } else { - /* - * Our block is right, pick up the left block. - */ - rbp = bp; - rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp)); - right = block; - lbno = be32_to_cpu(right->bb_leftsib); - if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno, - lbno, 0, &lbp, XFS_INO_BTREE_REF))) - return error; - bp = lbp; - left = XFS_BUF_TO_INOBT_BLOCK(lbp); - if ((error = xfs_btree_check_sblock(cur, left, - cur->bc_nlevels - 1, lbp))) - return error; - nptr = 2; - } - /* - * Fill in the new block's btree header and log it. - */ - new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); - new->bb_level = cpu_to_be16(cur->bc_nlevels); - new->bb_numrecs = cpu_to_be16(2); - new->bb_leftsib = cpu_to_be32(NULLAGBLOCK); - new->bb_rightsib = cpu_to_be32(NULLAGBLOCK); - xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS); - ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK); - /* - * Fill in the key data in the new root. - */ - kp = XFS_INOBT_KEY_ADDR(new, 1, cur); - if (be16_to_cpu(left->bb_level) > 0) { - kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); - kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); - } else { - rp = XFS_INOBT_REC_ADDR(left, 1, cur); - kp[0].ir_startino = rp->ir_startino; - rp = XFS_INOBT_REC_ADDR(right, 1, cur); - kp[1].ir_startino = rp->ir_startino; - } - xfs_inobt_log_keys(cur, nbp, 1, 2); - /* - * Fill in the pointer data in the new root. - */ - pp = XFS_INOBT_PTR_ADDR(new, 1, cur); - pp[0] = cpu_to_be32(lbno); - pp[1] = cpu_to_be32(rbno); - xfs_inobt_log_ptrs(cur, nbp, 1, 2); - /* - * Fix up the cursor. - */ - xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); - cur->bc_ptrs[cur->bc_nlevels] = nptr; - cur->bc_nlevels++; - *stat = 1; - return 0; -} /* * Externally visible routines. @@ -1128,6 +981,20 @@ xfs_inobt_dup_cursor( cur->bc_private.a.agbp, cur->bc_private.a.agno); } +STATIC void +xfs_inobt_set_root( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *nptr, + int inc) /* level change */ +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); + + agi->agi_root = nptr->s; + be32_add_cpu(&agi->agi_level, inc); + xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); +} + STATIC int xfs_inobt_alloc_block( struct xfs_btree_cur *cur, @@ -1281,6 +1148,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .key_len = sizeof(xfs_inobt_key_t), .dup_cursor = xfs_inobt_dup_cursor, + .set_root = xfs_inobt_set_root, .alloc_block = xfs_inobt_alloc_block, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, -- cgit v0.10.2 From ea77b0a66e6c910ef265d9af522d6303ea6b3055 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:57:28 +1100 Subject: [XFS] move xfs_bmbt_newroot to common code xfs_bmbt_newroot is a mostly generic implementation of moving from an inode root to a real block based root. So move it to xfs_btree.c where it can use all the nice infrastructure there and make it pointer size agnostic The new name for it is xfs_btree_new_iroot, following the old naming but making it clear we're dealing with the root in inode case here, and to avoid confusion with xfs_btree_new_root which is used for the not inode rooted case. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32201a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 7d6c4ac..315bc29 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -476,7 +476,7 @@ xfs_bmap_add_attrfork_btree( goto error0; /* must be at least one entry */ XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); - if ((error = xfs_bmbt_newroot(cur, flags, &stat))) + if ((error = xfs_btree_new_iroot(cur, flags, &stat))) goto error0; if (stat == 0) { xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index e753926..204f276 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -525,7 +525,7 @@ xfs_bmbt_insrec( cur->bc_private.b.whichfork); block = xfs_bmbt_get_block(cur, level, &bp); } else if (level == cur->bc_nlevels - 1) { - if ((error = xfs_bmbt_newroot(cur, &logflags, stat)) || + if ((error = xfs_btree_new_iroot(cur, &logflags, stat)) || *stat == 0) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; @@ -1183,117 +1183,6 @@ xfs_bmbt_log_recs( } /* - * Give the bmap btree a new root block. Copy the old broot contents - * down into a real block and make the broot point to it. - */ -int /* error */ -xfs_bmbt_newroot( - xfs_btree_cur_t *cur, /* btree cursor */ - int *logflags, /* logging flags for inode */ - int *stat) /* return status - 0 fail */ -{ - xfs_alloc_arg_t args; /* allocation arguments */ - xfs_bmbt_block_t *block; /* bmap btree block */ - xfs_buf_t *bp; /* buffer for block */ - xfs_bmbt_block_t *cblock; /* child btree block */ - xfs_bmbt_key_t *ckp; /* child key pointer */ - xfs_bmbt_ptr_t *cpp; /* child ptr pointer */ - int error; /* error return code */ -#ifdef DEBUG - int i; /* loop counter */ -#endif - xfs_bmbt_key_t *kp; /* pointer to bmap btree key */ - int level; /* btree level */ - xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - level = cur->bc_nlevels - 1; - block = xfs_bmbt_get_block(cur, level, &bp); - /* - * Copy the root into a real block. - */ - args.mp = cur->bc_mp; - pp = XFS_BMAP_PTR_IADDR(block, 1, cur); - args.tp = cur->bc_tp; - args.fsbno = cur->bc_private.b.firstblock; - args.mod = args.minleft = args.alignment = args.total = args.isfl = - args.userdata = args.minalignslop = 0; - args.minlen = args.maxlen = args.prod = 1; - args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; - args.firstblock = args.fsbno; - if (args.fsbno == NULLFSBLOCK) { -#ifdef DEBUG - if ((error = xfs_btree_check_lptr_disk(cur, *pp, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - args.fsbno = be64_to_cpu(*pp); - args.type = XFS_ALLOCTYPE_START_BNO; - } else if (cur->bc_private.b.flist->xbf_low) - args.type = XFS_ALLOCTYPE_START_BNO; - else - args.type = XFS_ALLOCTYPE_NEAR_BNO; - if ((error = xfs_alloc_vextent(&args))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (args.fsbno == NULLFSBLOCK) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - ASSERT(args.len == 1); - cur->bc_private.b.firstblock = args.fsbno; - cur->bc_private.b.allocated++; - cur->bc_private.b.ip->i_d.di_nblocks++; - XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip, - XFS_TRANS_DQ_BCOUNT, 1L); - bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0); - cblock = XFS_BUF_TO_BMBT_BLOCK(bp); - *cblock = *block; - be16_add_cpu(&block->bb_level, 1); - block->bb_numrecs = cpu_to_be16(1); - cur->bc_nlevels++; - cur->bc_ptrs[level + 1] = 1; - kp = XFS_BMAP_KEY_IADDR(block, 1, cur); - ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur); - memcpy(ckp, kp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*kp)); - cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { - if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memcpy(cpp, pp, be16_to_cpu(cblock->bb_numrecs) * sizeof(*pp)); -#ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, args.fsbno, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - *pp = cpu_to_be64(args.fsbno); - xfs_iroot_realloc(cur->bc_private.b.ip, 1 - be16_to_cpu(cblock->bb_numrecs), - cur->bc_private.b.whichfork); - xfs_btree_setbuf(cur, level, bp); - /* - * Do all this logging at the end so that - * the root is at the right level. - */ - xfs_bmbt_log_block(cur, bp, XFS_BB_ALL_BITS); - xfs_bmbt_log_keys(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs)); - xfs_bmbt_log_ptrs(cur, bp, 1, be16_to_cpu(cblock->bb_numrecs)); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *logflags |= - XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork); - *stat = 1; - return 0; -} - -/* * Set all the fields in a bmap extent record from the arguments. */ void diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 6bfd62e..26fd8ace 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -255,12 +255,6 @@ extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int); -/* - * Give the bmap btree a new root block. Copy the old broot contents - * down into a real block and make the broot point to it. - */ -extern int xfs_bmbt_newroot(struct xfs_btree_cur *cur, int *lflags, int *stat); - extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 8de884c..3b6e01d 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -2469,6 +2469,107 @@ error0: } /* + * Copy the old inode root contents into a real block and make the + * broot point to it. + */ +int /* error */ +xfs_btree_new_iroot( + struct xfs_btree_cur *cur, /* btree cursor */ + int *logflags, /* logging flags for inode */ + int *stat) /* return status - 0 fail */ +{ + struct xfs_buf *cbp; /* buffer for cblock */ + struct xfs_btree_block *block; /* btree block */ + struct xfs_btree_block *cblock; /* child btree block */ + union xfs_btree_key *ckp; /* child key pointer */ + union xfs_btree_ptr *cpp; /* child ptr pointer */ + union xfs_btree_key *kp; /* pointer to btree key */ + union xfs_btree_ptr *pp; /* pointer to block addr */ + union xfs_btree_ptr nptr; /* new block addr */ + int level; /* btree level */ + int error; /* error return code */ +#ifdef DEBUG + int i; /* loop counter */ +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, newroot); + + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + + level = cur->bc_nlevels - 1; + + block = xfs_btree_get_iroot(cur); + pp = xfs_btree_ptr_addr(cur, 1, block); + + /* Allocate the new block. If we can't do it, we're toast. Give up. */ + error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat); + if (error) + goto error0; + if (*stat == 0) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; + } + XFS_BTREE_STATS_INC(cur, alloc); + + /* Copy the root into a real block. */ + error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); + if (error) + goto error0; + + memcpy(cblock, block, xfs_btree_block_len(cur)); + + be16_add_cpu(&block->bb_level, 1); + xfs_btree_set_numrecs(block, 1); + cur->bc_nlevels++; + cur->bc_ptrs[level + 1] = 1; + + kp = xfs_btree_key_addr(cur, 1, block); + ckp = xfs_btree_key_addr(cur, 1, cblock); + xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); + + cpp = xfs_btree_ptr_addr(cur, 1, cblock); +#ifdef DEBUG + for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { + error = xfs_btree_check_ptr(cur, pp, i, level); + if (error) + goto error0; + } +#endif + xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, &nptr, 0, level); + if (error) + goto error0; +#endif + xfs_btree_copy_ptrs(cur, pp, &nptr, 1); + + xfs_iroot_realloc(cur->bc_private.b.ip, + 1 - xfs_btree_get_numrecs(cblock), + cur->bc_private.b.whichfork); + + xfs_btree_setbuf(cur, level, cbp); + + /* + * Do all this logging at the end so that + * the root is at the right level. + */ + xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); + xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); + xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); + + *logflags |= + XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork); + *stat = 1; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* * Allocate a new root block, fill it in. */ int /* error */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 1801539..21eec86 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -548,6 +548,7 @@ int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, union xfs_btree_key *, struct xfs_btree_cur **, int *); int xfs_btree_new_root(struct xfs_btree_cur *, int *); +int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); /* * Helpers. -- cgit v0.10.2 From 4b22a57188d87e873346b73c227607715be96399 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:57:40 +1100 Subject: [XFS] implement generic xfs_btree_insert/insrec Make the btree insert code generic. Based on a patch from David Chinner with lots of changes to follow the original btree implementations more closely. While this loses some of the generic helper routines for inserting/moving/removing records it also solves some of the one off bugs in the original code and makes it easier to verify. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32202a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 875e1ba..a983824 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -408,7 +408,7 @@ xfs_alloc_fixup_trees( if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_alloc_insert(cnt_cur, &i))) + if ((error = xfs_btree_insert(cnt_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } @@ -416,7 +416,7 @@ xfs_alloc_fixup_trees( if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_alloc_insert(cnt_cur, &i))) + if ((error = xfs_btree_insert(cnt_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } @@ -444,7 +444,7 @@ xfs_alloc_fixup_trees( if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_alloc_insert(bno_cur, &i))) + if ((error = xfs_btree_insert(bno_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } @@ -1756,7 +1756,7 @@ xfs_free_ag_extent( else { nbno = bno; nlen = len; - if ((error = xfs_alloc_insert(bno_cur, &i))) + if ((error = xfs_btree_insert(bno_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); } @@ -1768,7 +1768,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 0, error0); - if ((error = xfs_alloc_insert(cnt_cur, &i))) + if ((error = xfs_btree_insert(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index f21a3e9..818adca 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -583,256 +583,6 @@ error0: } /* - * Insert one record/level. Return information to the caller - * allowing the next level up to proceed if necessary. - */ -STATIC int /* error */ -xfs_alloc_insrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to insert record at */ - xfs_agblock_t *bnop, /* i/o: block number inserted */ - xfs_alloc_rec_t *recp, /* i/o: record data inserted */ - xfs_btree_cur_t **curp, /* output: new cursor replacing cur */ - int *stat) /* output: success/failure */ -{ - xfs_agf_t *agf; /* allocation group freelist header */ - xfs_alloc_block_t *block; /* btree block record/key lives in */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop index */ - xfs_alloc_key_t key; /* key value being inserted */ - xfs_alloc_key_t *kp; /* pointer to btree keys */ - xfs_agblock_t nbno; /* block number of allocated block */ - xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */ - xfs_alloc_key_t nkey; /* new key value, from split */ - xfs_alloc_rec_t nrec; /* new record value, for caller */ - int numrecs; - int optr; /* old ptr value */ - xfs_alloc_ptr_t *pp; /* pointer to btree addresses */ - int ptr; /* index in btree block for this rec */ - xfs_alloc_rec_t *rp; /* pointer to btree records */ - - ASSERT(be32_to_cpu(recp->ar_blockcount) > 0); - - /* - * GCC doesn't understand the (arguably complex) control flow in - * this function and complains about uninitialized structure fields - * without this. - */ - memset(&nrec, 0, sizeof(nrec)); - - /* - * If we made it to the root level, allocate a new root block - * and we're done. - */ - if (level >= cur->bc_nlevels) { - XFS_STATS_INC(xs_abt_insrec); - if ((error = xfs_btree_new_root(cur, &i))) - return error; - *bnop = NULLAGBLOCK; - *stat = i; - return 0; - } - /* - * Make a key out of the record data to be inserted, and save it. - */ - key.ar_startblock = recp->ar_startblock; - key.ar_blockcount = recp->ar_blockcount; - optr = ptr = cur->bc_ptrs[level]; - /* - * If we're off the left edge, return failure. - */ - if (ptr == 0) { - *stat = 0; - return 0; - } - XFS_STATS_INC(xs_abt_insrec); - /* - * Get pointers to the btree buffer and block. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - numrecs = be16_to_cpu(block->bb_numrecs); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; - /* - * Check that the new entry is being inserted in the right place. - */ - if (ptr <= numrecs) { - if (level == 0) { - rp = XFS_ALLOC_REC_ADDR(block, ptr, cur); - xfs_btree_check_rec(cur->bc_btnum, recp, rp); - } else { - kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur); - xfs_btree_check_key(cur->bc_btnum, &key, kp); - } - } -#endif - nbno = NULLAGBLOCK; - ncur = NULL; - /* - * If the block is full, we can't insert the new entry until we - * make the block un-full. - */ - if (numrecs == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { - /* - * First, try shifting an entry to the right neighbor. - */ - if ((error = xfs_btree_rshift(cur, level, &i))) - return error; - if (i) { - /* nothing */ - } - /* - * Next, try shifting an entry to the left neighbor. - */ - else { - if ((error = xfs_btree_lshift(cur, level, &i))) - return error; - if (i) - optr = ptr = cur->bc_ptrs[level]; - else { - union xfs_btree_ptr bno = { .s = cpu_to_be32(nbno) }; - /* - * Next, try splitting the current block in - * half. If this works we have to re-set our - * variables because we could be in a - * different block now. - */ - if ((error = xfs_btree_split(cur, level, &bno, - (union xfs_btree_key *)&nkey, - &ncur, &i))) - return error; - nbno = be32_to_cpu(bno.s); - if (i) { - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); -#ifdef DEBUG - if ((error = - xfs_btree_check_sblock(cur, - block, level, bp))) - return error; -#endif - ptr = cur->bc_ptrs[level]; - nrec.ar_startblock = nkey.ar_startblock; - nrec.ar_blockcount = nkey.ar_blockcount; - } - /* - * Otherwise the insert fails. - */ - else { - *stat = 0; - return 0; - } - } - } - } - /* - * At this point we know there's room for our new entry in the block - * we're pointing at. - */ - numrecs = be16_to_cpu(block->bb_numrecs); - if (level > 0) { - /* - * It's a non-leaf entry. Make a hole for the new data - * in the key and ptr regions of the block. - */ - kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); - pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); -#ifdef DEBUG - for (i = numrecs; i >= ptr; i--) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level))) - return error; - } -#endif - memmove(&kp[ptr], &kp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*kp)); - memmove(&pp[ptr], &pp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*pp)); -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, *bnop, level))) - return error; -#endif - /* - * Now stuff the new data in, bump numrecs and log the new data. - */ - kp[ptr - 1] = key; - pp[ptr - 1] = cpu_to_be32(*bnop); - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_alloc_log_keys(cur, bp, ptr, numrecs); - xfs_alloc_log_ptrs(cur, bp, ptr, numrecs); -#ifdef DEBUG - if (ptr < numrecs) - xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1, - kp + ptr); -#endif - } else { - /* - * It's a leaf entry. Make a hole for the new record. - */ - rp = XFS_ALLOC_REC_ADDR(block, 1, cur); - memmove(&rp[ptr], &rp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*rp)); - /* - * Now stuff the new record in, bump numrecs - * and log the new data. - */ - rp[ptr - 1] = *recp; - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_alloc_log_recs(cur, bp, ptr, numrecs); -#ifdef DEBUG - if (ptr < numrecs) - xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1, - rp + ptr); -#endif - } - /* - * Log the new number of records in the btree header. - */ - xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); - /* - * If we inserted at the start of a block, update the parents' keys. - */ - if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) - return error; - /* - * Look to see if the longest extent in the allocation group - * needs to be updated. - */ - - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - if (level == 0 && - cur->bc_btnum == XFS_BTNUM_CNT && - be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && - be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) { - /* - * If this is a leaf in the by-size btree and there - * is no right sibling block and this block is bigger - * than the previous longest block, update it. - */ - agf->agf_longest = recp->ar_blockcount; - cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest - = be32_to_cpu(recp->ar_blockcount); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, - XFS_AGF_LONGEST); - } - /* - * Return the new block number, if any. - * If there is one, give back a record value and a cursor too. - */ - *bnop = nbno; - if (nbno != NULLAGBLOCK) { - *recp = nrec; - *curp = ncur; - } - *stat = 1; - return 0; -} - -/* * Log header fields from a btree block. */ STATIC void @@ -1019,65 +769,6 @@ xfs_alloc_get_rec( return 0; } -/* - * Insert the current record at the point referenced by cur. - * The cursor may be inconsistent on return if splits have been done. - */ -int /* error */ -xfs_alloc_insert( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* result value, 0 for failure */ - int level; /* current level number in btree */ - xfs_agblock_t nbno; /* new block number (split result) */ - xfs_btree_cur_t *ncur; /* new cursor (split result) */ - xfs_alloc_rec_t nrec; /* record being inserted this level */ - xfs_btree_cur_t *pcur; /* previous level's cursor */ - - level = 0; - nbno = NULLAGBLOCK; - nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); - nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); - ncur = NULL; - pcur = cur; - /* - * Loop going up the tree, starting at the leaf level. - * Stop when we don't get a split block, that must mean that - * the insert is finished with this level. - */ - do { - /* - * Insert nrec/nbno into this level of the tree. - * Note if we fail, nbno will be null. - */ - if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur, - &i))) { - if (pcur != cur) - xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); - return error; - } - /* - * See if the cursor we just used is trash. - * Can't trash the caller's cursor, but otherwise we should - * if ncur is a new cursor or we're about to be done. - */ - if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) { - cur->bc_nlevels = pcur->bc_nlevels; - xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); - } - /* - * If we got a new cursor, switch to it. - */ - if (ncur) { - pcur = ncur; - ncur = NULL; - } - } while (nbno != NULLAGBLOCK); - *stat = i; - return 0; -} STATIC struct xfs_btree_cur * xfs_allocbt_dup_cursor( @@ -1170,6 +861,12 @@ xfs_allocbt_update_lastrec( return; len = rec->alloc.ar_blockcount; break; + case LASTREC_INSREC: + if (be32_to_cpu(rec->alloc.ar_blockcount) <= + be32_to_cpu(agf->agf_longest)) + return; + len = rec->alloc.ar_blockcount; + break; default: ASSERT(0); return; @@ -1200,6 +897,28 @@ xfs_allocbt_init_key_from_rec( } STATIC void +xfs_allocbt_init_rec_from_key( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + ASSERT(key->alloc.ar_startblock != 0); + + rec->alloc.ar_startblock = key->alloc.ar_startblock; + rec->alloc.ar_blockcount = key->alloc.ar_blockcount; +} + +STATIC void +xfs_allocbt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + ASSERT(cur->bc_rec.a.ar_startblock != 0); + + rec->alloc.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock); + rec->alloc.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount); +} + +STATIC void xfs_allocbt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) @@ -1309,6 +1028,8 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .update_lastrec = xfs_allocbt_update_lastrec, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, + .init_rec_from_key = xfs_allocbt_init_rec_from_key, + .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 81e2f36..2e340ef 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -107,12 +107,6 @@ extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat); extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, xfs_extlen_t *len, int *stat); -/* - * Insert the current record at the point referenced by cur. - * The cursor may be inconsistent on return if splits have been done. - */ -extern int xfs_alloc_insert(struct xfs_btree_cur *cur, int *stat); - extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 315bc29..85e2e8b 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -977,7 +977,7 @@ xfs_bmap_add_extent_delay_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1053,7 +1053,7 @@ xfs_bmap_add_extent_delay_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1143,7 +1143,7 @@ xfs_bmap_add_extent_delay_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1198,7 +1198,7 @@ xfs_bmap_add_extent_delay_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1651,7 +1651,7 @@ xfs_bmap_add_extent_unwritten_real( oldext))) goto done; cur->bc_rec.b = *new; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1741,7 +1741,7 @@ xfs_bmap_add_extent_unwritten_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -1789,7 +1789,7 @@ xfs_bmap_add_extent_unwritten_real( cur->bc_rec.b = PREV; cur->bc_rec.b.br_blockcount = new->br_startoff - PREV.br_startoff; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); /* @@ -1804,7 +1804,7 @@ xfs_bmap_add_extent_unwritten_real( XFS_WANT_CORRUPTED_GOTO(i == 0, done); /* new middle extent - newext */ cur->bc_rec.b.br_state = new->br_state; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -2264,7 +2264,7 @@ xfs_bmap_add_extent_hole_real( goto done; XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = new->br_state; - if ((error = xfs_bmbt_insert(cur, &i))) + if ((error = xfs_btree_insert(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); } @@ -3303,7 +3303,7 @@ xfs_bmap_del_extent( if ((error = xfs_btree_increment(cur, 0, &i))) goto done; cur->bc_rec.b = new; - error = xfs_bmbt_insert(cur, &i); + error = xfs_btree_insert(cur, &i); if (error && error != ENOSPC) goto done; /* diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 204f276..2b15df3 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -456,198 +456,6 @@ error0: return error; } -/* - * Insert one record/level. Return information to the caller - * allowing the next level up to proceed if necessary. - */ -STATIC int /* error */ -xfs_bmbt_insrec( - xfs_btree_cur_t *cur, - int level, - xfs_fsblock_t *bnop, - xfs_bmbt_rec_t *recp, - xfs_btree_cur_t **curp, - int *stat) /* no-go/done/continue */ -{ - xfs_bmbt_block_t *block; /* bmap btree block */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop index */ - xfs_bmbt_key_t key; /* bmap btree key */ - xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */ - int logflags; /* inode logging flags */ - xfs_fsblock_t nbno; /* new block number */ - struct xfs_btree_cur *ncur; /* new btree cursor */ - __uint64_t startoff; /* new btree key value */ - xfs_bmbt_rec_t nrec; /* new record count */ - int optr; /* old key/record index */ - xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ - int ptr; /* key/record index */ - xfs_bmbt_rec_t *rp=NULL; /* pointer to bmap btree rec */ - int numrecs; - - ASSERT(level < cur->bc_nlevels); - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGIFR(cur, level, *bnop, recp); - ncur = NULL; - key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(recp)); - optr = ptr = cur->bc_ptrs[level]; - if (ptr == 0) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - XFS_STATS_INC(xs_bmbt_insrec); - block = xfs_bmbt_get_block(cur, level, &bp); - numrecs = be16_to_cpu(block->bb_numrecs); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (ptr <= numrecs) { - if (level == 0) { - rp = XFS_BMAP_REC_IADDR(block, ptr, cur); - xfs_btree_check_rec(XFS_BTNUM_BMAP, recp, rp); - } else { - kp = XFS_BMAP_KEY_IADDR(block, ptr, cur); - xfs_btree_check_key(XFS_BTNUM_BMAP, &key, kp); - } - } -#endif - nbno = NULLFSBLOCK; - if (numrecs == XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { - if (numrecs < XFS_BMAP_BLOCK_DMAXRECS(level, cur)) { - /* - * A root block, that can be made bigger. - */ - xfs_iroot_realloc(cur->bc_private.b.ip, 1, - cur->bc_private.b.whichfork); - block = xfs_bmbt_get_block(cur, level, &bp); - } else if (level == cur->bc_nlevels - 1) { - if ((error = xfs_btree_new_iroot(cur, &logflags, stat)) || - *stat == 0) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, - logflags); - block = xfs_bmbt_get_block(cur, level, &bp); - } else { - if ((error = xfs_btree_rshift(cur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (i) { - /* nothing */ - } else { - if ((error = xfs_btree_lshift(cur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - if (i) { - optr = ptr = cur->bc_ptrs[level]; - } else { - union xfs_btree_ptr bno = { .l = cpu_to_be64(nbno) }; - union xfs_btree_key skey; - if ((error = xfs_btree_split(cur, level, - &bno, &skey, &ncur, - &i))) { - XFS_BMBT_TRACE_CURSOR(cur, - ERROR); - return error; - } - nbno = be64_to_cpu(bno.l); - startoff = be64_to_cpu(skey.bmbt.br_startoff); - if (i) { - block = xfs_bmbt_get_block( - cur, level, &bp); -#ifdef DEBUG - if ((error = - xfs_btree_check_lblock(cur, - block, level, bp))) { - XFS_BMBT_TRACE_CURSOR( - cur, ERROR); - return error; - } -#endif - ptr = cur->bc_ptrs[level]; - xfs_bmbt_disk_set_allf(&nrec, - startoff, 0, 0, - XFS_EXT_NORM); - } else { - XFS_BMBT_TRACE_CURSOR(cur, - EXIT); - *stat = 0; - return 0; - } - } - } - } - } - numrecs = be16_to_cpu(block->bb_numrecs); - if (level > 0) { - kp = XFS_BMAP_KEY_IADDR(block, 1, cur); - pp = XFS_BMAP_PTR_IADDR(block, 1, cur); -#ifdef DEBUG - for (i = numrecs; i >= ptr; i--) { - if ((error = xfs_btree_check_lptr_disk(cur, pp[i - 1], - level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memmove(&kp[ptr], &kp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*kp)); - memmove(&pp[ptr], &pp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*pp)); -#ifdef DEBUG - if ((error = xfs_btree_check_lptr(cur, *bnop, level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } -#endif - kp[ptr - 1] = key; - pp[ptr - 1] = cpu_to_be64(*bnop); - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_bmbt_log_keys(cur, bp, ptr, numrecs); - xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs); - } else { - rp = XFS_BMAP_REC_IADDR(block, 1, cur); - memmove(&rp[ptr], &rp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*rp)); - rp[ptr - 1] = *recp; - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_bmbt_log_recs(cur, bp, ptr, numrecs); - } - xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS); -#ifdef DEBUG - if (ptr < numrecs) { - if (level == 0) - xfs_btree_check_rec(XFS_BTNUM_BMAP, rp + ptr - 1, - rp + ptr); - else - xfs_btree_check_key(XFS_BTNUM_BMAP, kp + ptr - 1, - kp + ptr); - } -#endif - if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - *bnop = nbno; - if (nbno != NULLFSBLOCK) { - *recp = nrec; - *curp = ncur; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; -} - STATIC int xfs_bmbt_killroot( xfs_btree_cur_t *cur) @@ -1060,67 +868,6 @@ xfs_bmbt_disk_get_startoff( } /* - * Insert the current record at the point referenced by cur. - * - * A multi-level split of the tree on insert will invalidate the original - * cursor. All callers of this function should assume that the cursor is - * no longer valid and revalidate it. - */ -int /* error */ -xfs_bmbt_insert( - xfs_btree_cur_t *cur, - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; - int level; - xfs_fsblock_t nbno; - xfs_btree_cur_t *ncur; - xfs_bmbt_rec_t nrec; - xfs_btree_cur_t *pcur; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - level = 0; - nbno = NULLFSBLOCK; - xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); - ncur = NULL; - pcur = cur; - do { - if ((error = xfs_bmbt_insrec(pcur, level++, &nbno, &nrec, &ncur, - &i))) { - if (pcur != cur) - xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { - cur->bc_nlevels = pcur->bc_nlevels; - cur->bc_private.b.allocated += - pcur->bc_private.b.allocated; - pcur->bc_private.b.allocated = 0; - ASSERT((cur->bc_private.b.firstblock != NULLFSBLOCK) || - XFS_IS_REALTIME_INODE(cur->bc_private.b.ip)); - cur->bc_private.b.firstblock = - pcur->bc_private.b.firstblock; - ASSERT(cur->bc_private.b.flist == - pcur->bc_private.b.flist); - xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); - } - if (ncur) { - pcur = ncur; - ncur = NULL; - } - } while (nbno != NULLFSBLOCK); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = i; - return 0; -error0: - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; -} - -/* * Log fields from the btree block header. */ void @@ -1450,6 +1197,21 @@ xfs_bmbt_dup_cursor( return new; } +STATIC void +xfs_bmbt_update_cursor( + struct xfs_btree_cur *src, + struct xfs_btree_cur *dst) +{ + ASSERT((dst->bc_private.b.firstblock != NULLFSBLOCK) || + (dst->bc_private.b.ip->i_d.di_flags & XFS_DIFLAG_REALTIME)); + ASSERT(dst->bc_private.b.flist == src->bc_private.b.flist); + + dst->bc_private.b.allocated += src->bc_private.b.allocated; + dst->bc_private.b.firstblock = src->bc_private.b.firstblock; + + src->bc_private.b.allocated = 0; +} + STATIC int xfs_bmbt_alloc_block( struct xfs_btree_cur *cur, @@ -1544,6 +1306,23 @@ xfs_bmbt_get_maxrecs( return XFS_BMAP_BLOCK_IMAXRECS(level, cur); } +/* + * Get the maximum records we could store in the on-disk format. + * + * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but + * for the root node this checks the available space in the dinode fork + * so that we can resize the in-memory buffer to match it. After a + * resize to the maximum size this function returns the same value + * as xfs_bmbt_get_maxrecs for the root node, too. + */ +STATIC int +xfs_bmbt_get_dmaxrecs( + struct xfs_btree_cur *cur, + int level) +{ + return XFS_BMAP_BLOCK_DMAXRECS(level, cur); +} + STATIC void xfs_bmbt_init_key_from_rec( union xfs_btree_key *key, @@ -1554,6 +1333,25 @@ xfs_bmbt_init_key_from_rec( } STATIC void +xfs_bmbt_init_rec_from_key( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + ASSERT(key->bmbt.br_startoff != 0); + + xfs_bmbt_disk_set_allf(&rec->bmbt, be64_to_cpu(key->bmbt.br_startoff), + 0, 0, XFS_EXT_NORM); +} + +STATIC void +xfs_bmbt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b); +} + +STATIC void xfs_bmbt_init_ptr_from_cur( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) @@ -1660,9 +1458,13 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .key_len = sizeof(xfs_bmbt_key_t), .dup_cursor = xfs_bmbt_dup_cursor, + .update_cursor = xfs_bmbt_update_cursor, .alloc_block = xfs_bmbt_alloc_block, .get_maxrecs = xfs_bmbt_get_maxrecs, + .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, .init_key_from_rec = xfs_bmbt_init_key_from_rec, + .init_rec_from_key = xfs_bmbt_init_rec_from_key, + .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, .key_diff = xfs_bmbt_key_diff, diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 26fd8ace..703fe2e 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -250,7 +250,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); -extern int xfs_bmbt_insert(struct xfs_btree_cur *, int *); extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 3b6e01d..36477aa 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -963,6 +963,17 @@ xfs_btree_ptr_is_null( return be32_to_cpu(ptr->s) == NULLAGBLOCK; } +STATIC void +xfs_btree_set_ptr_null( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + ptr->l = cpu_to_be64(NULLFSBLOCK); + else + ptr->s = cpu_to_be32(NULLAGBLOCK); +} + /* * Get/set/init sibling pointers */ @@ -2697,3 +2708,354 @@ out0: *stat = 0; return 0; } + +STATIC int +xfs_btree_make_block_unfull( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* btree level */ + int numrecs,/* # of recs in block */ + int *oindex,/* old tree index */ + int *index, /* new tree index */ + union xfs_btree_ptr *nptr, /* new btree ptr */ + struct xfs_btree_cur **ncur, /* new btree cursor */ + union xfs_btree_rec *nrec, /* new record */ + int *stat) +{ + union xfs_btree_key key; /* new btree key value */ + int error = 0; + + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 1) { + struct xfs_inode *ip = cur->bc_private.b.ip; + + if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { + /* A root block that can be made bigger. */ + + xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); + } else { + /* A root block that needs replacing */ + int logflags = 0; + + error = xfs_btree_new_iroot(cur, &logflags, stat); + if (error || *stat == 0) + return error; + + xfs_trans_log_inode(cur->bc_tp, ip, logflags); + } + + return 0; + } + + /* First, try shifting an entry to the right neighbor. */ + error = xfs_btree_rshift(cur, level, stat); + if (error || *stat) + return error; + + /* Next, try shifting an entry to the left neighbor. */ + error = xfs_btree_lshift(cur, level, stat); + if (error) + return error; + + if (*stat) { + *oindex = *index = cur->bc_ptrs[level]; + return 0; + } + + /* + * Next, try splitting the current block in half. + * + * If this works we have to re-set our variables because we + * could be in a different block now. + */ + error = xfs_btree_split(cur, level, nptr, &key, ncur, stat); + if (error || *stat == 0) + return error; + + + *index = cur->bc_ptrs[level]; + cur->bc_ops->init_rec_from_key(&key, nrec); + return 0; +} + +/* + * Insert one record/level. Return information to the caller + * allowing the next level up to proceed if necessary. + */ +STATIC int +xfs_btree_insrec( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level to insert record at */ + union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ + union xfs_btree_rec *recp, /* i/o: record data inserted */ + struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ + int *stat) /* success/failure */ +{ + struct xfs_btree_block *block; /* btree block */ + struct xfs_buf *bp; /* buffer for block */ + union xfs_btree_key key; /* btree key */ + union xfs_btree_ptr nptr; /* new block ptr */ + struct xfs_btree_cur *ncur; /* new btree cursor */ + union xfs_btree_rec nrec; /* new record count */ + int optr; /* old key/record index */ + int ptr; /* key/record index */ + int numrecs;/* number of records */ + int error; /* error return value */ +#ifdef DEBUG + int i; +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp); + + ncur = NULL; + + /* + * If we have an external root pointer, and we've made it to the + * root level, allocate a new root block and we're done. + */ + if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + (level >= cur->bc_nlevels)) { + error = xfs_btree_new_root(cur, stat); + xfs_btree_set_ptr_null(cur, ptrp); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return error; + } + + /* If we're off the left edge, return failure. */ + ptr = cur->bc_ptrs[level]; + if (ptr == 0) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + /* Make a key out of the record data to be inserted, and save it. */ + cur->bc_ops->init_key_from_rec(&key, recp); + + optr = ptr; + + XFS_BTREE_STATS_INC(cur, insrec); + + /* Get pointers to the btree buffer and block. */ + block = xfs_btree_get_block(cur, level, &bp); + numrecs = xfs_btree_get_numrecs(block); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; + + /* Check that the new entry is being inserted in the right place. */ + if (ptr <= numrecs) { + if (level == 0) { + xfs_btree_check_rec(cur->bc_btnum, recp, + xfs_btree_rec_addr(cur, ptr, block)); + } else { + xfs_btree_check_key(cur->bc_btnum, &key, + xfs_btree_key_addr(cur, ptr, block)); + } + } +#endif + + /* + * If the block is full, we can't insert the new entry until we + * make the block un-full. + */ + xfs_btree_set_ptr_null(cur, &nptr); + if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { + error = xfs_btree_make_block_unfull(cur, level, numrecs, + &optr, &ptr, &nptr, &ncur, &nrec, stat); + if (error || *stat == 0) + goto error0; + } + + /* + * The current block may have changed if the block was + * previously full and we have just made space in it. + */ + block = xfs_btree_get_block(cur, level, &bp); + numrecs = xfs_btree_get_numrecs(block); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + return error; +#endif + + /* + * At this point we know there's room for our new entry in the block + * we're pointing at. + */ + XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); + + if (level > 0) { + /* It's a nonleaf. make a hole in the keys and ptrs */ + union xfs_btree_key *kp; + union xfs_btree_ptr *pp; + + kp = xfs_btree_key_addr(cur, ptr, block); + pp = xfs_btree_ptr_addr(cur, ptr, block); + +#ifdef DEBUG + for (i = numrecs - ptr; i >= 0; i--) { + error = xfs_btree_check_ptr(cur, pp, i, level); + if (error) + return error; + } +#endif + + xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); + xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); + +#ifdef DEBUG + error = xfs_btree_check_ptr(cur, ptrp, 0, level); + if (error) + goto error0; +#endif + + /* Now put the new data in, bump numrecs and log it. */ + xfs_btree_copy_keys(cur, kp, &key, 1); + xfs_btree_copy_ptrs(cur, pp, ptrp, 1); + numrecs++; + xfs_btree_set_numrecs(block, numrecs); + xfs_btree_log_ptrs(cur, bp, ptr, numrecs); + xfs_btree_log_keys(cur, bp, ptr, numrecs); +#ifdef DEBUG + if (ptr < numrecs) { + xfs_btree_check_key(cur->bc_btnum, kp, + xfs_btree_key_addr(cur, ptr + 1, block)); + } +#endif + } else { + /* It's a leaf. make a hole in the records */ + union xfs_btree_rec *rp; + + rp = xfs_btree_rec_addr(cur, ptr, block); + + xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); + + /* Now put the new data in, bump numrecs and log it. */ + xfs_btree_copy_recs(cur, rp, recp, 1); + xfs_btree_set_numrecs(block, ++numrecs); + xfs_btree_log_recs(cur, bp, ptr, numrecs); +#ifdef DEBUG + if (ptr < numrecs) { + xfs_btree_check_rec(cur->bc_btnum, rp, + xfs_btree_rec_addr(cur, ptr + 1, block)); + } +#endif + } + + /* Log the new number of records in the btree header. */ + xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); + + /* If we inserted at the start of a block, update the parents' keys. */ + if (optr == 1) { + error = xfs_btree_updkey(cur, &key, level + 1); + if (error) + goto error0; + } + + /* + * If we are tracking the last record in the tree and + * we are at the far right edge of the tree, update it. + */ + if (xfs_btree_is_lastrec(cur, block, level)) { + cur->bc_ops->update_lastrec(cur, block, recp, + ptr, LASTREC_INSREC); + } + + /* + * Return the new block number, if any. + * If there is one, give back a record value and a cursor too. + */ + *ptrp = nptr; + if (!xfs_btree_ptr_is_null(cur, &nptr)) { + *recp = nrec; + *curp = ncur; + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} + +/* + * Insert the record at the point referenced by cur. + * + * A multi-level split of the tree on insert will invalidate the original + * cursor. All callers of this function should assume that the cursor is + * no longer valid and revalidate it. + */ +int +xfs_btree_insert( + struct xfs_btree_cur *cur, + int *stat) +{ + int error; /* error return value */ + int i; /* result value, 0 for failure */ + int level; /* current level number in btree */ + union xfs_btree_ptr nptr; /* new block number (split result) */ + struct xfs_btree_cur *ncur; /* new cursor (split result) */ + struct xfs_btree_cur *pcur; /* previous level's cursor */ + union xfs_btree_rec rec; /* record to insert */ + + level = 0; + ncur = NULL; + pcur = cur; + + xfs_btree_set_ptr_null(cur, &nptr); + cur->bc_ops->init_rec_from_cur(cur, &rec); + + /* + * Loop going up the tree, starting at the leaf level. + * Stop when we don't get a split block, that must mean that + * the insert is finished with this level. + */ + do { + /* + * Insert nrec/nptr into this level of the tree. + * Note if we fail, nptr will be null. + */ + error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i); + if (error) { + if (pcur != cur) + xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); + goto error0; + } + + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + level++; + + /* + * See if the cursor we just used is trash. + * Can't trash the caller's cursor, but otherwise we should + * if ncur is a new cursor or we're about to be done. + */ + if (pcur != cur && + (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { + /* Save the state from the cursor before we trash it */ + if (cur->bc_ops->update_cursor) + cur->bc_ops->update_cursor(pcur, cur); + cur->bc_nlevels = pcur->bc_nlevels; + xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); + } + /* If we got a new cursor, switch to it. */ + if (ncur) { + pcur = ncur; + ncur = NULL; + } + } while (!xfs_btree_ptr_is_null(cur, &nptr)); + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = i; + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 21eec86..6f03871 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -186,6 +186,8 @@ struct xfs_btree_ops { /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); + void (*update_cursor)(struct xfs_btree_cur *src, + struct xfs_btree_cur *dst); /* update btree root pointer */ void (*set_root)(struct xfs_btree_cur *cur, @@ -206,9 +208,16 @@ struct xfs_btree_ops { /* records in block/level */ int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); + /* records on disk. Matter for the root in inode case. */ + int (*get_dmaxrecs)(struct xfs_btree_cur *cur, int level); + /* init values of btree structures */ void (*init_key_from_rec)(union xfs_btree_key *key, union xfs_btree_rec *rec); + void (*init_rec_from_key)(union xfs_btree_key *key, + union xfs_btree_rec *rec); + void (*init_rec_from_cur)(struct xfs_btree_cur *cur, + union xfs_btree_rec *rec); void (*init_ptr_from_cur)(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr); @@ -240,6 +249,7 @@ struct xfs_btree_ops { * Reasons for the update_lastrec method to be called. */ #define LASTREC_UPDATE 0 +#define LASTREC_INSREC 1 /* @@ -549,6 +559,7 @@ int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, union xfs_btree_key *, struct xfs_btree_cur **, int *); int xfs_btree_new_root(struct xfs_btree_cur *, int *); int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); +int xfs_btree_insert(struct xfs_btree_cur *, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 138651a..b68e73b 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -418,7 +418,7 @@ xfs_ialloc_ag_alloc( return error; } ASSERT(i == 0); - if ((error = xfs_inobt_insert(cur, &i))) { + if ((error = xfs_btree_insert(cur, &i))) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; } diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 7ba3c7b..8f66e27 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -515,228 +515,6 @@ error0: } /* - * Insert one record/level. Return information to the caller - * allowing the next level up to proceed if necessary. - */ -STATIC int /* error */ -xfs_inobt_insrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level to insert record at */ - xfs_agblock_t *bnop, /* i/o: block number inserted */ - xfs_inobt_rec_t *recp, /* i/o: record data inserted */ - xfs_btree_cur_t **curp, /* output: new cursor replacing cur */ - int *stat) /* success/failure */ -{ - xfs_inobt_block_t *block; /* btree block record/key lives in */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop index */ - xfs_inobt_key_t key; /* key value being inserted */ - xfs_inobt_key_t *kp=NULL; /* pointer to btree keys */ - xfs_agblock_t nbno; /* block number of allocated block */ - xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */ - xfs_inobt_key_t nkey; /* new key value, from split */ - xfs_inobt_rec_t nrec; /* new record value, for caller */ - int numrecs; - int optr; /* old ptr value */ - xfs_inobt_ptr_t *pp; /* pointer to btree addresses */ - int ptr; /* index in btree block for this rec */ - xfs_inobt_rec_t *rp=NULL; /* pointer to btree records */ - - /* - * GCC doesn't understand the (arguably complex) control flow in - * this function and complains about uninitialized structure fields - * without this. - */ - memset(&nrec, 0, sizeof(nrec)); - - /* - * If we made it to the root level, allocate a new root block - * and we're done. - */ - if (level >= cur->bc_nlevels) { - error = xfs_btree_new_root(cur, &i); - *bnop = NULLAGBLOCK; - *stat = i; - return error; - } - /* - * Make a key out of the record data to be inserted, and save it. - */ - key.ir_startino = recp->ir_startino; - optr = ptr = cur->bc_ptrs[level]; - /* - * If we're off the left edge, return failure. - */ - if (ptr == 0) { - *stat = 0; - return 0; - } - /* - * Get pointers to the btree buffer and block. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); - numrecs = be16_to_cpu(block->bb_numrecs); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; - /* - * Check that the new entry is being inserted in the right place. - */ - if (ptr <= numrecs) { - if (level == 0) { - rp = XFS_INOBT_REC_ADDR(block, ptr, cur); - xfs_btree_check_rec(cur->bc_btnum, recp, rp); - } else { - kp = XFS_INOBT_KEY_ADDR(block, ptr, cur); - xfs_btree_check_key(cur->bc_btnum, &key, kp); - } - } -#endif - nbno = NULLAGBLOCK; - ncur = NULL; - /* - * If the block is full, we can't insert the new entry until we - * make the block un-full. - */ - if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) { - /* - * First, try shifting an entry to the right neighbor. - */ - if ((error = xfs_btree_rshift(cur, level, &i))) - return error; - if (i) { - /* nothing */ - } - /* - * Next, try shifting an entry to the left neighbor. - */ - else { - if ((error = xfs_btree_lshift(cur, level, &i))) - return error; - if (i) { - optr = ptr = cur->bc_ptrs[level]; - } else { - union xfs_btree_ptr bno = { .s = cpu_to_be32(nbno) }; - /* - * Next, try splitting the current block - * in half. If this works we have to - * re-set our variables because - * we could be in a different block now. - */ - if ((error = xfs_btree_split(cur, level, &bno, - (union xfs_btree_key *)&nkey, - &ncur, &i))) - return error; - nbno = be32_to_cpu(bno.s); - if (i) { - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, - block, level, bp))) - return error; -#endif - ptr = cur->bc_ptrs[level]; - nrec.ir_startino = nkey.ir_startino; - } else { - /* - * Otherwise the insert fails. - */ - *stat = 0; - return 0; - } - } - } - } - /* - * At this point we know there's room for our new entry in the block - * we're pointing at. - */ - numrecs = be16_to_cpu(block->bb_numrecs); - if (level > 0) { - /* - * It's a non-leaf entry. Make a hole for the new data - * in the key and ptr regions of the block. - */ - kp = XFS_INOBT_KEY_ADDR(block, 1, cur); - pp = XFS_INOBT_PTR_ADDR(block, 1, cur); -#ifdef DEBUG - for (i = numrecs; i >= ptr; i--) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level))) - return error; - } -#endif - memmove(&kp[ptr], &kp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*kp)); - memmove(&pp[ptr], &pp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*pp)); - /* - * Now stuff the new data in, bump numrecs and log the new data. - */ -#ifdef DEBUG - if ((error = xfs_btree_check_sptr(cur, *bnop, level))) - return error; -#endif - kp[ptr - 1] = key; - pp[ptr - 1] = cpu_to_be32(*bnop); - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_inobt_log_keys(cur, bp, ptr, numrecs); - xfs_inobt_log_ptrs(cur, bp, ptr, numrecs); - } else { - /* - * It's a leaf entry. Make a hole for the new record. - */ - rp = XFS_INOBT_REC_ADDR(block, 1, cur); - memmove(&rp[ptr], &rp[ptr - 1], - (numrecs - ptr + 1) * sizeof(*rp)); - /* - * Now stuff the new record in, bump numrecs - * and log the new data. - */ - rp[ptr - 1] = *recp; - numrecs++; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_inobt_log_recs(cur, bp, ptr, numrecs); - } - /* - * Log the new number of records in the btree header. - */ - xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); -#ifdef DEBUG - /* - * Check that the key/record is in the right place, now. - */ - if (ptr < numrecs) { - if (level == 0) - xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1, - rp + ptr); - else - xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1, - kp + ptr); - } -#endif - /* - * If we inserted at the start of a block, update the parents' keys. - */ - if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1))) - return error; - /* - * Return the new block number, if any. - * If there is one, give back a record value and a cursor too. - */ - *bnop = nbno; - if (nbno != NULLAGBLOCK) { - *recp = nrec; - *curp = ncur; - } - *stat = 1; - return 0; -} - -/* * Log header fields from a btree block. */ STATIC void @@ -912,66 +690,6 @@ xfs_inobt_get_rec( return 0; } -/* - * Insert the current record at the point referenced by cur. - * The cursor may be inconsistent on return if splits have been done. - */ -int /* error */ -xfs_inobt_insert( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* result value, 0 for failure */ - int level; /* current level number in btree */ - xfs_agblock_t nbno; /* new block number (split result) */ - xfs_btree_cur_t *ncur; /* new cursor (split result) */ - xfs_inobt_rec_t nrec; /* record being inserted this level */ - xfs_btree_cur_t *pcur; /* previous level's cursor */ - - level = 0; - nbno = NULLAGBLOCK; - nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); - nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); - nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); - ncur = NULL; - pcur = cur; - /* - * Loop going up the tree, starting at the leaf level. - * Stop when we don't get a split block, that must mean that - * the insert is finished with this level. - */ - do { - /* - * Insert nrec/nbno into this level of the tree. - * Note if we fail, nbno will be null. - */ - if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur, - &i))) { - if (pcur != cur) - xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); - return error; - } - /* - * See if the cursor we just used is trash. - * Can't trash the caller's cursor, but otherwise we should - * if ncur is a new cursor or we're about to be done. - */ - if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) { - cur->bc_nlevels = pcur->bc_nlevels; - xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); - } - /* - * If we got a new cursor, switch to it. - */ - if (ncur) { - pcur = ncur; - ncur = NULL; - } - } while (nbno != NULLAGBLOCK); - *stat = i; - return 0; -} STATIC struct xfs_btree_cur * xfs_inobt_dup_cursor( @@ -1053,6 +771,24 @@ xfs_inobt_init_key_from_rec( key->inobt.ir_startino = rec->inobt.ir_startino; } +STATIC void +xfs_inobt_init_rec_from_key( + union xfs_btree_key *key, + union xfs_btree_rec *rec) +{ + rec->inobt.ir_startino = key->inobt.ir_startino; +} + +STATIC void +xfs_inobt_init_rec_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec) +{ + rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); + rec->inobt.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount); + rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); +} + /* * intial value of ptr for lookup */ @@ -1152,6 +888,8 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .alloc_block = xfs_inobt_alloc_block, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, + .init_rec_from_key = xfs_inobt_init_rec_from_key, + .init_rec_from_cur = xfs_inobt_init_rec_from_cur, .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, .key_diff = xfs_inobt_key_diff, diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 7f77549..c9cbc4f 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -129,12 +129,6 @@ extern int xfs_inobt_delete(struct xfs_btree_cur *cur, int *stat); extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, __int32_t *fcnt, xfs_inofree_t *free, int *stat); -/* - * Insert the current record at the point referenced by cur. - * The cursor may be inconsistent on return if splits have been done. - */ -extern int xfs_inobt_insert(struct xfs_btree_cur *cur, int *stat); - extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); -- cgit v0.10.2 From d4b3a4b7dd62f2e111d4d0afa9ef3f9b6cd955c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:57:51 +1100 Subject: [XFS] move xfs_bmbt_killroot to common code xfs_bmbt_killroot is a mostly generic implementation of moving from a real block based root to an inode based root. So move it to xfs_btree.c where it can use all the nice infrastructure there and make it pointer size agnostic The new name for it is xfs_btree_kill_iroot, following the old naming but making it clear we're dealing with the root in inode case here, and to avoid confusion with xfs_btree_new_root which is used for the not inode rooted case. I've also added a comment describing what it does and why it's named the way it is. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32203a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 818adca..f124ddd 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -834,6 +834,37 @@ xfs_allocbt_alloc_block( return 0; } +STATIC int +xfs_allocbt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); + xfs_agblock_t bno; + int error; + + bno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(bp)); + error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); + if (error) + return error; + + /* + * Since blocks move to the free list without the coordination used in + * xfs_bmap_finish, we can't allow block to be available for + * reallocation and non-transaction writing (user data) until we know + * that the transaction that moved it to the free list is permanently + * on disk. We track the blocks by declaring these blocks as "busy"; + * the busy list is maintained on a per-ag basis and each transaction + * records which entries should be removed when the iclog commits to + * disk. If a busy block is allocated, the iclog is pushed up to the + * LSN that freed the block. + */ + xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); + xfs_trans_agbtree_delta(cur->bc_tp, -1); + return 0; +} + /* * Update the longest extent in the AGF */ @@ -1025,6 +1056,7 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, .set_root = xfs_allocbt_set_root, .alloc_block = xfs_allocbt_alloc_block, + .free_block = xfs_allocbt_free_block, .update_lastrec = xfs_allocbt_update_lastrec, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 2b15df3..6b7774e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -49,7 +49,6 @@ */ -STATIC int xfs_bmbt_killroot(xfs_btree_cur_t *); STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); @@ -194,7 +193,7 @@ xfs_bmbt_delrec( if (level == cur->bc_nlevels - 1) { xfs_iroot_realloc(cur->bc_private.b.ip, -1, cur->bc_private.b.whichfork); - if ((error = xfs_bmbt_killroot(cur))) { + if ((error = xfs_btree_kill_iroot(cur))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -228,7 +227,7 @@ xfs_bmbt_delrec( */ if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK && level == cur->bc_nlevels - 2) { - if ((error = xfs_bmbt_killroot(cur))) { + if ((error = xfs_btree_kill_iroot(cur))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); goto error0; } @@ -456,97 +455,6 @@ error0: return error; } -STATIC int -xfs_bmbt_killroot( - xfs_btree_cur_t *cur) -{ - xfs_bmbt_block_t *block; - xfs_bmbt_block_t *cblock; - xfs_buf_t *cbp; - xfs_bmbt_key_t *ckp; - xfs_bmbt_ptr_t *cpp; -#ifdef DEBUG - int error; -#endif - int i; - xfs_bmbt_key_t *kp; - xfs_inode_t *ip; - xfs_ifork_t *ifp; - int level; - xfs_bmbt_ptr_t *pp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - level = cur->bc_nlevels - 1; - ASSERT(level >= 1); - /* - * Don't deal with the root block needs to be a leaf case. - * We're just going to turn the thing back into extents anyway. - */ - if (level == 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; - } - block = xfs_bmbt_get_block(cur, level, &cbp); - /* - * Give up if the root has multiple children. - */ - if (be16_to_cpu(block->bb_numrecs) != 1) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; - } - /* - * Only do this if the next level will fit. - * Then the data must be copied up to the inode, - * instead of freeing the root you free the next level. - */ - cbp = cur->bc_bufs[level - 1]; - cblock = XFS_BUF_TO_BMBT_BLOCK(cbp); - if (be16_to_cpu(cblock->bb_numrecs) > XFS_BMAP_BLOCK_DMAXRECS(level, cur)) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; - } - ASSERT(be64_to_cpu(cblock->bb_leftsib) == NULLDFSBNO); - ASSERT(be64_to_cpu(cblock->bb_rightsib) == NULLDFSBNO); - ip = cur->bc_private.b.ip; - ifp = XFS_IFORK_PTR(ip, cur->bc_private.b.whichfork); - ASSERT(XFS_BMAP_BLOCK_IMAXRECS(level, cur) == - XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes)); - i = (int)(be16_to_cpu(cblock->bb_numrecs) - XFS_BMAP_BLOCK_IMAXRECS(level, cur)); - if (i) { - xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork); - block = ifp->if_broot; - } - be16_add_cpu(&block->bb_numrecs, i); - ASSERT(block->bb_numrecs == cblock->bb_numrecs); - kp = XFS_BMAP_KEY_IADDR(block, 1, cur); - ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur); - memcpy(kp, ckp, be16_to_cpu(block->bb_numrecs) * sizeof(*kp)); - pp = XFS_BMAP_PTR_IADDR(block, 1, cur); - cpp = XFS_BMAP_PTR_IADDR(cblock, 1, cur); -#ifdef DEBUG - for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { - if ((error = xfs_btree_check_lptr_disk(cur, cpp[i], level - 1))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } -#endif - memcpy(pp, cpp, be16_to_cpu(block->bb_numrecs) * sizeof(*pp)); - xfs_bmap_add_free(XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(cbp)), 1, - cur->bc_private.b.flist, cur->bc_mp); - ip->i_d.di_nblocks--; - XFS_TRANS_MOD_DQUOT_BYINO(cur->bc_mp, cur->bc_tp, ip, - XFS_TRANS_DQ_BCOUNT, -1L); - xfs_trans_binval(cur->bc_tp, cbp); - cur->bc_bufs[level - 1] = NULL; - be16_add_cpu(&block->bb_level, -1); - xfs_trans_log_inode(cur->bc_tp, ip, - XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); - cur->bc_nlevels--; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - return 0; -} - /* * Log key values from the btree block. */ @@ -1299,6 +1207,25 @@ xfs_bmbt_alloc_block( } STATIC int +xfs_bmbt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_trans *tp = cur->bc_tp; + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); + + xfs_bmap_add_free(fsbno, 1, cur->bc_private.b.flist, mp); + ip->i_d.di_nblocks--; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); + xfs_trans_binval(tp, bp); + return 0; +} + +STATIC int xfs_bmbt_get_maxrecs( struct xfs_btree_cur *cur, int level) @@ -1460,6 +1387,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .dup_cursor = xfs_bmbt_dup_cursor, .update_cursor = xfs_bmbt_update_cursor, .alloc_block = xfs_bmbt_alloc_block, + .free_block = xfs_bmbt_free_block, .get_maxrecs = xfs_bmbt_get_maxrecs, .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, .init_key_from_rec = xfs_bmbt_init_key_from_rec, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 36477aa..75a8a7b 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -3059,3 +3059,115 @@ error0: XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } + +/* + * Try to merge a non-leaf block back into the inode root. + * + * Note: the killroot names comes from the fact that we're effectively + * killing the old root block. But because we can't just delete the + * inode we have to copy the single block it was pointing to into the + * inode. + */ +int +xfs_btree_kill_iroot( + struct xfs_btree_cur *cur) +{ + int whichfork = cur->bc_private.b.whichfork; + struct xfs_inode *ip = cur->bc_private.b.ip; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_btree_block *block; + struct xfs_btree_block *cblock; + union xfs_btree_key *kp; + union xfs_btree_key *ckp; + union xfs_btree_ptr *pp; + union xfs_btree_ptr *cpp; + struct xfs_buf *cbp; + int level; + int index; + int numrecs; +#ifdef DEBUG + union xfs_btree_ptr ptr; + int i; +#endif + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(cur->bc_nlevels > 1); + + /* + * Don't deal with the root block needs to be a leaf case. + * We're just going to turn the thing back into extents anyway. + */ + level = cur->bc_nlevels - 1; + if (level == 1) + goto out0; + + /* + * Give up if the root has multiple children. + */ + block = xfs_btree_get_iroot(cur); + if (xfs_btree_get_numrecs(block) != 1) + goto out0; + + cblock = xfs_btree_get_block(cur, level - 1, &cbp); + numrecs = xfs_btree_get_numrecs(cblock); + + /* + * Only do this if the next level will fit. + * Then the data must be copied up to the inode, + * instead of freeing the root you free the next level. + */ + if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) + goto out0; + + XFS_BTREE_STATS_INC(cur, killroot); + +#ifdef DEBUG + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); + ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); + xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); + ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); +#endif + + index = numrecs - cur->bc_ops->get_maxrecs(cur, level); + if (index) { + xfs_iroot_realloc(cur->bc_private.b.ip, index, + cur->bc_private.b.whichfork); + block = (struct xfs_btree_block *)ifp->if_broot; + } + + be16_add_cpu(&block->bb_numrecs, index); + ASSERT(block->bb_numrecs == cblock->bb_numrecs); + + kp = xfs_btree_key_addr(cur, 1, block); + ckp = xfs_btree_key_addr(cur, 1, cblock); + xfs_btree_copy_keys(cur, kp, ckp, numrecs); + + pp = xfs_btree_ptr_addr(cur, 1, block); + cpp = xfs_btree_ptr_addr(cur, 1, cblock); +#ifdef DEBUG + for (i = 0; i < numrecs; i++) { + int error; + + error = xfs_btree_check_ptr(cur, cpp, i, level - 1); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + } +#endif + xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); + + cur->bc_ops->free_block(cur, cbp); + XFS_BTREE_STATS_INC(cur, free); + + cur->bc_bufs[level - 1] = NULL; + be16_add_cpu(&block->bb_level, -1); + xfs_trans_log_inode(cur->bc_tp, ip, + XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); + cur->bc_nlevels--; +out0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 6f03871..ff2552f 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -198,6 +198,7 @@ struct xfs_btree_ops { union xfs_btree_ptr *start_bno, union xfs_btree_ptr *new_bno, int length, int *stat); + int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp); /* update last record information */ void (*update_lastrec)(struct xfs_btree_cur *cur, @@ -559,6 +560,7 @@ int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, union xfs_btree_key *, struct xfs_btree_cur **, int *); int xfs_btree_new_root(struct xfs_btree_cur *, int *); int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); +int xfs_btree_kill_iroot(struct xfs_btree_cur *); int xfs_btree_insert(struct xfs_btree_cur *, int *); /* diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 8f66e27..90f1d4e 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -754,6 +754,22 @@ xfs_inobt_alloc_block( return 0; } +STATIC int +xfs_inobt_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + xfs_fsblock_t fsbno; + int error; + + fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)); + error = xfs_free_extent(cur->bc_tp, fsbno, 1); + if (error) + return error; + + xfs_trans_binval(cur->bc_tp, bp); + return error; +} STATIC int xfs_inobt_get_maxrecs( @@ -886,6 +902,7 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_inobt_set_root, .alloc_block = xfs_inobt_alloc_block, + .free_block = xfs_inobt_free_block, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, .init_rec_from_key = xfs_inobt_init_rec_from_key, -- cgit v0.10.2 From 91cca5df9bc85efdabfa645f51d54259ed09f4bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:01 +1100 Subject: [XFS] implement generic xfs_btree_delete/delrec Make the btree delete code generic. Based on a patch from David Chinner with lots of changes to follow the original btree implementations more closely. While this loses some of the generic helper routines for inserting/moving/removing records it also solves some of the one off bugs in the original code and makes it easier to verify. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32205a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index a983824..e9c7024 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -398,7 +398,7 @@ xfs_alloc_fixup_trees( /* * Delete the entry from the by-size btree. */ - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); /* @@ -427,7 +427,7 @@ xfs_alloc_fixup_trees( /* * No remaining freespace, just delete the by-block tree entry. */ - if ((error = xfs_alloc_delete(bno_cur, &i))) + if ((error = xfs_btree_delete(bno_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } else { @@ -1651,7 +1651,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* @@ -1660,13 +1660,13 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* * Delete the old by-block entry for the right block. */ - if ((error = xfs_alloc_delete(bno_cur, &i))) + if ((error = xfs_btree_delete(bno_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* @@ -1711,7 +1711,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* @@ -1737,7 +1737,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index f124ddd..d256b51 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -40,691 +40,6 @@ #include "xfs_alloc.h" #include "xfs_error.h" -/* - * Prototypes for internal functions. - */ - -STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int); -STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); - -/* - * Internal functions. - */ - -/* - * Single level of the xfs_alloc_delete record deletion routine. - * Delete record pointed to by cur/level. - * Remove the record from its block then rebalance the tree. - * Return 0 for error, 1 for done, 2 to go on to the next level. - */ -STATIC int /* error */ -xfs_alloc_delrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level removing record from */ - int *stat) /* fail/done/go-on */ -{ - xfs_agf_t *agf; /* allocation group freelist header */ - xfs_alloc_block_t *block; /* btree block record/key lives in */ - xfs_agblock_t bno; /* btree block number */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop index */ - xfs_alloc_key_t key; /* kp points here if block is level 0 */ - xfs_agblock_t lbno; /* left block's block number */ - xfs_buf_t *lbp; /* left block's buffer pointer */ - xfs_alloc_block_t *left; /* left btree block */ - xfs_alloc_key_t *lkp=NULL; /* left block key pointer */ - xfs_alloc_ptr_t *lpp=NULL; /* left block address pointer */ - int lrecs=0; /* number of records in left block */ - xfs_alloc_rec_t *lrp; /* left block record pointer */ - xfs_mount_t *mp; /* mount structure */ - int ptr; /* index in btree block for this rec */ - xfs_agblock_t rbno; /* right block's block number */ - xfs_buf_t *rbp; /* right block's buffer pointer */ - xfs_alloc_block_t *right; /* right btree block */ - xfs_alloc_key_t *rkp; /* right block key pointer */ - xfs_alloc_ptr_t *rpp; /* right block address pointer */ - int rrecs=0; /* number of records in right block */ - int numrecs; - xfs_alloc_rec_t *rrp; /* right block record pointer */ - xfs_btree_cur_t *tcur; /* temporary btree cursor */ - - /* - * Get the index of the entry being deleted, check for nothing there. - */ - ptr = cur->bc_ptrs[level]; - if (ptr == 0) { - *stat = 0; - return 0; - } - /* - * Get the buffer & block containing the record or key/ptr. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_ALLOC_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - /* - * Fail if we're off the end of the block. - */ - numrecs = be16_to_cpu(block->bb_numrecs); - if (ptr > numrecs) { - *stat = 0; - return 0; - } - XFS_STATS_INC(xs_abt_delrec); - /* - * It's a nonleaf. Excise the key and ptr being deleted, by - * sliding the entries past them down one. - * Log the changed areas of the block. - */ - if (level > 0) { - lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur); - lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur); -#ifdef DEBUG - for (i = ptr; i < numrecs; i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level))) - return error; - } -#endif - if (ptr < numrecs) { - memmove(&lkp[ptr - 1], &lkp[ptr], - (numrecs - ptr) * sizeof(*lkp)); - memmove(&lpp[ptr - 1], &lpp[ptr], - (numrecs - ptr) * sizeof(*lpp)); - xfs_alloc_log_ptrs(cur, bp, ptr, numrecs - 1); - xfs_alloc_log_keys(cur, bp, ptr, numrecs - 1); - } - } - /* - * It's a leaf. Excise the record being deleted, by sliding the - * entries past it down one. Log the changed areas of the block. - */ - else { - lrp = XFS_ALLOC_REC_ADDR(block, 1, cur); - if (ptr < numrecs) { - memmove(&lrp[ptr - 1], &lrp[ptr], - (numrecs - ptr) * sizeof(*lrp)); - xfs_alloc_log_recs(cur, bp, ptr, numrecs - 1); - } - /* - * If it's the first record in the block, we'll need a key - * structure to pass up to the next level (updkey). - */ - if (ptr == 1) { - key.ar_startblock = lrp->ar_startblock; - key.ar_blockcount = lrp->ar_blockcount; - lkp = &key; - } - } - /* - * Decrement and log the number of entries in the block. - */ - numrecs--; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); - /* - * See if the longest free extent in the allocation group was - * changed by this operation. True if it's the by-size btree, and - * this is the leaf level, and there is no right sibling block, - * and this was the last record. - */ - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - mp = cur->bc_mp; - - if (level == 0 && - cur->bc_btnum == XFS_BTNUM_CNT && - be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK && - ptr > numrecs) { - ASSERT(ptr == numrecs + 1); - /* - * There are still records in the block. Grab the size - * from the last one. - */ - if (numrecs) { - rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur); - agf->agf_longest = rrp->ar_blockcount; - } - /* - * No free extents left. - */ - else - agf->agf_longest = 0; - mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest = - be32_to_cpu(agf->agf_longest); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, - XFS_AGF_LONGEST); - } - /* - * Is this the root level? If so, we're almost done. - */ - if (level == cur->bc_nlevels - 1) { - /* - * If this is the root level, - * and there's only one entry left, - * and it's NOT the leaf level, - * then we can get rid of this level. - */ - if (numrecs == 1 && level > 0) { - /* - * lpp is still set to the first pointer in the block. - * Make it the new root of the btree. - */ - bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]); - agf->agf_roots[cur->bc_btnum] = *lpp; - be32_add_cpu(&agf->agf_levels[cur->bc_btnum], -1); - mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--; - /* - * Put this buffer/block on the ag's freelist. - */ - error = xfs_alloc_put_freelist(cur->bc_tp, - cur->bc_private.a.agbp, NULL, bno, 1); - if (error) - return error; - /* - * Since blocks move to the free list without the - * coordination used in xfs_bmap_finish, we can't allow - * block to be available for reallocation and - * non-transaction writing (user data) until we know - * that the transaction that moved it to the free list - * is permanently on disk. We track the blocks by - * declaring these blocks as "busy"; the busy list is - * maintained on a per-ag basis and each transaction - * records which entries should be removed when the - * iclog commits to disk. If a busy block is - * allocated, the iclog is pushed up to the LSN - * that freed the block. - */ - xfs_alloc_mark_busy(cur->bc_tp, - be32_to_cpu(agf->agf_seqno), bno, 1); - - xfs_trans_agbtree_delta(cur->bc_tp, -1); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, - XFS_AGF_ROOTS | XFS_AGF_LEVELS); - /* - * Update the cursor so there's one fewer level. - */ - xfs_btree_setbuf(cur, level, NULL); - cur->bc_nlevels--; - } else if (level > 0 && - (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * If we deleted the leftmost entry in the block, update the - * key values above us in the tree. - */ - if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)lkp, level + 1))) - return error; - /* - * If the number of records remaining in the block is at least - * the minimum, we're done. - */ - if (numrecs >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * Otherwise, we have to move some records around to keep the - * tree balanced. Look at the left and right sibling blocks to - * see if we can re-balance by moving only one record. - */ - rbno = be32_to_cpu(block->bb_rightsib); - lbno = be32_to_cpu(block->bb_leftsib); - bno = NULLAGBLOCK; - ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK); - /* - * Duplicate the cursor so our btree manipulations here won't - * disrupt the next level up. - */ - if ((error = xfs_btree_dup_cursor(cur, &tcur))) - return error; - /* - * If there's a right sibling, see if it's ok to shift an entry - * out of it. - */ - if (rbno != NULLAGBLOCK) { - /* - * Move the temp cursor to the last entry in the next block. - * Actually any entry but the first would suffice. - */ - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_increment(tcur, level, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * Grab a pointer to the block. - */ - rbp = tcur->bc_bufs[level]; - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - goto error0; -#endif - /* - * Grab the current block number, for future use. - */ - bno = be32_to_cpu(right->bb_leftsib); - /* - * If right block is full enough so that removing one entry - * won't make it too empty, and left-shifting an entry out - * of right to us works, we're done. - */ - if (be16_to_cpu(right->bb_numrecs) - 1 >= - XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_btree_lshift(tcur, level, &i))) - goto error0; - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_ALLOC_BLOCK_MINRECS(level, cur)); - xfs_btree_del_cursor(tcur, - XFS_BTREE_NOERROR); - if (level > 0 && - (error = xfs_btree_decrement(cur, level, - &i))) - return error; - *stat = 1; - return 0; - } - } - /* - * Otherwise, grab the number of records in right for - * future reference, and fix up the temp cursor to point - * to our block again (last record). - */ - rrecs = be16_to_cpu(right->bb_numrecs); - if (lbno != NULLAGBLOCK) { - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_decrement(tcur, level, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } - } - /* - * If there's a left sibling, see if it's ok to shift an entry - * out of it. - */ - if (lbno != NULLAGBLOCK) { - /* - * Move the temp cursor to the first entry in the - * previous block. - */ - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_decrement(tcur, level, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_btree_firstrec(tcur, level); - /* - * Grab a pointer to the block. - */ - lbp = tcur->bc_bufs[level]; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - goto error0; -#endif - /* - * Grab the current block number, for future use. - */ - bno = be32_to_cpu(left->bb_rightsib); - /* - * If left block is full enough so that removing one entry - * won't make it too empty, and right-shifting an entry out - * of left to us works, we're done. - */ - if (be16_to_cpu(left->bb_numrecs) - 1 >= - XFS_ALLOC_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_btree_rshift(tcur, level, &i))) - goto error0; - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_ALLOC_BLOCK_MINRECS(level, cur)); - xfs_btree_del_cursor(tcur, - XFS_BTREE_NOERROR); - if (level == 0) - cur->bc_ptrs[0]++; - *stat = 1; - return 0; - } - } - /* - * Otherwise, grab the number of records in right for - * future reference. - */ - lrecs = be16_to_cpu(left->bb_numrecs); - } - /* - * Delete the temp cursor, we're done with it. - */ - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - /* - * If here, we need to do a join to keep the tree balanced. - */ - ASSERT(bno != NULLAGBLOCK); - /* - * See if we can join with the left neighbor block. - */ - if (lbno != NULLAGBLOCK && - lrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { - /* - * Set "right" to be the starting block, - * "left" to be the left neighbor. - */ - rbno = bno; - right = block; - rrecs = be16_to_cpu(right->bb_numrecs); - rbp = bp; - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, lbno, 0, &lbp, - XFS_ALLOC_BTREE_REF))) - return error; - left = XFS_BUF_TO_ALLOC_BLOCK(lbp); - lrecs = be16_to_cpu(left->bb_numrecs); - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; - } - /* - * If that won't work, see if we can join with the right neighbor block. - */ - else if (rbno != NULLAGBLOCK && - rrecs + numrecs <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) { - /* - * Set "left" to be the starting block, - * "right" to be the right neighbor. - */ - lbno = bno; - left = block; - lrecs = be16_to_cpu(left->bb_numrecs); - lbp = bp; - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, rbno, 0, &rbp, - XFS_ALLOC_BTREE_REF))) - return error; - right = XFS_BUF_TO_ALLOC_BLOCK(rbp); - rrecs = be16_to_cpu(right->bb_numrecs); - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; - } - /* - * Otherwise, we can't fix the imbalance. - * Just return. This is probably a logic error, but it's not fatal. - */ - else { - if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * We're now going to join "left" and "right" by moving all the stuff - * in "right" to "left" and deleting "right". - */ - if (level > 0) { - /* - * It's a non-leaf. Move keys and pointers. - */ - lkp = XFS_ALLOC_KEY_ADDR(left, lrecs + 1, cur); - lpp = XFS_ALLOC_PTR_ADDR(left, lrecs + 1, cur); - rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur); - rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < rrecs; i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) - return error; - } -#endif - memcpy(lkp, rkp, rrecs * sizeof(*lkp)); - memcpy(lpp, rpp, rrecs * sizeof(*lpp)); - xfs_alloc_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); - xfs_alloc_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); - } else { - /* - * It's a leaf. Move records. - */ - lrp = XFS_ALLOC_REC_ADDR(left, lrecs + 1, cur); - rrp = XFS_ALLOC_REC_ADDR(right, 1, cur); - memcpy(lrp, rrp, rrecs * sizeof(*lrp)); - xfs_alloc_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); - } - /* - * If we joined with the left neighbor, set the buffer in the - * cursor to the left block, and fix up the index. - */ - if (bp != lbp) { - xfs_btree_setbuf(cur, level, lbp); - cur->bc_ptrs[level] += lrecs; - } - /* - * If we joined with the right neighbor and there's a level above - * us, increment the cursor at that level. - */ - else if (level + 1 < cur->bc_nlevels && - (error = xfs_btree_increment(cur, level + 1, &i))) - return error; - /* - * Fix up the number of records in the surviving block. - */ - lrecs += rrecs; - left->bb_numrecs = cpu_to_be16(lrecs); - /* - * Fix up the right block pointer in the surviving block, and log it. - */ - left->bb_rightsib = right->bb_rightsib; - xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - /* - * If there is a right sibling now, make it point to the - * remaining block. - */ - if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) { - xfs_alloc_block_t *rrblock; - xfs_buf_t *rrbp; - - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, - &rrbp, XFS_ALLOC_BTREE_REF))) - return error; - rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp); - if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) - return error; - rrblock->bb_leftsib = cpu_to_be32(lbno); - xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); - } - /* - * Free the deleting block by putting it on the freelist. - */ - error = xfs_alloc_put_freelist(cur->bc_tp, - cur->bc_private.a.agbp, NULL, rbno, 1); - if (error) - return error; - /* - * Since blocks move to the free list without the coordination - * used in xfs_bmap_finish, we can't allow block to be available - * for reallocation and non-transaction writing (user data) - * until we know that the transaction that moved it to the free - * list is permanently on disk. We track the blocks by declaring - * these blocks as "busy"; the busy list is maintained on a - * per-ag basis and each transaction records which entries - * should be removed when the iclog commits to disk. If a - * busy block is allocated, the iclog is pushed up to the - * LSN that freed the block. - */ - xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); - xfs_trans_agbtree_delta(cur->bc_tp, -1); - - /* - * Adjust the current level's cursor so that we're left referring - * to the right node, after we're done. - * If this leaves the ptr value 0 our caller will fix it up. - */ - if (level > 0) - cur->bc_ptrs[level]--; - /* - * Return value means the next level up has something to do. - */ - *stat = 2; - return 0; - -error0: - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - -/* - * Log header fields from a btree block. - */ -STATIC void -xfs_alloc_log_block( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* buffer containing btree block */ - int fields) /* mask of fields: XFS_BB_... */ -{ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - static const short offsets[] = { /* table of offsets */ - offsetof(xfs_alloc_block_t, bb_magic), - offsetof(xfs_alloc_block_t, bb_level), - offsetof(xfs_alloc_block_t, bb_numrecs), - offsetof(xfs_alloc_block_t, bb_leftsib), - offsetof(xfs_alloc_block_t, bb_rightsib), - sizeof(xfs_alloc_block_t) - }; - - xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); - xfs_trans_log_buf(tp, bp, first, last); -} - -/* - * Log keys from a btree block (nonleaf). - */ -STATIC void -xfs_alloc_log_keys( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int kfirst, /* index of first key to log */ - int klast) /* index of last key to log */ -{ - xfs_alloc_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - xfs_alloc_key_t *kp; /* key pointer in btree block */ - int last; /* last byte offset logged */ - - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - kp = XFS_ALLOC_KEY_ADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - -/* - * Log block pointer fields from a btree block (nonleaf). - */ -STATIC void -xfs_alloc_log_ptrs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int pfirst, /* index of first pointer to log */ - int plast) /* index of last pointer to log */ -{ - xfs_alloc_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - xfs_alloc_ptr_t *pp; /* block-pointer pointer in btree blk */ - - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - pp = XFS_ALLOC_PTR_ADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - -/* - * Log records from a btree block (leaf). - */ -STATIC void -xfs_alloc_log_recs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int rfirst, /* index of first record to log */ - int rlast) /* index of last record to log */ -{ - xfs_alloc_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - xfs_alloc_rec_t *rp; /* record pointer for btree block */ - - - block = XFS_BUF_TO_ALLOC_BLOCK(bp); - rp = XFS_ALLOC_REC_ADDR(block, 1, cur); -#ifdef DEBUG - { - xfs_agf_t *agf; - xfs_alloc_rec_t *p; - - agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); - for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++) - ASSERT(be32_to_cpu(p->ar_startblock) + - be32_to_cpu(p->ar_blockcount) <= - be32_to_cpu(agf->agf_length)); - } -#endif - first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - - -/* - * Externally visible routines. - */ - -/* - * Delete the record pointed to by cur. - * The cursor refers to the place where the record was (could be inserted) - * when the operation returns. - */ -int /* error */ -xfs_alloc_delete( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; /* result code */ - int level; /* btree level */ - - /* - * Go up the tree, starting at leaf level. - * If 2 is returned then a join was done; go to the next level. - * Otherwise we are done. - */ - for (level = 0, i = 2; i == 2; level++) { - if ((error = xfs_alloc_delrec(cur, level, &i))) - return error; - } - if (i == 0) { - for (level = 1; level < cur->bc_nlevels; level++) { - if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_btree_decrement(cur, level, &i))) - return error; - break; - } - } - } - *stat = i; - return 0; -} /* * Get the data from the pointed-to record. @@ -879,6 +194,7 @@ xfs_allocbt_update_lastrec( struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); __be32 len; + int numrecs; ASSERT(cur->bc_btnum == XFS_BTNUM_CNT); @@ -898,6 +214,22 @@ xfs_allocbt_update_lastrec( return; len = rec->alloc.ar_blockcount; break; + case LASTREC_DELREC: + numrecs = xfs_btree_get_numrecs(block); + if (ptr <= numrecs) + return; + ASSERT(ptr == numrecs + 1); + + if (numrecs) { + xfs_alloc_rec_t *rrp; + + rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur); + len = rrp->ar_blockcount; + } else { + len = 0; + } + + break; default: ASSERT(0); return; @@ -909,6 +241,14 @@ xfs_allocbt_update_lastrec( } STATIC int +xfs_allocbt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_alloc_mnr[level != 0]; +} + +STATIC int xfs_allocbt_get_maxrecs( struct xfs_btree_cur *cur, int level) @@ -983,6 +323,38 @@ xfs_allocbt_key_diff( return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } +STATIC int +xfs_allocbt_kill_root( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int level, + union xfs_btree_ptr *newroot) +{ + int error; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, killroot); + + /* + * Update the root pointer, decreasing the level by 1 and then + * free the old root. + */ + xfs_allocbt_set_root(cur, newroot, -1); + error = xfs_allocbt_free_block(cur, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + XFS_BTREE_STATS_INC(cur, free); + + xfs_btree_setbuf(cur, level, NULL); + cur->bc_nlevels--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_allocbt_trace_buf; @@ -1055,9 +427,11 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, .set_root = xfs_allocbt_set_root, + .kill_root = xfs_allocbt_kill_root, .alloc_block = xfs_allocbt_alloc_block, .free_block = xfs_allocbt_free_block, .update_lastrec = xfs_allocbt_update_lastrec, + .get_minrecs = xfs_allocbt_get_minrecs, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, .init_rec_from_key = xfs_allocbt_init_rec_from_key, diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 2e340ef..8d2e3ec 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -95,13 +95,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur)) /* - * Delete the record pointed to by cur. - * The cursor refers to the place where the record was (could be inserted) - * when the operation returns. - */ -extern int xfs_alloc_delete(struct xfs_btree_cur *cur, int *stat); - -/* * Get the data from the pointed-to record. */ extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 85e2e8b..74761ca 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -864,7 +864,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_blockcount, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) @@ -1425,13 +1425,13 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_blockcount, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) @@ -1474,7 +1474,7 @@ xfs_bmap_add_extent_unwritten_real( &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) @@ -1517,7 +1517,7 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_blockcount, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) @@ -2152,7 +2152,7 @@ xfs_bmap_add_extent_hole_real( right.br_blockcount, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_btree_decrement(cur, 0, &i))) @@ -3216,7 +3216,7 @@ xfs_bmap_del_extent( flags |= XFS_ILOG_FEXT(whichfork); break; } - if ((error = xfs_bmbt_delete(cur, &i))) + if ((error = xfs_btree_delete(cur, &i))) goto done; XFS_WANT_CORRUPTED_GOTO(i == 1, done); break; diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 6b7774e..5b80305 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -44,14 +44,6 @@ #include "xfs_error.h" #include "xfs_quota.h" -/* - * Prototypes for internal btree functions. - */ - - -STATIC void xfs_bmbt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); - #undef EXIT #define ENTRY XBT_ENTRY @@ -80,453 +72,6 @@ STATIC void xfs_bmbt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); #define XFS_BMBT_TRACE_CURSOR(c,s) \ XFS_BTREE_TRACE_CURSOR(c,s) - -/* - * Internal functions. - */ - -/* - * Delete record pointed to by cur/level. - */ -STATIC int /* error */ -xfs_bmbt_delrec( - xfs_btree_cur_t *cur, - int level, - int *stat) /* success/failure */ -{ - xfs_bmbt_block_t *block; /* bmap btree block */ - xfs_fsblock_t bno; /* fs-relative block number */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop counter */ - int j; /* temp state */ - xfs_bmbt_key_t key; /* bmap btree key */ - xfs_bmbt_key_t *kp=NULL; /* pointer to bmap btree key */ - xfs_fsblock_t lbno; /* left sibling block number */ - xfs_buf_t *lbp; /* left buffer pointer */ - xfs_bmbt_block_t *left; /* left btree block */ - xfs_bmbt_key_t *lkp; /* left btree key */ - xfs_bmbt_ptr_t *lpp; /* left address pointer */ - int lrecs=0; /* left record count */ - xfs_bmbt_rec_t *lrp; /* left record pointer */ - xfs_mount_t *mp; /* file system mount point */ - xfs_bmbt_ptr_t *pp; /* pointer to bmap block addr */ - int ptr; /* key/record index */ - xfs_fsblock_t rbno; /* right sibling block number */ - xfs_buf_t *rbp; /* right buffer pointer */ - xfs_bmbt_block_t *right; /* right btree block */ - xfs_bmbt_key_t *rkp; /* right btree key */ - xfs_bmbt_rec_t *rp; /* pointer to bmap btree rec */ - xfs_bmbt_ptr_t *rpp; /* right address pointer */ - xfs_bmbt_block_t *rrblock; /* right-right btree block */ - xfs_buf_t *rrbp; /* right-right buffer pointer */ - int rrecs=0; /* right record count */ - xfs_bmbt_rec_t *rrp; /* right record pointer */ - xfs_btree_cur_t *tcur; /* temporary btree cursor */ - int numrecs; /* temporary numrec count */ - int numlrecs, numrrecs; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGI(cur, level); - ptr = cur->bc_ptrs[level]; - tcur = NULL; - if (ptr == 0) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - block = xfs_bmbt_get_block(cur, level, &bp); - numrecs = be16_to_cpu(block->bb_numrecs); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, block, level, bp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } -#endif - if (ptr > numrecs) { - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 0; - return 0; - } - XFS_STATS_INC(xs_bmbt_delrec); - if (level > 0) { - kp = XFS_BMAP_KEY_IADDR(block, 1, cur); - pp = XFS_BMAP_PTR_IADDR(block, 1, cur); -#ifdef DEBUG - for (i = ptr; i < numrecs; i++) { - if ((error = xfs_btree_check_lptr_disk(cur, pp[i], level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - } -#endif - if (ptr < numrecs) { - memmove(&kp[ptr - 1], &kp[ptr], - (numrecs - ptr) * sizeof(*kp)); - memmove(&pp[ptr - 1], &pp[ptr], - (numrecs - ptr) * sizeof(*pp)); - xfs_bmbt_log_ptrs(cur, bp, ptr, numrecs - 1); - xfs_bmbt_log_keys(cur, bp, ptr, numrecs - 1); - } - } else { - rp = XFS_BMAP_REC_IADDR(block, 1, cur); - if (ptr < numrecs) { - memmove(&rp[ptr - 1], &rp[ptr], - (numrecs - ptr) * sizeof(*rp)); - xfs_bmbt_log_recs(cur, bp, ptr, numrecs - 1); - } - if (ptr == 1) { - key.br_startoff = - cpu_to_be64(xfs_bmbt_disk_get_startoff(rp)); - kp = &key; - } - } - numrecs--; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_bmbt_log_block(cur, bp, XFS_BB_NUMRECS); - /* - * We're at the root level. - * First, shrink the root block in-memory. - * Try to get rid of the next level down. - * If we can't then there's nothing left to do. - */ - if (level == cur->bc_nlevels - 1) { - xfs_iroot_realloc(cur->bc_private.b.ip, -1, - cur->bc_private.b.whichfork); - if ((error = xfs_btree_kill_iroot(cur))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (level > 0 && (error = xfs_btree_decrement(cur, level, &j))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)kp, level + 1))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (numrecs >= XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if (level > 0 && (error = xfs_btree_decrement(cur, level, &j))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - rbno = be64_to_cpu(block->bb_rightsib); - lbno = be64_to_cpu(block->bb_leftsib); - /* - * One child of root, need to get a chance to copy its contents - * into the root and delete it. Can't go up to next level, - * there's nothing to delete there. - */ - if (lbno == NULLFSBLOCK && rbno == NULLFSBLOCK && - level == cur->bc_nlevels - 2) { - if ((error = xfs_btree_kill_iroot(cur))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - ASSERT(rbno != NULLFSBLOCK || lbno != NULLFSBLOCK); - if ((error = xfs_btree_dup_cursor(cur, &tcur))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - bno = NULLFSBLOCK; - if (rbno != NULLFSBLOCK) { - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_increment(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - rbp = tcur->bc_bufs[level]; - right = XFS_BUF_TO_BMBT_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } -#endif - bno = be64_to_cpu(right->bb_leftsib); - if (be16_to_cpu(right->bb_numrecs) - 1 >= - XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if ((error = xfs_btree_lshift(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_BMAP_BLOCK_IMINRECS(level, tcur)); - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - tcur = NULL; - if (level > 0) { - if ((error = xfs_btree_decrement(cur, - level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, - ERROR); - goto error0; - } - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - } - rrecs = be16_to_cpu(right->bb_numrecs); - if (lbno != NULLFSBLOCK) { - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_decrement(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } - } - if (lbno != NULLFSBLOCK) { - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * decrement to last in block - */ - if ((error = xfs_btree_decrement(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - i = xfs_btree_firstrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - lbp = tcur->bc_bufs[level]; - left = XFS_BUF_TO_BMBT_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } -#endif - bno = be64_to_cpu(left->bb_rightsib); - if (be16_to_cpu(left->bb_numrecs) - 1 >= - XFS_BMAP_BLOCK_IMINRECS(level, cur)) { - if ((error = xfs_btree_rshift(tcur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_BMAP_BLOCK_IMINRECS(level, tcur)); - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - tcur = NULL; - if (level == 0) - cur->bc_ptrs[0]++; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - } - lrecs = be16_to_cpu(left->bb_numrecs); - } - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - tcur = NULL; - mp = cur->bc_mp; - ASSERT(bno != NULLFSBLOCK); - if (lbno != NULLFSBLOCK && - lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { - rbno = bno; - right = block; - rbp = bp; - if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, lbno, 0, &lbp, - XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - left = XFS_BUF_TO_BMBT_BLOCK(lbp); - if ((error = xfs_btree_check_lblock(cur, left, level, lbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - } else if (rbno != NULLFSBLOCK && - rrecs + be16_to_cpu(block->bb_numrecs) <= - XFS_BMAP_BLOCK_IMAXRECS(level, cur)) { - lbno = bno; - left = block; - lbp = bp; - if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, rbno, 0, &rbp, - XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - right = XFS_BUF_TO_BMBT_BLOCK(rbp); - if ((error = xfs_btree_check_lblock(cur, right, level, rbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - lrecs = be16_to_cpu(left->bb_numrecs); - } else { - if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 1; - return 0; - } - numlrecs = be16_to_cpu(left->bb_numrecs); - numrrecs = be16_to_cpu(right->bb_numrecs); - if (level > 0) { - lkp = XFS_BMAP_KEY_IADDR(left, numlrecs + 1, cur); - lpp = XFS_BMAP_PTR_IADDR(left, numlrecs + 1, cur); - rkp = XFS_BMAP_KEY_IADDR(right, 1, cur); - rpp = XFS_BMAP_PTR_IADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < numrrecs; i++) { - if ((error = xfs_btree_check_lptr_disk(cur, rpp[i], level))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - } -#endif - memcpy(lkp, rkp, numrrecs * sizeof(*lkp)); - memcpy(lpp, rpp, numrrecs * sizeof(*lpp)); - xfs_bmbt_log_keys(cur, lbp, numlrecs + 1, numlrecs + numrrecs); - xfs_bmbt_log_ptrs(cur, lbp, numlrecs + 1, numlrecs + numrrecs); - } else { - lrp = XFS_BMAP_REC_IADDR(left, numlrecs + 1, cur); - rrp = XFS_BMAP_REC_IADDR(right, 1, cur); - memcpy(lrp, rrp, numrrecs * sizeof(*lrp)); - xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs); - } - be16_add_cpu(&left->bb_numrecs, numrrecs); - left->bb_rightsib = right->bb_rightsib; - xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS); - if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) { - if ((error = xfs_btree_read_bufl(mp, cur->bc_tp, - be64_to_cpu(left->bb_rightsib), - 0, &rrbp, XFS_BMAP_BTREE_REF))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - rrblock = XFS_BUF_TO_BMBT_BLOCK(rrbp); - if ((error = xfs_btree_check_lblock(cur, rrblock, level, rrbp))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - rrblock->bb_leftsib = cpu_to_be64(lbno); - xfs_bmbt_log_block(cur, rrbp, XFS_BB_LEFTSIB); - } - xfs_bmap_add_free(XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(rbp)), 1, - cur->bc_private.b.flist, mp); - cur->bc_private.b.ip->i_d.di_nblocks--; - xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, XFS_ILOG_CORE); - XFS_TRANS_MOD_DQUOT_BYINO(mp, cur->bc_tp, cur->bc_private.b.ip, - XFS_TRANS_DQ_BCOUNT, -1L); - xfs_trans_binval(cur->bc_tp, rbp); - if (bp != lbp) { - cur->bc_bufs[level] = lbp; - cur->bc_ptrs[level] += lrecs; - cur->bc_ra[level] = 0; - } else if ((error = xfs_btree_increment(cur, level + 1, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - goto error0; - } - if (level > 0) - cur->bc_ptrs[level]--; - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = 2; - return 0; - -error0: - if (tcur) - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - -/* - * Log key values from the btree block. - */ -STATIC void -xfs_bmbt_log_keys( - xfs_btree_cur_t *cur, - xfs_buf_t *bp, - int kfirst, - int klast) -{ - xfs_trans_t *tp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGBII(cur, bp, kfirst, klast); - tp = cur->bc_tp; - if (bp) { - xfs_bmbt_block_t *block; - int first; - xfs_bmbt_key_t *kp; - int last; - - block = XFS_BUF_TO_BMBT_BLOCK(bp); - kp = XFS_BMAP_KEY_DADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(tp, bp, first, last); - } else { - xfs_inode_t *ip; - - ip = cur->bc_private.b.ip; - xfs_trans_log_inode(tp, ip, - XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); -} - -/* - * Log pointer values from the btree block. - */ -STATIC void -xfs_bmbt_log_ptrs( - xfs_btree_cur_t *cur, - xfs_buf_t *bp, - int pfirst, - int plast) -{ - xfs_trans_t *tp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGBII(cur, bp, pfirst, plast); - tp = cur->bc_tp; - if (bp) { - xfs_bmbt_block_t *block; - int first; - int last; - xfs_bmbt_ptr_t *pp; - - block = XFS_BUF_TO_BMBT_BLOCK(bp); - pp = XFS_BMAP_PTR_DADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(tp, bp, first, last); - } else { - xfs_inode_t *ip; - - ip = cur->bc_private.b.ip; - xfs_trans_log_inode(tp, ip, - XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); -} - /* * Determine the extent state. */ @@ -576,42 +121,6 @@ xfs_bmdr_to_bmbt( } /* - * Delete the record pointed to by cur. - */ -int /* error */ -xfs_bmbt_delete( - xfs_btree_cur_t *cur, - int *stat) /* success/failure */ -{ - int error; /* error return value */ - int i; - int level; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - for (level = 0, i = 2; i == 2; level++) { - if ((error = xfs_bmbt_delrec(cur, level, &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - } - if (i == 0) { - for (level = 1; level < cur->bc_nlevels; level++) { - if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_btree_decrement(cur, level, - &i))) { - XFS_BMBT_TRACE_CURSOR(cur, ERROR); - return error; - } - break; - } - } - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); - *stat = i; - return 0; -} - -/* * Convert a compressed bmap extent record to an uncompressed form. * This code must be in sync with the routines xfs_bmbt_get_startoff, * xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state. @@ -665,31 +174,6 @@ xfs_bmbt_get_all( } /* - * Get the block pointer for the given level of the cursor. - * Fill in the buffer pointer, if applicable. - */ -xfs_bmbt_block_t * -xfs_bmbt_get_block( - xfs_btree_cur_t *cur, - int level, - xfs_buf_t **bpp) -{ - xfs_ifork_t *ifp; - xfs_bmbt_block_t *rval; - - if (level < cur->bc_nlevels - 1) { - *bpp = cur->bc_bufs[level]; - rval = XFS_BUF_TO_BMBT_BLOCK(*bpp); - } else { - *bpp = NULL; - ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, - cur->bc_private.b.whichfork); - rval = ifp->if_broot; - } - return rval; -} - -/* * Extract the blockcount field from an in memory bmap extent record. */ xfs_filblks_t @@ -1226,6 +710,14 @@ xfs_bmbt_free_block( } STATIC int +xfs_bmbt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + return XFS_BMAP_BLOCK_IMINRECS(level, cur); +} + +STATIC int xfs_bmbt_get_maxrecs( struct xfs_btree_cur *cur, int level) @@ -1389,6 +881,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .alloc_block = xfs_bmbt_alloc_block, .free_block = xfs_bmbt_free_block, .get_maxrecs = xfs_bmbt_get_maxrecs, + .get_minrecs = xfs_bmbt_get_minrecs, .get_dmaxrecs = xfs_bmbt_get_dmaxrecs, .init_key_from_rec = xfs_bmbt_init_key_from_rec, .init_rec_from_key = xfs_bmbt_init_rec_from_key, diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 703fe2e..952ab39 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -237,10 +237,7 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; * Prototypes for xfs_bmap.c to call. */ extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int); -extern int xfs_bmbt_delete(struct xfs_btree_cur *, int *); extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); -extern xfs_bmbt_block_t *xfs_bmbt_get_block(struct xfs_btree_cur *cur, - int, struct xfs_buf **bpp); extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); extern xfs_fileoff_t xfs_bmbt_get_startoff(xfs_bmbt_rec_host_t *r); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 75a8a7b..28cc768 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -3171,3 +3171,596 @@ out0: XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); return 0; } + +STATIC int +xfs_btree_dec_cursor( + struct xfs_btree_cur *cur, + int level, + int *stat) +{ + int error; + int i; + + if (level > 0) { + error = xfs_btree_decrement(cur, level, &i); + if (error) + return error; + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; +} + +/* + * Single level of the btree record deletion routine. + * Delete record pointed to by cur/level. + * Remove the record from its block then rebalance the tree. + * Return 0 for error, 1 for done, 2 to go on to the next level. + */ +STATIC int /* error */ +xfs_btree_delrec( + struct xfs_btree_cur *cur, /* btree cursor */ + int level, /* level removing record from */ + int *stat) /* fail/done/go-on */ +{ + struct xfs_btree_block *block; /* btree block */ + union xfs_btree_ptr cptr; /* current block ptr */ + struct xfs_buf *bp; /* buffer for block */ + int error; /* error return value */ + int i; /* loop counter */ + union xfs_btree_key key; /* storage for keyp */ + union xfs_btree_key *keyp = &key; /* passed to the next level */ + union xfs_btree_ptr lptr; /* left sibling block ptr */ + struct xfs_buf *lbp; /* left buffer pointer */ + struct xfs_btree_block *left; /* left btree block */ + int lrecs = 0; /* left record count */ + int ptr; /* key/record index */ + union xfs_btree_ptr rptr; /* right sibling block ptr */ + struct xfs_buf *rbp; /* right buffer pointer */ + struct xfs_btree_block *right; /* right btree block */ + struct xfs_btree_block *rrblock; /* right-right btree block */ + struct xfs_buf *rrbp; /* right-right buffer pointer */ + int rrecs = 0; /* right record count */ + struct xfs_btree_cur *tcur; /* temporary btree cursor */ + int numrecs; /* temporary numrec count */ + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_TRACE_ARGI(cur, level); + + tcur = NULL; + + /* Get the index of the entry being deleted, check for nothing there. */ + ptr = cur->bc_ptrs[level]; + if (ptr == 0) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + /* Get the buffer & block containing the record or key/ptr. */ + block = xfs_btree_get_block(cur, level, &bp); + numrecs = xfs_btree_get_numrecs(block); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, level, bp); + if (error) + goto error0; +#endif + + /* Fail if we're off the end of the block. */ + if (ptr > numrecs) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 0; + return 0; + } + + XFS_BTREE_STATS_INC(cur, delrec); + XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); + + /* Excise the entries being deleted. */ + if (level > 0) { + /* It's a nonleaf. operate on keys and ptrs */ + union xfs_btree_key *lkp; + union xfs_btree_ptr *lpp; + + lkp = xfs_btree_key_addr(cur, ptr + 1, block); + lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); + +#ifdef DEBUG + for (i = 0; i < numrecs - ptr; i++) { + error = xfs_btree_check_ptr(cur, lpp, i, level); + if (error) + goto error0; + } +#endif + + if (ptr < numrecs) { + xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); + xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); + xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); + xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); + } + + /* + * If it's the first record in the block, we'll need to pass a + * key up to the next level (updkey). + */ + if (ptr == 1) + keyp = xfs_btree_key_addr(cur, 1, block); + } else { + /* It's a leaf. operate on records */ + if (ptr < numrecs) { + xfs_btree_shift_recs(cur, + xfs_btree_rec_addr(cur, ptr + 1, block), + -1, numrecs - ptr); + xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); + } + + /* + * If it's the first record in the block, we'll need a key + * structure to pass up to the next level (updkey). + */ + if (ptr == 1) { + cur->bc_ops->init_key_from_rec(&key, + xfs_btree_rec_addr(cur, 1, block)); + keyp = &key; + } + } + + /* + * Decrement and log the number of entries in the block. + */ + xfs_btree_set_numrecs(block, --numrecs); + xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); + + /* + * If we are tracking the last record in the tree and + * we are at the far right edge of the tree, update it. + */ + if (xfs_btree_is_lastrec(cur, block, level)) { + cur->bc_ops->update_lastrec(cur, block, NULL, + ptr, LASTREC_DELREC); + } + + /* + * We're at the root level. First, shrink the root block in-memory. + * Try to get rid of the next level down. If we can't then there's + * nothing left to do. + */ + if (level == cur->bc_nlevels - 1) { + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + xfs_iroot_realloc(cur->bc_private.b.ip, -1, + cur->bc_private.b.whichfork); + + error = xfs_btree_kill_iroot(cur); + if (error) + goto error0; + + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + *stat = 1; + return 0; + } + + /* + * If this is the root level, and there's only one entry left, + * and it's NOT the leaf level, then we can get rid of this + * level. + */ + if (numrecs == 1 && level > 0) { + union xfs_btree_ptr *pp; + /* + * pp is still set to the first pointer in the block. + * Make it the new root of the btree. + */ + pp = xfs_btree_ptr_addr(cur, 1, block); + error = cur->bc_ops->kill_root(cur, bp, level, pp); + if (error) + goto error0; + } else if (level > 0) { + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + } + *stat = 1; + return 0; + } + + /* + * If we deleted the leftmost entry in the block, update the + * key values above us in the tree. + */ + if (ptr == 1) { + error = xfs_btree_updkey(cur, keyp, level + 1); + if (error) + goto error0; + } + + /* + * If the number of records remaining in the block is at least + * the minimum, we're done. + */ + if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + + /* + * Otherwise, we have to move some records around to keep the + * tree balanced. Look at the left and right sibling blocks to + * see if we can re-balance by moving only one record. + */ + xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); + xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); + + if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + /* + * One child of root, need to get a chance to copy its contents + * into the root and delete it. Can't go up to next level, + * there's nothing to delete there. + */ + if (xfs_btree_ptr_is_null(cur, &rptr) && + xfs_btree_ptr_is_null(cur, &lptr) && + level == cur->bc_nlevels - 2) { + error = xfs_btree_kill_iroot(cur); + if (!error) + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + } + + ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || + !xfs_btree_ptr_is_null(cur, &lptr)); + + /* + * Duplicate the cursor so our btree manipulations here won't + * disrupt the next level up. + */ + error = xfs_btree_dup_cursor(cur, &tcur); + if (error) + goto error0; + + /* + * If there's a right sibling, see if it's ok to shift an entry + * out of it. + */ + if (!xfs_btree_ptr_is_null(cur, &rptr)) { + /* + * Move the temp cursor to the last entry in the next block. + * Actually any entry but the first would suffice. + */ + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_increment(tcur, level, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + i = xfs_btree_lastrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + /* Grab a pointer to the block. */ + right = xfs_btree_get_block(tcur, level, &rbp); +#ifdef DEBUG + error = xfs_btree_check_block(tcur, right, level, rbp); + if (error) + goto error0; +#endif + /* Grab the current block number, for future use. */ + xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); + + /* + * If right block is full enough so that removing one entry + * won't make it too empty, and left-shifting an entry out + * of right to us works, we're done. + */ + if (xfs_btree_get_numrecs(right) - 1 >= + cur->bc_ops->get_minrecs(tcur, level)) { + error = xfs_btree_lshift(tcur, level, &i); + if (error) + goto error0; + if (i) { + ASSERT(xfs_btree_get_numrecs(block) >= + cur->bc_ops->get_minrecs(tcur, level)); + + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + } + + /* + * Otherwise, grab the number of records in right for + * future reference, and fix up the temp cursor to point + * to our block again (last record). + */ + rrecs = xfs_btree_get_numrecs(right); + if (!xfs_btree_ptr_is_null(cur, &lptr)) { + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_decrement(tcur, level, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + } + } + + /* + * If there's a left sibling, see if it's ok to shift an entry + * out of it. + */ + if (!xfs_btree_ptr_is_null(cur, &lptr)) { + /* + * Move the temp cursor to the first entry in the + * previous block. + */ + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + error = xfs_btree_decrement(tcur, level, &i); + if (error) + goto error0; + i = xfs_btree_firstrec(tcur, level); + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + /* Grab a pointer to the block. */ + left = xfs_btree_get_block(tcur, level, &lbp); +#ifdef DEBUG + error = xfs_btree_check_block(cur, left, level, lbp); + if (error) + goto error0; +#endif + /* Grab the current block number, for future use. */ + xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); + + /* + * If left block is full enough so that removing one entry + * won't make it too empty, and right-shifting an entry out + * of left to us works, we're done. + */ + if (xfs_btree_get_numrecs(left) - 1 >= + cur->bc_ops->get_minrecs(tcur, level)) { + error = xfs_btree_rshift(tcur, level, &i); + if (error) + goto error0; + if (i) { + ASSERT(xfs_btree_get_numrecs(block) >= + cur->bc_ops->get_minrecs(tcur, level)); + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + if (level == 0) + cur->bc_ptrs[0]++; + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = 1; + return 0; + } + } + + /* + * Otherwise, grab the number of records in right for + * future reference. + */ + lrecs = xfs_btree_get_numrecs(left); + } + + /* Delete the temp cursor, we're done with it. */ + xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); + tcur = NULL; + + /* If here, we need to do a join to keep the tree balanced. */ + ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); + + if (!xfs_btree_ptr_is_null(cur, &lptr) && + lrecs + xfs_btree_get_numrecs(block) <= + cur->bc_ops->get_maxrecs(cur, level)) { + /* + * Set "right" to be the starting block, + * "left" to be the left neighbor. + */ + rptr = cptr; + right = block; + rbp = bp; + error = xfs_btree_read_buf_block(cur, &lptr, level, + 0, &left, &lbp); + if (error) + goto error0; + + /* + * If that won't work, see if we can join with the right neighbor block. + */ + } else if (!xfs_btree_ptr_is_null(cur, &rptr) && + rrecs + xfs_btree_get_numrecs(block) <= + cur->bc_ops->get_maxrecs(cur, level)) { + /* + * Set "left" to be the starting block, + * "right" to be the right neighbor. + */ + lptr = cptr; + left = block; + lbp = bp; + error = xfs_btree_read_buf_block(cur, &rptr, level, + 0, &right, &rbp); + if (error) + goto error0; + + /* + * Otherwise, we can't fix the imbalance. + * Just return. This is probably a logic error, but it's not fatal. + */ + } else { + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + return 0; + } + + rrecs = xfs_btree_get_numrecs(right); + lrecs = xfs_btree_get_numrecs(left); + + /* + * We're now going to join "left" and "right" by moving all the stuff + * in "right" to "left" and deleting "right". + */ + XFS_BTREE_STATS_ADD(cur, moves, rrecs); + if (level > 0) { + /* It's a non-leaf. Move keys and pointers. */ + union xfs_btree_key *lkp; /* left btree key */ + union xfs_btree_ptr *lpp; /* left address pointer */ + union xfs_btree_key *rkp; /* right btree key */ + union xfs_btree_ptr *rpp; /* right address pointer */ + + lkp = xfs_btree_key_addr(cur, lrecs + 1, left); + lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); + rkp = xfs_btree_key_addr(cur, 1, right); + rpp = xfs_btree_ptr_addr(cur, 1, right); +#ifdef DEBUG + for (i = 1; i < rrecs; i++) { + error = xfs_btree_check_ptr(cur, rpp, i, level); + if (error) + goto error0; + } +#endif + xfs_btree_copy_keys(cur, lkp, rkp, rrecs); + xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); + + xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); + xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); + } else { + /* It's a leaf. Move records. */ + union xfs_btree_rec *lrp; /* left record pointer */ + union xfs_btree_rec *rrp; /* right record pointer */ + + lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); + rrp = xfs_btree_rec_addr(cur, 1, right); + + xfs_btree_copy_recs(cur, lrp, rrp, rrecs); + xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); + } + + XFS_BTREE_STATS_INC(cur, join); + + /* + * Fix up the the number of records and right block pointer in the + * surviving block, and log it. + */ + xfs_btree_set_numrecs(left, lrecs + rrecs); + xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), + xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); + xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); + + /* If there is a right sibling, point it to the remaining block. */ + xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); + if (!xfs_btree_ptr_is_null(cur, &cptr)) { + error = xfs_btree_read_buf_block(cur, &cptr, level, + 0, &rrblock, &rrbp); + if (error) + goto error0; + xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); + xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); + } + + /* Free the deleted block. */ + error = cur->bc_ops->free_block(cur, rbp); + if (error) + goto error0; + XFS_BTREE_STATS_INC(cur, free); + + /* + * If we joined with the left neighbor, set the buffer in the + * cursor to the left block, and fix up the index. + */ + if (bp != lbp) { + cur->bc_bufs[level] = lbp; + cur->bc_ptrs[level] += lrecs; + cur->bc_ra[level] = 0; + } + /* + * If we joined with the right neighbor and there's a level above + * us, increment the cursor at that level. + */ + else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || + (level + 1 < cur->bc_nlevels)) { + error = xfs_btree_increment(cur, level + 1, &i); + if (error) + goto error0; + } + + /* + * Readjust the ptr at this level if it's not a leaf, since it's + * still pointing at the deletion point, which makes the cursor + * inconsistent. If this makes the ptr 0, the caller fixes it up. + * We can't use decrement because it would change the next level up. + */ + if (level > 0) + cur->bc_ptrs[level]--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + /* Return value means the next level up has something to do. */ + *stat = 2; + return 0; + +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + if (tcur) + xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); + return error; +} + +/* + * Delete the record pointed to by cur. + * The cursor refers to the place where the record was (could be inserted) + * when the operation returns. + */ +int /* error */ +xfs_btree_delete( + struct xfs_btree_cur *cur, + int *stat) /* success/failure */ +{ + int error; /* error return value */ + int level; + int i; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + + /* + * Go up the tree, starting at leaf level. + * + * If 2 is returned then a join was done; go to the next level. + * Otherwise we are done. + */ + for (level = 0, i = 2; i == 2; level++) { + error = xfs_btree_delrec(cur, level, &i); + if (error) + goto error0; + } + + if (i == 0) { + for (level = 1; level < cur->bc_nlevels; level++) { + if (cur->bc_ptrs[level] == 0) { + error = xfs_btree_decrement(cur, level, &i); + if (error) + goto error0; + break; + } + } + } + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + *stat = i; + return 0; +error0: + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index ff2552f..06ef792 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -192,6 +192,8 @@ struct xfs_btree_ops { /* update btree root pointer */ void (*set_root)(struct xfs_btree_cur *cur, union xfs_btree_ptr *nptr, int level_change); + int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp, + int level, union xfs_btree_ptr *newroot); /* block allocation / freeing */ int (*alloc_block)(struct xfs_btree_cur *cur, @@ -207,6 +209,7 @@ struct xfs_btree_ops { int ptr, int reason); /* records in block/level */ + int (*get_minrecs)(struct xfs_btree_cur *cur, int level); int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); /* records on disk. Matter for the root in inode case. */ @@ -251,6 +254,7 @@ struct xfs_btree_ops { */ #define LASTREC_UPDATE 0 #define LASTREC_INSREC 1 +#define LASTREC_DELREC 2 /* @@ -562,6 +566,7 @@ int xfs_btree_new_root(struct xfs_btree_cur *, int *); int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); int xfs_btree_kill_iroot(struct xfs_btree_cur *); int xfs_btree_insert(struct xfs_btree_cur *, int *); +int xfs_btree_delete(struct xfs_btree_cur *, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index b68e73b..f13f59b 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1168,8 +1168,8 @@ xfs_difree( xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); - if ((error = xfs_inobt_delete(cur, &i))) { - cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n", + if ((error = xfs_btree_delete(cur, &i))) { + cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", error, mp->m_fsname); goto error0; } diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 90f1d4e..6c0a07d 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -40,611 +40,6 @@ #include "xfs_alloc.h" #include "xfs_error.h" -STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int); -STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int); -STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int); - -/* - * Single level of the xfs_inobt_delete record deletion routine. - * Delete record pointed to by cur/level. - * Remove the record from its block then rebalance the tree. - * Return 0 for error, 1 for done, 2 to go on to the next level. - */ -STATIC int /* error */ -xfs_inobt_delrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level removing record from */ - int *stat) /* fail/done/go-on */ -{ - xfs_buf_t *agbp; /* buffer for a.g. inode header */ - xfs_mount_t *mp; /* mount structure */ - xfs_agi_t *agi; /* allocation group inode header */ - xfs_inobt_block_t *block; /* btree block record/key lives in */ - xfs_agblock_t bno; /* btree block number */ - xfs_buf_t *bp; /* buffer for block */ - int error; /* error return value */ - int i; /* loop index */ - xfs_inobt_key_t key; /* kp points here if block is level 0 */ - xfs_inobt_key_t *kp = NULL; /* pointer to btree keys */ - xfs_agblock_t lbno; /* left block's block number */ - xfs_buf_t *lbp; /* left block's buffer pointer */ - xfs_inobt_block_t *left; /* left btree block */ - xfs_inobt_key_t *lkp; /* left block key pointer */ - xfs_inobt_ptr_t *lpp; /* left block address pointer */ - int lrecs = 0; /* number of records in left block */ - xfs_inobt_rec_t *lrp; /* left block record pointer */ - xfs_inobt_ptr_t *pp = NULL; /* pointer to btree addresses */ - int ptr; /* index in btree block for this rec */ - xfs_agblock_t rbno; /* right block's block number */ - xfs_buf_t *rbp; /* right block's buffer pointer */ - xfs_inobt_block_t *right; /* right btree block */ - xfs_inobt_key_t *rkp; /* right block key pointer */ - xfs_inobt_rec_t *rp; /* pointer to btree records */ - xfs_inobt_ptr_t *rpp; /* right block address pointer */ - int rrecs = 0; /* number of records in right block */ - int numrecs; - xfs_inobt_rec_t *rrp; /* right block record pointer */ - xfs_btree_cur_t *tcur; /* temporary btree cursor */ - - mp = cur->bc_mp; - - /* - * Get the index of the entry being deleted, check for nothing there. - */ - ptr = cur->bc_ptrs[level]; - if (ptr == 0) { - *stat = 0; - return 0; - } - - /* - * Get the buffer & block containing the record or key/ptr. - */ - bp = cur->bc_bufs[level]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, level, bp))) - return error; -#endif - /* - * Fail if we're off the end of the block. - */ - - numrecs = be16_to_cpu(block->bb_numrecs); - if (ptr > numrecs) { - *stat = 0; - return 0; - } - /* - * It's a nonleaf. Excise the key and ptr being deleted, by - * sliding the entries past them down one. - * Log the changed areas of the block. - */ - if (level > 0) { - kp = XFS_INOBT_KEY_ADDR(block, 1, cur); - pp = XFS_INOBT_PTR_ADDR(block, 1, cur); -#ifdef DEBUG - for (i = ptr; i < numrecs; i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level))) - return error; - } -#endif - if (ptr < numrecs) { - memmove(&kp[ptr - 1], &kp[ptr], - (numrecs - ptr) * sizeof(*kp)); - memmove(&pp[ptr - 1], &pp[ptr], - (numrecs - ptr) * sizeof(*kp)); - xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1); - xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1); - } - } - /* - * It's a leaf. Excise the record being deleted, by sliding the - * entries past it down one. Log the changed areas of the block. - */ - else { - rp = XFS_INOBT_REC_ADDR(block, 1, cur); - if (ptr < numrecs) { - memmove(&rp[ptr - 1], &rp[ptr], - (numrecs - ptr) * sizeof(*rp)); - xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1); - } - /* - * If it's the first record in the block, we'll need a key - * structure to pass up to the next level (updkey). - */ - if (ptr == 1) { - key.ir_startino = rp->ir_startino; - kp = &key; - } - } - /* - * Decrement and log the number of entries in the block. - */ - numrecs--; - block->bb_numrecs = cpu_to_be16(numrecs); - xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); - /* - * Is this the root level? If so, we're almost done. - */ - if (level == cur->bc_nlevels - 1) { - /* - * If this is the root level, - * and there's only one entry left, - * and it's NOT the leaf level, - * then we can get rid of this level. - */ - if (numrecs == 1 && level > 0) { - agbp = cur->bc_private.a.agbp; - agi = XFS_BUF_TO_AGI(agbp); - /* - * pp is still set to the first pointer in the block. - * Make it the new root of the btree. - */ - bno = be32_to_cpu(agi->agi_root); - agi->agi_root = *pp; - be32_add_cpu(&agi->agi_level, -1); - /* - * Free the block. - */ - if ((error = xfs_free_extent(cur->bc_tp, - XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1))) - return error; - xfs_trans_binval(cur->bc_tp, bp); - xfs_ialloc_log_agi(cur->bc_tp, agbp, - XFS_AGI_ROOT | XFS_AGI_LEVEL); - /* - * Update the cursor so there's one fewer level. - */ - cur->bc_bufs[level] = NULL; - cur->bc_nlevels--; - } else if (level > 0 && - (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * If we deleted the leftmost entry in the block, update the - * key values above us in the tree. - */ - if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)kp, level + 1))) - return error; - /* - * If the number of records remaining in the block is at least - * the minimum, we're done. - */ - if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) { - if (level > 0 && - (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * Otherwise, we have to move some records around to keep the - * tree balanced. Look at the left and right sibling blocks to - * see if we can re-balance by moving only one record. - */ - rbno = be32_to_cpu(block->bb_rightsib); - lbno = be32_to_cpu(block->bb_leftsib); - bno = NULLAGBLOCK; - ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK); - /* - * Duplicate the cursor so our btree manipulations here won't - * disrupt the next level up. - */ - if ((error = xfs_btree_dup_cursor(cur, &tcur))) - return error; - /* - * If there's a right sibling, see if it's ok to shift an entry - * out of it. - */ - if (rbno != NULLAGBLOCK) { - /* - * Move the temp cursor to the last entry in the next block. - * Actually any entry but the first would suffice. - */ - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_btree_increment(tcur, level, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - i = xfs_btree_lastrec(tcur, level); - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - /* - * Grab a pointer to the block. - */ - rbp = tcur->bc_bufs[level]; - right = XFS_BUF_TO_INOBT_BLOCK(rbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - goto error0; -#endif - /* - * Grab the current block number, for future use. - */ - bno = be32_to_cpu(right->bb_leftsib); - /* - * If right block is full enough so that removing one entry - * won't make it too empty, and left-shifting an entry out - * of right to us works, we're done. - */ - if (be16_to_cpu(right->bb_numrecs) - 1 >= - XFS_INOBT_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_btree_lshift(tcur, level, &i))) - goto error0; - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_INOBT_BLOCK_MINRECS(level, cur)); - xfs_btree_del_cursor(tcur, - XFS_BTREE_NOERROR); - if (level > 0 && - (error = xfs_btree_decrement(cur, level, - &i))) - return error; - *stat = 1; - return 0; - } - } - /* - * Otherwise, grab the number of records in right for - * future reference, and fix up the temp cursor to point - * to our block again (last record). - */ - rrecs = be16_to_cpu(right->bb_numrecs); - if (lbno != NULLAGBLOCK) { - xfs_btree_firstrec(tcur, level); - if ((error = xfs_btree_decrement(tcur, level, &i))) - goto error0; - } - } - /* - * If there's a left sibling, see if it's ok to shift an entry - * out of it. - */ - if (lbno != NULLAGBLOCK) { - /* - * Move the temp cursor to the first entry in the - * previous block. - */ - xfs_btree_firstrec(tcur, level); - if ((error = xfs_btree_decrement(tcur, level, &i))) - goto error0; - xfs_btree_firstrec(tcur, level); - /* - * Grab a pointer to the block. - */ - lbp = tcur->bc_bufs[level]; - left = XFS_BUF_TO_INOBT_BLOCK(lbp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - goto error0; -#endif - /* - * Grab the current block number, for future use. - */ - bno = be32_to_cpu(left->bb_rightsib); - /* - * If left block is full enough so that removing one entry - * won't make it too empty, and right-shifting an entry out - * of left to us works, we're done. - */ - if (be16_to_cpu(left->bb_numrecs) - 1 >= - XFS_INOBT_BLOCK_MINRECS(level, cur)) { - if ((error = xfs_btree_rshift(tcur, level, &i))) - goto error0; - if (i) { - ASSERT(be16_to_cpu(block->bb_numrecs) >= - XFS_INOBT_BLOCK_MINRECS(level, cur)); - xfs_btree_del_cursor(tcur, - XFS_BTREE_NOERROR); - if (level == 0) - cur->bc_ptrs[0]++; - *stat = 1; - return 0; - } - } - /* - * Otherwise, grab the number of records in right for - * future reference. - */ - lrecs = be16_to_cpu(left->bb_numrecs); - } - /* - * Delete the temp cursor, we're done with it. - */ - xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); - /* - * If here, we need to do a join to keep the tree balanced. - */ - ASSERT(bno != NULLAGBLOCK); - /* - * See if we can join with the left neighbor block. - */ - if (lbno != NULLAGBLOCK && - lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) { - /* - * Set "right" to be the starting block, - * "left" to be the left neighbor. - */ - rbno = bno; - right = block; - rrecs = be16_to_cpu(right->bb_numrecs); - rbp = bp; - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, lbno, 0, &lbp, - XFS_INO_BTREE_REF))) - return error; - left = XFS_BUF_TO_INOBT_BLOCK(lbp); - lrecs = be16_to_cpu(left->bb_numrecs); - if ((error = xfs_btree_check_sblock(cur, left, level, lbp))) - return error; - } - /* - * If that won't work, see if we can join with the right neighbor block. - */ - else if (rbno != NULLAGBLOCK && - rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) { - /* - * Set "left" to be the starting block, - * "right" to be the right neighbor. - */ - lbno = bno; - left = block; - lrecs = be16_to_cpu(left->bb_numrecs); - lbp = bp; - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, rbno, 0, &rbp, - XFS_INO_BTREE_REF))) - return error; - right = XFS_BUF_TO_INOBT_BLOCK(rbp); - rrecs = be16_to_cpu(right->bb_numrecs); - if ((error = xfs_btree_check_sblock(cur, right, level, rbp))) - return error; - } - /* - * Otherwise, we can't fix the imbalance. - * Just return. This is probably a logic error, but it's not fatal. - */ - else { - if (level > 0 && (error = xfs_btree_decrement(cur, level, &i))) - return error; - *stat = 1; - return 0; - } - /* - * We're now going to join "left" and "right" by moving all the stuff - * in "right" to "left" and deleting "right". - */ - if (level > 0) { - /* - * It's a non-leaf. Move keys and pointers. - */ - lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur); - lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur); - rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); - rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); -#ifdef DEBUG - for (i = 0; i < rrecs; i++) { - if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level))) - return error; - } -#endif - memcpy(lkp, rkp, rrecs * sizeof(*lkp)); - memcpy(lpp, rpp, rrecs * sizeof(*lpp)); - xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); - xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); - } else { - /* - * It's a leaf. Move records. - */ - lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur); - rrp = XFS_INOBT_REC_ADDR(right, 1, cur); - memcpy(lrp, rrp, rrecs * sizeof(*lrp)); - xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); - } - /* - * If we joined with the left neighbor, set the buffer in the - * cursor to the left block, and fix up the index. - */ - if (bp != lbp) { - xfs_btree_setbuf(cur, level, lbp); - cur->bc_ptrs[level] += lrecs; - } - /* - * If we joined with the right neighbor and there's a level above - * us, increment the cursor at that level. - */ - else if (level + 1 < cur->bc_nlevels && - (error = xfs_btree_increment(cur, level + 1, &i))) - return error; - /* - * Fix up the number of records in the surviving block. - */ - lrecs += rrecs; - left->bb_numrecs = cpu_to_be16(lrecs); - /* - * Fix up the right block pointer in the surviving block, and log it. - */ - left->bb_rightsib = right->bb_rightsib; - xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); - /* - * If there is a right sibling now, make it point to the - * remaining block. - */ - if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) { - xfs_inobt_block_t *rrblock; - xfs_buf_t *rrbp; - - if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, - &rrbp, XFS_INO_BTREE_REF))) - return error; - rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); - if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))) - return error; - rrblock->bb_leftsib = cpu_to_be32(lbno); - xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); - } - /* - * Free the deleting block. - */ - if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp, - cur->bc_private.a.agno, rbno), 1))) - return error; - xfs_trans_binval(cur->bc_tp, rbp); - /* - * Readjust the ptr at this level if it's not a leaf, since it's - * still pointing at the deletion point, which makes the cursor - * inconsistent. If this makes the ptr 0, the caller fixes it up. - * We can't use decrement because it would change the next level up. - */ - if (level > 0) - cur->bc_ptrs[level]--; - /* - * Return value means the next level up has something to do. - */ - *stat = 2; - return 0; - -error0: - xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); - return error; -} - -/* - * Log header fields from a btree block. - */ -STATIC void -xfs_inobt_log_block( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* buffer containing btree block */ - int fields) /* mask of fields: XFS_BB_... */ -{ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - static const short offsets[] = { /* table of offsets */ - offsetof(xfs_inobt_block_t, bb_magic), - offsetof(xfs_inobt_block_t, bb_level), - offsetof(xfs_inobt_block_t, bb_numrecs), - offsetof(xfs_inobt_block_t, bb_leftsib), - offsetof(xfs_inobt_block_t, bb_rightsib), - sizeof(xfs_inobt_block_t) - }; - - xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last); - xfs_trans_log_buf(tp, bp, first, last); -} - -/* - * Log keys from a btree block (nonleaf). - */ -STATIC void -xfs_inobt_log_keys( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int kfirst, /* index of first key to log */ - int klast) /* index of last key to log */ -{ - xfs_inobt_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - xfs_inobt_key_t *kp; /* key pointer in btree block */ - int last; /* last byte offset logged */ - - block = XFS_BUF_TO_INOBT_BLOCK(bp); - kp = XFS_INOBT_KEY_ADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - -/* - * Log block pointer fields from a btree block (nonleaf). - */ -STATIC void -xfs_inobt_log_ptrs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int pfirst, /* index of first pointer to log */ - int plast) /* index of last pointer to log */ -{ - xfs_inobt_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - xfs_inobt_ptr_t *pp; /* block-pointer pointer in btree blk */ - - block = XFS_BUF_TO_INOBT_BLOCK(bp); - pp = XFS_INOBT_PTR_ADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - -/* - * Log records from a btree block (leaf). - */ -STATIC void -xfs_inobt_log_recs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_buf_t *bp, /* buffer containing btree block */ - int rfirst, /* index of first record to log */ - int rlast) /* index of last record to log */ -{ - xfs_inobt_block_t *block; /* btree block to log from */ - int first; /* first byte offset logged */ - int last; /* last byte offset logged */ - xfs_inobt_rec_t *rp; /* record pointer for btree block */ - - block = XFS_BUF_TO_INOBT_BLOCK(bp); - rp = XFS_INOBT_REC_ADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(cur->bc_tp, bp, first, last); -} - - -/* - * Externally visible routines. - */ - -/* - * Delete the record pointed to by cur. - * The cursor refers to the place where the record was (could be inserted) - * when the operation returns. - */ -int /* error */ -xfs_inobt_delete( - xfs_btree_cur_t *cur, /* btree cursor */ - int *stat) /* success/failure */ -{ - int error; - int i; /* result code */ - int level; /* btree level */ - - /* - * Go up the tree, starting at leaf level. - * If 2 is returned then a join was done; go to the next level. - * Otherwise we are done. - */ - for (level = 0, i = 2; i == 2; level++) { - if ((error = xfs_inobt_delrec(cur, level, &i))) - return error; - } - if (i == 0) { - for (level = 1; level < cur->bc_nlevels; level++) { - if (cur->bc_ptrs[level] == 0) { - if ((error = xfs_btree_decrement(cur, level, &i))) - return error; - break; - } - } - } - *stat = i; - return 0; -} - /* * Get the data from the pointed-to record. @@ -690,6 +85,13 @@ xfs_inobt_get_rec( return 0; } +STATIC int +xfs_inobt_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_mp->m_inobt_mnr[level != 0]; +} STATIC struct xfs_btree_cur * xfs_inobt_dup_cursor( @@ -829,6 +231,38 @@ xfs_inobt_key_diff( cur->bc_rec.i.ir_startino; } +STATIC int +xfs_inobt_kill_root( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int level, + union xfs_btree_ptr *newroot) +{ + int error; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, killroot); + + /* + * Update the root pointer, decreasing the level by 1 and then + * free the old root. + */ + xfs_inobt_set_root(cur, newroot, -1); + error = xfs_inobt_free_block(cur, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + XFS_BTREE_STATS_INC(cur, free); + + cur->bc_bufs[level] = NULL; + cur->bc_nlevels--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_inobt_trace_buf; @@ -901,8 +335,10 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_inobt_set_root, + .kill_root = xfs_inobt_kill_root, .alloc_block = xfs_inobt_alloc_block, .free_block = xfs_inobt_free_block, + .get_minrecs = xfs_inobt_get_minrecs, .get_maxrecs = xfs_inobt_get_maxrecs, .init_key_from_rec = xfs_inobt_init_key_from_rec, .init_rec_from_key = xfs_inobt_init_rec_from_key, diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index c9cbc4f..3eff3b6 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -117,13 +117,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; i, XFS_INOBT_BLOCK_MAXRECS(1, cur))) /* - * Delete the record pointed to by cur. - * The cursor refers to the place where the record was (could be inserted) - * when the operation returns. - */ -extern int xfs_inobt_delete(struct xfs_btree_cur *cur, int *stat); - -/* * Get the data from the pointed-to record. */ extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, -- cgit v0.10.2 From 8cc938fe4237e50bea4aa557ed53b06de2319d49 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:11 +1100 Subject: [XFS] implement generic xfs_btree_get_rec Not really much reason to make it generic given that it's so small, but this is the last non-method in xfs_alloc_btree.c and xfs_ialloc_btree.c, so it makes the whole btree implementation more structured. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32206a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index e9c7024..54fa69e 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -155,6 +155,27 @@ xfs_alloc_update( } /* + * Get the data from the pointed-to record. + */ +STATIC int /* error */ +xfs_alloc_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t *bno, /* output: starting block of extent */ + xfs_extlen_t *len, /* output: length of extent */ + int *stat) /* output: success/failure */ +{ + union xfs_btree_rec *rec; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (!error && *stat == 1) { + *bno = be32_to_cpu(rec->alloc.ar_startblock); + *len = be32_to_cpu(rec->alloc.ar_blockcount); + } + return error; +} + +/* * Compute aligned version of the found extent. * Takes alignment and min length into account. */ diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index d256b51..4d44f03 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -41,50 +41,6 @@ #include "xfs_error.h" -/* - * Get the data from the pointed-to record. - */ -int /* error */ -xfs_alloc_get_rec( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agblock_t *bno, /* output: starting block of extent */ - xfs_extlen_t *len, /* output: length of extent */ - int *stat) /* output: success/failure */ -{ - xfs_alloc_block_t *block; /* btree block */ -#ifdef DEBUG - int error; /* error return value */ -#endif - int ptr; /* record number */ - - ptr = cur->bc_ptrs[0]; - block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0]))) - return error; -#endif - /* - * Off the right end or left end, return failure. - */ - if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) { - *stat = 0; - return 0; - } - /* - * Point to the record and extract its data. - */ - { - xfs_alloc_rec_t *rec; /* record data */ - - rec = XFS_ALLOC_REC_ADDR(block, ptr, cur); - *bno = be32_to_cpu(rec->ar_startblock); - *len = be32_to_cpu(rec->ar_blockcount); - } - *stat = 1; - return 0; -} - - STATIC struct xfs_btree_cur * xfs_allocbt_dup_cursor( struct xfs_btree_cur *cur) diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 8d2e3ec..22f1d70 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -94,12 +94,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; #define XFS_ALLOC_PTR_ADDR(bb,i,cur) \ XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur)) -/* - * Get the data from the pointed-to record. - */ -extern int xfs_alloc_get_rec(struct xfs_btree_cur *cur, xfs_agblock_t *bno, - xfs_extlen_t *len, int *stat); - extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 28cc768..8503ed5 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -3764,3 +3764,44 @@ error0: XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_btree_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + union xfs_btree_rec **recp, /* output: btree record */ + int *stat) /* output: success/failure */ +{ + struct xfs_btree_block *block; /* btree block */ + struct xfs_buf *bp; /* buffer pointer */ + int ptr; /* record number */ +#ifdef DEBUG + int error; /* error return value */ +#endif + + ptr = cur->bc_ptrs[0]; + block = xfs_btree_get_block(cur, 0, &bp); + +#ifdef DEBUG + error = xfs_btree_check_block(cur, block, 0, bp); + if (error) + return error; +#endif + + /* + * Off the right end or left end, return failure. + */ + if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { + *stat = 0; + return 0; + } + + /* + * Point to the record and extract its data. + */ + *recp = xfs_btree_rec_addr(cur, ptr, block); + *stat = 1; + return 0; +} diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 06ef792..cee3684 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -567,6 +567,7 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); int xfs_btree_kill_iroot(struct xfs_btree_cur *); int xfs_btree_insert(struct xfs_btree_cur *, int *); int xfs_btree_delete(struct xfs_btree_cur *, int *); +int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); /* * Helpers. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index f13f59b..c8a56c52 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -192,6 +192,29 @@ xfs_inobt_update( } /* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_inobt_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agino_t *ino, /* output: starting inode of chunk */ + __int32_t *fcnt, /* output: number of free inodes */ + xfs_inofree_t *free, /* output: free inode mask */ + int *stat) /* output: success/failure */ +{ + union xfs_btree_rec *rec; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (!error && *stat == 1) { + *ino = be32_to_cpu(rec->inobt.ir_startino); + *fcnt = be32_to_cpu(rec->inobt.ir_freecount); + *free = be64_to_cpu(rec->inobt.ir_free); + } + return error; +} + +/* * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. */ diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 4026578..c5745f6 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -168,6 +168,11 @@ int xfs_inobt_lookup_ge(struct xfs_btree_cur *cur, xfs_agino_t ino, int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, __int32_t fcnt, xfs_inofree_t free, int *stat); +/* + * Get the data from the pointed-to record. + */ +extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, + __int32_t *fcnt, xfs_inofree_t *free, int *stat); #endif /* __KERNEL__ */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 6c0a07d..9f4e33c 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -41,50 +41,6 @@ #include "xfs_error.h" -/* - * Get the data from the pointed-to record. - */ -int /* error */ -xfs_inobt_get_rec( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_agino_t *ino, /* output: starting inode of chunk */ - __int32_t *fcnt, /* output: number of free inodes */ - xfs_inofree_t *free, /* output: free inode mask */ - int *stat) /* output: success/failure */ -{ - xfs_inobt_block_t *block; /* btree block */ - xfs_buf_t *bp; /* buffer containing btree block */ -#ifdef DEBUG - int error; /* error return value */ -#endif - int ptr; /* record number */ - xfs_inobt_rec_t *rec; /* record data */ - - bp = cur->bc_bufs[0]; - ptr = cur->bc_ptrs[0]; - block = XFS_BUF_TO_INOBT_BLOCK(bp); -#ifdef DEBUG - if ((error = xfs_btree_check_sblock(cur, block, 0, bp))) - return error; -#endif - /* - * Off the right end or left end, return failure. - */ - if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) { - *stat = 0; - return 0; - } - /* - * Point to the record and extract its data. - */ - rec = XFS_INOBT_REC_ADDR(block, ptr, cur); - *ino = be32_to_cpu(rec->ir_startino); - *fcnt = be32_to_cpu(rec->ir_freecount); - *free = be64_to_cpu(rec->ir_free); - *stat = 1; - return 0; -} - STATIC int xfs_inobt_get_minrecs( struct xfs_btree_cur *cur, diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 3eff3b6..ff7406b 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -116,13 +116,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \ i, XFS_INOBT_BLOCK_MAXRECS(1, cur))) -/* - * Get the data from the pointed-to record. - */ -extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, - __int32_t *fcnt, xfs_inofree_t *free, int *stat); - - extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); -- cgit v0.10.2 From fd6bcc5b63051392ba709a8fd33173b263669e0a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:21 +1100 Subject: [XFS] kill xfs_bmbt_log_block and xfs_bmbt_log_recs These are equivalent to the xfs_btree_* versions, and the only remaining caller can be switched to the generic one after they are exported. Also remove some now dead infrastructure in xfs_bmap_btree.c. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32207a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 74761ca..5cceb8d 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -3563,8 +3563,8 @@ xfs_bmap_extents_to_btree( * Do all this logging at the end so that * the root is at the right level. */ - xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS); - xfs_bmbt_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); + xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); + xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); ASSERT(*curp == NULL); *curp = cur; *logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 5b80305..7a02d39 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -44,33 +44,6 @@ #include "xfs_error.h" #include "xfs_quota.h" -#undef EXIT - -#define ENTRY XBT_ENTRY -#define ERROR XBT_ERROR -#define EXIT XBT_EXIT - -/* - * Keep the XFS_BMBT_TRACE_ names around for now until all code using them - * is converted to be generic and thus switches to the XFS_BTREE_TRACE_ names. - */ -#define XFS_BMBT_TRACE_ARGBI(c,b,i) \ - XFS_BTREE_TRACE_ARGBI(c,b,i) -#define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ - XFS_BTREE_TRACE_ARGBII(c,b,i,j) -#define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ - XFS_BTREE_TRACE_ARGFFFI(c,o,b,i,j) -#define XFS_BMBT_TRACE_ARGI(c,i) \ - XFS_BTREE_TRACE_ARGI(c,i) -#define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ - XFS_BTREE_TRACE_ARGIPK(c,i,(union xfs_btree_ptr)f,s) -#define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ - XFS_BTREE_TRACE_ARGIPR(c,i, \ - (union xfs_btree_ptr)f, (union xfs_btree_rec *)r) -#define XFS_BMBT_TRACE_ARGIK(c,i,k) \ - XFS_BTREE_TRACE_ARGIK(c,i,(union xfs_btree_key *)k) -#define XFS_BMBT_TRACE_CURSOR(c,s) \ - XFS_BTREE_TRACE_CURSOR(c,s) /* * Determine the extent state. @@ -259,67 +232,6 @@ xfs_bmbt_disk_get_startoff( XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; } -/* - * Log fields from the btree block header. - */ -void -xfs_bmbt_log_block( - xfs_btree_cur_t *cur, - xfs_buf_t *bp, - int fields) -{ - int first; - int last; - xfs_trans_t *tp; - static const short offsets[] = { - offsetof(xfs_bmbt_block_t, bb_magic), - offsetof(xfs_bmbt_block_t, bb_level), - offsetof(xfs_bmbt_block_t, bb_numrecs), - offsetof(xfs_bmbt_block_t, bb_leftsib), - offsetof(xfs_bmbt_block_t, bb_rightsib), - sizeof(xfs_bmbt_block_t) - }; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGBI(cur, bp, fields); - tp = cur->bc_tp; - if (bp) { - xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, - &last); - xfs_trans_log_buf(tp, bp, first, last); - } else - xfs_trans_log_inode(tp, cur->bc_private.b.ip, - XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); -} - -/* - * Log record values from the btree block. - */ -void -xfs_bmbt_log_recs( - xfs_btree_cur_t *cur, - xfs_buf_t *bp, - int rfirst, - int rlast) -{ - xfs_bmbt_block_t *block; - int first; - int last; - xfs_bmbt_rec_t *rp; - xfs_trans_t *tp; - - XFS_BMBT_TRACE_CURSOR(cur, ENTRY); - XFS_BMBT_TRACE_ARGBII(cur, bp, rfirst, rlast); - ASSERT(bp); - tp = cur->bc_tp; - block = XFS_BUF_TO_BMBT_BLOCK(bp); - rp = XFS_BMAP_REC_DADDR(block, 1, cur); - first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block); - last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block); - xfs_trans_log_buf(tp, bp, first, last); - XFS_BMBT_TRACE_CURSOR(cur, EXIT); -} /* * Set all the fields in a bmap extent record from the arguments. diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 952ab39..6f38e25 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -247,10 +247,6 @@ extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r); extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r); -extern void xfs_bmbt_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); -extern void xfs_bmbt_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, - int); - extern void xfs_bmbt_set_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_set_allf(xfs_bmbt_rec_host_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 8503ed5..88eb00b 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -1309,7 +1309,7 @@ xfs_btree_log_keys( /* * Log record values from the btree block. */ -STATIC void +void xfs_btree_log_recs( struct xfs_btree_cur *cur, struct xfs_buf *bp, @@ -1357,7 +1357,7 @@ xfs_btree_log_ptrs( /* * Log fields from a btree block header. */ -STATIC void +void xfs_btree_log_block( struct xfs_btree_cur *cur, /* btree cursor */ struct xfs_buf *bp, /* buffer containing btree block */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index cee3684..d6cc71e 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -570,6 +570,12 @@ int xfs_btree_delete(struct xfs_btree_cur *, int *); int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *); /* + * Internal btree helpers also used by xfs_bmap.c. + */ +void xfs_btree_log_block(struct xfs_btree_cur *, struct xfs_buf *, int); +void xfs_btree_log_recs(struct xfs_btree_cur *, struct xfs_buf *, int, int); + +/* * Helpers. */ static inline int xfs_btree_get_numrecs(struct xfs_btree_block *block) -- cgit v0.10.2 From 4a26e66e7728112f0e1cd7eca3bcc430b3a221c9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:32 +1100 Subject: [XFS] add keys_inorder and recs_inorder btree methods Add methods to check whether two keys/records are in the righ order. This replaces the xfs_btree_check_key and xfs_btree_check_rec methods. For the callers from xfs_bmap.c just opencode the bmbt-specific asserts. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32208a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 4d44f03..9e63f8c 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -311,6 +311,45 @@ xfs_allocbt_kill_root( return 0; } +#ifdef DEBUG +STATIC int +xfs_allocbt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + if (cur->bc_btnum == XFS_BTNUM_BNO) { + return be32_to_cpu(k1->alloc.ar_startblock) < + be32_to_cpu(k2->alloc.ar_startblock); + } else { + return be32_to_cpu(k1->alloc.ar_blockcount) < + be32_to_cpu(k2->alloc.ar_blockcount) || + (k1->alloc.ar_blockcount == k2->alloc.ar_blockcount && + be32_to_cpu(k1->alloc.ar_startblock) < + be32_to_cpu(k2->alloc.ar_startblock)); + } +} + +STATIC int +xfs_allocbt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + if (cur->bc_btnum == XFS_BTNUM_BNO) { + return be32_to_cpu(r1->alloc.ar_startblock) + + be32_to_cpu(r1->alloc.ar_blockcount) <= + be32_to_cpu(r2->alloc.ar_startblock); + } else { + return be32_to_cpu(r1->alloc.ar_blockcount) < + be32_to_cpu(r2->alloc.ar_blockcount) || + (r1->alloc.ar_blockcount == r2->alloc.ar_blockcount && + be32_to_cpu(r1->alloc.ar_startblock) < + be32_to_cpu(r2->alloc.ar_startblock)); + } +} +#endif /* DEBUG */ + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_allocbt_trace_buf; @@ -395,6 +434,11 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, .key_diff = xfs_allocbt_key_diff, +#ifdef DEBUG + .keys_inorder = xfs_allocbt_keys_inorder, + .recs_inorder = xfs_allocbt_recs_inorder, +#endif + #ifdef XFS_BTREE_TRACE .trace_enter = xfs_allocbt_trace_enter, .trace_cursor = xfs_allocbt_trace_cursor, diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 5cceb8d..b7f99d7 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -6195,7 +6195,8 @@ xfs_check_block( } if (prevp) { - xfs_btree_check_key(XFS_BTNUM_BMAP, prevp, keyp); + ASSERT(be64_to_cpu(prevp->br_startoff) < + be64_to_cpu(keyp->br_startoff)); } prevp = keyp; @@ -6338,11 +6339,15 @@ xfs_bmap_check_leaf_extents( ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); if (i) { - xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep); + ASSERT(xfs_bmbt_disk_get_startoff(&last) + + xfs_bmbt_disk_get_blockcount(&last) <= + xfs_bmbt_disk_get_startoff(ep)); } for (j = 1; j < num_recs; j++) { nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); - xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp); + ASSERT(xfs_bmbt_disk_get_startoff(ep) + + xfs_bmbt_disk_get_blockcount(ep) <= + xfs_bmbt_disk_get_startoff(nextp)); ep = nextp; } diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 7a02d39..c5eeb32 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -699,6 +699,29 @@ xfs_bmbt_key_diff( cur->bc_rec.b.br_startoff; } +#ifdef DEBUG +STATIC int +xfs_bmbt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return be64_to_cpu(k1->bmbt.br_startoff) < + be64_to_cpu(k2->bmbt.br_startoff); +} + +STATIC int +xfs_bmbt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + return xfs_bmbt_disk_get_startoff(&r1->bmbt) + + xfs_bmbt_disk_get_blockcount(&r1->bmbt) <= + xfs_bmbt_disk_get_startoff(&r2->bmbt); +} +#endif /* DEBUG */ + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_bmbt_trace_buf; @@ -801,6 +824,11 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, .key_diff = xfs_bmbt_key_diff, +#ifdef DEBUG + .keys_inorder = xfs_bmbt_keys_inorder, + .recs_inorder = xfs_bmbt_recs_inorder, +#endif + #ifdef XFS_BTREE_TRACE .trace_enter = xfs_bmbt_trace_enter, .trace_cursor = xfs_bmbt_trace_cursor, diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 88eb00b..d667d30 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -52,122 +52,6 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }; -/* - * External routines. - */ - -#ifdef DEBUG -/* - * Debug routine: check that keys are in the right order. - */ -void -xfs_btree_check_key( - xfs_btnum_t btnum, /* btree identifier */ - void *ak1, /* pointer to left (lower) key */ - void *ak2) /* pointer to right (higher) key */ -{ - switch (btnum) { - case XFS_BTNUM_BNO: { - xfs_alloc_key_t *k1; - xfs_alloc_key_t *k2; - - k1 = ak1; - k2 = ak2; - ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)); - break; - } - case XFS_BTNUM_CNT: { - xfs_alloc_key_t *k1; - xfs_alloc_key_t *k2; - - k1 = ak1; - k2 = ak2; - ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) || - (k1->ar_blockcount == k2->ar_blockcount && - be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock))); - break; - } - case XFS_BTNUM_BMAP: { - xfs_bmbt_key_t *k1; - xfs_bmbt_key_t *k2; - - k1 = ak1; - k2 = ak2; - ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff)); - break; - } - case XFS_BTNUM_INO: { - xfs_inobt_key_t *k1; - xfs_inobt_key_t *k2; - - k1 = ak1; - k2 = ak2; - ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino)); - break; - } - default: - ASSERT(0); - } -} - -/* - * Debug routine: check that records are in the right order. - */ -void -xfs_btree_check_rec( - xfs_btnum_t btnum, /* btree identifier */ - void *ar1, /* pointer to left (lower) record */ - void *ar2) /* pointer to right (higher) record */ -{ - switch (btnum) { - case XFS_BTNUM_BNO: { - xfs_alloc_rec_t *r1; - xfs_alloc_rec_t *r2; - - r1 = ar1; - r2 = ar2; - ASSERT(be32_to_cpu(r1->ar_startblock) + - be32_to_cpu(r1->ar_blockcount) <= - be32_to_cpu(r2->ar_startblock)); - break; - } - case XFS_BTNUM_CNT: { - xfs_alloc_rec_t *r1; - xfs_alloc_rec_t *r2; - - r1 = ar1; - r2 = ar2; - ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) || - (r1->ar_blockcount == r2->ar_blockcount && - be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock))); - break; - } - case XFS_BTNUM_BMAP: { - xfs_bmbt_rec_t *r1; - xfs_bmbt_rec_t *r2; - - r1 = ar1; - r2 = ar2; - ASSERT(xfs_bmbt_disk_get_startoff(r1) + - xfs_bmbt_disk_get_blockcount(r1) <= - xfs_bmbt_disk_get_startoff(r2)); - break; - } - case XFS_BTNUM_INO: { - xfs_inobt_rec_t *r1; - xfs_inobt_rec_t *r2; - - r1 = ar1; - r2 = ar2; - ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <= - be32_to_cpu(r2->ir_startino)); - break; - } - default: - ASSERT(0); - } -} -#endif /* DEBUG */ int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lblock( @@ -2032,9 +1916,8 @@ xfs_btree_lshift( xfs_btree_log_keys(cur, lbp, lrecs, lrecs); xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); - xfs_btree_check_key(cur->bc_btnum, - xfs_btree_key_addr(cur, lrecs - 1, left), - lkp); + ASSERT(cur->bc_ops->keys_inorder(cur, + xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); } else { /* It's a leaf. Move records. */ union xfs_btree_rec *lrp; /* left record pointer */ @@ -2045,9 +1928,8 @@ xfs_btree_lshift( xfs_btree_copy_recs(cur, lrp, rrp, 1); xfs_btree_log_recs(cur, lbp, lrecs, lrecs); - xfs_btree_check_rec(cur->bc_btnum, - xfs_btree_rec_addr(cur, lrecs - 1, left), - lrp); + ASSERT(cur->bc_ops->recs_inorder(cur, + xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); } xfs_btree_set_numrecs(left, lrecs); @@ -2222,8 +2104,8 @@ xfs_btree_rshift( xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); - xfs_btree_check_key(cur->bc_btnum, rkp, - xfs_btree_key_addr(cur, 2, right)); + ASSERT(cur->bc_ops->keys_inorder(cur, rkp, + xfs_btree_key_addr(cur, 2, right))); } else { /* It's a leaf. make a hole in the records */ union xfs_btree_rec *lrp; @@ -2241,8 +2123,8 @@ xfs_btree_rshift( cur->bc_ops->init_key_from_rec(&key, rrp); rkp = &key; - xfs_btree_check_rec(cur->bc_btnum, rrp, - xfs_btree_rec_addr(cur, 2, right)); + ASSERT(cur->bc_ops->recs_inorder(cur, rrp, + xfs_btree_rec_addr(cur, 2, right))); } /* @@ -2849,11 +2731,11 @@ xfs_btree_insrec( /* Check that the new entry is being inserted in the right place. */ if (ptr <= numrecs) { if (level == 0) { - xfs_btree_check_rec(cur->bc_btnum, recp, - xfs_btree_rec_addr(cur, ptr, block)); + ASSERT(cur->bc_ops->recs_inorder(cur, recp, + xfs_btree_rec_addr(cur, ptr, block))); } else { - xfs_btree_check_key(cur->bc_btnum, &key, - xfs_btree_key_addr(cur, ptr, block)); + ASSERT(cur->bc_ops->keys_inorder(cur, &key, + xfs_btree_key_addr(cur, ptr, block))); } } #endif @@ -2923,8 +2805,8 @@ xfs_btree_insrec( xfs_btree_log_keys(cur, bp, ptr, numrecs); #ifdef DEBUG if (ptr < numrecs) { - xfs_btree_check_key(cur->bc_btnum, kp, - xfs_btree_key_addr(cur, ptr + 1, block)); + ASSERT(cur->bc_ops->keys_inorder(cur, kp, + xfs_btree_key_addr(cur, ptr + 1, block))); } #endif } else { @@ -2941,8 +2823,8 @@ xfs_btree_insrec( xfs_btree_log_recs(cur, bp, ptr, numrecs); #ifdef DEBUG if (ptr < numrecs) { - xfs_btree_check_rec(cur->bc_btnum, rp, - xfs_btree_rec_addr(cur, ptr + 1, block)); + ASSERT(cur->bc_ops->recs_inorder(cur, rp, + xfs_btree_rec_addr(cur, ptr + 1, block))); } #endif } diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index d6cc71e..25a488d 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -229,6 +229,18 @@ struct xfs_btree_ops { __int64_t (*key_diff)(struct xfs_btree_cur *cur, union xfs_btree_key *key); +#ifdef DEBUG + /* check that k1 is lower than k2 */ + int (*keys_inorder)(struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2); + + /* check that r1 is lower than r2 */ + int (*recs_inorder)(struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2); +#endif + /* btree tracing */ #ifdef XFS_BTREE_TRACE void (*trace_enter)(struct xfs_btree_cur *, const char *, @@ -379,30 +391,6 @@ xfs_btree_check_ptr( int index, /* offset from ptr to check */ int level); /* btree block level */ -#ifdef DEBUG - -/* - * Debug routine: check that keys are in the right order. - */ -void -xfs_btree_check_key( - xfs_btnum_t btnum, /* btree identifier */ - void *ak1, /* pointer to left (lower) key */ - void *ak2); /* pointer to right (higher) key */ - -/* - * Debug routine: check that records are in the right order. - */ -void -xfs_btree_check_rec( - xfs_btnum_t btnum, /* btree identifier */ - void *ar1, /* pointer to left (lower) record */ - void *ar2); /* pointer to right (higher) record */ -#else -#define xfs_btree_check_key(a, b, c) -#define xfs_btree_check_rec(a, b, c) -#endif /* DEBUG */ - /* * Delete the btree cursor. */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 9f4e33c..dcd4a95 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -219,6 +219,28 @@ xfs_inobt_kill_root( return 0; } +#ifdef DEBUG +STATIC int +xfs_inobt_keys_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_key *k1, + union xfs_btree_key *k2) +{ + return be32_to_cpu(k1->inobt.ir_startino) < + be32_to_cpu(k2->inobt.ir_startino); +} + +STATIC int +xfs_inobt_recs_inorder( + struct xfs_btree_cur *cur, + union xfs_btree_rec *r1, + union xfs_btree_rec *r2) +{ + return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <= + be32_to_cpu(r2->inobt.ir_startino); +} +#endif /* DEBUG */ + #ifdef XFS_BTREE_TRACE ktrace_t *xfs_inobt_trace_buf; @@ -302,6 +324,11 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, .key_diff = xfs_inobt_key_diff, +#ifdef DEBUG + .keys_inorder = xfs_inobt_keys_inorder, + .recs_inorder = xfs_inobt_recs_inorder, +#endif + #ifdef XFS_BTREE_TRACE .trace_enter = xfs_inobt_trace_enter, .trace_cursor = xfs_inobt_trace_cursor, -- cgit v0.10.2 From 3cc7524c8445e6244b055f3fa338529188c7c260 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:41 +1100 Subject: [XFS] mark various functions in xfs_btree.c static Lots of functionality in xfs_btree.c isn't needed by callers outside of this file anymore, so mark these functions static. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32209a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index d667d30..b735c72 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -87,7 +87,7 @@ xfs_btree_check_lblock( return 0; } -int /* error (0 or EFSCORRUPTED) */ +STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_sblock( struct xfs_btree_cur *cur, /* btree cursor */ struct xfs_btree_sblock *block, /* btree short form block pointer */ @@ -163,7 +163,7 @@ xfs_btree_check_lptr( /* * Check that (short) pointer is ok. */ -int /* error (0 or EFSCORRUPTED) */ +STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_sptr( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* btree block disk address */ @@ -182,7 +182,7 @@ xfs_btree_check_sptr( /* * Check that block ptr is ok. */ -int /* error (0 or EFSCORRUPTED) */ +STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_ptr( struct xfs_btree_cur *cur, /* btree cursor */ union xfs_btree_ptr *ptr, /* btree block disk address */ @@ -523,7 +523,7 @@ xfs_btree_islastblock( * Change the cursor to point to the first record at the given level. * Other levels are unaffected. */ -int /* success=1, failure=0 */ +STATIC int /* success=1, failure=0 */ xfs_btree_firstrec( xfs_btree_cur_t *cur, /* btree cursor */ int level) /* level to change */ @@ -552,7 +552,7 @@ xfs_btree_firstrec( * Change the cursor to point to the last record in the current block * at the given level. Other levels are unaffected. */ -int /* success=1, failure=0 */ +STATIC int /* success=1, failure=0 */ xfs_btree_lastrec( xfs_btree_cur_t *cur, /* btree cursor */ int level) /* level to change */ @@ -775,7 +775,7 @@ xfs_btree_readahead_sblock( * Read-ahead btree blocks, at the given level. * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. */ -int +STATIC int xfs_btree_readahead( struct xfs_btree_cur *cur, /* btree cursor */ int lev, /* level in btree */ @@ -1711,7 +1711,7 @@ error0: /* * Update keys at all levels from here to the root along the cursor's path. */ -int +STATIC int xfs_btree_updkey( struct xfs_btree_cur *cur, union xfs_btree_key *keyp, @@ -1821,7 +1821,7 @@ error0: * Move 1 record left from cur/level if possible. * Update cur to reflect the new path. */ -int /* error */ +STATIC int /* error */ xfs_btree_lshift( struct xfs_btree_cur *cur, int level, @@ -2004,7 +2004,7 @@ error0: * Move 1 record right from cur/level if possible. * Update cur to reflect the new path. */ -int /* error */ +STATIC int /* error */ xfs_btree_rshift( struct xfs_btree_cur *cur, int level, @@ -2180,7 +2180,7 @@ error1: * Return new block number and the key to its first * record (to be inserted into parent). */ -int /* error */ +STATIC int /* error */ xfs_btree_split( struct xfs_btree_cur *cur, int level, @@ -2465,7 +2465,7 @@ error0: /* * Allocate a new root block, fill it in. */ -int /* error */ +STATIC int /* error */ xfs_btree_new_root( struct xfs_btree_cur *cur, /* btree cursor */ int *stat) /* success/failure */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 25a488d..2a22c58 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -340,16 +340,6 @@ xfs_btree_check_lblock( struct xfs_buf *bp); /* buffer containing block, if any */ /* - * Check that short form block header is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sblock( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_sblock *block, /* btree short form block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp); /* buffer containing block */ - -/* * Check that block header is ok. */ int @@ -368,29 +358,6 @@ xfs_btree_check_lptr( xfs_dfsbno_t ptr, /* btree block disk address */ int level); /* btree block level */ -#define xfs_btree_check_lptr_disk(cur, ptr, level) \ - xfs_btree_check_lptr(cur, be64_to_cpu(ptr), level) - - -/* - * Check that (short) pointer is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sptr( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agblock_t ptr, /* btree block disk address */ - int level); /* btree block level */ - -/* - * Check that (short) pointer is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_ptr( - struct xfs_btree_cur *cur, /* btree cursor */ - union xfs_btree_ptr *ptr, /* btree block disk address */ - int index, /* offset from ptr to check */ - int level); /* btree block level */ - /* * Delete the btree cursor. */ @@ -409,15 +376,6 @@ xfs_btree_dup_cursor( xfs_btree_cur_t **ncur);/* output cursor */ /* - * Change the cursor to point to the first record in the current block - * at the given level. Other levels are unaffected. - */ -int /* success=1, failure=0 */ -xfs_btree_firstrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level); /* level to change */ - -/* * Get a buffer for the block, return it with no data read. * Long-form addressing. */ @@ -449,15 +407,6 @@ xfs_btree_islastblock( int level); /* level to check */ /* - * Change the cursor to point to the last record in the current block - * at the given level. Other levels are unaffected. - */ -int /* success=1, failure=0 */ -xfs_btree_lastrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level); /* level to change */ - -/* * Compute first and last byte offsets for the fields given. * Interprets the offsets table, which contains struct field offsets. */ @@ -518,16 +467,6 @@ xfs_btree_reada_bufs( xfs_extlen_t count); /* count of filesystem blocks */ /* - * Read-ahead btree blocks, at the given level. - * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. - */ -int /* readahead block count */ -xfs_btree_readahead( - xfs_btree_cur_t *cur, /* btree cursor */ - int lev, /* level in btree */ - int lr); /* left/right bits */ - -/* * Set the buffer for level "lev" in the cursor to bp, releasing * any previous buffer. */ @@ -544,13 +483,7 @@ xfs_btree_setbuf( int xfs_btree_increment(struct xfs_btree_cur *, int, int *); int xfs_btree_decrement(struct xfs_btree_cur *, int, int *); int xfs_btree_lookup(struct xfs_btree_cur *, xfs_lookup_t, int *); -int xfs_btree_updkey(struct xfs_btree_cur *, union xfs_btree_key *, int); int xfs_btree_update(struct xfs_btree_cur *, union xfs_btree_rec *); -int xfs_btree_lshift(struct xfs_btree_cur *, int, int *); -int xfs_btree_rshift(struct xfs_btree_cur *, int, int *); -int xfs_btree_split(struct xfs_btree_cur *, int, union xfs_btree_ptr *, - union xfs_btree_key *, struct xfs_btree_cur **, int *); -int xfs_btree_new_root(struct xfs_btree_cur *, int *); int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *); int xfs_btree_kill_iroot(struct xfs_btree_cur *); int xfs_btree_insert(struct xfs_btree_cur *, int *); -- cgit v0.10.2 From 7f7c39ccb6045cf1fd5e7684a484c445291b44d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 16:58:50 +1100 Subject: [XFS] make btree tracing generic Make the existing bmap btree tracing generic so that it applies to all btree types. Some fragments lifted from a patch by Dave Chinner. This adds two files that were missed from the previous btree tracing checkin. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32210a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: Bill O'Donnell Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c new file mode 100644 index 0000000..44ff942 --- /dev/null +++ b/fs/xfs/xfs_btree_trace.c @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_btree_trace.h" + +STATIC void +xfs_btree_trace_ptr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr ptr, + __psunsigned_t *high, + __psunsigned_t *low) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + __u64 val = be64_to_cpu(ptr.l); + *high = val >> 32; + *low = (int)val; + } else { + *high = 0; + *low = be32_to_cpu(ptr.s); + } +} + +/* + * Add a trace buffer entry for arguments, for a buffer & 1 integer arg. + */ +void +xfs_btree_trace_argbi( + const char *func, + struct xfs_btree_cur *cur, + struct xfs_buf *b, + int i, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI, + line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for a buffer & 2 integer args. + */ +void +xfs_btree_trace_argbii( + const char *func, + struct xfs_btree_cur *cur, + struct xfs_buf *b, + int i0, + int i1, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII, + line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for 3 block-length args + * and an integer arg. + */ +void +xfs_btree_trace_argfffi( + const char *func, + struct xfs_btree_cur *cur, + xfs_dfiloff_t o, + xfs_dfsbno_t b, + xfs_dfilblks_t i, + int j, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI, + line, + o >> 32, (int)o, + b >> 32, (int)b, + i >> 32, (int)i, + (int)j, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for one integer arg. + */ +void +xfs_btree_trace_argi( + const char *func, + struct xfs_btree_cur *cur, + int i, + int line) +{ + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI, + line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, fsblock, key. + */ +void +xfs_btree_trace_argipk( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_ptr ptr, + union xfs_btree_key *key, + int line) +{ + __psunsigned_t high, low; + __uint64_t l0, l1; + + xfs_btree_trace_ptr(cur, ptr, &high, &low); + cur->bc_ops->trace_key(cur, key, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK, + line, i, high, low, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, fsblock, rec. + */ +void +xfs_btree_trace_argipr( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_ptr ptr, + union xfs_btree_rec *rec, + int line) +{ + __psunsigned_t high, low; + __uint64_t l0, l1, l2; + + xfs_btree_trace_ptr(cur, ptr, &high, &low); + cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR, + line, i, + high, low, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + l2 >> 32, (int)l2, + 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for int, key. + */ +void +xfs_btree_trace_argik( + const char *func, + struct xfs_btree_cur *cur, + int i, + union xfs_btree_key *key, + int line) +{ + __uint64_t l0, l1; + + cur->bc_ops->trace_key(cur, key, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK, + line, i, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + 0, 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for arguments, for record. + */ +void +xfs_btree_trace_argr( + const char *func, + struct xfs_btree_cur *cur, + union xfs_btree_rec *rec, + int line) +{ + __uint64_t l0, l1, l2; + + cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2); + cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR, + line, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + l2 >> 32, (int)l2, + 0, 0, 0, 0, 0); +} + +/* + * Add a trace buffer entry for the cursor/operation. + */ +void +xfs_btree_trace_cursor( + const char *func, + struct xfs_btree_cur *cur, + int type, + int line) +{ + __uint32_t s0; + __uint64_t l0, l1; + char *s; + + switch (type) { + case XBT_ARGS: + s = "args"; + break; + case XBT_ENTRY: + s = "entry"; + break; + case XBT_ERROR: + s = "error"; + break; + case XBT_EXIT: + s = "exit"; + break; + default: + s = "unknown"; + break; + } + + cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1); + cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line, + s0, + l0 >> 32, (int)l0, + l1 >> 32, (int)l1, + (__psunsigned_t)cur->bc_bufs[0], + (__psunsigned_t)cur->bc_bufs[1], + (__psunsigned_t)cur->bc_bufs[2], + (__psunsigned_t)cur->bc_bufs[3], + (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1], + (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]); +} diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h new file mode 100644 index 0000000..b3f5eb3 --- /dev/null +++ b/fs/xfs/xfs_btree_trace.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_BTREE_TRACE_H__ +#define __XFS_BTREE_TRACE_H__ + +struct xfs_btree_cur; +struct xfs_buf; + + +/* + * Trace hooks. + * i,j = integer (32 bit) + * b = btree block buffer (xfs_buf_t) + * p = btree ptr + * r = btree record + * k = btree key + */ + +#ifdef XFS_BTREE_TRACE + +/* + * Trace buffer entry types. + */ +#define XFS_BTREE_KTRACE_ARGBI 1 +#define XFS_BTREE_KTRACE_ARGBII 2 +#define XFS_BTREE_KTRACE_ARGFFFI 3 +#define XFS_BTREE_KTRACE_ARGI 4 +#define XFS_BTREE_KTRACE_ARGIPK 5 +#define XFS_BTREE_KTRACE_ARGIPR 6 +#define XFS_BTREE_KTRACE_ARGIK 7 +#define XFS_BTREE_KTRACE_ARGR 8 +#define XFS_BTREE_KTRACE_CUR 9 + +/* + * Sub-types for cursor traces. + */ +#define XBT_ARGS 0 +#define XBT_ENTRY 1 +#define XBT_ERROR 2 +#define XBT_EXIT 3 + +void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *, + struct xfs_buf *, int, int); +void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *, + struct xfs_buf *, int, int, int); +void xfs_btree_trace_argfffi(const char *, struct xfs_btree_cur *, + xfs_dfiloff_t, xfs_dfsbno_t, xfs_dfilblks_t, int, int); +void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int); +void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int, + union xfs_btree_ptr, union xfs_btree_key *, int); +void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int, + union xfs_btree_ptr, union xfs_btree_rec *, int); +void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int, + union xfs_btree_key *, int); +void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *, + union xfs_btree_rec *, int); +void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int); + + +#define XFS_ALLOCBT_TRACE_SIZE 4096 /* size of global trace buffer */ +extern ktrace_t *xfs_allocbt_trace_buf; + +#define XFS_INOBT_TRACE_SIZE 4096 /* size of global trace buffer */ +extern ktrace_t *xfs_inobt_trace_buf; + +#define XFS_BMBT_TRACE_SIZE 4096 /* size of global trace buffer */ +#define XFS_BMBT_KTRACE_SIZE 32 /* size of per-inode trace buffer */ +extern ktrace_t *xfs_bmbt_trace_buf; + + +#define XFS_BTREE_TRACE_ARGBI(c, b, i) \ + xfs_btree_trace_argbi(__func__, c, b, i, __LINE__) +#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \ + xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__) +#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) \ + xfs_btree_trace_argfffi(__func__, c, o, b, i, j, __LINE__) +#define XFS_BTREE_TRACE_ARGI(c, i) \ + xfs_btree_trace_argi(__func__, c, i, __LINE__) +#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \ + xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__) +#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \ + xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__) +#define XFS_BTREE_TRACE_ARGIK(c, i, k) \ + xfs_btree_trace_argik(__func__, c, i, k, __LINE__) +#define XFS_BTREE_TRACE_ARGR(c, r) \ + xfs_btree_trace_argr(__func__, c, r, __LINE__) +#define XFS_BTREE_TRACE_CURSOR(c, t) \ + xfs_btree_trace_cursor(__func__, c, t, __LINE__) +#else +#define XFS_BTREE_TRACE_ARGBI(c, b, i) +#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) +#define XFS_BTREE_TRACE_ARGFFFI(c, o, b, i, j) +#define XFS_BTREE_TRACE_ARGI(c, i) +#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s) +#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) +#define XFS_BTREE_TRACE_ARGIK(c, i, k) +#define XFS_BTREE_TRACE_ARGR(c, r) +#define XFS_BTREE_TRACE_CURSOR(c, t) +#endif /* XFS_BTREE_TRACE */ + +#endif /* __XFS_BTREE_TRACE_H__ */ -- cgit v0.10.2 From d112f2984592acb774187b3adddc107fb0825500 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 16:59:06 +1100 Subject: [XFS] Wait for all I/O on truncate to zero file size It's possible to have outstanding xfs_ioend_t's queued when the file size is zero. This can happen in the direct I/O path when a direct I/O write fails due to ENOSPC. In this case the xfs_ioend_t will still be queued (ie xfs_end_io_direct() does not know that the I/O failed so can't force the xfs_ioend_t to be flushed synchronously). When we truncate a file on unlink we don't know to wait for these xfs_ioend_ts and we can have a use-after-free situation if the inode is reclaimed before the xfs_ioend_t is finally processed. As was suggested by Dave Chinner lets wait for all I/Os to complete when truncating the file size to zero. SGI-PV: 981668 SGI-Modid: xfs-linux-melb:xfs-kern:32216a Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cc0474d..2b1294b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1450,7 +1450,7 @@ xfs_itruncate_start( mp = ip->i_mount; /* wait for the completion of any pending DIOs */ - if (new_size < ip->i_size) + if (new_size == 0 || new_size < ip->i_size) vn_iowait(ip); /* -- cgit v0.10.2 From bc3048e3cd3806ccfd5b16b1a30e5d6013abbd3d Mon Sep 17 00:00:00 2001 From: Peter Leckie Date: Thu, 30 Oct 2008 17:05:04 +1100 Subject: [XFS] Clean up dquot pincount code. This is a code cleanup and optimization that removes a per mount point spinlock from the quota code and cleans up the code. The patch changes the pincount from being an int protected by a spinlock to an atomic_t allowing the pincount to be manipulated without holding the spinlock. This cleanup also protects against random wakup's of both the aild and xfssyncd by reevaluating the pincount after been woken. Two latter patches will address the Spurious wakeups. SGI-PV: 986789 SGI-Modid: xfs-linux-melb:xfs-kern:32215a Signed-off-by: Peter Leckie Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Donald Douwsma Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index f2705f2..d3f4fbb 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -101,7 +101,7 @@ xfs_qm_dqinit( if (brandnewdquot) { dqp->dq_flnext = dqp->dq_flprev = dqp; mutex_init(&dqp->q_qlock); - sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); + init_waitqueue_head(&dqp->q_pinwait); /* * Because we want to use a counting completion, complete @@ -131,7 +131,7 @@ xfs_qm_dqinit( dqp->q_res_bcount = 0; dqp->q_res_icount = 0; dqp->q_res_rtbcount = 0; - dqp->q_pincount = 0; + atomic_set(&dqp->q_pincount, 0); dqp->q_hash = NULL; ASSERT(dqp->dq_flnext == dqp->dq_flprev); @@ -1489,7 +1489,7 @@ xfs_qm_dqpurge( "xfs_qm_dqpurge: dquot %p flush failed", dqp); xfs_dqflock(dqp); } - ASSERT(dqp->q_pincount == 0); + ASSERT(atomic_read(&dqp->q_pincount) == 0); ASSERT(XFS_FORCED_SHUTDOWN(mp) || !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 8958d0f..7e45533 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -83,8 +83,8 @@ typedef struct xfs_dquot { xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ mutex_t q_qlock; /* quota lock */ struct completion q_flush; /* flush completion queue */ - uint q_pincount; /* pin count for this dquot */ - sv_t q_pinwait; /* sync var for pinning */ + atomic_t q_pincount; /* dquot pin count */ + wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ #ifdef XFS_DQUOT_TRACE struct ktrace *q_trace; /* trace header structure */ #endif diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index f028644..e33f864 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -88,25 +88,22 @@ xfs_qm_dquot_logitem_format( /* * Increment the pin count of the given dquot. - * This value is protected by pinlock spinlock in the xQM structure. */ STATIC void xfs_qm_dquot_logitem_pin( xfs_dq_logitem_t *logitem) { - xfs_dquot_t *dqp; + xfs_dquot_t *dqp = logitem->qli_dquot; - dqp = logitem->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); - dqp->q_pincount++; - spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); + atomic_inc(dqp->q_pincount); } /* * Decrement the pin count of the given dquot, and wake up * anyone in xfs_dqwait_unpin() if the count goes to 0. The - * dquot must have been previously pinned with a call to xfs_dqpin(). + * dquot must have been previously pinned with a call to + * xfs_qm_dquot_logitem_pin(). */ /* ARGSUSED */ STATIC void @@ -114,16 +111,11 @@ xfs_qm_dquot_logitem_unpin( xfs_dq_logitem_t *logitem, int stale) { - xfs_dquot_t *dqp; + xfs_dquot_t *dqp = logitem->qli_dquot; - dqp = logitem->qli_dquot; - ASSERT(dqp->q_pincount > 0); - spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); - dqp->q_pincount--; - if (dqp->q_pincount == 0) { - sv_broadcast(&dqp->q_pinwait); - } - spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); + ASSERT(atomic_read(&dqp->q_pincount) > 0); + if (atomic_dec_and_test(&dqp->q_pincount)) + wake_up(&dqp->q_pinwait); } /* ARGSUSED */ @@ -193,21 +185,14 @@ xfs_qm_dqunpin_wait( xfs_dquot_t *dqp) { ASSERT(XFS_DQ_IS_LOCKED(dqp)); - if (dqp->q_pincount == 0) { + if (atomic_read(&dqp->q_pincount) == 0) return; - } /* * Give the log a push so we don't wait here too long. */ xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); - spin_lock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); - if (dqp->q_pincount == 0) { - spin_unlock(&(XFS_DQ_TO_QINF(dqp)->qi_pinlock)); - return; - } - sv_wait(&(dqp->q_pinwait), PINOD, - &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s); + wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } /* @@ -310,7 +295,7 @@ xfs_qm_dquot_logitem_trylock( uint retval; dqp = qip->qli_dquot; - if (dqp->q_pincount > 0) + if (atomic_read(&dqp->q_pincount) > 0) return (XFS_ITEM_PINNED); if (! xfs_qm_dqlock_nowait(dqp)) diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index df0ffef..270f775 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1137,7 +1137,6 @@ xfs_qm_init_quotainfo( return error; } - spin_lock_init(&qinf->qi_pinlock); xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); qinf->qi_dqreclaims = 0; @@ -1234,7 +1233,6 @@ xfs_qm_destroy_quotainfo( */ xfs_qm_rele_quotafs_ref(mp); - spinlock_destroy(&qi->qi_pinlock); xfs_qm_list_destroy(&qi->qi_dqlist); if (qi->qi_uquotaip) { diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index 44f2534..4f2de977 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -106,7 +106,6 @@ typedef struct xfs_qm { typedef struct xfs_quotainfo { xfs_inode_t *qi_uquotaip; /* user quota inode */ xfs_inode_t *qi_gquotaip; /* group quota inode */ - spinlock_t qi_pinlock; /* dquot pinning lock */ xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ int qi_dqreclaims; /* a change here indicates a removal in the dqlist */ -- cgit v0.10.2 From d1de802155341ab63e64d37c58c61d6f358bb3ad Mon Sep 17 00:00:00 2001 From: Peter Leckie Date: Thu, 30 Oct 2008 17:05:18 +1100 Subject: [XFS] Fix build brakage from patch "Clean up dquot pincount code" This is a fix for patch " Clean up dquot pincount code" which introduced a build breakage due to a missing & in xfs_qm_dquot_logitem_pin. SGI-PV: 986789 SGI-Modid: xfs-linux-melb:xfs-kern:32221a Signed-off-by: Peter Leckie Signed-off-by: Donald Douwsma Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index e33f864..48f0810 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -96,7 +96,7 @@ xfs_qm_dquot_logitem_pin( xfs_dquot_t *dqp = logitem->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - atomic_inc(dqp->q_pincount); + atomic_inc(&dqp->q_pincount); } /* -- cgit v0.10.2 From 24ee0e49c9cce23acb1758728cb09e8d2b53bd33 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 17:05:26 +1100 Subject: [XFS] Make xfs_btree_check_ptr() debug-only code. SGI-PV: 985583 SGI-Modid: xfs-linux-melb:xfs-kern:32224a Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index b735c72..72a26bb 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -160,6 +160,7 @@ xfs_btree_check_lptr( return 0; } +#ifdef DEBUG /* * Check that (short) pointer is ok. */ @@ -197,6 +198,7 @@ xfs_btree_check_ptr( be32_to_cpu((&ptr->s)[index]), level); } } +#endif /* * Delete the btree cursor. -- cgit v0.10.2 From 847fff5ca881670ca8ec617afeb943950f0c804b Mon Sep 17 00:00:00 2001 From: Barry Naujok Date: Thu, 30 Oct 2008 17:05:38 +1100 Subject: [XFS] Sync up kernel and user-space headers SGI-PV: 986558 SGI-Modid: xfs-linux-melb:xfs-kern:32231a Signed-off-by: Barry Naujok Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 61b292a..729ee3e 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -192,15 +192,16 @@ typedef struct xfs_perag xfs_agino_t pagi_freecount; /* number of free inodes */ xfs_agino_t pagi_count; /* number of allocated inodes */ int pagb_count; /* pagb slots in use */ + xfs_perag_busy_t *pagb_list; /* unstable blocks */ #ifdef __KERNEL__ spinlock_t pagb_lock; /* lock for pagb_list */ -#endif - xfs_perag_busy_t *pagb_list; /* unstable blocks */ + atomic_t pagf_fstrms; /* # of filestreams active in this AG */ int pag_ici_init; /* incore inode cache initialised */ rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ +#endif } xfs_perag_t; #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 5aec15d..5881727 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -121,6 +121,19 @@ extern ktrace_t *xfs_alloc_trace_buf; #define XFS_ALLOC_KTRACE_BUSYSEARCH 6 #endif +void +xfs_alloc_mark_busy(xfs_trans_t *tp, + xfs_agnumber_t agno, + xfs_agblock_t bno, + xfs_extlen_t len); + +void +xfs_alloc_clear_busy(xfs_trans_t *tp, + xfs_agnumber_t ag, + int idx); + +#endif /* __KERNEL__ */ + /* * Compute and fill in value of m_ag_maxlevels. */ @@ -196,18 +209,4 @@ xfs_free_extent( xfs_fsblock_t bno, /* starting block number of extent */ xfs_extlen_t len); /* length of extent */ -void -xfs_alloc_mark_busy(xfs_trans_t *tp, - xfs_agnumber_t agno, - xfs_agblock_t bno, - xfs_extlen_t len); - -void -xfs_alloc_clear_busy(xfs_trans_t *tp, - xfs_agnumber_t ag, - int idx); - - -#endif /* __KERNEL__ */ - #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index 0b3b5ef..53d5e70 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h @@ -41,21 +41,36 @@ #endif #ifdef XFS_NATIVE_HOST -#define cpu_to_be16(val) ((__be16)(val)) -#define cpu_to_be32(val) ((__be32)(val)) -#define cpu_to_be64(val) ((__be64)(val)) -#define be16_to_cpu(val) ((__uint16_t)(val)) -#define be32_to_cpu(val) ((__uint32_t)(val)) -#define be64_to_cpu(val) ((__uint64_t)(val)) +#define cpu_to_be16(val) ((__force __be16)(__u16)(val)) +#define cpu_to_be32(val) ((__force __be32)(__u32)(val)) +#define cpu_to_be64(val) ((__force __be64)(__u64)(val)) +#define be16_to_cpu(val) ((__force __u16)(__be16)(val)) +#define be32_to_cpu(val) ((__force __u32)(__be32)(val)) +#define be64_to_cpu(val) ((__force __u64)(__be64)(val)) #else -#define cpu_to_be16(val) (__swab16((__uint16_t)(val))) -#define cpu_to_be32(val) (__swab32((__uint32_t)(val))) -#define cpu_to_be64(val) (__swab64((__uint64_t)(val))) -#define be16_to_cpu(val) (__swab16((__be16)(val))) -#define be32_to_cpu(val) (__swab32((__be32)(val))) -#define be64_to_cpu(val) (__swab64((__be64)(val))) +#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val))) +#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val))) +#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val))) +#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val))) +#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val))) +#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val))) #endif +static inline void be16_add_cpu(__be16 *a, __s16 b) +{ + *a = cpu_to_be16(be16_to_cpu(*a) + b); +} + +static inline void be32_add_cpu(__be32 *a, __s32 b) +{ + *a = cpu_to_be32(be32_to_cpu(*a) + b); +} + +static inline void be64_add_cpu(__be64 *a, __s64 b) +{ + *a = cpu_to_be64(be64_to_cpu(*a) + b); +} + #endif /* __KERNEL__ */ /* do we need conversion? */ diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index 8e0e463..bca7b24 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -61,8 +61,7 @@ static inline int xfs_highbit64(__uint64_t v) /* Get low bit set out of 32-bit argument, -1 if none set */ static inline int xfs_lowbit32(__uint32_t v) { - unsigned long t = v; - return (v) ? find_first_bit(&t, 32) : -1; + return ffs(v) - 1; } /* Get low bit set out of 64-bit argument, -1 if none set */ diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 9f3e3a8..7c9d12c 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -137,9 +137,7 @@ typedef struct xfs_bmalloca { char conv; /* overwriting unwritten extents */ } xfs_bmalloca_t; -#ifdef __KERNEL__ - -#if defined(XFS_BMAP_TRACE) +#if defined(__KERNEL__) && defined(XFS_BMAP_TRACE) /* * Trace operations for bmap extent tracing */ @@ -163,9 +161,12 @@ xfs_bmap_trace_exlist( int whichfork); /* data or attr fork */ #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ xfs_bmap_trace_exlist(__func__,ip,c,w) -#else + +#else /* __KERNEL__ && XFS_BMAP_TRACE */ + #define XFS_BMAP_TRACE_EXLIST(ip,c,w) -#endif + +#endif /* __KERNEL__ && XFS_BMAP_TRACE */ /* * Convert inode from non-attributed to attributed. @@ -206,20 +207,6 @@ xfs_bmap_compute_maxlevels( int whichfork); /* data or attr fork */ /* - * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi - * caller. Frees all the extents that need freeing, which must be done - * last due to locking considerations. - * - * Return 1 if the given transaction was committed and a new one allocated, - * and 0 otherwise. - */ -int /* error */ -xfs_bmap_finish( - struct xfs_trans **tp, /* transaction pointer addr */ - xfs_bmap_free_t *flist, /* i/o: list extents to free */ - int *committed); /* xact committed or not */ - -/* * Returns the file-relative block number of the first unused block in the file. * This is the lowest-address hole if the file has holes, else the first block * past the end of file. @@ -344,6 +331,32 @@ xfs_bunmapi( int *done); /* set if not done yet */ /* + * Check an extent list, which has just been read, for + * any bit in the extent flag field. + */ +int +xfs_check_nostate_extents( + struct xfs_ifork *ifp, + xfs_extnum_t idx, + xfs_extnum_t num); + +#ifdef __KERNEL__ + +/* + * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi + * caller. Frees all the extents that need freeing, which must be done + * last due to locking considerations. + * + * Return 1 if the given transaction was committed and a new one allocated, + * and 0 otherwise. + */ +int /* error */ +xfs_bmap_finish( + struct xfs_trans **tp, /* transaction pointer addr */ + xfs_bmap_free_t *flist, /* i/o: list extents to free */ + int *committed); /* xact committed or not */ + +/* * Fcntl interface to xfs_bmapi. */ int /* error code */ @@ -375,16 +388,6 @@ xfs_bmap_count_blocks( int *count); /* - * Check an extent list, which has just been read, for - * any bit in the extent flag field. - */ -int -xfs_check_nostate_extents( - struct xfs_ifork *ifp, - xfs_extnum_t idx, - xfs_extnum_t num); - -/* * Search the extent records for the entry containing block bno. * If bno lies in a hole, point to the next entry. If bno lies * past eof, *eofp will be set, and *prevp will contain the last diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 6f38e25..5669242 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -231,8 +231,6 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; be16_to_cpu((bb)->bb_numrecs) <= (mp)->m_bmap_dmxr[(level) != 0]) -#ifdef __KERNEL__ - /* * Prototypes for xfs_bmap.c to call. */ @@ -264,6 +262,5 @@ extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); -#endif /* __KERNEL__ */ #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 2a22c58..7425b2b 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -327,8 +327,6 @@ typedef struct xfs_btree_cur #define XFS_BUF_TO_SBLOCK(bp) ((xfs_btree_sblock_t *)XFS_BUF_PTR(bp)) -#ifdef __KERNEL__ - /* * Check that long form block header is ok. */ @@ -515,8 +513,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block) return be16_to_cpu(block->bb_level); } -#endif /* __KERNEL__ */ - /* * Min and max functions for extlen, agblock, fileoff, and filblks types. diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 599e270..70b710c 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -206,9 +206,8 @@ struct xfs_nameops { }; -#ifdef __KERNEL__ /*======================================================================== - * Function prototypes for the kernel. + * Function prototypes. *========================================================================*/ /* @@ -269,6 +268,5 @@ xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf); extern struct kmem_zone *xfs_da_state_zone; extern struct kmem_zone *xfs_dabuf_zone; -#endif /* __KERNEL__ */ #endif /* __XFS_DA_BTREE_H__ */ diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index c5745f6..ccf554a 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -56,7 +56,6 @@ static inline int xfs_ialloc_find_free(xfs_inofree_t *fp) } -#ifdef __KERNEL__ /* * Allocate an inode on disk. * Mode is used to tell whether the new inode will need space, and whether @@ -174,6 +173,4 @@ int xfs_inobt_lookup_le(struct xfs_btree_cur *cur, xfs_agino_t ino, extern int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_agino_t *ino, __int32_t *fcnt, xfs_inofree_t *free, int *stat); -#endif /* __KERNEL__ */ - #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h index d364500..f9ce6289 100644 --- a/fs/xfs/xfs_imap.h +++ b/fs/xfs/xfs_imap.h @@ -30,11 +30,9 @@ typedef struct xfs_imap { ushort im_boffset; /* inode offset in block in bytes */ } xfs_imap_t; -#ifdef __KERNEL__ struct xfs_mount; struct xfs_trans; int xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, xfs_imap_t *, uint); -#endif #endif /* __XFS_IMAP_H__ */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 2a69a7d..a8f1e68 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -20,7 +20,7 @@ struct xfs_dinode; struct xfs_dinode_core; - +struct xfs_inode; /* * Fork identifiers. @@ -84,54 +84,6 @@ typedef struct xfs_ifork { } xfs_ifork_t; /* - * Flags for xfs_ichgtime(). - */ -#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ -#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ - -/* - * Per-fork incore inode flags. - */ -#define XFS_IFINLINE 0x01 /* Inline data is read in */ -#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ -#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ -#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ - -/* - * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate(). - */ -#define XFS_IMAP_LOOKUP 0x1 -#define XFS_IMAP_BULKSTAT 0x2 - -#ifdef __KERNEL__ -struct bhv_desc; -struct cred; -struct ktrace; -struct xfs_buf; -struct xfs_bmap_free; -struct xfs_bmbt_irec; -struct xfs_bmbt_block; -struct xfs_inode; -struct xfs_inode_log_item; -struct xfs_mount; -struct xfs_trans; -struct xfs_dquot; - -#if defined(XFS_ILOCK_TRACE) -#define XFS_ILOCK_KTRACE_SIZE 32 -extern ktrace_t *xfs_ilock_trace_buf; -extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); -#else -#define xfs_ilock_trace(i,n,f,ra) -#endif - -typedef struct dm_attrs_s { - __uint32_t da_dmevmask; /* DMIG event mask */ - __uint16_t da_dmstate; /* DMIG state info */ - __uint16_t da_pad; /* DMIG extra padding */ -} dm_attrs_t; - -/* * This is the xfs in-core inode structure. * Most of the on-disk inode is embedded in the i_d field. * @@ -191,6 +143,96 @@ typedef struct xfs_icdinode { __uint32_t di_gen; /* generation number */ } xfs_icdinode_t; +/* + * Flags for xfs_ichgtime(). + */ +#define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ +#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ + +/* + * Per-fork incore inode flags. + */ +#define XFS_IFINLINE 0x01 /* Inline data is read in */ +#define XFS_IFEXTENTS 0x02 /* All extent pointers are read in */ +#define XFS_IFBROOT 0x04 /* i_broot points to the bmap b-tree root */ +#define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ + +/* + * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate(). + */ +#define XFS_IMAP_LOOKUP 0x1 +#define XFS_IMAP_BULKSTAT 0x2 + +/* + * Fork handling. + */ + +#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) +#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) + +#define XFS_IFORK_PTR(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + &(ip)->i_df : \ + (ip)->i_afp) +#define XFS_IFORK_DSIZE(ip) \ + (XFS_IFORK_Q(ip) ? \ + XFS_IFORK_BOFF(ip) : \ + XFS_LITINO((ip)->i_mount)) +#define XFS_IFORK_ASIZE(ip) \ + (XFS_IFORK_Q(ip) ? \ + XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ + 0) +#define XFS_IFORK_SIZE(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + XFS_IFORK_DSIZE(ip) : \ + XFS_IFORK_ASIZE(ip)) +#define XFS_IFORK_FORMAT(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + (ip)->i_d.di_format : \ + (ip)->i_d.di_aformat) +#define XFS_IFORK_FMT_SET(ip,w,n) \ + ((w) == XFS_DATA_FORK ? \ + ((ip)->i_d.di_format = (n)) : \ + ((ip)->i_d.di_aformat = (n))) +#define XFS_IFORK_NEXTENTS(ip,w) \ + ((w) == XFS_DATA_FORK ? \ + (ip)->i_d.di_nextents : \ + (ip)->i_d.di_anextents) +#define XFS_IFORK_NEXT_SET(ip,w,n) \ + ((w) == XFS_DATA_FORK ? \ + ((ip)->i_d.di_nextents = (n)) : \ + ((ip)->i_d.di_anextents = (n))) + + + +#ifdef __KERNEL__ + +struct bhv_desc; +struct cred; +struct ktrace; +struct xfs_buf; +struct xfs_bmap_free; +struct xfs_bmbt_irec; +struct xfs_bmbt_block; +struct xfs_inode_log_item; +struct xfs_mount; +struct xfs_trans; +struct xfs_dquot; + +#if defined(XFS_ILOCK_TRACE) +#define XFS_ILOCK_KTRACE_SIZE 32 +extern ktrace_t *xfs_ilock_trace_buf; +extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); +#else +#define xfs_ilock_trace(i,n,f,ra) +#endif + +typedef struct dm_attrs_s { + __uint32_t da_dmevmask; /* DMIG event mask */ + __uint16_t da_dmstate; /* DMIG state info */ + __uint16_t da_pad; /* DMIG extra padding */ +} dm_attrs_t; + typedef struct { struct xfs_inode *ip_mnext; /* next inode in mount list */ struct xfs_inode *ip_mprev; /* ptr to prev inode */ @@ -327,50 +369,26 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) spin_unlock(&ip->i_flags_lock); return ret; } -#endif /* __KERNEL__ */ - /* - * Fork handling. + * Manage the i_flush queue embedded in the inode. This completion + * queue synchronizes processes attempting to flush the in-core + * inode back to disk. */ +static inline void xfs_iflock(xfs_inode_t *ip) +{ + wait_for_completion(&ip->i_flush); +} -#define XFS_IFORK_Q(ip) ((ip)->i_d.di_forkoff != 0) -#define XFS_IFORK_BOFF(ip) ((int)((ip)->i_d.di_forkoff << 3)) - -#define XFS_IFORK_PTR(ip,w) \ - ((w) == XFS_DATA_FORK ? \ - &(ip)->i_df : \ - (ip)->i_afp) -#define XFS_IFORK_DSIZE(ip) \ - (XFS_IFORK_Q(ip) ? \ - XFS_IFORK_BOFF(ip) : \ - XFS_LITINO((ip)->i_mount)) -#define XFS_IFORK_ASIZE(ip) \ - (XFS_IFORK_Q(ip) ? \ - XFS_LITINO((ip)->i_mount) - XFS_IFORK_BOFF(ip) : \ - 0) -#define XFS_IFORK_SIZE(ip,w) \ - ((w) == XFS_DATA_FORK ? \ - XFS_IFORK_DSIZE(ip) : \ - XFS_IFORK_ASIZE(ip)) -#define XFS_IFORK_FORMAT(ip,w) \ - ((w) == XFS_DATA_FORK ? \ - (ip)->i_d.di_format : \ - (ip)->i_d.di_aformat) -#define XFS_IFORK_FMT_SET(ip,w,n) \ - ((w) == XFS_DATA_FORK ? \ - ((ip)->i_d.di_format = (n)) : \ - ((ip)->i_d.di_aformat = (n))) -#define XFS_IFORK_NEXTENTS(ip,w) \ - ((w) == XFS_DATA_FORK ? \ - (ip)->i_d.di_nextents : \ - (ip)->i_d.di_anextents) -#define XFS_IFORK_NEXT_SET(ip,w,n) \ - ((w) == XFS_DATA_FORK ? \ - ((ip)->i_d.di_nextents = (n)) : \ - ((ip)->i_d.di_anextents = (n))) +static inline int xfs_iflock_nowait(xfs_inode_t *ip) +{ + return try_wait_for_completion(&ip->i_flush); +} -#ifdef __KERNEL__ +static inline void xfs_ifunlock(xfs_inode_t *ip) +{ + complete(&ip->i_flush); +} /* * In-core inode flags. @@ -490,19 +508,11 @@ int xfs_finish_reclaim_all(struct xfs_mount *, int); /* * xfs_inode.c prototypes. */ -int xfs_itobp(struct xfs_mount *, struct xfs_trans *, - xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **, - xfs_daddr_t, uint, uint); int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, xfs_inode_t **, xfs_daddr_t, uint); -int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, int, struct xfs_buf **, boolean_t *, xfs_inode_t **); -void xfs_dinode_from_disk(struct xfs_icdinode *, - struct xfs_dinode_core *); -void xfs_dinode_to_disk(struct xfs_dinode_core *, - struct xfs_icdinode *); uint xfs_ip2xflags(struct xfs_inode *); uint xfs_dic2xflags(struct xfs_dinode *); @@ -514,15 +524,11 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); struct xfs_inode * xfs_inode_alloc(struct xfs_mount *, xfs_ino_t); -void xfs_idestroy_fork(xfs_inode_t *, int); void xfs_idestroy(xfs_inode_t *); -void xfs_idata_realloc(xfs_inode_t *, int, int); void xfs_iextract(xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); -void xfs_iroot_realloc(xfs_inode_t *, int, int); void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); -int xfs_iextents_copy(xfs_inode_t *, xfs_bmbt_rec_t *, int); int xfs_iflush(xfs_inode_t *, uint); void xfs_iflush_all(struct xfs_mount *); void xfs_ichgtime(xfs_inode_t *, int); @@ -533,6 +539,21 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); void xfs_synchronize_atime(xfs_inode_t *); void xfs_mark_inode_dirty_sync(xfs_inode_t *); +#endif /* __KERNEL__ */ + +int xfs_itobp(struct xfs_mount *, struct xfs_trans *, + struct xfs_inode *, struct xfs_dinode **, + struct xfs_buf **, xfs_daddr_t, uint, uint); +void xfs_dinode_from_disk(struct xfs_icdinode *, + struct xfs_dinode_core *); +void xfs_dinode_to_disk(struct xfs_dinode_core *, + struct xfs_icdinode *); +void xfs_idestroy_fork(struct xfs_inode *, int); +void xfs_idata_realloc(struct xfs_inode *, int, int); +void xfs_iroot_realloc(struct xfs_inode *, int, int); +int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); +int xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int); + xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t); void xfs_iext_insert(xfs_ifork_t *, xfs_extnum_t, xfs_extnum_t, xfs_bmbt_irec_t *); @@ -562,7 +583,8 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int); #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) #ifdef DEBUG -void xfs_isize_check(struct xfs_mount *, xfs_inode_t *, xfs_fsize_t); +void xfs_isize_check(struct xfs_mount *, struct xfs_inode *, + xfs_fsize_t); #else /* DEBUG */ #define xfs_isize_check(mp, ip, isize) #endif /* DEBUG */ @@ -577,26 +599,4 @@ extern struct kmem_zone *xfs_ifork_zone; extern struct kmem_zone *xfs_inode_zone; extern struct kmem_zone *xfs_ili_zone; -/* - * Manage the i_flush queue embedded in the inode. This completion - * queue synchronizes processes attempting to flush the in-core - * inode back to disk. - */ -static inline void xfs_iflock(xfs_inode_t *ip) -{ - wait_for_completion(&ip->i_flush); -} - -static inline int xfs_iflock_nowait(xfs_inode_t *ip) -{ - return try_wait_for_completion(&ip->i_flush); -} - -static inline void xfs_ifunlock(xfs_inode_t *ip) -{ - complete(&ip->i_flush); -} - -#endif /* __KERNEL__ */ - #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 4051307..1ff04cc 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -112,6 +112,24 @@ typedef struct xfs_inode_log_format_64 { #define XFS_ILI_IOLOCKED_ANY (XFS_ILI_IOLOCKED_EXCL | XFS_ILI_IOLOCKED_SHARED) +#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w) +static inline int xfs_ilog_fbroot(int w) +{ + return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT); +} + +#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w) +static inline int xfs_ilog_fext(int w) +{ + return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); +} + +#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w) +static inline int xfs_ilog_fdata(int w) +{ + return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA); +} + #ifdef __KERNEL__ struct xfs_buf; @@ -148,26 +166,6 @@ typedef struct xfs_inode_log_item { } xfs_inode_log_item_t; -#define XFS_ILOG_FDATA(w) xfs_ilog_fdata(w) -static inline int xfs_ilog_fdata(int w) -{ - return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA); -} - -#endif /* __KERNEL__ */ - -#define XFS_ILOG_FBROOT(w) xfs_ilog_fbroot(w) -static inline int xfs_ilog_fbroot(int w) -{ - return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT); -} - -#define XFS_ILOG_FEXT(w) xfs_ilog_fext(w) -static inline int xfs_ilog_fext(int w) -{ - return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); -} - static inline int xfs_inode_clean(xfs_inode_t *ip) { return (!ip->i_itemp || @@ -175,9 +173,6 @@ static inline int xfs_inode_clean(xfs_inode_t *ip) !ip->i_update_core; } - -#ifdef __KERNEL__ - extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); extern void xfs_inode_item_destroy(struct xfs_inode *); extern void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 49d647e..56c3b12 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -44,14 +44,14 @@ typedef struct xfs_trans_reservations { } xfs_trans_reservations_t; #ifndef __KERNEL__ -/* - * Moved here from xfs_ag.h to avoid reordering header files - */ + #define XFS_DADDR_TO_AGNO(mp,d) \ ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) #define XFS_DADDR_TO_AGBNO(mp,d) \ ((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks)) -#else + +#else /* __KERNEL__ */ + struct cred; struct log; struct xfs_mount_args; @@ -507,7 +507,6 @@ typedef struct xfs_mod_sb { #define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) #define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) -extern void xfs_mod_sb(xfs_trans_t *, __int64_t); extern int xfs_log_sbcount(xfs_mount_t *, uint); extern int xfs_mountfs(xfs_mount_t *mp); extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); @@ -526,9 +525,6 @@ extern void xfs_freesb(xfs_mount_t *); extern int xfs_fs_writable(xfs_mount_t *); extern int xfs_syncsub(xfs_mount_t *, int, int *); extern int xfs_sync_inodes(xfs_mount_t *, int, int *); -extern xfs_agnumber_t xfs_initialize_perag(xfs_mount_t *, xfs_agnumber_t); -extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); -extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); extern int xfs_dmops_get(struct xfs_mount *, struct xfs_mount_args *); @@ -540,4 +536,9 @@ extern struct xfs_dmops xfs_dmcore_xfs; #endif /* __KERNEL__ */ +extern void xfs_mod_sb(struct xfs_trans *, __int64_t); +extern xfs_agnumber_t xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t); +extern void xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *); +extern void xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t); + #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 74c80bd..1d89d50 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -18,6 +18,8 @@ #ifndef __XFS_TRANS_H__ #define __XFS_TRANS_H__ +struct xfs_log_item; + /* * This is the structure written in the log at the head of * every transaction. It identifies the type and id of the @@ -98,76 +100,6 @@ typedef struct xfs_trans_header { #define XFS_TRANS_TYPE_MAX 41 /* new transaction types need to be reflected in xfs_logprint(8) */ - -#ifdef __KERNEL__ -struct xfs_buf; -struct xfs_buftarg; -struct xfs_efd_log_item; -struct xfs_efi_log_item; -struct xfs_inode; -struct xfs_item_ops; -struct xfs_log_iovec; -struct xfs_log_item; -struct xfs_log_item_desc; -struct xfs_mount; -struct xfs_trans; -struct xfs_dquot_acct; - -typedef struct xfs_log_item { - struct list_head li_ail; /* AIL pointers */ - xfs_lsn_t li_lsn; /* last on-disk lsn */ - struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ - struct xfs_mount *li_mountp; /* ptr to fs mount */ - uint li_type; /* item type */ - uint li_flags; /* misc flags */ - struct xfs_log_item *li_bio_list; /* buffer item list */ - void (*li_cb)(struct xfs_buf *, - struct xfs_log_item *); - /* buffer item iodone */ - /* callback func */ - struct xfs_item_ops *li_ops; /* function list */ -} xfs_log_item_t; - -#define XFS_LI_IN_AIL 0x1 -#define XFS_LI_ABORTED 0x2 - -typedef struct xfs_item_ops { - uint (*iop_size)(xfs_log_item_t *); - void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); - void (*iop_pin)(xfs_log_item_t *); - void (*iop_unpin)(xfs_log_item_t *, int); - void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *); - uint (*iop_trylock)(xfs_log_item_t *); - void (*iop_unlock)(xfs_log_item_t *); - xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); - void (*iop_push)(xfs_log_item_t *); - void (*iop_pushbuf)(xfs_log_item_t *); - void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); -} xfs_item_ops_t; - -#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) -#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) -#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) -#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags) -#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp) -#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) -#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) -#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) -#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) -#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) -#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) - -/* - * Return values for the IOP_TRYLOCK() routines. - */ -#define XFS_ITEM_SUCCESS 0 -#define XFS_ITEM_PINNED 1 -#define XFS_ITEM_LOCKED 2 -#define XFS_ITEM_FLUSHING 3 -#define XFS_ITEM_PUSHBUF 4 - -#endif /* __KERNEL__ */ - /* * This structure is used to track log items associated with * a transaction. It points to the log item and keeps some @@ -176,7 +108,7 @@ typedef struct xfs_item_ops { * once we get to commit processing (see xfs_trans_commit()). */ typedef struct xfs_log_item_desc { - xfs_log_item_t *lid_item; + struct xfs_log_item *lid_item; ushort lid_size; unsigned char lid_flags; unsigned char lid_index; @@ -276,94 +208,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) (xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs)); } -#ifdef __KERNEL__ -/* - * This structure is used to maintain a list of block ranges that have been - * freed in the transaction. The ranges are listed in the perag[] busy list - * between when they're freed and the transaction is committed to disk. - */ - -typedef struct xfs_log_busy_slot { - xfs_agnumber_t lbc_ag; - ushort lbc_idx; /* index in perag.busy[] */ -} xfs_log_busy_slot_t; - -#define XFS_LBC_NUM_SLOTS 31 -typedef struct xfs_log_busy_chunk { - struct xfs_log_busy_chunk *lbc_next; - uint lbc_free; /* free slots bitmask */ - ushort lbc_unused; /* first unused */ - xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS]; -} xfs_log_busy_chunk_t; - -#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1) -#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1) - -#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK) -#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot))) -#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)])) -#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK) -#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot))) - -/* - * This is the type of function which can be given to xfs_trans_callback() - * to be called upon the transaction's commit to disk. - */ -typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *); - -/* - * This is the structure maintained for every active transaction. - */ -typedef struct xfs_trans { - unsigned int t_magic; /* magic number */ - xfs_log_callback_t t_logcb; /* log callback struct */ - unsigned int t_type; /* transaction type */ - unsigned int t_log_res; /* amt of log space resvd */ - unsigned int t_log_count; /* count for perm log res */ - unsigned int t_blk_res; /* # of blocks resvd */ - unsigned int t_blk_res_used; /* # of resvd blocks used */ - unsigned int t_rtx_res; /* # of rt extents resvd */ - unsigned int t_rtx_res_used; /* # of resvd rt extents used */ - xfs_log_ticket_t t_ticket; /* log mgr ticket */ - xfs_lsn_t t_lsn; /* log seq num of start of - * transaction. */ - xfs_lsn_t t_commit_lsn; /* log seq num of end of - * transaction. */ - struct xfs_mount *t_mountp; /* ptr to fs mount struct */ - struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ - xfs_trans_callback_t t_callback; /* transaction callback */ - void *t_callarg; /* callback arg */ - unsigned int t_flags; /* misc flags */ - int64_t t_icount_delta; /* superblock icount change */ - int64_t t_ifree_delta; /* superblock ifree change */ - int64_t t_fdblocks_delta; /* superblock fdblocks chg */ - int64_t t_res_fdblocks_delta; /* on-disk only chg */ - int64_t t_frextents_delta;/* superblock freextents chg*/ - int64_t t_res_frextents_delta; /* on-disk only chg */ -#ifdef DEBUG - int64_t t_ag_freeblks_delta; /* debugging counter */ - int64_t t_ag_flist_delta; /* debugging counter */ - int64_t t_ag_btree_delta; /* debugging counter */ -#endif - int64_t t_dblocks_delta;/* superblock dblocks change */ - int64_t t_agcount_delta;/* superblock agcount change */ - int64_t t_imaxpct_delta;/* superblock imaxpct change */ - int64_t t_rextsize_delta;/* superblock rextsize chg */ - int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */ - int64_t t_rblocks_delta;/* superblock rblocks change */ - int64_t t_rextents_delta;/* superblocks rextents chg */ - int64_t t_rextslog_delta;/* superblocks rextslog chg */ - unsigned int t_items_free; /* log item descs free */ - xfs_log_item_chunk_t t_items; /* first log item desc chunk */ - xfs_trans_header_t t_header; /* header for in-log trans */ - unsigned int t_busy_free; /* busy descs free */ - xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */ - unsigned long t_pflags; /* saved process flags state */ -} xfs_trans_t; - -#endif /* __KERNEL__ */ - - #define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */ /* * Values for t_flags. @@ -906,6 +750,156 @@ typedef struct xfs_trans { #define XFS_DQUOT_REF 1 #ifdef __KERNEL__ + +struct xfs_buf; +struct xfs_buftarg; +struct xfs_efd_log_item; +struct xfs_efi_log_item; +struct xfs_inode; +struct xfs_item_ops; +struct xfs_log_iovec; +struct xfs_log_item_desc; +struct xfs_mount; +struct xfs_trans; +struct xfs_dquot_acct; + +typedef struct xfs_log_item { + struct list_head li_ail; /* AIL pointers */ + xfs_lsn_t li_lsn; /* last on-disk lsn */ + struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ + struct xfs_mount *li_mountp; /* ptr to fs mount */ + uint li_type; /* item type */ + uint li_flags; /* misc flags */ + struct xfs_log_item *li_bio_list; /* buffer item list */ + void (*li_cb)(struct xfs_buf *, + struct xfs_log_item *); + /* buffer item iodone */ + /* callback func */ + struct xfs_item_ops *li_ops; /* function list */ +} xfs_log_item_t; + +#define XFS_LI_IN_AIL 0x1 +#define XFS_LI_ABORTED 0x2 + +typedef struct xfs_item_ops { + uint (*iop_size)(xfs_log_item_t *); + void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); + void (*iop_pin)(xfs_log_item_t *); + void (*iop_unpin)(xfs_log_item_t *, int); + void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *); + uint (*iop_trylock)(xfs_log_item_t *); + void (*iop_unlock)(xfs_log_item_t *); + xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); + void (*iop_push)(xfs_log_item_t *); + void (*iop_pushbuf)(xfs_log_item_t *); + void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); +} xfs_item_ops_t; + +#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) +#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) +#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) +#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags) +#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp) +#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) +#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) +#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn) +#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip) +#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip) +#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn) + +/* + * Return values for the IOP_TRYLOCK() routines. + */ +#define XFS_ITEM_SUCCESS 0 +#define XFS_ITEM_PINNED 1 +#define XFS_ITEM_LOCKED 2 +#define XFS_ITEM_FLUSHING 3 +#define XFS_ITEM_PUSHBUF 4 + +/* + * This structure is used to maintain a list of block ranges that have been + * freed in the transaction. The ranges are listed in the perag[] busy list + * between when they're freed and the transaction is committed to disk. + */ + +typedef struct xfs_log_busy_slot { + xfs_agnumber_t lbc_ag; + ushort lbc_idx; /* index in perag.busy[] */ +} xfs_log_busy_slot_t; + +#define XFS_LBC_NUM_SLOTS 31 +typedef struct xfs_log_busy_chunk { + struct xfs_log_busy_chunk *lbc_next; + uint lbc_free; /* free slots bitmask */ + ushort lbc_unused; /* first unused */ + xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS]; +} xfs_log_busy_chunk_t; + +#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1) +#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1) + +#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK) +#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot))) +#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)])) +#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK) +#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot))) + +/* + * This is the type of function which can be given to xfs_trans_callback() + * to be called upon the transaction's commit to disk. + */ +typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *); + +/* + * This is the structure maintained for every active transaction. + */ +typedef struct xfs_trans { + unsigned int t_magic; /* magic number */ + xfs_log_callback_t t_logcb; /* log callback struct */ + unsigned int t_type; /* transaction type */ + unsigned int t_log_res; /* amt of log space resvd */ + unsigned int t_log_count; /* count for perm log res */ + unsigned int t_blk_res; /* # of blocks resvd */ + unsigned int t_blk_res_used; /* # of resvd blocks used */ + unsigned int t_rtx_res; /* # of rt extents resvd */ + unsigned int t_rtx_res_used; /* # of resvd rt extents used */ + xfs_log_ticket_t t_ticket; /* log mgr ticket */ + xfs_lsn_t t_lsn; /* log seq num of start of + * transaction. */ + xfs_lsn_t t_commit_lsn; /* log seq num of end of + * transaction. */ + struct xfs_mount *t_mountp; /* ptr to fs mount struct */ + struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ + xfs_trans_callback_t t_callback; /* transaction callback */ + void *t_callarg; /* callback arg */ + unsigned int t_flags; /* misc flags */ + int64_t t_icount_delta; /* superblock icount change */ + int64_t t_ifree_delta; /* superblock ifree change */ + int64_t t_fdblocks_delta; /* superblock fdblocks chg */ + int64_t t_res_fdblocks_delta; /* on-disk only chg */ + int64_t t_frextents_delta;/* superblock freextents chg*/ + int64_t t_res_frextents_delta; /* on-disk only chg */ +#ifdef DEBUG + int64_t t_ag_freeblks_delta; /* debugging counter */ + int64_t t_ag_flist_delta; /* debugging counter */ + int64_t t_ag_btree_delta; /* debugging counter */ +#endif + int64_t t_dblocks_delta;/* superblock dblocks change */ + int64_t t_agcount_delta;/* superblock agcount change */ + int64_t t_imaxpct_delta;/* superblock imaxpct change */ + int64_t t_rextsize_delta;/* superblock rextsize chg */ + int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */ + int64_t t_rblocks_delta;/* superblock rblocks change */ + int64_t t_rextents_delta;/* superblocks rextents chg */ + int64_t t_rextslog_delta;/* superblocks rextslog chg */ + unsigned int t_items_free; /* log item descs free */ + xfs_log_item_chunk_t t_items; /* first log item desc chunk */ + xfs_trans_header_t t_header; /* header for in-log trans */ + unsigned int t_busy_free; /* busy descs free */ + xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */ + unsigned long t_pflags; /* saved process flags state */ +} xfs_trans_t; + /* * XFS transaction mechanism exported interfaces that are * actually macros. @@ -928,7 +922,6 @@ typedef struct xfs_trans { /* * XFS transaction mechanism exported interfaces. */ -void xfs_trans_init(struct xfs_mount *); xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint); xfs_trans_t *xfs_trans_dup(xfs_trans_t *); @@ -975,7 +968,6 @@ int _xfs_trans_commit(xfs_trans_t *, int *); #define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) void xfs_trans_cancel(xfs_trans_t *, int); -int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); @@ -990,4 +982,7 @@ extern kmem_zone_t *xfs_trans_zone; #endif /* __KERNEL__ */ +void xfs_trans_init(struct xfs_mount *); +int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); + #endif /* __XFS_TRANS_H__ */ -- cgit v0.10.2 From 89b2839319cb0c0364d55dc6fd6d3838e864ab54 Mon Sep 17 00:00:00 2001 From: Barry Naujok Date: Thu, 30 Oct 2008 17:05:49 +1100 Subject: [XFS] Check agf_btreeblks is valid when reading in the AGF SGI-PV: 987683 SGI-Modid: xfs-linux-melb:xfs-kern:32232a Signed-off-by: Barry Naujok Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 54fa69e..0a2a872 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2272,6 +2272,9 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); + if (xfs_sb_version_haslazysbcount(&mp->m_sb)) + agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= + be32_to_cpu(agf->agf_length); if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, XFS_RANDOM_ALLOC_READ_AGF))) { XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", @@ -2297,6 +2300,7 @@ xfs_alloc_read_agf( #ifdef DEBUG else if (!XFS_FORCED_SHUTDOWN(mp)) { ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); + ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == -- cgit v0.10.2 From 34519daae6778d129d56688f75ade27f6011bac9 Mon Sep 17 00:00:00 2001 From: Barry Naujok Date: Thu, 30 Oct 2008 17:05:58 +1100 Subject: [XFS] Show buffer address with debug hexdump on corruption SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32233a Signed-off-by: Barry Naujok Signed-off-by: Eric Sandeen Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index c27abef..6361042 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -84,5 +84,5 @@ assfail(char *expr, char *file, int line) void xfs_hex_dump(void *p, int length) { - print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1); + print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); } -- cgit v0.10.2 From fe4fa4b8e463fa5848ef9e86ed75d27501d0da1e Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:06:08 +1100 Subject: [XFS] move sync code to its own file The sync code in XFS is spread around several files. While it used to make sense to have such a distribution, the code is about to be cleaned up and so centralising it in one spot as the first step makes sense. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32282a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 75b2be7..51b87de 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -107,6 +107,7 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ xfs_iops.o \ xfs_lrw.o \ xfs_super.o \ + xfs_sync.o \ xfs_vnode.o \ xfs_xattr.o) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 0c71c21..727d0e4 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -59,6 +59,7 @@ #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" #include "xfs_inode_item.h" +#include "xfs_sync.h" #include #include diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c new file mode 100644 index 0000000..c765eb2a --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -0,0 +1,605 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_inode.h" +#include "xfs_dinode.h" +#include "xfs_error.h" +#include "xfs_mru_cache.h" +#include "xfs_filestream.h" +#include "xfs_vnodeops.h" +#include "xfs_utils.h" +#include "xfs_buf_item.h" +#include "xfs_inode_item.h" +#include "xfs_rw.h" + +/* + * xfs_sync flushes any pending I/O to file system vfsp. + * + * This routine is called by vfs_sync() to make sure that things make it + * out to disk eventually, on sync() system calls to flush out everything, + * and when the file system is unmounted. For the vfs_sync() case, all + * we really need to do is sync out the log to make all of our meta-data + * updates permanent (except for timestamps). For calls from pflushd(), + * dirty pages are kept moving by calling pdflush() on the inodes + * containing them. We also flush the inodes that we can lock without + * sleeping and the superblock if we can lock it without sleeping from + * vfs_sync() so that items at the tail of the log are always moving out. + * + * Flags: + * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want + * to sleep if we can help it. All we really need + * to do is ensure that the log is synced at least + * periodically. We also push the inodes and + * superblock if we can lock them without sleeping + * and they are not pinned. + * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not + * set, then we really want to lock each inode and flush + * it. + * SYNC_WAIT - All the flushes that take place in this call should + * be synchronous. + * SYNC_DELWRI - This tells us to push dirty pages associated with + * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to + * determine if they should be flushed sync, async, or + * delwri. + * SYNC_CLOSE - This flag is passed when the system is being + * unmounted. We should sync and invalidate everything. + * SYNC_FSDATA - This indicates that the caller would like to make + * sure the superblock is safe on disk. We can ensure + * this by simply making sure the log gets flushed + * if SYNC_BDFLUSH is set, and by actually writing it + * out otherwise. + * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete + * before we return (including direct I/O). Forms the drain + * side of the write barrier needed to safely quiesce the + * filesystem. + * + */ +int +xfs_sync( + xfs_mount_t *mp, + int flags) +{ + int error; + + /* + * Get the Quota Manager to flush the dquots. + * + * If XFS quota support is not enabled or this filesystem + * instance does not use quotas XFS_QM_DQSYNC will always + * return zero. + */ + error = XFS_QM_DQSYNC(mp, flags); + if (error) { + /* + * If we got an IO error, we will be shutting down. + * So, there's nothing more for us to do here. + */ + ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(error); + } + + if (flags & SYNC_IOWAIT) + xfs_filestream_flush(mp); + + return xfs_syncsub(mp, flags, NULL); +} + +/* + * xfs sync routine for internal use + * + * This routine supports all of the flags defined for the generic vfs_sync + * interface as explained above under xfs_sync. + * + */ +int +xfs_sync_inodes( + xfs_mount_t *mp, + int flags, + int *bypassed) +{ + xfs_inode_t *ip = NULL; + struct inode *vp = NULL; + int error; + int last_error; + uint64_t fflag; + uint lock_flags; + uint base_lock_flags; + boolean_t mount_locked; + boolean_t vnode_refed; + int preempt; + xfs_iptr_t *ipointer; +#ifdef DEBUG + boolean_t ipointer_in = B_FALSE; + +#define IPOINTER_SET ipointer_in = B_TRUE +#define IPOINTER_CLR ipointer_in = B_FALSE +#else +#define IPOINTER_SET +#define IPOINTER_CLR +#endif + + +/* Insert a marker record into the inode list after inode ip. The list + * must be locked when this is called. After the call the list will no + * longer be locked. + */ +#define IPOINTER_INSERT(ip, mp) { \ + ASSERT(ipointer_in == B_FALSE); \ + ipointer->ip_mnext = ip->i_mnext; \ + ipointer->ip_mprev = ip; \ + ip->i_mnext = (xfs_inode_t *)ipointer; \ + ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ + preempt = 0; \ + XFS_MOUNT_IUNLOCK(mp); \ + mount_locked = B_FALSE; \ + IPOINTER_SET; \ + } + +/* Remove the marker from the inode list. If the marker was the only item + * in the list then there are no remaining inodes and we should zero out + * the whole list. If we are the current head of the list then move the head + * past us. + */ +#define IPOINTER_REMOVE(ip, mp) { \ + ASSERT(ipointer_in == B_TRUE); \ + if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ + ip = ipointer->ip_mnext; \ + ip->i_mprev = ipointer->ip_mprev; \ + ipointer->ip_mprev->i_mnext = ip; \ + if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ + mp->m_inodes = ip; \ + } \ + } else { \ + ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ + mp->m_inodes = NULL; \ + ip = NULL; \ + } \ + IPOINTER_CLR; \ + } + +#define XFS_PREEMPT_MASK 0x7f + + ASSERT(!(flags & SYNC_BDFLUSH)); + + if (bypassed) + *bypassed = 0; + if (mp->m_flags & XFS_MOUNT_RDONLY) + return 0; + error = 0; + last_error = 0; + preempt = 0; + + /* Allocate a reference marker */ + ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); + + fflag = XFS_B_ASYNC; /* default is don't wait */ + if (flags & SYNC_DELWRI) + fflag = XFS_B_DELWRI; + if (flags & SYNC_WAIT) + fflag = 0; /* synchronous overrides all */ + + base_lock_flags = XFS_ILOCK_SHARED; + if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { + /* + * We need the I/O lock if we're going to call any of + * the flush/inval routines. + */ + base_lock_flags |= XFS_IOLOCK_SHARED; + } + + XFS_MOUNT_ILOCK(mp); + + ip = mp->m_inodes; + + mount_locked = B_TRUE; + vnode_refed = B_FALSE; + + IPOINTER_CLR; + + do { + ASSERT(ipointer_in == B_FALSE); + ASSERT(vnode_refed == B_FALSE); + + lock_flags = base_lock_flags; + + /* + * There were no inodes in the list, just break out + * of the loop. + */ + if (ip == NULL) { + break; + } + + /* + * We found another sync thread marker - skip it + */ + if (ip->i_mount == NULL) { + ip = ip->i_mnext; + continue; + } + + vp = VFS_I(ip); + + /* + * If the vnode is gone then this is being torn down, + * call reclaim if it is flushed, else let regular flush + * code deal with it later in the loop. + */ + + if (vp == NULL) { + /* Skip ones already in reclaim */ + if (ip->i_flags & XFS_IRECLAIM) { + ip = ip->i_mnext; + continue; + } + if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { + ip = ip->i_mnext; + } else if ((xfs_ipincount(ip) == 0) && + xfs_iflock_nowait(ip)) { + IPOINTER_INSERT(ip, mp); + + xfs_finish_reclaim(ip, 1, + XFS_IFLUSH_DELWRI_ELSE_ASYNC); + + XFS_MOUNT_ILOCK(mp); + mount_locked = B_TRUE; + IPOINTER_REMOVE(ip, mp); + } else { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + ip = ip->i_mnext; + } + continue; + } + + if (VN_BAD(vp)) { + ip = ip->i_mnext; + continue; + } + + if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { + XFS_MOUNT_IUNLOCK(mp); + kmem_free(ipointer); + return 0; + } + + /* + * Try to lock without sleeping. We're out of order with + * the inode list lock here, so if we fail we need to drop + * the mount lock and try again. If we're called from + * bdflush() here, then don't bother. + * + * The inode lock here actually coordinates with the + * almost spurious inode lock in xfs_ireclaim() to prevent + * the vnode we handle here without a reference from + * being freed while we reference it. If we lock the inode + * while it's on the mount list here, then the spurious inode + * lock in xfs_ireclaim() after the inode is pulled from + * the mount list will sleep until we release it here. + * This keeps the vnode from being freed while we reference + * it. + */ + if (xfs_ilock_nowait(ip, lock_flags) == 0) { + if (vp == NULL) { + ip = ip->i_mnext; + continue; + } + + vp = vn_grab(vp); + if (vp == NULL) { + ip = ip->i_mnext; + continue; + } + + IPOINTER_INSERT(ip, mp); + xfs_ilock(ip, lock_flags); + + ASSERT(vp == VFS_I(ip)); + ASSERT(ip->i_mount == mp); + + vnode_refed = B_TRUE; + } + + /* From here on in the loop we may have a marker record + * in the inode list. + */ + + /* + * If we have to flush data or wait for I/O completion + * we need to drop the ilock that we currently hold. + * If we need to drop the lock, insert a marker if we + * have not already done so. + */ + if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) || + ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) { + if (mount_locked) { + IPOINTER_INSERT(ip, mp); + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (flags & SYNC_CLOSE) { + /* Shutdown case. Flush and invalidate. */ + if (XFS_FORCED_SHUTDOWN(mp)) + xfs_tosspages(ip, 0, -1, + FI_REMAPF); + else + error = xfs_flushinval_pages(ip, + 0, -1, FI_REMAPF); + } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { + error = xfs_flush_pages(ip, 0, + -1, fflag, FI_NONE); + } + + /* + * When freezing, we need to wait ensure all I/O (including direct + * I/O) is complete to ensure no further data modification can take + * place after this point + */ + if (flags & SYNC_IOWAIT) + vn_iowait(ip); + + xfs_ilock(ip, XFS_ILOCK_SHARED); + } + + if ((flags & SYNC_ATTR) && + (ip->i_update_core || + (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { + if (mount_locked) + IPOINTER_INSERT(ip, mp); + + if (flags & SYNC_WAIT) { + xfs_iflock(ip); + error = xfs_iflush(ip, XFS_IFLUSH_SYNC); + + /* + * If we can't acquire the flush lock, then the inode + * is already being flushed so don't bother waiting. + * + * If we can lock it then do a delwri flush so we can + * combine multiple inode flushes in each disk write. + */ + } else if (xfs_iflock_nowait(ip)) { + error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); + } else if (bypassed) { + (*bypassed)++; + } + } + + if (lock_flags != 0) { + xfs_iunlock(ip, lock_flags); + } + + if (vnode_refed) { + /* + * If we had to take a reference on the vnode + * above, then wait until after we've unlocked + * the inode to release the reference. This is + * because we can be already holding the inode + * lock when IRELE() calls xfs_inactive(). + * + * Make sure to drop the mount lock before calling + * IRELE() so that we don't trip over ourselves if + * we have to go for the mount lock again in the + * inactive code. + */ + if (mount_locked) { + IPOINTER_INSERT(ip, mp); + } + + IRELE(ip); + + vnode_refed = B_FALSE; + } + + if (error) { + last_error = error; + } + + /* + * bail out if the filesystem is corrupted. + */ + if (error == EFSCORRUPTED) { + if (!mount_locked) { + XFS_MOUNT_ILOCK(mp); + IPOINTER_REMOVE(ip, mp); + } + XFS_MOUNT_IUNLOCK(mp); + ASSERT(ipointer_in == B_FALSE); + kmem_free(ipointer); + return XFS_ERROR(error); + } + + /* Let other threads have a chance at the mount lock + * if we have looped many times without dropping the + * lock. + */ + if ((++preempt & XFS_PREEMPT_MASK) == 0) { + if (mount_locked) { + IPOINTER_INSERT(ip, mp); + } + } + + if (mount_locked == B_FALSE) { + XFS_MOUNT_ILOCK(mp); + mount_locked = B_TRUE; + IPOINTER_REMOVE(ip, mp); + continue; + } + + ASSERT(ipointer_in == B_FALSE); + ip = ip->i_mnext; + + } while (ip != mp->m_inodes); + + XFS_MOUNT_IUNLOCK(mp); + + ASSERT(ipointer_in == B_FALSE); + + kmem_free(ipointer); + return XFS_ERROR(last_error); +} + +/* + * xfs sync routine for internal use + * + * This routine supports all of the flags defined for the generic vfs_sync + * interface as explained above under xfs_sync. + * + */ +int +xfs_syncsub( + xfs_mount_t *mp, + int flags, + int *bypassed) +{ + int error = 0; + int last_error = 0; + uint log_flags = XFS_LOG_FORCE; + xfs_buf_t *bp; + xfs_buf_log_item_t *bip; + + /* + * Sync out the log. This ensures that the log is periodically + * flushed even if there is not enough activity to fill it up. + */ + if (flags & SYNC_WAIT) + log_flags |= XFS_LOG_SYNC; + + xfs_log_force(mp, (xfs_lsn_t)0, log_flags); + + if (flags & (SYNC_ATTR|SYNC_DELWRI)) { + if (flags & SYNC_BDFLUSH) + xfs_finish_reclaim_all(mp, 1); + else + error = xfs_sync_inodes(mp, flags, bypassed); + } + + /* + * Flushing out dirty data above probably generated more + * log activity, so if this isn't vfs_sync() then flush + * the log again. + */ + if (flags & SYNC_DELWRI) { + xfs_log_force(mp, (xfs_lsn_t)0, log_flags); + } + + if (flags & SYNC_FSDATA) { + /* + * If this is vfs_sync() then only sync the superblock + * if we can lock it without sleeping and it is not pinned. + */ + if (flags & SYNC_BDFLUSH) { + bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); + if (bp != NULL) { + bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); + if ((bip != NULL) && + xfs_buf_item_dirty(bip)) { + if (!(XFS_BUF_ISPINNED(bp))) { + XFS_BUF_ASYNC(bp); + error = xfs_bwrite(mp, bp); + } else { + xfs_buf_relse(bp); + } + } else { + xfs_buf_relse(bp); + } + } + } else { + bp = xfs_getsb(mp, 0); + /* + * If the buffer is pinned then push on the log so + * we won't get stuck waiting in the write for + * someone, maybe ourselves, to flush the log. + * Even though we just pushed the log above, we + * did not have the superblock buffer locked at + * that point so it can become pinned in between + * there and here. + */ + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + if (flags & SYNC_WAIT) + XFS_BUF_UNASYNC(bp); + else + XFS_BUF_ASYNC(bp); + error = xfs_bwrite(mp, bp); + } + if (error) { + last_error = error; + } + } + + /* + * Now check to see if the log needs a "dummy" transaction. + */ + if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { + xfs_trans_t *tp; + xfs_inode_t *ip; + + /* + * Put a dummy transaction in the log to tell + * recovery that all others are OK. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); + if ((error = xfs_trans_reserve(tp, 0, + XFS_ICHANGE_LOG_RES(mp), + 0, 0, 0))) { + xfs_trans_cancel(tp, 0); + return error; + } + + ip = mp->m_rootip; + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_trans_commit(tp, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_log_force(mp, (xfs_lsn_t)0, log_flags); + } + + /* + * When shutting down, we need to insure that the AIL is pushed + * to disk or the filesystem can appear corrupt from the PROM. + */ + if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) { + XFS_bflush(mp->m_ddev_targp); + if (mp->m_rtdev_targp) { + XFS_bflush(mp->m_rtdev_targp); + } + } + + return XFS_ERROR(last_error); +} diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h new file mode 100644 index 0000000..f4c3b1e --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -0,0 +1,7 @@ +#ifndef XFS_SYNC_H +#define XFS_SYNC_H 1 + +int xfs_sync(struct xfs_mount *mp, int flags); +int xfs_syncsub(struct xfs_mount *mp, int flags, int *bypassed); + +#endif diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 439dd39..01e274b 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -56,6 +56,7 @@ #include "xfs_vnodeops.h" #include "xfs_vfsops.h" #include "xfs_utils.h" +#include "xfs_sync.h" STATIC void @@ -196,562 +197,3 @@ fscorrupt_out2: return XFS_ERROR(EFSCORRUPTED); } -/* - * xfs_sync flushes any pending I/O to file system vfsp. - * - * This routine is called by vfs_sync() to make sure that things make it - * out to disk eventually, on sync() system calls to flush out everything, - * and when the file system is unmounted. For the vfs_sync() case, all - * we really need to do is sync out the log to make all of our meta-data - * updates permanent (except for timestamps). For calls from pflushd(), - * dirty pages are kept moving by calling pdflush() on the inodes - * containing them. We also flush the inodes that we can lock without - * sleeping and the superblock if we can lock it without sleeping from - * vfs_sync() so that items at the tail of the log are always moving out. - * - * Flags: - * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want - * to sleep if we can help it. All we really need - * to do is ensure that the log is synced at least - * periodically. We also push the inodes and - * superblock if we can lock them without sleeping - * and they are not pinned. - * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not - * set, then we really want to lock each inode and flush - * it. - * SYNC_WAIT - All the flushes that take place in this call should - * be synchronous. - * SYNC_DELWRI - This tells us to push dirty pages associated with - * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to - * determine if they should be flushed sync, async, or - * delwri. - * SYNC_CLOSE - This flag is passed when the system is being - * unmounted. We should sync and invalidate everything. - * SYNC_FSDATA - This indicates that the caller would like to make - * sure the superblock is safe on disk. We can ensure - * this by simply making sure the log gets flushed - * if SYNC_BDFLUSH is set, and by actually writing it - * out otherwise. - * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete - * before we return (including direct I/O). Forms the drain - * side of the write barrier needed to safely quiesce the - * filesystem. - * - */ -int -xfs_sync( - xfs_mount_t *mp, - int flags) -{ - int error; - - /* - * Get the Quota Manager to flush the dquots. - * - * If XFS quota support is not enabled or this filesystem - * instance does not use quotas XFS_QM_DQSYNC will always - * return zero. - */ - error = XFS_QM_DQSYNC(mp, flags); - if (error) { - /* - * If we got an IO error, we will be shutting down. - * So, there's nothing more for us to do here. - */ - ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(error); - } - - if (flags & SYNC_IOWAIT) - xfs_filestream_flush(mp); - - return xfs_syncsub(mp, flags, NULL); -} - -/* - * xfs sync routine for internal use - * - * This routine supports all of the flags defined for the generic vfs_sync - * interface as explained above under xfs_sync. - * - */ -int -xfs_sync_inodes( - xfs_mount_t *mp, - int flags, - int *bypassed) -{ - xfs_inode_t *ip = NULL; - struct inode *vp = NULL; - int error; - int last_error; - uint64_t fflag; - uint lock_flags; - uint base_lock_flags; - boolean_t mount_locked; - boolean_t vnode_refed; - int preempt; - xfs_iptr_t *ipointer; -#ifdef DEBUG - boolean_t ipointer_in = B_FALSE; - -#define IPOINTER_SET ipointer_in = B_TRUE -#define IPOINTER_CLR ipointer_in = B_FALSE -#else -#define IPOINTER_SET -#define IPOINTER_CLR -#endif - - -/* Insert a marker record into the inode list after inode ip. The list - * must be locked when this is called. After the call the list will no - * longer be locked. - */ -#define IPOINTER_INSERT(ip, mp) { \ - ASSERT(ipointer_in == B_FALSE); \ - ipointer->ip_mnext = ip->i_mnext; \ - ipointer->ip_mprev = ip; \ - ip->i_mnext = (xfs_inode_t *)ipointer; \ - ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ - preempt = 0; \ - XFS_MOUNT_IUNLOCK(mp); \ - mount_locked = B_FALSE; \ - IPOINTER_SET; \ - } - -/* Remove the marker from the inode list. If the marker was the only item - * in the list then there are no remaining inodes and we should zero out - * the whole list. If we are the current head of the list then move the head - * past us. - */ -#define IPOINTER_REMOVE(ip, mp) { \ - ASSERT(ipointer_in == B_TRUE); \ - if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ - ip = ipointer->ip_mnext; \ - ip->i_mprev = ipointer->ip_mprev; \ - ipointer->ip_mprev->i_mnext = ip; \ - if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ - mp->m_inodes = ip; \ - } \ - } else { \ - ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ - mp->m_inodes = NULL; \ - ip = NULL; \ - } \ - IPOINTER_CLR; \ - } - -#define XFS_PREEMPT_MASK 0x7f - - ASSERT(!(flags & SYNC_BDFLUSH)); - - if (bypassed) - *bypassed = 0; - if (mp->m_flags & XFS_MOUNT_RDONLY) - return 0; - error = 0; - last_error = 0; - preempt = 0; - - /* Allocate a reference marker */ - ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); - - fflag = XFS_B_ASYNC; /* default is don't wait */ - if (flags & SYNC_DELWRI) - fflag = XFS_B_DELWRI; - if (flags & SYNC_WAIT) - fflag = 0; /* synchronous overrides all */ - - base_lock_flags = XFS_ILOCK_SHARED; - if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { - /* - * We need the I/O lock if we're going to call any of - * the flush/inval routines. - */ - base_lock_flags |= XFS_IOLOCK_SHARED; - } - - XFS_MOUNT_ILOCK(mp); - - ip = mp->m_inodes; - - mount_locked = B_TRUE; - vnode_refed = B_FALSE; - - IPOINTER_CLR; - - do { - ASSERT(ipointer_in == B_FALSE); - ASSERT(vnode_refed == B_FALSE); - - lock_flags = base_lock_flags; - - /* - * There were no inodes in the list, just break out - * of the loop. - */ - if (ip == NULL) { - break; - } - - /* - * We found another sync thread marker - skip it - */ - if (ip->i_mount == NULL) { - ip = ip->i_mnext; - continue; - } - - vp = VFS_I(ip); - - /* - * If the vnode is gone then this is being torn down, - * call reclaim if it is flushed, else let regular flush - * code deal with it later in the loop. - */ - - if (vp == NULL) { - /* Skip ones already in reclaim */ - if (ip->i_flags & XFS_IRECLAIM) { - ip = ip->i_mnext; - continue; - } - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - ip = ip->i_mnext; - } else if ((xfs_ipincount(ip) == 0) && - xfs_iflock_nowait(ip)) { - IPOINTER_INSERT(ip, mp); - - xfs_finish_reclaim(ip, 1, - XFS_IFLUSH_DELWRI_ELSE_ASYNC); - - XFS_MOUNT_ILOCK(mp); - mount_locked = B_TRUE; - IPOINTER_REMOVE(ip, mp); - } else { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - ip = ip->i_mnext; - } - continue; - } - - if (VN_BAD(vp)) { - ip = ip->i_mnext; - continue; - } - - if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { - XFS_MOUNT_IUNLOCK(mp); - kmem_free(ipointer); - return 0; - } - - /* - * Try to lock without sleeping. We're out of order with - * the inode list lock here, so if we fail we need to drop - * the mount lock and try again. If we're called from - * bdflush() here, then don't bother. - * - * The inode lock here actually coordinates with the - * almost spurious inode lock in xfs_ireclaim() to prevent - * the vnode we handle here without a reference from - * being freed while we reference it. If we lock the inode - * while it's on the mount list here, then the spurious inode - * lock in xfs_ireclaim() after the inode is pulled from - * the mount list will sleep until we release it here. - * This keeps the vnode from being freed while we reference - * it. - */ - if (xfs_ilock_nowait(ip, lock_flags) == 0) { - if (vp == NULL) { - ip = ip->i_mnext; - continue; - } - - vp = vn_grab(vp); - if (vp == NULL) { - ip = ip->i_mnext; - continue; - } - - IPOINTER_INSERT(ip, mp); - xfs_ilock(ip, lock_flags); - - ASSERT(vp == VFS_I(ip)); - ASSERT(ip->i_mount == mp); - - vnode_refed = B_TRUE; - } - - /* From here on in the loop we may have a marker record - * in the inode list. - */ - - /* - * If we have to flush data or wait for I/O completion - * we need to drop the ilock that we currently hold. - * If we need to drop the lock, insert a marker if we - * have not already done so. - */ - if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) || - ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) { - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (flags & SYNC_CLOSE) { - /* Shutdown case. Flush and invalidate. */ - if (XFS_FORCED_SHUTDOWN(mp)) - xfs_tosspages(ip, 0, -1, - FI_REMAPF); - else - error = xfs_flushinval_pages(ip, - 0, -1, FI_REMAPF); - } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { - error = xfs_flush_pages(ip, 0, - -1, fflag, FI_NONE); - } - - /* - * When freezing, we need to wait ensure all I/O (including direct - * I/O) is complete to ensure no further data modification can take - * place after this point - */ - if (flags & SYNC_IOWAIT) - vn_iowait(ip); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - } - - if ((flags & SYNC_ATTR) && - (ip->i_update_core || - (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { - if (mount_locked) - IPOINTER_INSERT(ip, mp); - - if (flags & SYNC_WAIT) { - xfs_iflock(ip); - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); - - /* - * If we can't acquire the flush lock, then the inode - * is already being flushed so don't bother waiting. - * - * If we can lock it then do a delwri flush so we can - * combine multiple inode flushes in each disk write. - */ - } else if (xfs_iflock_nowait(ip)) { - error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); - } else if (bypassed) { - (*bypassed)++; - } - } - - if (lock_flags != 0) { - xfs_iunlock(ip, lock_flags); - } - - if (vnode_refed) { - /* - * If we had to take a reference on the vnode - * above, then wait until after we've unlocked - * the inode to release the reference. This is - * because we can be already holding the inode - * lock when IRELE() calls xfs_inactive(). - * - * Make sure to drop the mount lock before calling - * IRELE() so that we don't trip over ourselves if - * we have to go for the mount lock again in the - * inactive code. - */ - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - - IRELE(ip); - - vnode_refed = B_FALSE; - } - - if (error) { - last_error = error; - } - - /* - * bail out if the filesystem is corrupted. - */ - if (error == EFSCORRUPTED) { - if (!mount_locked) { - XFS_MOUNT_ILOCK(mp); - IPOINTER_REMOVE(ip, mp); - } - XFS_MOUNT_IUNLOCK(mp); - ASSERT(ipointer_in == B_FALSE); - kmem_free(ipointer); - return XFS_ERROR(error); - } - - /* Let other threads have a chance at the mount lock - * if we have looped many times without dropping the - * lock. - */ - if ((++preempt & XFS_PREEMPT_MASK) == 0) { - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - } - - if (mount_locked == B_FALSE) { - XFS_MOUNT_ILOCK(mp); - mount_locked = B_TRUE; - IPOINTER_REMOVE(ip, mp); - continue; - } - - ASSERT(ipointer_in == B_FALSE); - ip = ip->i_mnext; - - } while (ip != mp->m_inodes); - - XFS_MOUNT_IUNLOCK(mp); - - ASSERT(ipointer_in == B_FALSE); - - kmem_free(ipointer); - return XFS_ERROR(last_error); -} - -/* - * xfs sync routine for internal use - * - * This routine supports all of the flags defined for the generic vfs_sync - * interface as explained above under xfs_sync. - * - */ -int -xfs_syncsub( - xfs_mount_t *mp, - int flags, - int *bypassed) -{ - int error = 0; - int last_error = 0; - uint log_flags = XFS_LOG_FORCE; - xfs_buf_t *bp; - xfs_buf_log_item_t *bip; - - /* - * Sync out the log. This ensures that the log is periodically - * flushed even if there is not enough activity to fill it up. - */ - if (flags & SYNC_WAIT) - log_flags |= XFS_LOG_SYNC; - - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - - if (flags & (SYNC_ATTR|SYNC_DELWRI)) { - if (flags & SYNC_BDFLUSH) - xfs_finish_reclaim_all(mp, 1); - else - error = xfs_sync_inodes(mp, flags, bypassed); - } - - /* - * Flushing out dirty data above probably generated more - * log activity, so if this isn't vfs_sync() then flush - * the log again. - */ - if (flags & SYNC_DELWRI) { - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - } - - if (flags & SYNC_FSDATA) { - /* - * If this is vfs_sync() then only sync the superblock - * if we can lock it without sleeping and it is not pinned. - */ - if (flags & SYNC_BDFLUSH) { - bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); - if (bp != NULL) { - bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); - if ((bip != NULL) && - xfs_buf_item_dirty(bip)) { - if (!(XFS_BUF_ISPINNED(bp))) { - XFS_BUF_ASYNC(bp); - error = xfs_bwrite(mp, bp); - } else { - xfs_buf_relse(bp); - } - } else { - xfs_buf_relse(bp); - } - } - } else { - bp = xfs_getsb(mp, 0); - /* - * If the buffer is pinned then push on the log so - * we won't get stuck waiting in the write for - * someone, maybe ourselves, to flush the log. - * Even though we just pushed the log above, we - * did not have the superblock buffer locked at - * that point so it can become pinned in between - * there and here. - */ - if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - if (flags & SYNC_WAIT) - XFS_BUF_UNASYNC(bp); - else - XFS_BUF_ASYNC(bp); - error = xfs_bwrite(mp, bp); - } - if (error) { - last_error = error; - } - } - - /* - * Now check to see if the log needs a "dummy" transaction. - */ - if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { - xfs_trans_t *tp; - xfs_inode_t *ip; - - /* - * Put a dummy transaction in the log to tell - * recovery that all others are OK. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - if ((error = xfs_trans_reserve(tp, 0, - XFS_ICHANGE_LOG_RES(mp), - 0, 0, 0))) { - xfs_trans_cancel(tp, 0); - return error; - } - - ip = mp->m_rootip; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - } - - /* - * When shutting down, we need to insure that the AIL is pushed - * to disk or the filesystem can appear corrupt from the PROM. - */ - if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) { - XFS_bflush(mp->m_ddev_targp); - if (mp->m_rtdev_targp) { - XFS_bflush(mp->m_rtdev_targp); - } - } - - return XFS_ERROR(last_error); -} diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h index a74b050..6701d0e 100644 --- a/fs/xfs/xfs_vfsops.h +++ b/fs/xfs/xfs_vfsops.h @@ -8,7 +8,6 @@ struct kstatfs; struct xfs_mount; struct xfs_mount_args; -int xfs_sync(struct xfs_mount *mp, int flags); void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, int lnnum); void xfs_attr_quiesce(struct xfs_mount *mp); -- cgit v0.10.2 From a167b17e899a930758506bbc18748078d6fd8c89 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:06:18 +1100 Subject: [XFS] move xfssyncd code to xfs_sync.c Move all the xfssyncd code to the new xfs_sync.c file. This places it closer to the actual code that it interacts with, rather than just being associated with high level VFS code. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32283a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 727d0e4..767f38e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -979,146 +979,6 @@ xfs_fs_clear_inode( ASSERT(XFS_I(inode) == NULL); } -/* - * Enqueue a work item to be picked up by the vfs xfssyncd thread. - * Doing this has two advantages: - * - It saves on stack space, which is tight in certain situations - * - It can be used (with care) as a mechanism to avoid deadlocks. - * Flushing while allocating in a full filesystem requires both. - */ -STATIC void -xfs_syncd_queue_work( - struct xfs_mount *mp, - void *data, - void (*syncer)(struct xfs_mount *, void *)) -{ - struct bhv_vfs_sync_work *work; - - work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); - INIT_LIST_HEAD(&work->w_list); - work->w_syncer = syncer; - work->w_data = data; - work->w_mount = mp; - spin_lock(&mp->m_sync_lock); - list_add_tail(&work->w_list, &mp->m_sync_list); - spin_unlock(&mp->m_sync_lock); - wake_up_process(mp->m_sync_task); -} - -/* - * Flush delayed allocate data, attempting to free up reserved space - * from existing allocations. At this point a new allocation attempt - * has failed with ENOSPC and we are in the process of scratching our - * heads, looking about for more room... - */ -STATIC void -xfs_flush_inode_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - filemap_flush(inode->i_mapping); - iput(inode); -} - -void -xfs_flush_inode( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); - delay(msecs_to_jiffies(500)); -} - -/* - * This is the "bigger hammer" version of xfs_flush_inode_work... - * (IOW, "If at first you don't succeed, use a Bigger Hammer"). - */ -STATIC void -xfs_flush_device_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - sync_blockdev(mp->m_super->s_bdev); - iput(inode); -} - -void -xfs_flush_device( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); - delay(msecs_to_jiffies(500)); - xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); -} - -STATIC void -xfs_sync_worker( - struct xfs_mount *mp, - void *unused) -{ - int error; - - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) - error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR); - mp->m_sync_seq++; - wake_up(&mp->m_wait_single_sync_task); -} - -STATIC int -xfssyncd( - void *arg) -{ - struct xfs_mount *mp = arg; - long timeleft; - bhv_vfs_sync_work_t *work, *n; - LIST_HEAD (tmp); - - set_freezable(); - timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); - for (;;) { - timeleft = schedule_timeout_interruptible(timeleft); - /* swsusp */ - try_to_freeze(); - if (kthread_should_stop() && list_empty(&mp->m_sync_list)) - break; - - spin_lock(&mp->m_sync_lock); - /* - * We can get woken by laptop mode, to do a sync - - * that's the (only!) case where the list would be - * empty with time remaining. - */ - if (!timeleft || list_empty(&mp->m_sync_list)) { - if (!timeleft) - timeleft = xfs_syncd_centisecs * - msecs_to_jiffies(10); - INIT_LIST_HEAD(&mp->m_sync_work.w_list); - list_add_tail(&mp->m_sync_work.w_list, - &mp->m_sync_list); - } - list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list) - list_move(&work->w_list, &tmp); - spin_unlock(&mp->m_sync_lock); - - list_for_each_entry_safe(work, n, &tmp, w_list) { - (*work->w_syncer)(mp, work->w_data); - list_del(&work->w_list); - if (work == &mp->m_sync_work) - continue; - kmem_free(work); - } - } - - return 0; -} - STATIC void xfs_free_fsname( struct xfs_mount *mp) @@ -1137,8 +997,7 @@ xfs_fs_put_super( int unmount_event_flags = 0; int error; - kthread_stop(mp->m_sync_task); - + xfs_syncd_stop(mp); xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI); #ifdef HAVE_DMAPI @@ -1808,13 +1667,9 @@ xfs_fs_fill_super( goto fail_vnrele; } - mp->m_sync_work.w_syncer = xfs_sync_worker; - mp->m_sync_work.w_mount = mp; - mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); - if (IS_ERR(mp->m_sync_task)) { - error = -PTR_ERR(mp->m_sync_task); + error = xfs_syncd_init(mp); + if (error) goto fail_vnrele; - } xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index fe2ef4e..56dc48a 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -101,9 +101,6 @@ struct block_device; extern __uint64_t xfs_max_file_offset(unsigned int); -extern void xfs_flush_inode(struct xfs_inode *); -extern void xfs_flush_device(struct xfs_inode *); - extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); extern const struct export_operations xfs_export_operations; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index c765eb2a..a51534c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -44,6 +44,9 @@ #include "xfs_inode_item.h" #include "xfs_rw.h" +#include +#include + /* * xfs_sync flushes any pending I/O to file system vfsp. * @@ -603,3 +606,163 @@ xfs_syncsub( return XFS_ERROR(last_error); } + +/* + * Enqueue a work item to be picked up by the vfs xfssyncd thread. + * Doing this has two advantages: + * - It saves on stack space, which is tight in certain situations + * - It can be used (with care) as a mechanism to avoid deadlocks. + * Flushing while allocating in a full filesystem requires both. + */ +STATIC void +xfs_syncd_queue_work( + struct xfs_mount *mp, + void *data, + void (*syncer)(struct xfs_mount *, void *)) +{ + struct bhv_vfs_sync_work *work; + + work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); + INIT_LIST_HEAD(&work->w_list); + work->w_syncer = syncer; + work->w_data = data; + work->w_mount = mp; + spin_lock(&mp->m_sync_lock); + list_add_tail(&work->w_list, &mp->m_sync_list); + spin_unlock(&mp->m_sync_lock); + wake_up_process(mp->m_sync_task); +} + +/* + * Flush delayed allocate data, attempting to free up reserved space + * from existing allocations. At this point a new allocation attempt + * has failed with ENOSPC and we are in the process of scratching our + * heads, looking about for more room... + */ +STATIC void +xfs_flush_inode_work( + struct xfs_mount *mp, + void *arg) +{ + struct inode *inode = arg; + filemap_flush(inode->i_mapping); + iput(inode); +} + +void +xfs_flush_inode( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + igrab(inode); + xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); + delay(msecs_to_jiffies(500)); +} + +/* + * This is the "bigger hammer" version of xfs_flush_inode_work... + * (IOW, "If at first you don't succeed, use a Bigger Hammer"). + */ +STATIC void +xfs_flush_device_work( + struct xfs_mount *mp, + void *arg) +{ + struct inode *inode = arg; + sync_blockdev(mp->m_super->s_bdev); + iput(inode); +} + +void +xfs_flush_device( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + igrab(inode); + xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); + delay(msecs_to_jiffies(500)); + xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); +} + +STATIC void +xfs_sync_worker( + struct xfs_mount *mp, + void *unused) +{ + int error; + + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) + error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR); + mp->m_sync_seq++; + wake_up(&mp->m_wait_single_sync_task); +} + +STATIC int +xfssyncd( + void *arg) +{ + struct xfs_mount *mp = arg; + long timeleft; + bhv_vfs_sync_work_t *work, *n; + LIST_HEAD (tmp); + + set_freezable(); + timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); + for (;;) { + timeleft = schedule_timeout_interruptible(timeleft); + /* swsusp */ + try_to_freeze(); + if (kthread_should_stop() && list_empty(&mp->m_sync_list)) + break; + + spin_lock(&mp->m_sync_lock); + /* + * We can get woken by laptop mode, to do a sync - + * that's the (only!) case where the list would be + * empty with time remaining. + */ + if (!timeleft || list_empty(&mp->m_sync_list)) { + if (!timeleft) + timeleft = xfs_syncd_centisecs * + msecs_to_jiffies(10); + INIT_LIST_HEAD(&mp->m_sync_work.w_list); + list_add_tail(&mp->m_sync_work.w_list, + &mp->m_sync_list); + } + list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list) + list_move(&work->w_list, &tmp); + spin_unlock(&mp->m_sync_lock); + + list_for_each_entry_safe(work, n, &tmp, w_list) { + (*work->w_syncer)(mp, work->w_data); + list_del(&work->w_list); + if (work == &mp->m_sync_work) + continue; + kmem_free(work); + } + } + + return 0; +} + +int +xfs_syncd_init( + struct xfs_mount *mp) +{ + mp->m_sync_work.w_syncer = xfs_sync_worker; + mp->m_sync_work.w_mount = mp; + mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); + if (IS_ERR(mp->m_sync_task)) + return -PTR_ERR(mp->m_sync_task); + return 0; +} + +void +xfs_syncd_stop( + struct xfs_mount *mp) +{ + kthread_stop(mp->m_sync_task); +} + diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index f4c3b1e..3746d15 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -1,7 +1,63 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ #ifndef XFS_SYNC_H #define XFS_SYNC_H 1 +struct xfs_mount; + +typedef struct bhv_vfs_sync_work { + struct list_head w_list; + struct xfs_mount *w_mount; + void *w_data; /* syncer routine argument */ + void (*w_syncer)(struct xfs_mount *, void *); +} bhv_vfs_sync_work_t; + +#define SYNC_ATTR 0x0001 /* sync attributes */ +#define SYNC_CLOSE 0x0002 /* close file system down */ +#define SYNC_DELWRI 0x0004 /* look at delayed writes */ +#define SYNC_WAIT 0x0008 /* wait for i/o to complete */ +#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ +#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ +#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ +#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ +#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ + +/* + * When remounting a filesystem read-only or freezing the filesystem, + * we have two phases to execute. This first phase is syncing the data + * before we quiesce the fielsystem, and the second is flushing all the + * inodes out after we've waited for all the transactions created by + * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE + * to ensure that the inodes are written to their location on disk + * rather than just existing in transactions in the log. This means + * after a quiesce there is no log replay required to write the inodes + * to disk (this is the main difference between a sync and a quiesce). + */ +#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT) +#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) + +int xfs_syncd_init(struct xfs_mount *mp); +void xfs_syncd_stop(struct xfs_mount *mp); + int xfs_sync(struct xfs_mount *mp, int flags); int xfs_syncsub(struct xfs_mount *mp, int flags, int *bypassed); +void xfs_flush_inode(struct xfs_inode *ip); +void xfs_flush_device(struct xfs_inode *ip); + #endif diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 7e60c77..0ab60bc 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -33,37 +33,6 @@ struct xfs_mount_args; typedef struct kstatfs bhv_statvfs_t; -typedef struct bhv_vfs_sync_work { - struct list_head w_list; - struct xfs_mount *w_mount; - void *w_data; /* syncer routine argument */ - void (*w_syncer)(struct xfs_mount *, void *); -} bhv_vfs_sync_work_t; - -#define SYNC_ATTR 0x0001 /* sync attributes */ -#define SYNC_CLOSE 0x0002 /* close file system down */ -#define SYNC_DELWRI 0x0004 /* look at delayed writes */ -#define SYNC_WAIT 0x0008 /* wait for i/o to complete */ -#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ -#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ -#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ -#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ -#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ - -/* - * When remounting a filesystem read-only or freezing the filesystem, - * we have two phases to execute. This first phase is syncing the data - * before we quiesce the fielsystem, and the second is flushing all the - * inodes out after we've waited for all the transactions created by - * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE - * to ensure that the inodes are written to their location on disk - * rather than just existing in transactions in the log. This means - * after a quiesce there is no log replay required to write the inodes - * to disk (this is the main difference between a sync and a quiesce). - */ -#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT) -#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) - #define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ #define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ #define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 56c3b12..c684681 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -18,6 +18,7 @@ #ifndef __XFS_MOUNT_H__ #define __XFS_MOUNT_H__ +#include "xfs_sync.h" typedef struct xfs_trans_reservations { uint tr_write; /* extent alloc trans */ -- cgit v0.10.2 From 75c68f411b1242c8fdaf731078fdd4e77b14981d Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:06:28 +1100 Subject: [XFS] Remove xfs_iflush_all and clean up xfs_finish_reclaim_all() xfs_iflush_all() walks the m_inodes list to find inodes that need reclaiming. We already have such a list - the m_del_inodes list. Replace xfs_iflush_all() with a call to xfs_finish_reclaim_all() and clean up xfs_finish_reclaim_all() to handle the different flush modes now needed. Originally based on a patch from Christoph Hellwig. Version 3 o rediff against new linux-2.6/xfs_sync.c code Version 2 o revert xfs_syncsub() inode reclaim behaviour back to original code o xfs_quiesce_fs() should use XFS_IFLUSH_DELWRI_ELSE_ASYNC, not XFS_IFLUSH_ASYNC, to prevent change of behaviour. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32284a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a51534c..cd82ba5 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -504,7 +504,7 @@ xfs_syncsub( if (flags & (SYNC_ATTR|SYNC_DELWRI)) { if (flags & SYNC_BDFLUSH) - xfs_finish_reclaim_all(mp, 1); + xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); else error = xfs_sync_inodes(mp, flags, bypassed); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2b1294b..0c65ba2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3499,41 +3499,6 @@ corrupt_out: } -/* - * Flush all inactive inodes in mp. - */ -void -xfs_iflush_all( - xfs_mount_t *mp) -{ - xfs_inode_t *ip; - - again: - XFS_MOUNT_ILOCK(mp); - ip = mp->m_inodes; - if (ip == NULL) - goto out; - - do { - /* Make sure we skip markers inserted by sync */ - if (ip->i_mount == NULL) { - ip = ip->i_mnext; - continue; - } - - if (!VFS_I(ip)) { - XFS_MOUNT_IUNLOCK(mp); - xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); - goto again; - } - - ASSERT(vn_count(VFS_I(ip)) == 0); - - ip = ip->i_mnext; - } while (ip != mp->m_inodes); - out: - XFS_MOUNT_IUNLOCK(mp); -} #ifdef XFS_ILOCK_TRACE ktrace_t *xfs_ilock_trace_buf; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index a8f1e68..104623b 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -503,7 +503,7 @@ uint xfs_ilock_map_shared(xfs_inode_t *); void xfs_iunlock_map_shared(xfs_inode_t *, uint); void xfs_ireclaim(xfs_inode_t *); int xfs_finish_reclaim(xfs_inode_t *, int, int); -int xfs_finish_reclaim_all(struct xfs_mount *, int); +int xfs_finish_reclaim_all(struct xfs_mount *, int, int); /* * xfs_inode.c prototypes. @@ -530,7 +530,6 @@ void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); int xfs_iflush(xfs_inode_t *, uint); -void xfs_iflush_all(struct xfs_mount *); void xfs_ichgtime(xfs_inode_t *, int); xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); void xfs_lock_inodes(xfs_inode_t **, int, uint); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 15f5dd2..5ec6032 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1241,7 +1241,7 @@ xfs_unmountfs( * need to force the log first. */ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); - xfs_iflush_all(mp); + xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_ASYNC); XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 01e274b..0c5ee5e 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -66,7 +66,7 @@ xfs_quiesce_fs( int count = 0, pincount; xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_finish_reclaim_all(mp, 0); + xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* This loop must run at least twice. * The first instance of the loop will flush diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 8b6812f..a671457 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2918,36 +2918,30 @@ xfs_finish_reclaim( } int -xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock) +xfs_finish_reclaim_all( + xfs_mount_t *mp, + int noblock, + int mode) { - int purged; xfs_inode_t *ip, *n; - int done = 0; - while (!done) { - purged = 0; - XFS_MOUNT_ILOCK(mp); - list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { - if (noblock) { - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) - continue; - if (xfs_ipincount(ip) || - !xfs_iflock_nowait(ip)) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } +restart: + XFS_MOUNT_ILOCK(mp); + list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { + if (noblock) { + if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) + continue; + if (xfs_ipincount(ip) || + !xfs_iflock_nowait(ip)) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; } - XFS_MOUNT_IUNLOCK(mp); - if (xfs_finish_reclaim(ip, noblock, - XFS_IFLUSH_DELWRI_ELSE_ASYNC)) - delay(1); - purged = 1; - break; } - - done = !purged; + XFS_MOUNT_IUNLOCK(mp); + if (xfs_finish_reclaim(ip, noblock, mode)) + delay(1); + goto restart; } - XFS_MOUNT_IUNLOCK(mp); return 0; } -- cgit v0.10.2 From 2f8a3ce1c20f20e6494cdb77fed76bc474ca3ca5 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:07:20 +1100 Subject: [XFS] don't block in xfs_qm_dqflush() during async writeback. Normally dquots are written back via delayed write mechanisms. They are flushed to their backing buffer by xfssyncd, which is then pushed out by either AIL or xfsbufd flushing. The flush from the xfssyncd is supposed to be non-blocking, but xfs_qm_dqflush() always waits for pinned duots, which means that it will block for the length of time it takes to do a synchronous log force. This causes unnecessary extra log I/O to be issued whenever we try to flush a busy dquot. Avoid the log forces and blocking xfssyncd by making xfs_qm_dqflush() pay attention to what type of sync it is doing when it sees a pinned dquot and not waiting when doing non-blocking flushes. SGI-PV: 988147 SGI-Modid: xfs-linux-melb:xfs-kern:32287a Signed-off-by: David Chinner Signed-off-by: Peter Leckie Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index d3f4fbb..1e6bf39 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1221,16 +1221,14 @@ xfs_qm_dqflush( xfs_dqtrace_entry(dqp, "DQFLUSH"); /* - * If not dirty, nada. + * If not dirty, or it's pinned and we are not supposed to + * block, nada. */ - if (!XFS_DQ_IS_DIRTY(dqp)) { + if (!XFS_DQ_IS_DIRTY(dqp) || + (!(flags & XFS_QMOPT_SYNC) && atomic_read(&dqp->q_pincount) > 0)) { xfs_dqfunlock(dqp); - return (0); + return 0; } - - /* - * Cant flush a pinned dquot. Wait for it. - */ xfs_qm_dqunpin_wait(dqp); /* -- cgit v0.10.2 From 683a897080a053733778b36398186cb1b22c377f Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:07:29 +1100 Subject: [XFS] Use the inode tree for finding dirty inodes Update xfs_sync_inodes to walk the inode radix tree cache to find dirty inodes. This removes a huge bunch of nasty, messy code for traversing the mount inode list safely and removes another user of the mount inode list. Version 3 o rediff against new linux-2.6/xfs_sync.c code Version 2 o add comment explaining use of gang lookups for a single inode o use IRELE, not VN_RELE o move check for ag initialisation to caller. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32290a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index cd82ba5..53d85ec 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -121,356 +121,197 @@ xfs_sync( } /* - * xfs sync routine for internal use - * - * This routine supports all of the flags defined for the generic vfs_sync - * interface as explained above under xfs_sync. - * + * Sync all the inodes in the given AG according to the + * direction given by the flags. */ -int -xfs_sync_inodes( +STATIC int +xfs_sync_inodes_ag( xfs_mount_t *mp, + int ag, int flags, - int *bypassed) + int *bypassed) { xfs_inode_t *ip = NULL; struct inode *vp = NULL; - int error; - int last_error; - uint64_t fflag; - uint lock_flags; - uint base_lock_flags; - boolean_t mount_locked; - boolean_t vnode_refed; - int preempt; - xfs_iptr_t *ipointer; -#ifdef DEBUG - boolean_t ipointer_in = B_FALSE; - -#define IPOINTER_SET ipointer_in = B_TRUE -#define IPOINTER_CLR ipointer_in = B_FALSE -#else -#define IPOINTER_SET -#define IPOINTER_CLR -#endif - - -/* Insert a marker record into the inode list after inode ip. The list - * must be locked when this is called. After the call the list will no - * longer be locked. - */ -#define IPOINTER_INSERT(ip, mp) { \ - ASSERT(ipointer_in == B_FALSE); \ - ipointer->ip_mnext = ip->i_mnext; \ - ipointer->ip_mprev = ip; \ - ip->i_mnext = (xfs_inode_t *)ipointer; \ - ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ - preempt = 0; \ - XFS_MOUNT_IUNLOCK(mp); \ - mount_locked = B_FALSE; \ - IPOINTER_SET; \ - } - -/* Remove the marker from the inode list. If the marker was the only item - * in the list then there are no remaining inodes and we should zero out - * the whole list. If we are the current head of the list then move the head - * past us. - */ -#define IPOINTER_REMOVE(ip, mp) { \ - ASSERT(ipointer_in == B_TRUE); \ - if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ - ip = ipointer->ip_mnext; \ - ip->i_mprev = ipointer->ip_mprev; \ - ipointer->ip_mprev->i_mnext = ip; \ - if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ - mp->m_inodes = ip; \ - } \ - } else { \ - ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ - mp->m_inodes = NULL; \ - ip = NULL; \ - } \ - IPOINTER_CLR; \ - } - -#define XFS_PREEMPT_MASK 0x7f - - ASSERT(!(flags & SYNC_BDFLUSH)); - - if (bypassed) - *bypassed = 0; - if (mp->m_flags & XFS_MOUNT_RDONLY) - return 0; - error = 0; - last_error = 0; - preempt = 0; - - /* Allocate a reference marker */ - ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); + xfs_perag_t *pag = &mp->m_perag[ag]; + boolean_t vnode_refed = B_FALSE; + int nr_found; + int first_index = 0; + int error = 0; + int last_error = 0; + int fflag = XFS_B_ASYNC; + int lock_flags = XFS_ILOCK_SHARED; - fflag = XFS_B_ASYNC; /* default is don't wait */ if (flags & SYNC_DELWRI) fflag = XFS_B_DELWRI; if (flags & SYNC_WAIT) fflag = 0; /* synchronous overrides all */ - base_lock_flags = XFS_ILOCK_SHARED; if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { /* * We need the I/O lock if we're going to call any of * the flush/inval routines. */ - base_lock_flags |= XFS_IOLOCK_SHARED; + lock_flags |= XFS_IOLOCK_SHARED; } - XFS_MOUNT_ILOCK(mp); - - ip = mp->m_inodes; - - mount_locked = B_TRUE; - vnode_refed = B_FALSE; - - IPOINTER_CLR; - do { - ASSERT(ipointer_in == B_FALSE); - ASSERT(vnode_refed == B_FALSE); - - lock_flags = base_lock_flags; - /* - * There were no inodes in the list, just break out - * of the loop. + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. */ - if (ip == NULL) { - break; - } + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void**)&ip, first_index, 1); - /* - * We found another sync thread marker - skip it - */ - if (ip->i_mount == NULL) { - ip = ip->i_mnext; - continue; + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; } - vp = VFS_I(ip); + /* update the index for the next lookup */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); /* - * If the vnode is gone then this is being torn down, - * call reclaim if it is flushed, else let regular flush - * code deal with it later in the loop. + * skip inodes in reclaim. Let xfs_syncsub do that for + * us so we don't need to worry. */ - - if (vp == NULL) { - /* Skip ones already in reclaim */ - if (ip->i_flags & XFS_IRECLAIM) { - ip = ip->i_mnext; - continue; - } - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - ip = ip->i_mnext; - } else if ((xfs_ipincount(ip) == 0) && - xfs_iflock_nowait(ip)) { - IPOINTER_INSERT(ip, mp); - - xfs_finish_reclaim(ip, 1, - XFS_IFLUSH_DELWRI_ELSE_ASYNC); - - XFS_MOUNT_ILOCK(mp); - mount_locked = B_TRUE; - IPOINTER_REMOVE(ip, mp); - } else { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - ip = ip->i_mnext; - } + vp = VFS_I(ip); + if (!vp) { + read_unlock(&pag->pag_ici_lock); continue; } + /* bad inodes are dealt with elsewhere */ if (VN_BAD(vp)) { - ip = ip->i_mnext; + read_unlock(&pag->pag_ici_lock); continue; } + /* nothing to sync during shutdown */ if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { - XFS_MOUNT_IUNLOCK(mp); - kmem_free(ipointer); + read_unlock(&pag->pag_ici_lock); return 0; } /* - * Try to lock without sleeping. We're out of order with - * the inode list lock here, so if we fail we need to drop - * the mount lock and try again. If we're called from - * bdflush() here, then don't bother. - * - * The inode lock here actually coordinates with the - * almost spurious inode lock in xfs_ireclaim() to prevent - * the vnode we handle here without a reference from - * being freed while we reference it. If we lock the inode - * while it's on the mount list here, then the spurious inode - * lock in xfs_ireclaim() after the inode is pulled from - * the mount list will sleep until we release it here. - * This keeps the vnode from being freed while we reference - * it. + * The inode lock here actually coordinates with the almost + * spurious inode lock in xfs_ireclaim() to prevent the vnode + * we handle here without a reference from being freed while we + * reference it. If we lock the inode while it's on the mount + * list here, then the spurious inode lock in xfs_ireclaim() + * after the inode is pulled from the mount list will sleep + * until we release it here. This keeps the vnode from being + * freed while we reference it. */ if (xfs_ilock_nowait(ip, lock_flags) == 0) { - if (vp == NULL) { - ip = ip->i_mnext; - continue; - } - vp = vn_grab(vp); - if (vp == NULL) { - ip = ip->i_mnext; + read_unlock(&pag->pag_ici_lock); + if (!vp) continue; - } - - IPOINTER_INSERT(ip, mp); xfs_ilock(ip, lock_flags); ASSERT(vp == VFS_I(ip)); ASSERT(ip->i_mount == mp); vnode_refed = B_TRUE; + } else { + /* safe to unlock here as we have a reference */ + read_unlock(&pag->pag_ici_lock); } - - /* From here on in the loop we may have a marker record - * in the inode list. - */ - /* * If we have to flush data or wait for I/O completion * we need to drop the ilock that we currently hold. * If we need to drop the lock, insert a marker if we * have not already done so. */ - if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) || - ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) { - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } + if (flags & SYNC_CLOSE) { xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (flags & SYNC_CLOSE) { - /* Shutdown case. Flush and invalidate. */ - if (XFS_FORCED_SHUTDOWN(mp)) - xfs_tosspages(ip, 0, -1, - FI_REMAPF); - else - error = xfs_flushinval_pages(ip, - 0, -1, FI_REMAPF); - } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { - error = xfs_flush_pages(ip, 0, - -1, fflag, FI_NONE); - } - - /* - * When freezing, we need to wait ensure all I/O (including direct - * I/O) is complete to ensure no further data modification can take - * place after this point - */ + if (XFS_FORCED_SHUTDOWN(mp)) + xfs_tosspages(ip, 0, -1, FI_REMAPF); + else + error = xfs_flushinval_pages(ip, 0, -1, + FI_REMAPF); + /* wait for I/O on freeze */ if (flags & SYNC_IOWAIT) vn_iowait(ip); xfs_ilock(ip, XFS_ILOCK_SHARED); } - if ((flags & SYNC_ATTR) && - (ip->i_update_core || - (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { - if (mount_locked) - IPOINTER_INSERT(ip, mp); + if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { + xfs_iunlock(ip, XFS_ILOCK_SHARED); + error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); + if (flags & SYNC_IOWAIT) + vn_iowait(ip); + xfs_ilock(ip, XFS_ILOCK_SHARED); + } + if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) { if (flags & SYNC_WAIT) { xfs_iflock(ip); - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); - - /* - * If we can't acquire the flush lock, then the inode - * is already being flushed so don't bother waiting. - * - * If we can lock it then do a delwri flush so we can - * combine multiple inode flushes in each disk write. - */ + if (!xfs_inode_clean(ip)) + error = xfs_iflush(ip, XFS_IFLUSH_SYNC); + else + xfs_ifunlock(ip); } else if (xfs_iflock_nowait(ip)) { - error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); + if (!xfs_inode_clean(ip)) + error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); + else + xfs_ifunlock(ip); } else if (bypassed) { (*bypassed)++; } } - if (lock_flags != 0) { + if (lock_flags) xfs_iunlock(ip, lock_flags); - } if (vnode_refed) { - /* - * If we had to take a reference on the vnode - * above, then wait until after we've unlocked - * the inode to release the reference. This is - * because we can be already holding the inode - * lock when IRELE() calls xfs_inactive(). - * - * Make sure to drop the mount lock before calling - * IRELE() so that we don't trip over ourselves if - * we have to go for the mount lock again in the - * inactive code. - */ - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - IRELE(ip); - vnode_refed = B_FALSE; } - if (error) { + if (error) last_error = error; - } - /* * bail out if the filesystem is corrupted. */ - if (error == EFSCORRUPTED) { - if (!mount_locked) { - XFS_MOUNT_ILOCK(mp); - IPOINTER_REMOVE(ip, mp); - } - XFS_MOUNT_IUNLOCK(mp); - ASSERT(ipointer_in == B_FALSE); - kmem_free(ipointer); + if (error == EFSCORRUPTED) return XFS_ERROR(error); - } - - /* Let other threads have a chance at the mount lock - * if we have looped many times without dropping the - * lock. - */ - if ((++preempt & XFS_PREEMPT_MASK) == 0) { - if (mount_locked) { - IPOINTER_INSERT(ip, mp); - } - } - - if (mount_locked == B_FALSE) { - XFS_MOUNT_ILOCK(mp); - mount_locked = B_TRUE; - IPOINTER_REMOVE(ip, mp); - continue; - } - ASSERT(ipointer_in == B_FALSE); - ip = ip->i_mnext; + } while (nr_found); - } while (ip != mp->m_inodes); + return last_error; +} - XFS_MOUNT_IUNLOCK(mp); +int +xfs_sync_inodes( + xfs_mount_t *mp, + int flags, + int *bypassed) +{ + int error; + int last_error; + int i; - ASSERT(ipointer_in == B_FALSE); + if (bypassed) + *bypassed = 0; + if (mp->m_flags & XFS_MOUNT_RDONLY) + return 0; + error = 0; + last_error = 0; - kmem_free(ipointer); + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + error = xfs_sync_inodes_ag(mp, i, flags, bypassed); + if (error) + last_error = error; + if (error == EFSCORRUPTED) + break; + } return XFS_ERROR(last_error); } -- cgit v0.10.2 From 5b4d89ae0f5ae45c7fa1dfc616fd2bb8634bb7b7 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:08:03 +1100 Subject: [XFS] Traverse inode trees when releasing dquots Make releasing all inode dquots traverse the per-ag inode radix trees rather than the mount inode list. This removes another user of the mount inode list. Version 3 o fix comment relating to avoiding trying to release the quota inodes and those in reclaim. Version 2 o add comment explaining use of gang lookups for a single inode o use IRELE, not VN_RELE o move check for ag initialisation to caller. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32291a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 1a3b803..26152b9 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1022,101 +1022,92 @@ xfs_qm_export_flags( /* - * Go thru all the inodes in the file system, releasing their dquots. - * Note that the mount structure gets modified to indicate that quotas are off - * AFTER this, in the case of quotaoff. This also gets called from - * xfs_rootumount. + * Release all the dquots on the inodes in an AG. */ -void -xfs_qm_dqrele_all_inodes( - struct xfs_mount *mp, - uint flags) +STATIC void +xfs_qm_dqrele_inodes_ag( + xfs_mount_t *mp, + int ag, + uint flags) { - xfs_inode_t *ip, *topino; - uint ireclaims; - struct inode *vp; - boolean_t vnode_refd; + xfs_inode_t *ip = NULL; + struct inode *vp = NULL; + xfs_perag_t *pag = &mp->m_perag[ag]; + int first_index = 0; + int nr_found; - ASSERT(mp->m_quotainfo); - - XFS_MOUNT_ILOCK(mp); -again: - ip = mp->m_inodes; - if (ip == NULL) { - XFS_MOUNT_IUNLOCK(mp); - return; - } do { - /* Skip markers inserted by xfs_sync */ - if (ip->i_mount == NULL) { - ip = ip->i_mnext; - continue; - } - /* Root inode, rbmip and rsumip have associated blocks */ - if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { - ASSERT(ip->i_udquot == NULL); - ASSERT(ip->i_gdquot == NULL); - ip = ip->i_mnext; - continue; + boolean_t vnode_refd = B_FALSE; + + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void**)&ip, first_index, 1); + + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; } + + /* update the index for the next lookup */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + + /* skip quota inodes and those in reclaim */ vp = VFS_I(ip); - if (!vp) { + if (!vp || ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); - ip = ip->i_mnext; + read_unlock(&pag->pag_ici_lock); continue; } - vnode_refd = B_FALSE; if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - ireclaims = mp->m_ireclaims; - topino = mp->m_inodes; vp = vn_grab(vp); + read_unlock(&pag->pag_ici_lock); if (!vp) - goto again; - - XFS_MOUNT_IUNLOCK(mp); - /* XXX restart limit ? */ - xfs_ilock(ip, XFS_ILOCK_EXCL); + continue; vnode_refd = B_TRUE; + xfs_ilock(ip, XFS_ILOCK_EXCL); } else { - ireclaims = mp->m_ireclaims; - topino = mp->m_inodes; - XFS_MOUNT_IUNLOCK(mp); + read_unlock(&pag->pag_ici_lock); } - - /* - * We don't keep the mountlock across the dqrele() call, - * since it can take a while.. - */ if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; } - if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { + if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && + ip->i_gdquot) { xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* - * Wait until we've dropped the ilock and mountlock to - * do the vn_rele. Or be condemned to an eternity in the - * inactive code in hell. - */ if (vnode_refd) IRELE(ip); - XFS_MOUNT_ILOCK(mp); - /* - * If an inode was inserted or removed, we gotta - * start over again. - */ - if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { - /* XXX use a sentinel */ - goto again; - } - ip = ip->i_mnext; - } while (ip != mp->m_inodes); + } while (nr_found); +} - XFS_MOUNT_IUNLOCK(mp); +/* + * Go thru all the inodes in the file system, releasing their dquots. + * Note that the mount structure gets modified to indicate that quotas are off + * AFTER this, in the case of quotaoff. This also gets called from + * xfs_rootumount. + */ +void +xfs_qm_dqrele_all_inodes( + struct xfs_mount *mp, + uint flags) +{ + int i; + + ASSERT(mp->m_quotainfo); + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + xfs_qm_dqrele_inodes_ag(mp, i, flags); + } } /*------------------------------------------------------------------------*/ -- cgit v0.10.2 From 60197e8df364df326dcbb987519f367ad0ee1a11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:11:19 +1100 Subject: [XFS] Cleanup maxrecs calculation. Clean up the way the maximum and minimum records for the btree blocks are calculated. For the alloc and inobt btrees all the values are pre-calculated in xfs_mount_common, and we switch the current loop around the ugly generic macros that use cpp token pasting to generate type names to two small helpers in normal C code. For the bmbt and bmdr trees these helpers also exist, but can be called during runtime, too. Here we also kill various macros dealing with them and inline the logic into the get_minrecs / get_maxrecs / get_dmaxrecs methods in xfs_bmap_btree.c. Note that all these new helpers take an xfs_mount * argument which will be needed to determine the size of a btree block once we add support for extended btree blocks with CRCs and other RAS information. SGI-PV: 988146 SGI-Modid: xfs-linux-melb:xfs-kern:32292a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 9e63f8c..6ff27b7 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -480,3 +480,19 @@ xfs_allocbt_init_cursor( return cur; } + +/* + * Calculate number of records in an alloc btree block. + */ +int +xfs_allocbt_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= sizeof(struct xfs_btree_sblock); + + if (leaf) + return blocklen / sizeof(xfs_alloc_rec_t); + return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t)); +} diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 22f1d70..ff1f71d 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -56,12 +56,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; #define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)XFS_BUF_PTR(bp)) /* - * Real block structures have a size equal to the disk block size. - */ -#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0]) -#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0]) - -/* * Minimum and maximum blocksize and sectorsize. * The blocksize upper limit is pretty much arbitrary. * The sectorsize upper limit is due to sizeof(sb_sectsize). @@ -98,5 +92,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, xfs_btnum_t); +extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); #endif /* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index b7f99d7..09e4de4 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -3051,15 +3051,15 @@ xfs_bmap_btree_to_extents( __be64 *pp; /* ptr to block address */ xfs_bmbt_block_t *rblock;/* root btree block */ + mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(ifp->if_flags & XFS_IFEXTENTS); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); rblock = ifp->if_broot; ASSERT(be16_to_cpu(rblock->bb_level) == 1); ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); - ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1); - mp = ip->i_mount; - pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes); + ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); cbno = be64_to_cpu(*pp); *logflagsp = 0; #ifdef DEBUG @@ -4221,7 +4221,7 @@ xfs_bmap_compute_maxlevels( maxleafents = MAXAEXTNUM; sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); } - maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); + maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0); minleafrecs = mp->m_bmap_dmnr[0]; minnoderecs = mp->m_bmap_dmnr[1]; maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; @@ -4555,7 +4555,7 @@ xfs_bmap_read_extents( */ level = be16_to_cpu(block->bb_level); ASSERT(level > 0); - pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); @@ -6205,13 +6205,13 @@ xfs_check_block( */ if (root) { - pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); } else { pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr); } for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { if (root) { - thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz); + thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); } else { thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j, dmxr); @@ -6266,7 +6266,7 @@ xfs_bmap_check_leaf_extents( level = be16_to_cpu(block->bb_level); ASSERT(level > 0); xfs_check_block(block, mp, 1, ifp->if_broot_bytes); - pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); @@ -6426,7 +6426,7 @@ xfs_bmap_count_blocks( block = ifp->if_broot; level = be16_to_cpu(block->bb_level); ASSERT(level > 0); - pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes); + pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index c5eeb32..853828c 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -66,6 +66,7 @@ xfs_extent_state( */ void xfs_bmdr_to_bmbt( + struct xfs_mount *mp, xfs_bmdr_block_t *dblock, int dblocklen, xfs_bmbt_block_t *rblock, @@ -83,11 +84,11 @@ xfs_bmdr_to_bmbt( rblock->bb_numrecs = dblock->bb_numrecs; rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO); rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO); - dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); + dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); - tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); + tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); memcpy(tpp, fpp, sizeof(*fpp) * dmxr); @@ -428,6 +429,7 @@ xfs_bmbt_set_state( */ void xfs_bmbt_to_bmdr( + struct xfs_mount *mp, xfs_bmbt_block_t *rblock, int rblocklen, xfs_bmdr_block_t *dblock, @@ -445,10 +447,10 @@ xfs_bmbt_to_bmdr( ASSERT(be16_to_cpu(rblock->bb_level) > 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; - dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0); + dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); - fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen); + fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); @@ -626,15 +628,36 @@ xfs_bmbt_get_minrecs( struct xfs_btree_cur *cur, int level) { - return XFS_BMAP_BLOCK_IMINRECS(level, cur); + if (level == cur->bc_nlevels - 1) { + struct xfs_ifork *ifp; + + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, + cur->bc_private.b.whichfork); + + return xfs_bmbt_maxrecs(cur->bc_mp, + ifp->if_broot_bytes, level == 0) / 2; + } + + return cur->bc_mp->m_bmap_dmnr[level != 0]; } -STATIC int +int xfs_bmbt_get_maxrecs( struct xfs_btree_cur *cur, int level) { - return XFS_BMAP_BLOCK_IMAXRECS(level, cur); + if (level == cur->bc_nlevels - 1) { + struct xfs_ifork *ifp; + + ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, + cur->bc_private.b.whichfork); + + return xfs_bmbt_maxrecs(cur->bc_mp, + ifp->if_broot_bytes, level == 0); + } + + return cur->bc_mp->m_bmap_dmxr[level != 0]; + } /* @@ -651,7 +674,10 @@ xfs_bmbt_get_dmaxrecs( struct xfs_btree_cur *cur, int level) { - return XFS_BMAP_BLOCK_DMAXRECS(level, cur); + if (level != cur->bc_nlevels - 1) + return cur->bc_mp->m_bmap_dmxr[level != 0]; + return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize, + level == 0); } STATIC void @@ -871,3 +897,35 @@ xfs_bmbt_init_cursor( return cur; } + +/* + * Calculate number of records in a bmap btree block. + */ +int +xfs_bmbt_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= sizeof(struct xfs_btree_lblock); + + if (leaf) + return blocklen / sizeof(xfs_bmbt_rec_t); + return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)); +} + +/* + * Calculate number of records in a bmap btree inode root. + */ +int +xfs_bmdr_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= sizeof(xfs_bmdr_block_t); + + if (leaf) + return blocklen / sizeof(xfs_bmdr_rec_t); + return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t)); +} diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 5669242..835be2a 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -151,33 +151,6 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; #define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) -#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize) -#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \ - ((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \ - (cur)->bc_private.b.whichfork)->if_broot_bytes) - -#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \ - (((lev) == (cur)->bc_nlevels - 1 ? \ - XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \ - xfs_bmdr, (lev) == 0) : \ - ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))) -#define XFS_BMAP_BLOCK_IMAXRECS(lev,cur) \ - (((lev) == (cur)->bc_nlevels - 1 ? \ - XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\ - xfs_bmbt, (lev) == 0) : \ - ((cur)->bc_mp->m_bmap_dmxr[(lev) != 0]))) - -#define XFS_BMAP_BLOCK_DMINRECS(lev,cur) \ - (((lev) == (cur)->bc_nlevels - 1 ? \ - XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur),\ - xfs_bmdr, (lev) == 0) : \ - ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) -#define XFS_BMAP_BLOCK_IMINRECS(lev,cur) \ - (((lev) == (cur)->bc_nlevels - 1 ? \ - XFS_BTREE_BLOCK_MINRECS(XFS_BMAP_RBLOCK_ISIZE(lev,cur),\ - xfs_bmbt, (lev) == 0) : \ - ((cur)->bc_mp->m_bmap_dmnr[(lev) != 0]))) - #define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) #define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) @@ -192,8 +165,8 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \ be16_to_cpu((bb)->bb_level), cur))) #define XFS_BMAP_PTR_IADDR(bb,i,cur) \ - (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \ - be16_to_cpu((bb)->bb_level), cur))) + (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, xfs_bmbt_get_maxrecs(cur, \ + be16_to_cpu((bb)->bb_level)))) /* * These are to be used when we know the size of the block and @@ -203,11 +176,8 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i)) #define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \ (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i)) -#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \ - (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz))) - -#define XFS_BMAP_BROOT_NUMRECS(bb) be16_to_cpu((bb)->bb_numrecs) -#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0) +#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb,i,sz) \ + (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,xfs_bmbt_maxrecs(mp, sz, 0))) #define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ (int)(sizeof(xfs_bmbt_block_t) + \ @@ -234,7 +204,8 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; /* * Prototypes for xfs_bmap.c to call. */ -extern void xfs_bmdr_to_bmbt(xfs_bmdr_block_t *, int, xfs_bmbt_block_t *, int); +extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int, + xfs_bmbt_block_t *, int); extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); @@ -257,7 +228,12 @@ extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); -extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int); +extern void xfs_bmbt_to_bmdr(struct xfs_mount *, xfs_bmbt_block_t *, int, + xfs_bmdr_block_t *, int); + +extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); +extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf); +extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf); extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7425b2b..795a124 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -148,19 +148,6 @@ do { \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) -/* - * Maximum and minimum records in a btree block. - * Given block size, type prefix, and leaf flag (0 or 1). - * The divisor below is equivalent to lf ? (e1) : (e2) but that produces - * compiler warnings. - */ -#define XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) \ - ((int)(((bsz) - (uint)sizeof(t ## _block_t)) / \ - (((lf) * (uint)sizeof(t ## _rec_t)) + \ - ((1 - (lf)) * \ - ((uint)sizeof(t ## _key_t) + (uint)sizeof(t ## _ptr_t)))))) -#define XFS_BTREE_BLOCK_MINRECS(bsz,t,lf) \ - (XFS_BTREE_BLOCK_MAXRECS(bsz,t,lf) / 2) /* * Record, key, and pointer address calculation macros. diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index c9065ea..2a00fcc 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -78,8 +78,7 @@ typedef struct xfs_dinode xfs_dinode_core_t di_core; /* * In adding anything between the core and the union, be - * sure to update the macros like XFS_LITINO below and - * XFS_BMAP_RBLOCK_DSIZE in xfs_bmap_btree.h. + * sure to update the macros like XFS_LITINO below. */ __be32 di_next_unlinked;/* agi unlinked list ptr */ union { diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index dcd4a95..46aabb3 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -365,3 +365,19 @@ xfs_inobt_init_cursor( return cur; } + +/* + * Calculate number of records in an inobt btree block. + */ +int +xfs_inobt_maxrecs( + struct xfs_mount *mp, + int blocklen, + int leaf) +{ + blocklen -= sizeof(struct xfs_btree_sblock); + + if (leaf) + return blocklen / sizeof(xfs_inobt_rec_t); + return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); +} diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index ff7406b..f0fc1e4 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -85,14 +85,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; #define XFS_INOBT_CLR_FREE(rp,i) ((rp)->ir_free &= ~XFS_INOBT_MASK(i)) /* - * Real block structures have a size equal to the disk block size. - */ -#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_inobt_mxr[lev != 0]) -#define XFS_INOBT_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_inobt_mnr[lev != 0]) -#define XFS_INOBT_IS_LAST_REC(cur) \ - ((cur)->bc_ptrs[0] == be16_to_cpu(XFS_BUF_TO_INOBT_BLOCK((cur)->bc_bufs[0])->bb_numrecs)) - -/* * Maximum number of inode btree levels. */ #define XFS_IN_MAXLEVELS(mp) ((mp)->m_in_maxlevels) @@ -118,5 +110,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); +extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0c65ba2..73b604e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -622,7 +622,7 @@ xfs_iformat_btree( ifp = XFS_IFORK_PTR(ip, whichfork); dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork); size = XFS_BMAP_BROOT_SPACE(dfp); - nrecs = XFS_BMAP_BROOT_NUMRECS(dfp); + nrecs = be16_to_cpu(dfp->bb_numrecs); /* * blow out if -- fork has less extents than can fit in @@ -650,8 +650,9 @@ xfs_iformat_btree( * Copy and convert from the on-disk structure * to the in-memory structure. */ - xfs_bmdr_to_bmbt(dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), - ifp->if_broot, size); + xfs_bmdr_to_bmbt(ip->i_mount, dfp, + XFS_DFORK_SIZE(dip, ip->i_mount, whichfork), + ifp->if_broot, size); ifp->if_flags &= ~XFS_IFEXTENTS; ifp->if_flags |= XFS_IFBROOT; @@ -2348,6 +2349,7 @@ xfs_iroot_realloc( int rec_diff, int whichfork) { + struct xfs_mount *mp = ip->i_mount; int cur_max; xfs_ifork_t *ifp; xfs_bmbt_block_t *new_broot; @@ -2383,7 +2385,7 @@ xfs_iroot_realloc( * location. The records don't change location because * they are kept butted up against the btree block header. */ - cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); + cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); new_max = cur_max + rec_diff; new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); ifp->if_broot = (xfs_bmbt_block_t *) @@ -2391,10 +2393,10 @@ xfs_iroot_realloc( new_size, (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ KM_SLEEP); - op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, - ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, - (int)new_size); + op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + ifp->if_broot_bytes); + np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, + (int)new_size); ifp->if_broot_bytes = (int)new_size; ASSERT(ifp->if_broot_bytes <= XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ); @@ -2408,7 +2410,7 @@ xfs_iroot_realloc( * records, just get rid of the root and clear the status bit. */ ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0)); - cur_max = XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes); + cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); new_max = cur_max + rec_diff; ASSERT(new_max >= 0); if (new_max > 0) @@ -2442,9 +2444,9 @@ xfs_iroot_realloc( /* * Then copy the pointers. */ - op = (char *)XFS_BMAP_BROOT_PTR_ADDR(ifp->if_broot, 1, + op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_PTR_ADDR(new_broot, 1, + np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1, (int)new_size); memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); } @@ -2920,7 +2922,7 @@ xfs_iflush_fork( ASSERT(ifp->if_broot_bytes <= (XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ)); - xfs_bmbt_to_bmdr(ifp->if_broot, ifp->if_broot_bytes, + xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, (xfs_bmdr_block_t *)cp, XFS_DFORK_SIZE(dip, mp, whichfork)); } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 82d46ce..23c3a78 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2452,7 +2452,7 @@ xlog_recover_do_inode_trans( break; case XFS_ILOG_DBROOT: - xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, + xfs_bmbt_to_bmdr(mp, (xfs_bmbt_block_t *)src, len, &(dip->di_u.di_bmbt), XFS_DFORK_DSIZE(dip, mp)); break; @@ -2490,7 +2490,7 @@ xlog_recover_do_inode_trans( case XFS_ILOG_ABROOT: dest = XFS_DFORK_APTR(dip); - xfs_bmbt_to_bmdr((xfs_bmbt_block_t *)src, len, + xfs_bmbt_to_bmdr(mp, (xfs_bmbt_block_t *)src, len, (xfs_bmdr_block_t*)dest, XFS_DFORK_ASIZE(dip, mp)); break; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5ec6032..40338ff 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -567,8 +567,6 @@ xfs_readsb(xfs_mount_t *mp, int flags) STATIC void xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) { - int i; - mp->m_agfrotor = mp->m_agirotor = 0; spin_lock_init(&mp->m_agirotor_lock); mp->m_maxagi = mp->m_sb.sb_agcount; @@ -605,24 +603,20 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) } ASSERT(mp->m_attroffset < XFS_LITINO(mp)); - for (i = 0; i < 2; i++) { - mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, - xfs_alloc, i == 0); - mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, - xfs_alloc, i == 0); - } - for (i = 0; i < 2; i++) { - mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, - xfs_bmbt, i == 0); - mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, - xfs_bmbt, i == 0); - } - for (i = 0; i < 2; i++) { - mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, - xfs_inobt, i == 0); - mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, - xfs_inobt, i == 0); - } + mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1); + mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; + mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; + + mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); + mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2; + mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2; + + mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); + mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); + mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; + mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; mp->m_bsize = XFS_FSB_TO_BB(mp, 1); mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index c684681..f4644d7 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -276,12 +276,12 @@ typedef struct xfs_mount { uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ uint m_blockwmask; /* blockwsize-1 */ - uint m_alloc_mxr[2]; /* XFS_ALLOC_BLOCK_MAXRECS */ - uint m_alloc_mnr[2]; /* XFS_ALLOC_BLOCK_MINRECS */ - uint m_bmap_dmxr[2]; /* XFS_BMAP_BLOCK_DMAXRECS */ - uint m_bmap_dmnr[2]; /* XFS_BMAP_BLOCK_DMINRECS */ - uint m_inobt_mxr[2]; /* XFS_INOBT_BLOCK_MAXRECS */ - uint m_inobt_mnr[2]; /* XFS_INOBT_BLOCK_MINRECS */ + uint m_alloc_mxr[2]; /* max alloc btree records */ + uint m_alloc_mnr[2]; /* min alloc btree records */ + uint m_bmap_dmxr[2]; /* max bmap btree records */ + uint m_bmap_dmnr[2]; /* min bmap btree records */ + uint m_inobt_mxr[2]; /* max inobt btree records */ + uint m_inobt_mnr[2]; /* min inobt btree records */ uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ uint m_in_maxlevels; /* XFS_IN_MAXLEVELS */ -- cgit v0.10.2 From 6c7699c047c50403149ad91331dd39de47dea070 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:11:29 +1100 Subject: [XFS] remove the mount inode list Now we've removed all users of the mount inode list, we can kill it. This reduces the size of the xfs_inode by 2 pointers. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32293a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 4c92d19..1256746 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -76,7 +76,6 @@ xfs_iget_core( { struct inode *old_inode; xfs_inode_t *ip; - xfs_inode_t *iq; int error; unsigned long first_index, mask; xfs_perag_t *pag; @@ -255,24 +254,6 @@ finish_inode: write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); - - /* - * Link ip to its mount and thread it on the mount's inode list. - */ - XFS_MOUNT_ILOCK(mp); - if ((iq = mp->m_inodes)) { - ASSERT(iq->i_mprev->i_mnext == iq); - ip->i_mprev = iq->i_mprev; - iq->i_mprev->i_mnext = ip; - iq->i_mprev = ip; - ip->i_mnext = iq; - } else { - ip->i_mnext = ip; - ip->i_mprev = ip; - } - mp->m_inodes = ip; - - XFS_MOUNT_IUNLOCK(mp); xfs_put_perag(mp, pag); return_ip: @@ -493,36 +474,15 @@ xfs_iextract( { xfs_mount_t *mp = ip->i_mount; xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); - xfs_inode_t *iq; write_lock(&pag->pag_ici_lock); radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); write_unlock(&pag->pag_ici_lock); xfs_put_perag(mp, pag); - /* - * Remove from mount's inode list. - */ - XFS_MOUNT_ILOCK(mp); - ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL)); - iq = ip->i_mnext; - iq->i_mprev = ip->i_mprev; - ip->i_mprev->i_mnext = iq; - - /* - * Fix up the head pointer if it points to the inode being deleted. - */ - if (mp->m_inodes == ip) { - if (ip == iq) { - mp->m_inodes = NULL; - } else { - mp->m_inodes = iq; - } - } - /* Deal with the deleted inodes list */ + XFS_MOUNT_ILOCK(mp); list_del_init(&ip->i_reclaim); - mp->m_ireclaims++; XFS_MOUNT_IUNLOCK(mp); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 104623b..55d50b8 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -233,16 +233,8 @@ typedef struct dm_attrs_s { __uint16_t da_pad; /* DMIG extra padding */ } dm_attrs_t; -typedef struct { - struct xfs_inode *ip_mnext; /* next inode in mount list */ - struct xfs_inode *ip_mprev; /* ptr to prev inode */ - struct xfs_mount *ip_mount; /* fs mount struct ptr */ -} xfs_iptr_t; - typedef struct xfs_inode { /* Inode linking and identification information. */ - struct xfs_inode *i_mnext; /* next inode in mount list */ - struct xfs_inode *i_mprev; /* ptr to prev inode */ struct xfs_mount *i_mount; /* fs mount struct ptr */ struct list_head i_reclaim; /* reclaim list */ struct inode *i_vnode; /* vnode backpointer */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 40338ff..43e5917 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1282,11 +1282,6 @@ xfs_unmountfs( xfs_unmountfs_wait(mp); /* wait for async bufs */ xfs_log_unmount(mp); /* Done! No more fs ops. */ - /* - * All inodes from this mount point should be freed. - */ - ASSERT(mp->m_inodes == NULL); - if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) uuid_table_remove(&mp->m_sb.sb_uuid); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f4644d7..0ba0526 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -248,7 +248,6 @@ typedef struct xfs_mount { xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ spinlock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ - struct xfs_inode *m_inodes; /* active inode list */ struct list_head m_del_inodes; /* inodes to reclaim */ mutex_t m_ilock; /* inode list mutex */ uint m_ireclaims; /* count of calls to reclaim*/ -- cgit v0.10.2 From 136341b41ad4883bd668120f727a52c42331fe8a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:11:40 +1100 Subject: [XFS] cleanup btree record / key / ptr addressing macros. Replace the generic record / key / ptr addressing macros that use cpp token pasting with simpler macros that do the job for just one given btree type. The new macros lose the cur argument and thus can be used outside the core btree code, but also gain an xfs_mount * argument to allow for checking the CRC flag in the near future. Note that many of these macros aren't actually used in the kernel code, but only in userspace (mostly in xfs_repair). SGI-PV: 988146 SGI-Modid: xfs-linux-melb:xfs-kern:32295a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 6ff27b7..72c083f 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -179,7 +179,7 @@ xfs_allocbt_update_lastrec( if (numrecs) { xfs_alloc_rec_t *rrp; - rrp = XFS_ALLOC_REC_ADDR(block, numrecs, cur); + rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs); len = rrp->ar_blockcount; } else { len = 0; diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index ff1f71d..579f9c7 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -78,16 +78,27 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; /* * Record, key, and pointer address macros for btree blocks. + * + * (note that some of these may appear unused, but they are used in userspace) */ -#define XFS_ALLOC_REC_ADDR(bb,i,cur) \ - XFS_BTREE_REC_ADDR(xfs_alloc, bb, i) - -#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \ - XFS_BTREE_KEY_ADDR(xfs_alloc, bb, i) +#define XFS_ALLOC_REC_ADDR(mp, block, index) \ + ((xfs_alloc_rec_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + (((index) - 1) * sizeof(xfs_alloc_rec_t)))) -#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \ - XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur)) +#define XFS_ALLOC_KEY_ADDR(mp, block, index) \ + ((xfs_alloc_key_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + ((index) - 1) * sizeof(xfs_alloc_key_t))) +#define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \ + ((xfs_alloc_ptr_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + (maxrecs) * sizeof(xfs_alloc_key_t) + \ + ((index) - 1) * sizeof(xfs_alloc_ptr_t))) extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 09e4de4..3dab937 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -393,7 +393,7 @@ xfs_bmap_count_leaves( STATIC void xfs_bmap_disk_count_leaves( - xfs_extnum_t idx, + struct xfs_mount *mp, xfs_bmbt_block_t *block, int numrecs, int *count); @@ -3539,7 +3539,7 @@ xfs_bmap_extents_to_btree( ablock->bb_level = 0; ablock->bb_leftsib = cpu_to_be64(NULLDFSBNO); ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO); - arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); + arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); for (cnt = i = 0; i < nextents; i++) { ep = xfs_iext_get_ext(ifp, i); @@ -3554,11 +3554,13 @@ xfs_bmap_extents_to_btree( /* * Fill in the root key and pointer. */ - kp = XFS_BMAP_KEY_IADDR(block, 1, cur); - arp = XFS_BMAP_REC_IADDR(ablock, 1, cur); + kp = XFS_BMBT_KEY_ADDR(mp, block, 1); + arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); - pp = XFS_BMAP_PTR_IADDR(block, 1, cur); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, + be16_to_cpu(block->bb_level))); *pp = cpu_to_be64(args.fsbno); + /* * Do all this logging at the end so that * the root is at the right level. @@ -4574,7 +4576,7 @@ xfs_bmap_read_extents( error0); if (level == 0) break; - pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); xfs_trans_brelse(tp, bp); @@ -4617,7 +4619,7 @@ xfs_bmap_read_extents( /* * Copy records into the extent records. */ - frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); + frp = XFS_BMBT_REC_ADDR(mp, block, 1); start = i; for (j = 0; j < num_recs; j++, i++, frp++) { xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i); @@ -6187,12 +6189,7 @@ xfs_check_block( prevp = NULL; for( i = 1; i <= be16_to_cpu(block->bb_numrecs); i++) { dmxr = mp->m_bmap_dmxr[0]; - - if (root) { - keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz); - } else { - keyp = XFS_BTREE_KEY_ADDR(xfs_bmbt, block, i); - } + keyp = XFS_BMBT_KEY_ADDR(mp, block, i); if (prevp) { ASSERT(be64_to_cpu(prevp->br_startoff) < @@ -6203,19 +6200,16 @@ xfs_check_block( /* * Compare the block numbers to see if there are dups. */ - - if (root) { + if (root) pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); - } else { - pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr); - } + else + pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); + for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { - if (root) { + if (root) thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); - } else { - thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j, - dmxr); - } + else + thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); if (*thispa == *pp) { cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", __func__, j, i, @@ -6301,7 +6295,7 @@ xfs_bmap_check_leaf_extents( */ xfs_check_block(block, mp, 0, 0); - pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0); if (bp_release) { @@ -6337,14 +6331,14 @@ xfs_bmap_check_leaf_extents( * conform with the first entry in this one. */ - ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); + ep = XFS_BMBT_REC_ADDR(mp, block, 1); if (i) { ASSERT(xfs_bmbt_disk_get_startoff(&last) + xfs_bmbt_disk_get_blockcount(&last) <= xfs_bmbt_disk_get_startoff(ep)); } for (j = 1; j < num_recs; j++) { - nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); + nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); ASSERT(xfs_bmbt_disk_get_startoff(ep) + xfs_bmbt_disk_get_blockcount(ep) <= xfs_bmbt_disk_get_startoff(nextp)); @@ -6482,7 +6476,7 @@ xfs_bmap_count_tree( } /* Dive to the next level */ - pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]); + pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); if (unlikely((error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { @@ -6497,7 +6491,7 @@ xfs_bmap_count_tree( for (;;) { nextbno = be64_to_cpu(block->bb_rightsib); numrecs = be16_to_cpu(block->bb_numrecs); - xfs_bmap_disk_count_leaves(0, block, numrecs, count); + xfs_bmap_disk_count_leaves(mp, block, numrecs, count); xfs_trans_brelse(tp, bp); if (nextbno == NULLFSBLOCK) break; @@ -6536,7 +6530,7 @@ xfs_bmap_count_leaves( */ STATIC void xfs_bmap_disk_count_leaves( - xfs_extnum_t idx, + struct xfs_mount *mp, xfs_bmbt_block_t *block, int numrecs, int *count) @@ -6545,7 +6539,7 @@ xfs_bmap_disk_count_leaves( xfs_bmbt_rec_t *frp; for (b = 1; b <= numrecs; b++) { - frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); + frp = XFS_BMBT_REC_ADDR(mp, block, b); *count += xfs_bmbt_disk_get_blockcount(frp); } } diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 853828c..11137c0 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -44,7 +44,6 @@ #include "xfs_error.h" #include "xfs_quota.h" - /* * Determine the extent state. */ @@ -85,9 +84,9 @@ xfs_bmdr_to_bmbt( rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO); rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO); dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); - fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); - tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); - fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); + fkp = XFS_BMDR_KEY_ADDR(dblock, 1); + tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); + fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); tpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); @@ -448,10 +447,10 @@ xfs_bmbt_to_bmdr( dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); - fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen); - tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1); + fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); + tkp = XFS_BMDR_KEY_ADDR(dblock, 1); fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); - tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr); + tpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); dmxr = be16_to_cpu(dblock->bb_numrecs); memcpy(tkp, fkp, sizeof(*fkp) * dmxr); memcpy(tpp, fpp, sizeof(*fpp) * dmxr); diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 835be2a..7f00107 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -21,6 +21,7 @@ #define XFS_BMAP_MAGIC 0x424d4150 /* 'BMAP' */ struct xfs_btree_cur; +struct xfs_btree_block; struct xfs_btree_lblock; struct xfs_mount; struct xfs_inode; @@ -151,33 +152,50 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; #define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) -#define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) - -#define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i)) - -#define XFS_BMAP_KEY_DADDR(bb,i,cur) \ - (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i)) - -#define XFS_BMAP_KEY_IADDR(bb,i,cur) \ - (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i)) - -#define XFS_BMAP_PTR_DADDR(bb,i,cur) \ - (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \ - be16_to_cpu((bb)->bb_level), cur))) -#define XFS_BMAP_PTR_IADDR(bb,i,cur) \ - (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, xfs_bmbt_get_maxrecs(cur, \ - be16_to_cpu((bb)->bb_level)))) +#define XFS_BMBT_REC_ADDR(mp, block, index) \ + ((xfs_bmbt_rec_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_lblock) + \ + ((index) - 1) * sizeof(xfs_bmbt_rec_t))) + +#define XFS_BMBT_KEY_ADDR(mp, block, index) \ + ((xfs_bmbt_key_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_lblock) + \ + ((index) - 1) * sizeof(xfs_bmbt_key_t))) + +#define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \ + ((xfs_bmbt_ptr_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_lblock) + \ + (maxrecs) * sizeof(xfs_bmbt_key_t) + \ + ((index) - 1) * sizeof(xfs_bmbt_ptr_t))) + +#define XFS_BMDR_REC_ADDR(block, index) \ + ((xfs_bmdr_rec_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_bmdr_block) + \ + ((index) - 1) * sizeof(xfs_bmdr_rec_t))) + +#define XFS_BMDR_KEY_ADDR(block, index) \ + ((xfs_bmdr_key_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_bmdr_block) + \ + ((index) - 1) * sizeof(xfs_bmdr_key_t))) + +#define XFS_BMDR_PTR_ADDR(block, index, maxrecs) \ + ((xfs_bmdr_ptr_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_bmdr_block) + \ + (maxrecs) * sizeof(xfs_bmdr_key_t) + \ + ((index) - 1) * sizeof(xfs_bmdr_ptr_t))) /* * These are to be used when we know the size of the block and * we don't have a cursor. */ -#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \ - (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i)) -#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \ - (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i)) -#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb,i,sz) \ - (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,xfs_bmbt_maxrecs(mp, sz, 0))) +#define XFS_BMAP_BROOT_PTR_ADDR(mp, bb, i, sz) \ + XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) #define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ (int)(sizeof(xfs_bmbt_block_t) + \ diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 795a124..d6120a7 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -149,21 +149,6 @@ do { \ } \ } while (0) -/* - * Record, key, and pointer address calculation macros. - * Given block size, type prefix, block pointer, and index of requested entry - * (first entry numbered 1). - */ -#define XFS_BTREE_REC_ADDR(t,bb,i) \ - ((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \ - ((i) - 1) * sizeof(t ## _rec_t))) -#define XFS_BTREE_KEY_ADDR(t,bb,i) \ - ((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \ - ((i) - 1) * sizeof(t ## _key_t))) -#define XFS_BTREE_PTR_ADDR(t,bb,i,mxr) \ - ((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \ - (mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t))) - #define XFS_BTREE_MAXLEVELS 8 /* max of all btrees */ struct xfs_btree_ops { diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 84583cf..8ce72ab 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -258,7 +258,7 @@ xfs_growfs_data_private( block->bb_numrecs = cpu_to_be16(1); block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); - arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1); + arec = XFS_ALLOC_REC_ADDR(mp, block, 1); arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); arec->ar_blockcount = cpu_to_be32( agsize - be32_to_cpu(arec->ar_startblock)); @@ -279,7 +279,7 @@ xfs_growfs_data_private( block->bb_numrecs = cpu_to_be16(1); block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); - arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1); + arec = XFS_ALLOC_REC_ADDR(mp, block, 1); arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); arec->ar_blockcount = cpu_to_be32( agsize - be32_to_cpu(arec->ar_startblock)); diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index f0fc1e4..fa12c85 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -97,16 +97,27 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; /* * Record, key, and pointer address macros for btree blocks. + * + * (note that some of these may appear unused, but they are used in userspace) */ -#define XFS_INOBT_REC_ADDR(bb,i,cur) \ - (XFS_BTREE_REC_ADDR(xfs_inobt, bb, i)) - -#define XFS_INOBT_KEY_ADDR(bb,i,cur) \ - (XFS_BTREE_KEY_ADDR(xfs_inobt, bb, i)) - -#define XFS_INOBT_PTR_ADDR(bb,i,cur) \ - (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \ - i, XFS_INOBT_BLOCK_MAXRECS(1, cur))) +#define XFS_INOBT_REC_ADDR(mp, block, index) \ + ((xfs_inobt_rec_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + (((index) - 1) * sizeof(xfs_inobt_rec_t)))) + +#define XFS_INOBT_KEY_ADDR(mp, block, index) \ + ((xfs_inobt_key_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + ((index) - 1) * sizeof(xfs_inobt_key_t))) + +#define XFS_INOBT_PTR_ADDR(mp, block, index, maxrecs) \ + ((xfs_inobt_ptr_t *) \ + ((char *)(block) + \ + sizeof(struct xfs_btree_sblock) + \ + (maxrecs) * sizeof(xfs_inobt_key_t) + \ + ((index) - 1) * sizeof(xfs_inobt_ptr_t))) extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 73b604e..7b4f13c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2435,10 +2435,8 @@ xfs_iroot_realloc( /* * First copy the records. */ - op = (char *)XFS_BMAP_BROOT_REC_ADDR(ifp->if_broot, 1, - ifp->if_broot_bytes); - np = (char *)XFS_BMAP_BROOT_REC_ADDR(new_broot, 1, - (int)new_size); + op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1); + np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1); memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t)); /* -- cgit v0.10.2 From 7cc95a821df8f09a5d37a923cf8c3a7c3ee00c29 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:14:34 +1100 Subject: [XFS] Always use struct xfs_btree_block instead of short / longform structures. Always use the generic xfs_btree_block type instead of the short / long structures. Add XFS_BTREE_SBLOCK_LEN / XFS_BTREE_LBLOCK_LEN defines for the length of a short / long form block. The rationale for this is that we will grow more btree block header variants to support CRCs and other RAS information, and always accessing them through the same datatype with unions for the short / long form pointers makes implementing this much easier. SGI-PV: 988146 SGI-Modid: xfs-linux-melb:xfs-kern:32300a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 0a2a872..c47ce90 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -380,21 +380,20 @@ xfs_alloc_fixup_trees( return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } + #ifdef DEBUG - { - xfs_alloc_block_t *bnoblock; - xfs_alloc_block_t *cntblock; - - if (bno_cur->bc_nlevels == 1 && - cnt_cur->bc_nlevels == 1) { - bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]); - cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]); - XFS_WANT_CORRUPTED_RETURN( - be16_to_cpu(bnoblock->bb_numrecs) == - be16_to_cpu(cntblock->bb_numrecs)); - } + if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { + struct xfs_btree_block *bnoblock; + struct xfs_btree_block *cntblock; + + bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); + cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); + + XFS_WANT_CORRUPTED_RETURN( + bnoblock->bb_numrecs == cntblock->bb_numrecs); } #endif + /* * Deal with all four cases: the allocated record is contained * within the freespace record, so we can have new freespace diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 72c083f..733cb75 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -490,7 +490,7 @@ xfs_allocbt_maxrecs( int blocklen, int leaf) { - blocklen -= sizeof(struct xfs_btree_sblock); + blocklen -= XFS_ALLOC_BLOCK_LEN(mp); if (leaf) return blocklen / sizeof(xfs_alloc_rec_t); diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index 579f9c7..a6caa00 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -24,7 +24,6 @@ struct xfs_buf; struct xfs_btree_cur; -struct xfs_btree_sblock; struct xfs_mount; /* @@ -50,10 +49,6 @@ typedef struct xfs_alloc_rec_incore { /* btree pointer type */ typedef __be32 xfs_alloc_ptr_t; -/* btree block header type */ -typedef struct xfs_btree_sblock xfs_alloc_block_t; - -#define XFS_BUF_TO_ALLOC_BLOCK(bp) ((xfs_alloc_block_t *)XFS_BUF_PTR(bp)) /* * Minimum and maximum blocksize and sectorsize. @@ -77,6 +72,13 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; #define XFS_CNT_BLOCK(mp) ((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) /* + * Btree block header size depends on a superblock flag. + * + * (not quite yet, but soon) + */ +#define XFS_ALLOC_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN + +/* * Record, key, and pointer address macros for btree blocks. * * (note that some of these may appear unused, but they are used in userspace) @@ -84,19 +86,19 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t; #define XFS_ALLOC_REC_ADDR(mp, block, index) \ ((xfs_alloc_rec_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_ALLOC_BLOCK_LEN(mp) + \ (((index) - 1) * sizeof(xfs_alloc_rec_t)))) #define XFS_ALLOC_KEY_ADDR(mp, block, index) \ ((xfs_alloc_key_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_ALLOC_BLOCK_LEN(mp) + \ ((index) - 1) * sizeof(xfs_alloc_key_t))) #define XFS_ALLOC_PTR_ADDR(mp, block, index, maxrecs) \ ((xfs_alloc_ptr_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_ALLOC_BLOCK_LEN(mp) + \ (maxrecs) * sizeof(xfs_alloc_key_t) + \ ((index) - 1) * sizeof(xfs_alloc_ptr_t))) diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3dab937..7796a0c 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -394,7 +394,7 @@ xfs_bmap_count_leaves( STATIC void xfs_bmap_disk_count_leaves( struct xfs_mount *mp, - xfs_bmbt_block_t *block, + struct xfs_btree_block *block, int numrecs, int *count); @@ -3042,14 +3042,14 @@ xfs_bmap_btree_to_extents( int whichfork) /* data or attr fork */ { /* REFERENCED */ - xfs_bmbt_block_t *cblock;/* child btree block */ + struct xfs_btree_block *cblock;/* child btree block */ xfs_fsblock_t cbno; /* child block number */ xfs_buf_t *cbp; /* child block's buffer */ int error; /* error return value */ xfs_ifork_t *ifp; /* inode fork data */ xfs_mount_t *mp; /* mount point structure */ __be64 *pp; /* ptr to block address */ - xfs_bmbt_block_t *rblock;/* root btree block */ + struct xfs_btree_block *rblock;/* root btree block */ mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); @@ -3069,8 +3069,8 @@ xfs_bmap_btree_to_extents( if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF))) return error; - cblock = XFS_BUF_TO_BMBT_BLOCK(cbp); - if ((error = xfs_btree_check_lblock(cur, cblock, 0, cbp))) + cblock = XFS_BUF_TO_BLOCK(cbp); + if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp); ip->i_d.di_nblocks--; @@ -3450,11 +3450,11 @@ xfs_bmap_extents_to_btree( int *logflagsp, /* inode logging flags */ int whichfork) /* data or attr fork */ { - xfs_bmbt_block_t *ablock; /* allocated (child) bt block */ + struct xfs_btree_block *ablock; /* allocated (child) bt block */ xfs_buf_t *abp; /* buffer for ablock */ xfs_alloc_arg_t args; /* allocation arguments */ xfs_bmbt_rec_t *arp; /* child record pointer */ - xfs_bmbt_block_t *block; /* btree root block */ + struct xfs_btree_block *block; /* btree root block */ xfs_btree_cur_t *cur; /* bmap btree cursor */ xfs_bmbt_rec_host_t *ep; /* extent record pointer */ int error; /* error return value */ @@ -3474,6 +3474,7 @@ xfs_bmap_extents_to_btree( */ xfs_iroot_realloc(ip, 1, whichfork); ifp->if_flags |= XFS_IFBROOT; + /* * Fill in the root. */ @@ -3481,8 +3482,9 @@ xfs_bmap_extents_to_btree( block->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); block->bb_level = cpu_to_be16(1); block->bb_numrecs = cpu_to_be16(1); - block->bb_leftsib = cpu_to_be64(NULLDFSBNO); - block->bb_rightsib = cpu_to_be64(NULLDFSBNO); + block->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); + block->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); + /* * Need a cursor. Can't allocate until bb_level is filled in. */ @@ -3534,11 +3536,11 @@ xfs_bmap_extents_to_btree( /* * Fill in the child block. */ - ablock = XFS_BUF_TO_BMBT_BLOCK(abp); + ablock = XFS_BUF_TO_BLOCK(abp); ablock->bb_magic = cpu_to_be32(XFS_BMAP_MAGIC); ablock->bb_level = 0; - ablock->bb_leftsib = cpu_to_be64(NULLDFSBNO); - ablock->bb_rightsib = cpu_to_be64(NULLDFSBNO); + ablock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); + ablock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); for (cnt = i = 0; i < nextents; i++) { @@ -3550,7 +3552,8 @@ xfs_bmap_extents_to_btree( } } ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); - ablock->bb_numrecs = cpu_to_be16(cnt); + xfs_btree_set_numrecs(ablock, cnt); + /* * Fill in the root key and pointer. */ @@ -4533,7 +4536,7 @@ xfs_bmap_read_extents( xfs_inode_t *ip, /* incore inode */ int whichfork) /* data or attr fork */ { - xfs_bmbt_block_t *block; /* current btree block */ + struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ @@ -4570,7 +4573,7 @@ xfs_bmap_read_extents( if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) return error; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( XFS_BMAP_SANITY_CHECK(mp, block, level), error0); @@ -4596,7 +4599,7 @@ xfs_bmap_read_extents( xfs_extnum_t start; - num_recs = be16_to_cpu(block->bb_numrecs); + num_recs = xfs_btree_get_numrecs(block); if (unlikely(i + num_recs > room)) { ASSERT(i + num_recs <= room); xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, @@ -4613,7 +4616,7 @@ xfs_bmap_read_extents( /* * Read-ahead the next leaf block, if any. */ - nextbno = be64_to_cpu(block->bb_rightsib); + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); if (nextbno != NULLFSBLOCK) xfs_btree_reada_bufl(mp, nextbno, 1); /* @@ -4650,7 +4653,7 @@ xfs_bmap_read_extents( if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) return error; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); } ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork)); @@ -6175,7 +6178,7 @@ xfs_bmap_get_bp( void xfs_check_block( - xfs_bmbt_block_t *block, + struct xfs_btree_block *block, xfs_mount_t *mp, int root, short sz) @@ -6187,7 +6190,7 @@ xfs_check_block( ASSERT(be16_to_cpu(block->bb_level) > 0); prevp = NULL; - for( i = 1; i <= be16_to_cpu(block->bb_numrecs); i++) { + for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { dmxr = mp->m_bmap_dmxr[0]; keyp = XFS_BMBT_KEY_ADDR(mp, block, i); @@ -6232,7 +6235,7 @@ xfs_bmap_check_leaf_extents( xfs_inode_t *ip, /* incore inode pointer */ int whichfork) /* data or attr fork */ { - xfs_bmbt_block_t *block; /* current btree block */ + struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_buf_t *bp; /* buffer for "block" */ int error; /* error return value */ @@ -6282,7 +6285,7 @@ xfs_bmap_check_leaf_extents( if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, XFS_BMAP_BTREE_REF))) goto error_norelse; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( XFS_BMAP_SANITY_CHECK(mp, block, level), error0); @@ -6317,13 +6320,13 @@ xfs_bmap_check_leaf_extents( xfs_extnum_t num_recs; - num_recs = be16_to_cpu(block->bb_numrecs); + num_recs = xfs_btree_get_numrecs(block); /* * Read-ahead the next leaf block, if any. */ - nextbno = be64_to_cpu(block->bb_rightsib); + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); /* * Check all the extents to make sure they are OK. @@ -6367,7 +6370,7 @@ xfs_bmap_check_leaf_extents( if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, XFS_BMAP_BTREE_REF))) goto error_norelse; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); } if (bp_release) { bp_release = 0; @@ -6397,7 +6400,7 @@ xfs_bmap_count_blocks( int whichfork, /* data or attr fork */ int *count) /* out: count of blocks */ { - xfs_bmbt_block_t *block; /* current btree block */ + struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ @@ -6454,24 +6457,24 @@ xfs_bmap_count_tree( __be64 *pp; xfs_fsblock_t bno = blockno; xfs_fsblock_t nextbno; - xfs_bmbt_block_t *block, *nextblock; + struct xfs_btree_block *block, *nextblock; int numrecs; if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF))) return error; *count += 1; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); if (--level) { /* Not at node above leafs, count this level of nodes */ - nextbno = be64_to_cpu(block->bb_rightsib); + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); while (nextbno != NULLFSBLOCK) { if ((error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, XFS_BMAP_BTREE_REF))) return error; *count += 1; - nextblock = XFS_BUF_TO_BMBT_BLOCK(nbp); - nextbno = be64_to_cpu(nextblock->bb_rightsib); + nextblock = XFS_BUF_TO_BLOCK(nbp); + nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); xfs_trans_brelse(tp, nbp); } @@ -6489,7 +6492,7 @@ xfs_bmap_count_tree( } else { /* count all level 1 nodes and their leaves */ for (;;) { - nextbno = be64_to_cpu(block->bb_rightsib); + nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); numrecs = be16_to_cpu(block->bb_numrecs); xfs_bmap_disk_count_leaves(mp, block, numrecs, count); xfs_trans_brelse(tp, bp); @@ -6500,7 +6503,7 @@ xfs_bmap_count_tree( XFS_BMAP_BTREE_REF))) return error; *count += 1; - block = XFS_BUF_TO_BMBT_BLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); } } return 0; @@ -6531,7 +6534,7 @@ xfs_bmap_count_leaves( STATIC void xfs_bmap_disk_count_leaves( struct xfs_mount *mp, - xfs_bmbt_block_t *block, + struct xfs_btree_block *block, int numrecs, int *count) { diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 11137c0..e46e02b 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -68,7 +68,7 @@ xfs_bmdr_to_bmbt( struct xfs_mount *mp, xfs_bmdr_block_t *dblock, int dblocklen, - xfs_bmbt_block_t *rblock, + struct xfs_btree_block *rblock, int rblocklen) { int dmxr; @@ -81,8 +81,8 @@ xfs_bmdr_to_bmbt( rblock->bb_level = dblock->bb_level; ASSERT(be16_to_cpu(rblock->bb_level) > 0); rblock->bb_numrecs = dblock->bb_numrecs; - rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO); - rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO); + rblock->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); + rblock->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); fkp = XFS_BMDR_KEY_ADDR(dblock, 1); tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1); @@ -429,7 +429,7 @@ xfs_bmbt_set_state( void xfs_bmbt_to_bmdr( struct xfs_mount *mp, - xfs_bmbt_block_t *rblock, + struct xfs_btree_block *rblock, int rblocklen, xfs_bmdr_block_t *dblock, int dblocklen) @@ -441,8 +441,8 @@ xfs_bmbt_to_bmdr( __be64 *tpp; ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); - ASSERT(be64_to_cpu(rblock->bb_leftsib) == NULLDFSBNO); - ASSERT(be64_to_cpu(rblock->bb_rightsib) == NULLDFSBNO); + ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO); + ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO); ASSERT(be16_to_cpu(rblock->bb_level) > 0); dblock->bb_level = rblock->bb_level; dblock->bb_numrecs = rblock->bb_numrecs; @@ -906,7 +906,7 @@ xfs_bmbt_maxrecs( int blocklen, int leaf) { - blocklen -= sizeof(struct xfs_btree_lblock); + blocklen -= XFS_BMBT_BLOCK_LEN(mp); if (leaf) return blocklen / sizeof(xfs_bmbt_rec_t); diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 7f00107..735a424 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -22,7 +22,6 @@ struct xfs_btree_cur; struct xfs_btree_block; -struct xfs_btree_lblock; struct xfs_mount; struct xfs_inode; struct xfs_trans; @@ -147,27 +146,29 @@ typedef struct xfs_bmbt_key { /* btree pointer type */ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; -/* btree block header type */ -typedef struct xfs_btree_lblock xfs_bmbt_block_t; - -#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp)) +/* + * Btree block header size depends on a superblock flag. + * + * (not quite yet, but soon) + */ +#define XFS_BMBT_BLOCK_LEN(mp) XFS_BTREE_LBLOCK_LEN #define XFS_BMBT_REC_ADDR(mp, block, index) \ ((xfs_bmbt_rec_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_lblock) + \ + XFS_BMBT_BLOCK_LEN(mp) + \ ((index) - 1) * sizeof(xfs_bmbt_rec_t))) #define XFS_BMBT_KEY_ADDR(mp, block, index) \ ((xfs_bmbt_key_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_lblock) + \ + XFS_BMBT_BLOCK_LEN(mp) + \ ((index) - 1) * sizeof(xfs_bmbt_key_t))) #define XFS_BMBT_PTR_ADDR(mp, block, index, maxrecs) \ ((xfs_bmbt_ptr_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_lblock) + \ + XFS_BMBT_BLOCK_LEN(mp) + \ (maxrecs) * sizeof(xfs_bmbt_key_t) + \ ((index) - 1) * sizeof(xfs_bmbt_ptr_t))) @@ -198,7 +199,7 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; XFS_BMBT_PTR_ADDR(mp, bb, i, xfs_bmbt_maxrecs(mp, sz, 0)) #define XFS_BMAP_BROOT_SPACE_CALC(nrecs) \ - (int)(sizeof(xfs_bmbt_block_t) + \ + (int)(XFS_BTREE_LBLOCK_LEN + \ ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) #define XFS_BMAP_BROOT_SPACE(bb) \ @@ -223,7 +224,7 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t; * Prototypes for xfs_bmap.c to call. */ extern void xfs_bmdr_to_bmbt(struct xfs_mount *, xfs_bmdr_block_t *, int, - xfs_bmbt_block_t *, int); + struct xfs_btree_block *, int); extern void xfs_bmbt_get_all(xfs_bmbt_rec_host_t *r, xfs_bmbt_irec_t *s); extern xfs_filblks_t xfs_bmbt_get_blockcount(xfs_bmbt_rec_host_t *r); extern xfs_fsblock_t xfs_bmbt_get_startblock(xfs_bmbt_rec_host_t *r); @@ -246,7 +247,7 @@ extern void xfs_bmbt_disk_set_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s); extern void xfs_bmbt_disk_set_allf(xfs_bmbt_rec_t *r, xfs_fileoff_t o, xfs_fsblock_t b, xfs_filblks_t c, xfs_exntst_t v); -extern void xfs_bmbt_to_bmdr(struct xfs_mount *, xfs_bmbt_block_t *, int, +extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int, xfs_bmdr_block_t *, int); extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 72a26bb..7ed5926 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -53,10 +53,10 @@ const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { }; -int /* error (0 or EFSCORRUPTED) */ +STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_lblock( struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_lblock *block, /* btree long form block pointer */ + struct xfs_btree_block *block, /* btree long form block pointer */ int level, /* level of the btree block */ struct xfs_buf *bp) /* buffer for block, if any */ { @@ -69,12 +69,14 @@ xfs_btree_check_lblock( be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && - block->bb_leftsib && - (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO || - XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) && - block->bb_rightsib && - (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO || - XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib))); + block->bb_u.l.bb_leftsib && + (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO || + XFS_FSB_SANITY_CHECK(mp, + be64_to_cpu(block->bb_u.l.bb_leftsib))) && + block->bb_u.l.bb_rightsib && + (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO || + XFS_FSB_SANITY_CHECK(mp, + be64_to_cpu(block->bb_u.l.bb_rightsib))); if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK, XFS_RANDOM_BTREE_CHECK_LBLOCK))) { @@ -90,7 +92,7 @@ xfs_btree_check_lblock( STATIC int /* error (0 or EFSCORRUPTED) */ xfs_btree_check_sblock( struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_sblock *block, /* btree short form block pointer */ + struct xfs_btree_block *block, /* btree short form block pointer */ int level, /* level of the btree block */ struct xfs_buf *bp) /* buffer containing block */ { @@ -107,12 +109,12 @@ xfs_btree_check_sblock( be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && - (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK || - be32_to_cpu(block->bb_leftsib) < agflen) && - block->bb_leftsib && - (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK || - be32_to_cpu(block->bb_rightsib) < agflen) && - block->bb_rightsib; + (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK || + be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && + block->bb_u.s.bb_leftsib && + (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK || + be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && + block->bb_u.s.bb_rightsib; if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK, XFS_RANDOM_BTREE_CHECK_SBLOCK))) { @@ -135,13 +137,10 @@ xfs_btree_check_block( int level, /* level of the btree block */ struct xfs_buf *bp) /* buffer containing block, if any */ { - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - return xfs_btree_check_lblock(cur, - (struct xfs_btree_lblock *)block, level, bp); - } else { - return xfs_btree_check_sblock(cur, - (struct xfs_btree_sblock *)block, level, bp); - } + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + return xfs_btree_check_lblock(cur, block, level, bp); + else + return xfs_btree_check_sblock(cur, block, level, bp); } /* @@ -326,8 +325,8 @@ xfs_btree_dup_cursor( static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) { return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? - sizeof(struct xfs_btree_lblock) : - sizeof(struct xfs_btree_sblock); + XFS_BTREE_LBLOCK_LEN : + XFS_BTREE_SBLOCK_LEN; } /* @@ -510,7 +509,7 @@ xfs_btree_islastblock( xfs_btree_cur_t *cur, /* btree cursor */ int level) /* level to check */ { - xfs_btree_block_t *block; /* generic btree block pointer */ + struct xfs_btree_block *block; /* generic btree block pointer */ xfs_buf_t *bp; /* buffer containing block */ block = xfs_btree_get_block(cur, level, &bp); @@ -530,7 +529,7 @@ xfs_btree_firstrec( xfs_btree_cur_t *cur, /* btree cursor */ int level) /* level to change */ { - xfs_btree_block_t *block; /* generic btree block pointer */ + struct xfs_btree_block *block; /* generic btree block pointer */ xfs_buf_t *bp; /* buffer containing block */ /* @@ -559,7 +558,7 @@ xfs_btree_lastrec( xfs_btree_cur_t *cur, /* btree cursor */ int level) /* level to change */ { - xfs_btree_block_t *block; /* generic btree block pointer */ + struct xfs_btree_block *block; /* generic btree block pointer */ xfs_buf_t *bp; /* buffer containing block */ /* @@ -814,7 +813,7 @@ xfs_btree_setbuf( int lev, /* level in btree */ xfs_buf_t *bp) /* new buffer to set */ { - xfs_btree_block_t *b; /* btree block */ + struct xfs_btree_block *b; /* btree block */ xfs_buf_t *obp; /* old buffer pointer */ obp = cur->bc_bufs[lev]; @@ -1252,20 +1251,20 @@ xfs_btree_log_block( int first; /* first byte offset logged */ int last; /* last byte offset logged */ static const short soffsets[] = { /* table of offsets (short) */ - offsetof(struct xfs_btree_sblock, bb_magic), - offsetof(struct xfs_btree_sblock, bb_level), - offsetof(struct xfs_btree_sblock, bb_numrecs), - offsetof(struct xfs_btree_sblock, bb_leftsib), - offsetof(struct xfs_btree_sblock, bb_rightsib), - sizeof(struct xfs_btree_sblock) + offsetof(struct xfs_btree_block, bb_magic), + offsetof(struct xfs_btree_block, bb_level), + offsetof(struct xfs_btree_block, bb_numrecs), + offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), + offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), + XFS_BTREE_SBLOCK_LEN }; static const short loffsets[] = { /* table of offsets (long) */ - offsetof(struct xfs_btree_lblock, bb_magic), - offsetof(struct xfs_btree_lblock, bb_level), - offsetof(struct xfs_btree_lblock, bb_numrecs), - offsetof(struct xfs_btree_lblock, bb_leftsib), - offsetof(struct xfs_btree_lblock, bb_rightsib), - sizeof(struct xfs_btree_lblock) + offsetof(struct xfs_btree_block, bb_magic), + offsetof(struct xfs_btree_block, bb_level), + offsetof(struct xfs_btree_block, bb_numrecs), + offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), + offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), + XFS_BTREE_LBLOCK_LEN }; XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); @@ -3018,7 +3017,7 @@ xfs_btree_kill_iroot( if (index) { xfs_iroot_realloc(cur->bc_private.b.ip, index, cur->bc_private.b.whichfork); - block = (struct xfs_btree_block *)ifp->if_broot; + block = ifp->if_broot; } be16_add_cpu(&block->bb_numrecs, index); diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index d6120a7..789fffd 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -39,31 +39,16 @@ extern kmem_zone_t *xfs_btree_cur_zone; #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) /* - * Short form header: space allocation btrees. - */ -typedef struct xfs_btree_sblock { - __be32 bb_magic; /* magic number for block type */ - __be16 bb_level; /* 0 is a leaf */ - __be16 bb_numrecs; /* current # of data records */ - __be32 bb_leftsib; /* left sibling block or NULLAGBLOCK */ - __be32 bb_rightsib; /* right sibling block or NULLAGBLOCK */ -} xfs_btree_sblock_t; - -/* - * Long form header: bmap btrees. - */ -typedef struct xfs_btree_lblock { - __be32 bb_magic; /* magic number for block type */ - __be16 bb_level; /* 0 is a leaf */ - __be16 bb_numrecs; /* current # of data records */ - __be64 bb_leftsib; /* left sibling block or NULLDFSBNO */ - __be64 bb_rightsib; /* right sibling block or NULLDFSBNO */ -} xfs_btree_lblock_t; - -/* - * Combined header and structure, used by common code. + * Generic btree header. + * + * This is a comination of the actual format used on disk for short and long + * format btrees. The first three fields are shared by both format, but + * the pointers are different and should be used with care. + * + * To get the size of the actual short or long form headers please use + * the size macros below. Never use sizeof(xfs_btree_block). */ -typedef struct xfs_btree_block { +struct xfs_btree_block { __be32 bb_magic; /* magic number for block type */ __be16 bb_level; /* 0 is a leaf */ __be16 bb_numrecs; /* current # of data records */ @@ -77,7 +62,11 @@ typedef struct xfs_btree_block { __be64 bb_rightsib; } l; /* long form pointers */ } bb_u; /* rest */ -} xfs_btree_block_t; +}; + +#define XFS_BTREE_SBLOCK_LEN 16 /* size of a short form block */ +#define XFS_BTREE_LBLOCK_LEN 24 /* size of a long form block */ + /* * Generic key, ptr and record wrapper structures. @@ -294,20 +283,8 @@ typedef struct xfs_btree_cur /* * Convert from buffer to btree block header. */ -#define XFS_BUF_TO_BLOCK(bp) ((xfs_btree_block_t *)XFS_BUF_PTR(bp)) -#define XFS_BUF_TO_LBLOCK(bp) ((xfs_btree_lblock_t *)XFS_BUF_PTR(bp)) -#define XFS_BUF_TO_SBLOCK(bp) ((xfs_btree_sblock_t *)XFS_BUF_PTR(bp)) - +#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp)) -/* - * Check that long form block header is ok. - */ -int /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lblock( - struct xfs_btree_cur *cur, /* btree cursor */ - struct xfs_btree_lblock *block, /* btree long form block pointer */ - int level, /* level of the btree block */ - struct xfs_buf *bp); /* buffer containing block, if any */ /* * Check that block header is ok. diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 2a00fcc..d7cf392 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -165,7 +165,7 @@ typedef enum xfs_dinode_fmt */ #define XFS_LITINO(mp) ((mp)->m_litino) #define XFS_BROOT_SIZE_ADJ \ - (sizeof(xfs_bmbt_block_t) - sizeof(xfs_bmdr_block_t)) + (XFS_BTREE_LBLOCK_LEN - sizeof(xfs_bmdr_block_t)) /* * Inode data & attribute fork sizes, per inode. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 8ce72ab..f1d0585 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -126,7 +126,7 @@ xfs_growfs_data_private( xfs_extlen_t agsize; xfs_extlen_t tmpsize; xfs_alloc_rec_t *arec; - xfs_btree_sblock_t *block; + struct xfs_btree_block *block; xfs_buf_t *bp; int bucket; int dpct; @@ -251,13 +251,13 @@ xfs_growfs_data_private( bp = xfs_buf_get(mp->m_ddev_targp, XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)), BTOBB(mp->m_sb.sb_blocksize), 0); - block = XFS_BUF_TO_SBLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); memset(block, 0, mp->m_sb.sb_blocksize); block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC); block->bb_level = 0; block->bb_numrecs = cpu_to_be16(1); - block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); - block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); arec = XFS_ALLOC_REC_ADDR(mp, block, 1); arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); arec->ar_blockcount = cpu_to_be32( @@ -272,13 +272,13 @@ xfs_growfs_data_private( bp = xfs_buf_get(mp->m_ddev_targp, XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)), BTOBB(mp->m_sb.sb_blocksize), 0); - block = XFS_BUF_TO_SBLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); memset(block, 0, mp->m_sb.sb_blocksize); block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC); block->bb_level = 0; block->bb_numrecs = cpu_to_be16(1); - block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); - block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); arec = XFS_ALLOC_REC_ADDR(mp, block, 1); arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); arec->ar_blockcount = cpu_to_be32( @@ -294,13 +294,13 @@ xfs_growfs_data_private( bp = xfs_buf_get(mp->m_ddev_targp, XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)), BTOBB(mp->m_sb.sb_blocksize), 0); - block = XFS_BUF_TO_SBLOCK(bp); + block = XFS_BUF_TO_BLOCK(bp); memset(block, 0, mp->m_sb.sb_blocksize); block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC); block->bb_level = 0; block->bb_numrecs = 0; - block->bb_leftsib = cpu_to_be32(NULLAGBLOCK); - block->bb_rightsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); + block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); error = xfs_bwrite(mp, bp); if (error) { goto error0; diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 46aabb3..99f2408 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -375,7 +375,7 @@ xfs_inobt_maxrecs( int blocklen, int leaf) { - blocklen -= sizeof(struct xfs_btree_sblock); + blocklen -= XFS_INOBT_BLOCK_LEN(mp); if (leaf) return blocklen / sizeof(xfs_inobt_rec_t); diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index fa12c85..37e5dd0 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -24,7 +24,6 @@ struct xfs_buf; struct xfs_btree_cur; -struct xfs_btree_sblock; struct xfs_mount; /* @@ -70,11 +69,6 @@ typedef struct xfs_inobt_key { /* btree pointer type */ typedef __be32 xfs_inobt_ptr_t; -/* btree block header type */ -typedef struct xfs_btree_sblock xfs_inobt_block_t; - -#define XFS_BUF_TO_INOBT_BLOCK(bp) ((xfs_inobt_block_t *)XFS_BUF_PTR(bp)) - /* * Bit manipulations for ir_free. */ @@ -96,6 +90,13 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; #define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) /* + * Btree block header size depends on a superblock flag. + * + * (not quite yet, but soon) + */ +#define XFS_INOBT_BLOCK_LEN(mp) XFS_BTREE_SBLOCK_LEN + +/* * Record, key, and pointer address macros for btree blocks. * * (note that some of these may appear unused, but they are used in userspace) @@ -103,19 +104,19 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t; #define XFS_INOBT_REC_ADDR(mp, block, index) \ ((xfs_inobt_rec_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_INOBT_BLOCK_LEN(mp) + \ (((index) - 1) * sizeof(xfs_inobt_rec_t)))) #define XFS_INOBT_KEY_ADDR(mp, block, index) \ ((xfs_inobt_key_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_INOBT_BLOCK_LEN(mp) + \ ((index) - 1) * sizeof(xfs_inobt_key_t))) #define XFS_INOBT_PTR_ADDR(mp, block, index, maxrecs) \ ((xfs_inobt_ptr_t *) \ ((char *)(block) + \ - sizeof(struct xfs_btree_sblock) + \ + XFS_INOBT_BLOCK_LEN(mp) + \ (maxrecs) * sizeof(xfs_inobt_key_t) + \ ((index) - 1) * sizeof(xfs_inobt_ptr_t))) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7b4f13c..bc33762 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2352,7 +2352,7 @@ xfs_iroot_realloc( struct xfs_mount *mp = ip->i_mount; int cur_max; xfs_ifork_t *ifp; - xfs_bmbt_block_t *new_broot; + struct xfs_btree_block *new_broot; int new_max; size_t new_size; char *np; @@ -2373,8 +2373,7 @@ xfs_iroot_realloc( */ if (ifp->if_broot_bytes == 0) { new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(rec_diff); - ifp->if_broot = (xfs_bmbt_block_t*)kmem_alloc(new_size, - KM_SLEEP); + ifp->if_broot = kmem_alloc(new_size, KM_SLEEP); ifp->if_broot_bytes = (int)new_size; return; } @@ -2388,9 +2387,7 @@ xfs_iroot_realloc( cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0); new_max = cur_max + rec_diff; new_size = (size_t)XFS_BMAP_BROOT_SPACE_CALC(new_max); - ifp->if_broot = (xfs_bmbt_block_t *) - kmem_realloc(ifp->if_broot, - new_size, + ifp->if_broot = kmem_realloc(ifp->if_broot, new_size, (size_t)XFS_BMAP_BROOT_SPACE_CALC(cur_max), /* old size */ KM_SLEEP); op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, @@ -2418,11 +2415,11 @@ xfs_iroot_realloc( else new_size = 0; if (new_size > 0) { - new_broot = (xfs_bmbt_block_t *)kmem_alloc(new_size, KM_SLEEP); + new_broot = kmem_alloc(new_size, KM_SLEEP); /* * First copy over the btree block header. */ - memcpy(new_broot, ifp->if_broot, sizeof(xfs_bmbt_block_t)); + memcpy(new_broot, ifp->if_broot, XFS_BTREE_LBLOCK_LEN); } else { new_broot = NULL; ifp->if_flags &= ~XFS_IFBROOT; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 55d50b8..6fd20fc 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -63,7 +63,7 @@ typedef struct xfs_ext_irec { typedef struct xfs_ifork { int if_bytes; /* bytes in if_u1 */ int if_real_bytes; /* bytes allocated in if_u1 */ - xfs_bmbt_block_t *if_broot; /* file's incore btree root */ + struct xfs_btree_block *if_broot; /* file's incore btree root */ short if_broot_bytes; /* bytes allocated for root */ unsigned char if_flags; /* per-fork flags */ unsigned char if_ext_max; /* max # of extent records */ @@ -213,7 +213,6 @@ struct ktrace; struct xfs_buf; struct xfs_bmap_free; struct xfs_bmbt_irec; -struct xfs_bmbt_block; struct xfs_inode_log_item; struct xfs_mount; struct xfs_trans; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 23c3a78..199c8ea 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2452,8 +2452,8 @@ xlog_recover_do_inode_trans( break; case XFS_ILOG_DBROOT: - xfs_bmbt_to_bmdr(mp, (xfs_bmbt_block_t *)src, len, - &(dip->di_u.di_bmbt), + xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, + &dip->di_u.di_bmbt, XFS_DFORK_DSIZE(dip, mp)); break; @@ -2490,8 +2490,8 @@ xlog_recover_do_inode_trans( case XFS_ILOG_ABROOT: dest = XFS_DFORK_APTR(dip); - xfs_bmbt_to_bmdr(mp, (xfs_bmbt_block_t *)src, len, - (xfs_bmdr_block_t*)dest, + xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, + len, (xfs_bmdr_block_t*)dest, XFS_DFORK_ASIZE(dip, mp)); break; -- cgit v0.10.2 From 4e8938feba770b583fb13d249c17943961731a3e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:14:43 +1100 Subject: [XFS] Move XFS_BMAP_SANITY_CHECK out of line. Move the XFS_BMAP_SANITY_CHECK macro out of line and make it a properly typed function. Also pass the xfs_buf for the btree block instead of just the btree block header, as we will need some additional information for it to implement CRC checking of btree blocks. SGI-PV: 988146 SGI-Modid: xfs-linux-melb:xfs-kern:32301a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 7796a0c..db28905 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4524,6 +4524,22 @@ xfs_bmap_one_block( return rval; } +STATIC int +xfs_bmap_sanity_check( + struct xfs_mount *mp, + struct xfs_buf *bp, + int level) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + + if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC || + be16_to_cpu(block->bb_level) != level || + be16_to_cpu(block->bb_numrecs) == 0 || + be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) + return 0; + return 1; +} + /* * Read in the extents to if_extents. * All inode fields are set up by caller, we just traverse the btree @@ -4575,7 +4591,7 @@ xfs_bmap_read_extents( return error; block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( - XFS_BMAP_SANITY_CHECK(mp, block, level), + xfs_bmap_sanity_check(mp, bp, level), error0); if (level == 0) break; @@ -4611,7 +4627,7 @@ xfs_bmap_read_extents( goto error0; } XFS_WANT_CORRUPTED_GOTO( - XFS_BMAP_SANITY_CHECK(mp, block, 0), + xfs_bmap_sanity_check(mp, bp, 0), error0); /* * Read-ahead the next leaf block, if any. @@ -6287,7 +6303,7 @@ xfs_bmap_check_leaf_extents( goto error_norelse; block = XFS_BUF_TO_BLOCK(bp); XFS_WANT_CORRUPTED_GOTO( - XFS_BMAP_SANITY_CHECK(mp, block, level), + xfs_bmap_sanity_check(mp, bp, level), error0); if (level == 0) break; diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 735a424..a4555ab 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -213,13 +213,6 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; */ #define XFS_BM_MAXLEVELS(mp,w) ((mp)->m_bm_maxlevels[(w)]) -#define XFS_BMAP_SANITY_CHECK(mp,bb,level) \ - (be32_to_cpu((bb)->bb_magic) == XFS_BMAP_MAGIC && \ - be16_to_cpu((bb)->bb_level) == level && \ - be16_to_cpu((bb)->bb_numrecs) > 0 && \ - be16_to_cpu((bb)->bb_numrecs) <= (mp)->m_bmap_dmxr[(level) != 0]) - - /* * Prototypes for xfs_bmap.c to call. */ -- cgit v0.10.2 From 2af75df7be7ca86965bf73766f827575d1c26fbd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:14:53 +1100 Subject: [XFS] split out two helpers from xfs_syncsub Split out two helpers from xfs_syncsub for the dummy log commit and the superblock writeout. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32303a Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 53d85ec..59da332 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -315,6 +315,93 @@ xfs_sync_inodes( return XFS_ERROR(last_error); } +STATIC int +xfs_commit_dummy_trans( + struct xfs_mount *mp, + uint log_flags) +{ + struct xfs_inode *ip = mp->m_rootip; + struct xfs_trans *tp; + int error; + + /* + * Put a dummy transaction in the log to tell recovery + * that all others are OK. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + /* XXX(hch): ignoring the error here.. */ + error = xfs_trans_commit(tp, 0); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + xfs_log_force(mp, 0, log_flags); + return 0; +} + +STATIC int +xfs_sync_fsdata( + struct xfs_mount *mp, + int flags) +{ + struct xfs_buf *bp; + struct xfs_buf_log_item *bip; + int error = 0; + + /* + * If this is xfssyncd() then only sync the superblock if we can + * lock it without sleeping and it is not pinned. + */ + if (flags & SYNC_BDFLUSH) { + ASSERT(!(flags & SYNC_WAIT)); + + bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); + if (!bp) + goto out; + + bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *); + if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp)) + goto out_brelse; + } else { + bp = xfs_getsb(mp, 0); + + /* + * If the buffer is pinned then push on the log so we won't + * get stuck waiting in the write for someone, maybe + * ourselves, to flush the log. + * + * Even though we just pushed the log above, we did not have + * the superblock buffer locked at that point so it can + * become pinned in between there and here. + */ + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(mp, 0, XFS_LOG_FORCE); + } + + + if (flags & SYNC_WAIT) + XFS_BUF_UNASYNC(bp); + else + XFS_BUF_ASYNC(bp); + + return xfs_bwrite(mp, bp); + + out_brelse: + xfs_buf_relse(bp); + out: + return error; +} + /* * xfs sync routine for internal use * @@ -331,8 +418,6 @@ xfs_syncsub( int error = 0; int last_error = 0; uint log_flags = XFS_LOG_FORCE; - xfs_buf_t *bp; - xfs_buf_log_item_t *bip; /* * Sync out the log. This ensures that the log is periodically @@ -355,83 +440,22 @@ xfs_syncsub( * log activity, so if this isn't vfs_sync() then flush * the log again. */ - if (flags & SYNC_DELWRI) { - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - } + if (flags & SYNC_DELWRI) + xfs_log_force(mp, 0, log_flags); if (flags & SYNC_FSDATA) { - /* - * If this is vfs_sync() then only sync the superblock - * if we can lock it without sleeping and it is not pinned. - */ - if (flags & SYNC_BDFLUSH) { - bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); - if (bp != NULL) { - bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); - if ((bip != NULL) && - xfs_buf_item_dirty(bip)) { - if (!(XFS_BUF_ISPINNED(bp))) { - XFS_BUF_ASYNC(bp); - error = xfs_bwrite(mp, bp); - } else { - xfs_buf_relse(bp); - } - } else { - xfs_buf_relse(bp); - } - } - } else { - bp = xfs_getsb(mp, 0); - /* - * If the buffer is pinned then push on the log so - * we won't get stuck waiting in the write for - * someone, maybe ourselves, to flush the log. - * Even though we just pushed the log above, we - * did not have the superblock buffer locked at - * that point so it can become pinned in between - * there and here. - */ - if (XFS_BUF_ISPINNED(bp)) - xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - if (flags & SYNC_WAIT) - XFS_BUF_UNASYNC(bp); - else - XFS_BUF_ASYNC(bp); - error = xfs_bwrite(mp, bp); - } - if (error) { + error = xfs_sync_fsdata(mp, flags); + if (error) last_error = error; - } } /* * Now check to see if the log needs a "dummy" transaction. */ if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { - xfs_trans_t *tp; - xfs_inode_t *ip; - - /* - * Put a dummy transaction in the log to tell - * recovery that all others are OK. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); - if ((error = xfs_trans_reserve(tp, 0, - XFS_ICHANGE_LOG_RES(mp), - 0, 0, 0))) { - xfs_trans_cancel(tp, 0); + error = xfs_commit_dummy_trans(mp, log_flags); + if (error) return error; - } - - ip = mp->m_rootip; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - xfs_trans_ihold(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); } /* -- cgit v0.10.2 From bc60a99323b3ec628273b5fa998285c87d464ca6 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:03 +1100 Subject: [XFS] Use struct inodes instead of vnodes to kill vn_grab With the sync code relocated to the linux-2.6 directory we can use struct inodes directly. If we do the same thing for the quota release code, we can remove vn_grab altogether. While here, convert the VN_BAD() checks to is_bad_inode() so we can remove vnodes entirely from this code. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32304a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 59da332..461c1dc 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -131,10 +131,7 @@ xfs_sync_inodes_ag( int flags, int *bypassed) { - xfs_inode_t *ip = NULL; - struct inode *vp = NULL; xfs_perag_t *pag = &mp->m_perag[ag]; - boolean_t vnode_refed = B_FALSE; int nr_found; int first_index = 0; int error = 0; @@ -156,6 +153,10 @@ xfs_sync_inodes_ag( } do { + struct inode *inode; + boolean_t inode_refed; + xfs_inode_t *ip = NULL; + /* * use a gang lookup to find the next inode in the tree * as the tree is sparse and a gang lookup walks to find @@ -177,14 +178,14 @@ xfs_sync_inodes_ag( * skip inodes in reclaim. Let xfs_syncsub do that for * us so we don't need to worry. */ - vp = VFS_I(ip); - if (!vp) { + if (xfs_iflags_test(ip, (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { read_unlock(&pag->pag_ici_lock); continue; } /* bad inodes are dealt with elsewhere */ - if (VN_BAD(vp)) { + inode = VFS_I(ip); + if (is_bad_inode(inode)) { read_unlock(&pag->pag_ici_lock); continue; } @@ -196,30 +197,29 @@ xfs_sync_inodes_ag( } /* - * The inode lock here actually coordinates with the almost - * spurious inode lock in xfs_ireclaim() to prevent the vnode - * we handle here without a reference from being freed while we - * reference it. If we lock the inode while it's on the mount - * list here, then the spurious inode lock in xfs_ireclaim() - * after the inode is pulled from the mount list will sleep - * until we release it here. This keeps the vnode from being - * freed while we reference it. + * If we can't get a reference on the VFS_I, the inode must be + * in reclaim. If we can get the inode lock without blocking, + * it is safe to flush the inode because we hold the tree lock + * and xfs_iextract will block right now. Hence if we lock the + * inode while holding the tree lock, xfs_ireclaim() is + * guaranteed to block on the inode lock we now hold and hence + * it is safe to reference the inode until we drop the inode + * locks completely. */ - if (xfs_ilock_nowait(ip, lock_flags) == 0) { - vp = vn_grab(vp); + inode_refed = B_FALSE; + if (igrab(inode)) { read_unlock(&pag->pag_ici_lock); - if (!vp) - continue; xfs_ilock(ip, lock_flags); - - ASSERT(vp == VFS_I(ip)); - ASSERT(ip->i_mount == mp); - - vnode_refed = B_TRUE; + inode_refed = B_TRUE; } else { - /* safe to unlock here as we have a reference */ + if (!xfs_ilock_nowait(ip, lock_flags)) { + /* leave it to reclaim */ + read_unlock(&pag->pag_ici_lock); + continue; + } read_unlock(&pag->pag_ici_lock); } + /* * If we have to flush data or wait for I/O completion * we need to drop the ilock that we currently hold. @@ -240,7 +240,7 @@ xfs_sync_inodes_ag( xfs_ilock(ip, XFS_ILOCK_SHARED); } - if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { + if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); if (flags & SYNC_IOWAIT) @@ -268,9 +268,8 @@ xfs_sync_inodes_ag( if (lock_flags) xfs_iunlock(ip, lock_flags); - if (vnode_refed) { + if (inode_refed) { IRELE(ip); - vnode_refed = B_FALSE; } if (error) diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index b52528b..dceb6db 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -90,10 +90,10 @@ vn_ioerror( */ static inline int xfs_icount(struct xfs_inode *ip) { - struct inode *vp = VFS_I(ip); + struct inode *inode = VFS_I(ip); - if (vp) - return vn_count(vp); + if (!inode) + return atomic_read(&inode->i_count); return -1; } diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 683ce16..bf89e41 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -80,11 +80,6 @@ do { \ iput(VFS_I(ip)); \ } while (0) -static inline struct inode *vn_grab(struct inode *vp) -{ - return igrab(vp); -} - /* * Dealing with bad inodes */ diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 26152b9..4254b07 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1031,13 +1031,13 @@ xfs_qm_dqrele_inodes_ag( uint flags) { xfs_inode_t *ip = NULL; - struct inode *vp = NULL; xfs_perag_t *pag = &mp->m_perag[ag]; int first_index = 0; int nr_found; do { - boolean_t vnode_refd = B_FALSE; + boolean_t inode_refed; + struct inode *inode; /* * use a gang lookup to find the next inode in the tree @@ -1057,19 +1057,19 @@ xfs_qm_dqrele_inodes_ag( first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); /* skip quota inodes and those in reclaim */ - vp = VFS_I(ip); - if (!vp || ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { + inode = VFS_I(ip); + if (!inode || ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); read_unlock(&pag->pag_ici_lock); continue; } if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - vp = vn_grab(vp); + inode = igrab(inode); read_unlock(&pag->pag_ici_lock); - if (!vp) + if (!inode) continue; - vnode_refd = B_TRUE; + inode_refed = B_TRUE; xfs_ilock(ip, XFS_ILOCK_EXCL); } else { read_unlock(&pag->pag_ici_lock); @@ -1084,7 +1084,7 @@ xfs_qm_dqrele_inodes_ag( ip->i_gdquot = NULL; } xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (vnode_refd) + if (inode_refed) IRELE(ip); } while (nr_found); } -- cgit v0.10.2 From 2030b5aba8a4bcaca5aca85968514fa58207d3bd Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:12 +1100 Subject: [XFS] use xfs_sync_inodes rather than xfs_syncsub Kill the unused arg in xfs_syncsub() and xfs_sync_inodes(). For callers of xfs_syncsub() that only want to flush inodes, replace xfs_syncsub() with direct calls to xfs_sync_inodes() as that is all that is being done with the specific flags being passed in. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32305a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 461c1dc..7e9fb52 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -117,7 +117,7 @@ xfs_sync( if (flags & SYNC_IOWAIT) xfs_filestream_flush(mp); - return xfs_syncsub(mp, flags, NULL); + return xfs_syncsub(mp, flags); } /* @@ -128,8 +128,7 @@ STATIC int xfs_sync_inodes_ag( xfs_mount_t *mp, int ag, - int flags, - int *bypassed) + int flags) { xfs_perag_t *pag = &mp->m_perag[ag]; int nr_found; @@ -260,8 +259,6 @@ xfs_sync_inodes_ag( error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); else xfs_ifunlock(ip); - } else if (bypassed) { - (*bypassed)++; } } @@ -288,15 +285,12 @@ xfs_sync_inodes_ag( int xfs_sync_inodes( xfs_mount_t *mp, - int flags, - int *bypassed) + int flags) { int error; int last_error; int i; - if (bypassed) - *bypassed = 0; if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; error = 0; @@ -305,7 +299,7 @@ xfs_sync_inodes( for (i = 0; i < mp->m_sb.sb_agcount; i++) { if (!mp->m_perag[i].pag_ici_init) continue; - error = xfs_sync_inodes_ag(mp, i, flags, bypassed); + error = xfs_sync_inodes_ag(mp, i, flags); if (error) last_error = error; if (error == EFSCORRUPTED) @@ -408,11 +402,10 @@ xfs_sync_fsdata( * interface as explained above under xfs_sync. * */ -int +STATIC int xfs_syncsub( xfs_mount_t *mp, - int flags, - int *bypassed) + int flags) { int error = 0; int last_error = 0; @@ -431,7 +424,7 @@ xfs_syncsub( if (flags & SYNC_BDFLUSH) xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); else - error = xfs_sync_inodes(mp, flags, bypassed); + error = xfs_sync_inodes(mp, flags); } /* diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 3746d15..2954861 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -55,7 +55,7 @@ int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); int xfs_sync(struct xfs_mount *mp, int flags); -int xfs_syncsub(struct xfs_mount *mp, int flags, int *bypassed); +int xfs_sync_inodes(struct xfs_mount *mp, int flags); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 4254b07..9ff28e6 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -127,7 +127,7 @@ xfs_qm_quotactl( break; case Q_XQUOTASYNC: - return (xfs_sync_inodes(mp, SYNC_DELWRI, NULL)); + return xfs_sync_inodes(mp, SYNC_DELWRI); default: break; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 0ba0526..4e62802 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -523,8 +523,6 @@ extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); extern int xfs_fs_writable(xfs_mount_t *); -extern int xfs_syncsub(xfs_mount_t *, int, int *); -extern int xfs_sync_inodes(xfs_mount_t *, int, int *); extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); extern int xfs_dmops_get(struct xfs_mount *, struct xfs_mount_args *); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 0c5ee5e..d5396d6 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -68,15 +68,15 @@ xfs_quiesce_fs( xfs_flush_buftarg(mp->m_ddev_targp, 0); xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); - /* This loop must run at least twice. - * The first instance of the loop will flush - * most meta data but that will generate more - * meta data (typically directory updates). - * Which then must be flushed and logged before - * we can write the unmount record. + /* + * This loop must run at least twice. The first instance of the loop + * will flush most meta data but that will generate more meta data + * (typically directory updates). Which then must be flushed and + * logged before we can write the unmount record. */ do { - xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL); + xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_sync_inodes(mp, SYNC_INODE_QUIESCE); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { delay(50); -- cgit v0.10.2 From dfd837a9eb79de4e50323a6f4e1ad8138d806cb7 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:21 +1100 Subject: [XFS] kill xfs_syncsub Now that the only caller is xfs_sync(), merge the two together as it makes no sense to keep them separate. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32306a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 7e9fb52..d4b7b21 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -48,79 +48,6 @@ #include /* - * xfs_sync flushes any pending I/O to file system vfsp. - * - * This routine is called by vfs_sync() to make sure that things make it - * out to disk eventually, on sync() system calls to flush out everything, - * and when the file system is unmounted. For the vfs_sync() case, all - * we really need to do is sync out the log to make all of our meta-data - * updates permanent (except for timestamps). For calls from pflushd(), - * dirty pages are kept moving by calling pdflush() on the inodes - * containing them. We also flush the inodes that we can lock without - * sleeping and the superblock if we can lock it without sleeping from - * vfs_sync() so that items at the tail of the log are always moving out. - * - * Flags: - * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want - * to sleep if we can help it. All we really need - * to do is ensure that the log is synced at least - * periodically. We also push the inodes and - * superblock if we can lock them without sleeping - * and they are not pinned. - * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not - * set, then we really want to lock each inode and flush - * it. - * SYNC_WAIT - All the flushes that take place in this call should - * be synchronous. - * SYNC_DELWRI - This tells us to push dirty pages associated with - * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to - * determine if they should be flushed sync, async, or - * delwri. - * SYNC_CLOSE - This flag is passed when the system is being - * unmounted. We should sync and invalidate everything. - * SYNC_FSDATA - This indicates that the caller would like to make - * sure the superblock is safe on disk. We can ensure - * this by simply making sure the log gets flushed - * if SYNC_BDFLUSH is set, and by actually writing it - * out otherwise. - * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete - * before we return (including direct I/O). Forms the drain - * side of the write barrier needed to safely quiesce the - * filesystem. - * - */ -int -xfs_sync( - xfs_mount_t *mp, - int flags) -{ - int error; - - /* - * Get the Quota Manager to flush the dquots. - * - * If XFS quota support is not enabled or this filesystem - * instance does not use quotas XFS_QM_DQSYNC will always - * return zero. - */ - error = XFS_QM_DQSYNC(mp, flags); - if (error) { - /* - * If we got an IO error, we will be shutting down. - * So, there's nothing more for us to do here. - */ - ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(error); - } - - if (flags & SYNC_IOWAIT) - xfs_filestream_flush(mp); - - return xfs_syncsub(mp, flags); -} - -/* * Sync all the inodes in the given AG according to the * direction given by the flags. */ @@ -396,22 +323,78 @@ xfs_sync_fsdata( } /* - * xfs sync routine for internal use + * xfs_sync flushes any pending I/O to file system vfsp. * - * This routine supports all of the flags defined for the generic vfs_sync - * interface as explained above under xfs_sync. + * This routine is called by vfs_sync() to make sure that things make it + * out to disk eventually, on sync() system calls to flush out everything, + * and when the file system is unmounted. For the vfs_sync() case, all + * we really need to do is sync out the log to make all of our meta-data + * updates permanent (except for timestamps). For calls from pflushd(), + * dirty pages are kept moving by calling pdflush() on the inodes + * containing them. We also flush the inodes that we can lock without + * sleeping and the superblock if we can lock it without sleeping from + * vfs_sync() so that items at the tail of the log are always moving out. + * + * Flags: + * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want + * to sleep if we can help it. All we really need + * to do is ensure that the log is synced at least + * periodically. We also push the inodes and + * superblock if we can lock them without sleeping + * and they are not pinned. + * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not + * set, then we really want to lock each inode and flush + * it. + * SYNC_WAIT - All the flushes that take place in this call should + * be synchronous. + * SYNC_DELWRI - This tells us to push dirty pages associated with + * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to + * determine if they should be flushed sync, async, or + * delwri. + * SYNC_CLOSE - This flag is passed when the system is being + * unmounted. We should sync and invalidate everything. + * SYNC_FSDATA - This indicates that the caller would like to make + * sure the superblock is safe on disk. We can ensure + * this by simply making sure the log gets flushed + * if SYNC_BDFLUSH is set, and by actually writing it + * out otherwise. + * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete + * before we return (including direct I/O). Forms the drain + * side of the write barrier needed to safely quiesce the + * filesystem. * */ -STATIC int -xfs_syncsub( +int +xfs_sync( xfs_mount_t *mp, int flags) { - int error = 0; + int error; int last_error = 0; uint log_flags = XFS_LOG_FORCE; /* + * Get the Quota Manager to flush the dquots. + * + * If XFS quota support is not enabled or this filesystem + * instance does not use quotas XFS_QM_DQSYNC will always + * return zero. + */ + error = XFS_QM_DQSYNC(mp, flags); + if (error) { + /* + * If we got an IO error, we will be shutting down. + * So, there's nothing more for us to do here. + */ + ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(error); + } + + if (flags & SYNC_IOWAIT) + xfs_filestream_flush(mp); + + /* * Sync out the log. This ensures that the log is periodically * flushed even if there is not enough activity to fill it up. */ -- cgit v0.10.2 From aacaa880bfac8fecd44b279a49688643890358f5 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:29 +1100 Subject: [XFS] xfssyncd: don't call xfs_sync Start de-multiplexing xfs_sync() by making xfs_sync_worker() call the specific sync functions it needs. This is only a small, unique subset of the entire xfs_sync() code so is easier to follow. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32307a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index d4b7b21..3c31137 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -526,6 +526,11 @@ xfs_flush_device( xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); } +/* + * Every sync period we need to unpin all items, reclaim inodes, sync + * quota and write out the superblock. We might need to cover the log + * to indicate it is idle. + */ STATIC void xfs_sync_worker( struct xfs_mount *mp, @@ -533,8 +538,15 @@ xfs_sync_worker( { int error; - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) - error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR); + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + /* dgc: errors ignored here */ + error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); + error = xfs_sync_fsdata(mp, SYNC_BDFLUSH); + if (xfs_log_need_covered(mp)) + error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE); + } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); } -- cgit v0.10.2 From be97d9d5577f6c8a36588e2f262c772c5422b128 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:38 +1100 Subject: [XFS] make SYNC_ATTR no longer use xfs_sync Continue to de-multiplex xfs_sync be replacing all SYNC_ATTR callers with direct calls xfs_sync_inodes(). Add an assert into xfs_sync() to ensure we caught all the SYNC_ATTR callers. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32308a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 767f38e..38d380a 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -998,7 +998,8 @@ xfs_fs_put_super( int error; xfs_syncd_stop(mp); - xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI); + xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); + xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI); #ifdef HAVE_DMAPI if (mp->m_flags & XFS_MOUNT_DMAPI) { diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 3c31137..002ccb6 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -342,9 +342,8 @@ xfs_sync_fsdata( * periodically. We also push the inodes and * superblock if we can lock them without sleeping * and they are not pinned. - * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not - * set, then we really want to lock each inode and flush - * it. + * SYNC_ATTR - We need to flush the inodes. Now handled by direct calls + * to xfs_sync_inodes(). * SYNC_WAIT - All the flushes that take place in this call should * be synchronous. * SYNC_DELWRI - This tells us to push dirty pages associated with @@ -373,6 +372,8 @@ xfs_sync( int last_error = 0; uint log_flags = XFS_LOG_FORCE; + ASSERT(!(flags & SYNC_ATTR)); + /* * Get the Quota Manager to flush the dquots. * @@ -403,20 +404,18 @@ xfs_sync( xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - if (flags & (SYNC_ATTR|SYNC_DELWRI)) { + if (flags & SYNC_DELWRI) { if (flags & SYNC_BDFLUSH) xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); else error = xfs_sync_inodes(mp, flags); - } - - /* - * Flushing out dirty data above probably generated more - * log activity, so if this isn't vfs_sync() then flush - * the log again. - */ - if (flags & SYNC_DELWRI) + /* + * Flushing out dirty data above probably generated more + * log activity, so if this isn't vfs_sync() then flush + * the log again. + */ xfs_log_force(mp, 0, log_flags); + } if (flags & SYNC_FSDATA) { error = xfs_sync_fsdata(mp, flags); diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 2954861..5316915 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -49,7 +49,6 @@ typedef struct bhv_vfs_sync_work { * to disk (this is the main difference between a sync and a quiesce). */ #define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT) -#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index d5396d6..c82b955 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -76,7 +76,7 @@ xfs_quiesce_fs( */ do { xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); - xfs_sync_inodes(mp, SYNC_INODE_QUIESCE); + xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { delay(50); -- cgit v0.10.2 From e9f1c6ee12955fd8657f6f0f9a3d09112b1f1fdd Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:15:50 +1100 Subject: [XFS] make SYNC_DELWRI no longer use xfs_sync Continue to de-multiplex xfs_sync be replacing all SYNC_DELWRI callers with direct calls functions that do the work. Isolate the data quiesce case to a function in xfs_sync.c. Isolate the FSDATA case with explicit calls to xfs_sync_fsdata(). Version 2: o Push delwri related log forces into xfs_sync_inodes(). SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32309a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 38d380a..376f32d 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -998,7 +998,6 @@ xfs_fs_put_super( int error; xfs_syncd_stop(mp); - xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI); #ifdef HAVE_DMAPI @@ -1057,7 +1056,7 @@ xfs_fs_write_super( struct super_block *sb) { if (!(sb->s_flags & MS_RDONLY)) - xfs_sync(XFS_M(sb), SYNC_FSDATA); + xfs_sync_fsdata(XFS_M(sb), 0); sb->s_dirt = 0; } @@ -1068,7 +1067,6 @@ xfs_fs_sync_super( { struct xfs_mount *mp = XFS_M(sb); int error; - int flags; /* * Treat a sync operation like a freeze. This is to work @@ -1082,20 +1080,10 @@ xfs_fs_sync_super( * dirty the Linux inode until after the transaction I/O * completes. */ - if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) { - /* - * First stage of freeze - no more writers will make progress - * now we are here, so we flush delwri and delalloc buffers - * here, then wait for all I/O to complete. Data is frozen at - * that point. Metadata is not frozen, transactions can still - * occur here so don't bother flushing the buftarg (i.e - * SYNC_QUIESCE) because it'll just get dirty again. - */ - flags = SYNC_DATA_QUIESCE; - } else - flags = SYNC_FSDATA; - - error = xfs_sync(mp, flags); + if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) + error = xfs_quiesce_data(mp); + else + error = xfs_sync_fsdata(mp, 0); sb->s_dirt = 0; if (unlikely(laptop_mode)) { @@ -1233,8 +1221,7 @@ xfs_fs_remount( /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { - xfs_filestream_flush(mp); - xfs_sync(mp, SYNC_DATA_QUIESCE); + xfs_quiesce_data(mp); xfs_attr_quiesce(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 002ccb6..838070c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -217,12 +217,16 @@ xfs_sync_inodes( int error; int last_error; int i; + int lflags = XFS_LOG_FORCE; if (mp->m_flags & XFS_MOUNT_RDONLY) return 0; error = 0; last_error = 0; + if (flags & SYNC_WAIT) + lflags |= XFS_LOG_SYNC; + for (i = 0; i < mp->m_sb.sb_agcount; i++) { if (!mp->m_perag[i].pag_ici_init) continue; @@ -232,6 +236,9 @@ xfs_sync_inodes( if (error == EFSCORRUPTED) break; } + if (flags & SYNC_DELWRI) + xfs_log_force(mp, 0, lflags); + return XFS_ERROR(last_error); } @@ -269,7 +276,7 @@ xfs_commit_dummy_trans( return 0; } -STATIC int +int xfs_sync_fsdata( struct xfs_mount *mp, int flags) @@ -323,6 +330,39 @@ xfs_sync_fsdata( } /* + * First stage of freeze - no more writers will make progress now we are here, + * so we flush delwri and delalloc buffers here, then wait for all I/O to + * complete. Data is frozen at that point. Metadata is not frozen, + * transactions can still occur here so don't bother flushing the buftarg (i.e + * SYNC_QUIESCE) because it'll just get dirty again. + */ +int +xfs_quiesce_data( + struct xfs_mount *mp) +{ + int error; + + /* push non-blocking */ + xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH); + XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); + xfs_filestream_flush(mp); + + /* push and block */ + xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT); + XFS_QM_DQSYNC(mp, SYNC_WAIT); + + /* write superblock and hoover shutdown errors */ + error = xfs_sync_fsdata(mp, 0); + + /* flush devices */ + XFS_bflush(mp->m_ddev_targp); + if (mp->m_rtdev_targp) + XFS_bflush(mp->m_rtdev_targp); + + return error; +} + +/* * xfs_sync flushes any pending I/O to file system vfsp. * * This routine is called by vfs_sync() to make sure that things make it diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 5316915..fcd4040 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -55,6 +55,9 @@ void xfs_syncd_stop(struct xfs_mount *mp); int xfs_sync(struct xfs_mount *mp, int flags); int xfs_sync_inodes(struct xfs_mount *mp, int flags); +int xfs_sync_fsdata(struct xfs_mount *mp, int flags); + +int xfs_quiesce_data(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index c82b955..b55a9bb 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -75,7 +75,6 @@ xfs_quiesce_fs( * logged before we can write the unmount record. */ do { - xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); if (!pincount) { -- cgit v0.10.2 From cb56a4b995d44b7990ca3acd18db571eedd0649f Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:16:00 +1100 Subject: [XFS] Kill SYNC_CLOSE SYNC_CLOSE is only ever used and checked in conjunction with SYNC_WAIT, and this only done in one spot. The only thing this does is make XFS_bflush() calls to the data buftargs. This will happen very shortly afterwards the xfs_sync() call anyway in the unmount path via the xfs_close_devices(), so this code is redundant and can be removed. That only user of SYNC_CLOSE is now gone, so kill the flag completely. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32310a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 376f32d..60ecf47 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1026,16 +1026,6 @@ xfs_fs_put_super( error = xfs_unmount_flush(mp, 0); WARN_ON(error); - /* - * If we're forcing a shutdown, typically because of a media error, - * we want to make sure we invalidate dirty pages that belong to - * referenced vnodes as well. - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE); - ASSERT(error != EFSCORRUPTED); - } - if (mp->m_flags & XFS_MOUNT_DMAPI) { XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, unmount_event_flags); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 838070c..91a54a7 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -70,7 +70,7 @@ xfs_sync_inodes_ag( if (flags & SYNC_WAIT) fflag = 0; /* synchronous overrides all */ - if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { + if (flags & SYNC_DELWRI) { /* * We need the I/O lock if we're going to call any of * the flush/inval routines. @@ -117,7 +117,7 @@ xfs_sync_inodes_ag( } /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { + if (XFS_FORCED_SHUTDOWN(mp)) { read_unlock(&pag->pag_ici_lock); return 0; } @@ -152,20 +152,6 @@ xfs_sync_inodes_ag( * If we need to drop the lock, insert a marker if we * have not already done so. */ - if (flags & SYNC_CLOSE) { - xfs_iunlock(ip, XFS_ILOCK_SHARED); - if (XFS_FORCED_SHUTDOWN(mp)) - xfs_tosspages(ip, 0, -1, FI_REMAPF); - else - error = xfs_flushinval_pages(ip, 0, -1, - FI_REMAPF); - /* wait for I/O on freeze */ - if (flags & SYNC_IOWAIT) - vn_iowait(ip); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - } - if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); @@ -390,8 +376,6 @@ xfs_quiesce_data( * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to * determine if they should be flushed sync, async, or * delwri. - * SYNC_CLOSE - This flag is passed when the system is being - * unmounted. We should sync and invalidate everything. * SYNC_FSDATA - This indicates that the caller would like to make * sure the superblock is safe on disk. We can ensure * this by simply making sure the log gets flushed @@ -472,17 +456,6 @@ xfs_sync( return error; } - /* - * When shutting down, we need to insure that the AIL is pushed - * to disk or the filesystem can appear corrupt from the PROM. - */ - if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) { - XFS_bflush(mp->m_ddev_targp); - if (mp->m_rtdev_targp) { - XFS_bflush(mp->m_rtdev_targp); - } - } - return XFS_ERROR(last_error); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index fcd4040..2509db0 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -28,7 +28,6 @@ typedef struct bhv_vfs_sync_work { } bhv_vfs_sync_work_t; #define SYNC_ATTR 0x0001 /* sync attributes */ -#define SYNC_CLOSE 0x0002 /* close file system down */ #define SYNC_DELWRI 0x0004 /* look at delayed writes */ #define SYNC_WAIT 0x0008 /* wait for i/o to complete */ #define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ -- cgit v0.10.2 From a4e4c4f4a8f9376158f8181a75285091f52a79e3 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:16:11 +1100 Subject: [XFS] Kill xfs_sync() There are no more callers to xfs_sync() now, so remove the function altogther. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32311a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 91a54a7..ed24435 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -316,11 +316,21 @@ xfs_sync_fsdata( } /* - * First stage of freeze - no more writers will make progress now we are here, + * When remounting a filesystem read-only or freezing the filesystem, we have + * two phases to execute. This first phase is syncing the data before we + * quiesce the filesystem, and the second is flushing all the inodes out after + * we've waited for all the transactions created by the first phase to + * complete. The second phase ensures that the inodes are written to their + * location on disk rather than just existing in transactions in the log. This + * means after a quiesce there is no log replay required to write the inodes to + * disk (this is the main difference between a sync and a quiesce). + */ +/* + * First stage of freeze - no writers will make progress now we are here, * so we flush delwri and delalloc buffers here, then wait for all I/O to * complete. Data is frozen at that point. Metadata is not frozen, - * transactions can still occur here so don't bother flushing the buftarg (i.e - * SYNC_QUIESCE) because it'll just get dirty again. + * transactions can still occur here so don't bother flushing the buftarg + * because it'll just get dirty again. */ int xfs_quiesce_data( @@ -337,11 +347,10 @@ xfs_quiesce_data( xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT); XFS_QM_DQSYNC(mp, SYNC_WAIT); - /* write superblock and hoover shutdown errors */ + /* write superblock and hoover up shutdown errors */ error = xfs_sync_fsdata(mp, 0); - /* flush devices */ - XFS_bflush(mp->m_ddev_targp); + /* flush data-only devices */ if (mp->m_rtdev_targp) XFS_bflush(mp->m_rtdev_targp); @@ -349,117 +358,6 @@ xfs_quiesce_data( } /* - * xfs_sync flushes any pending I/O to file system vfsp. - * - * This routine is called by vfs_sync() to make sure that things make it - * out to disk eventually, on sync() system calls to flush out everything, - * and when the file system is unmounted. For the vfs_sync() case, all - * we really need to do is sync out the log to make all of our meta-data - * updates permanent (except for timestamps). For calls from pflushd(), - * dirty pages are kept moving by calling pdflush() on the inodes - * containing them. We also flush the inodes that we can lock without - * sleeping and the superblock if we can lock it without sleeping from - * vfs_sync() so that items at the tail of the log are always moving out. - * - * Flags: - * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want - * to sleep if we can help it. All we really need - * to do is ensure that the log is synced at least - * periodically. We also push the inodes and - * superblock if we can lock them without sleeping - * and they are not pinned. - * SYNC_ATTR - We need to flush the inodes. Now handled by direct calls - * to xfs_sync_inodes(). - * SYNC_WAIT - All the flushes that take place in this call should - * be synchronous. - * SYNC_DELWRI - This tells us to push dirty pages associated with - * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to - * determine if they should be flushed sync, async, or - * delwri. - * SYNC_FSDATA - This indicates that the caller would like to make - * sure the superblock is safe on disk. We can ensure - * this by simply making sure the log gets flushed - * if SYNC_BDFLUSH is set, and by actually writing it - * out otherwise. - * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete - * before we return (including direct I/O). Forms the drain - * side of the write barrier needed to safely quiesce the - * filesystem. - * - */ -int -xfs_sync( - xfs_mount_t *mp, - int flags) -{ - int error; - int last_error = 0; - uint log_flags = XFS_LOG_FORCE; - - ASSERT(!(flags & SYNC_ATTR)); - - /* - * Get the Quota Manager to flush the dquots. - * - * If XFS quota support is not enabled or this filesystem - * instance does not use quotas XFS_QM_DQSYNC will always - * return zero. - */ - error = XFS_QM_DQSYNC(mp, flags); - if (error) { - /* - * If we got an IO error, we will be shutting down. - * So, there's nothing more for us to do here. - */ - ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(error); - } - - if (flags & SYNC_IOWAIT) - xfs_filestream_flush(mp); - - /* - * Sync out the log. This ensures that the log is periodically - * flushed even if there is not enough activity to fill it up. - */ - if (flags & SYNC_WAIT) - log_flags |= XFS_LOG_SYNC; - - xfs_log_force(mp, (xfs_lsn_t)0, log_flags); - - if (flags & SYNC_DELWRI) { - if (flags & SYNC_BDFLUSH) - xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); - else - error = xfs_sync_inodes(mp, flags); - /* - * Flushing out dirty data above probably generated more - * log activity, so if this isn't vfs_sync() then flush - * the log again. - */ - xfs_log_force(mp, 0, log_flags); - } - - if (flags & SYNC_FSDATA) { - error = xfs_sync_fsdata(mp, flags); - if (error) - last_error = error; - } - - /* - * Now check to see if the log needs a "dummy" transaction. - */ - if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { - error = xfs_commit_dummy_trans(mp, log_flags); - if (error) - return error; - } - - return XFS_ERROR(last_error); -} - -/* * Enqueue a work item to be picked up by the vfs xfssyncd thread. * Doing this has two advantages: * - It saves on stack space, which is tight in certain situations diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 2509db0..4591dc0 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -28,31 +28,14 @@ typedef struct bhv_vfs_sync_work { } bhv_vfs_sync_work_t; #define SYNC_ATTR 0x0001 /* sync attributes */ -#define SYNC_DELWRI 0x0004 /* look at delayed writes */ -#define SYNC_WAIT 0x0008 /* wait for i/o to complete */ -#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ -#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ -#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ -#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ -#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ - -/* - * When remounting a filesystem read-only or freezing the filesystem, - * we have two phases to execute. This first phase is syncing the data - * before we quiesce the fielsystem, and the second is flushing all the - * inodes out after we've waited for all the transactions created by - * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE - * to ensure that the inodes are written to their location on disk - * rather than just existing in transactions in the log. This means - * after a quiesce there is no log replay required to write the inodes - * to disk (this is the main difference between a sync and a quiesce). - */ -#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT) +#define SYNC_DELWRI 0x0002 /* look at delayed writes */ +#define SYNC_WAIT 0x0004 /* wait for i/o to complete */ +#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ +#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); -int xfs_sync(struct xfs_mount *mp, int flags); int xfs_sync_inodes(struct xfs_mount *mp, int flags); int xfs_sync_fsdata(struct xfs_mount *mp, int flags); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 270f775..db1986a 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -987,14 +987,10 @@ xfs_qm_dqdetach( } /* - * This is called by VFS_SYNC and flags arg determines the caller, - * and its motives, as done in xfs_sync. - * - * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31 - * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25 - * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA + * This is called to sync quotas. We can be told to use non-blocking + * semantics by either the SYNC_BDFLUSH flag or the absence of the + * SYNC_WAIT flag. */ - int xfs_qm_sync( xfs_mount_t *mp, diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 1256746..58865fe 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -431,14 +431,13 @@ xfs_ireclaim(xfs_inode_t *ip) xfs_iextract(ip); /* - * Here we do a spurious inode lock in order to coordinate with - * xfs_sync(). This is because xfs_sync() references the inodes - * in the mount list without taking references on the corresponding - * vnodes. We make that OK here by ensuring that we wait until - * the inode is unlocked in xfs_sync() before we go ahead and - * free it. We get both the regular lock and the io lock because - * the xfs_sync() code may need to drop the regular one but will - * still hold the io lock. + * Here we do a spurious inode lock in order to coordinate with inode + * cache radix tree lookups. This is because the lookup can reference + * the inodes in the cache without taking references. We make that OK + * here by ensuring that we wait until the inode is unlocked after the + * lookup before we go ahead and free it. We get both the ilock and + * the iolock because the code may need to drop the ilock one but will + * still hold the iolock. */ xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); -- cgit v0.10.2 From 76bf105cb16da6c847a13a3c77dc962ba1081713 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:16:21 +1100 Subject: [XFS] Move remaining quiesce code. With all the other filesystem sync code it in xfs_sync.c including the data quiesce code, it makes sense to move the remaining quiesce code to the same place. SGI-PV: 988140 SGI-Modid: xfs-linux-melb:xfs-kern:32312a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 60ecf47..15fb262 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1212,7 +1212,7 @@ xfs_fs_remount( /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { xfs_quiesce_data(mp); - xfs_attr_quiesce(mp); + xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } @@ -1221,7 +1221,7 @@ xfs_fs_remount( /* * Second stage of a freeze. The data is already frozen so we only - * need to take care of themetadata. Once that's done write a dummy + * need to take care of the metadata. Once that's done write a dummy * record to dirty the log in case of a crash while frozen. */ STATIC void @@ -1230,7 +1230,7 @@ xfs_fs_lockfs( { struct xfs_mount *mp = XFS_M(sb); - xfs_attr_quiesce(mp); + xfs_quiesce_attr(mp); xfs_fs_log_dummy(mp); } diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index ed24435..b2b7082 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -357,6 +357,61 @@ xfs_quiesce_data( return error; } +STATIC void +xfs_quiesce_fs( + struct xfs_mount *mp) +{ + int count = 0, pincount; + + xfs_flush_buftarg(mp->m_ddev_targp, 0); + xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + + /* + * This loop must run at least twice. The first instance of the loop + * will flush most meta data but that will generate more meta data + * (typically directory updates). Which then must be flushed and + * logged before we can write the unmount record. + */ + do { + xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); + pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); + if (!pincount) { + delay(50); + count++; + } + } while (count < 2); +} + +/* + * Second stage of a quiesce. The data is already synced, now we have to take + * care of the metadata. New transactions are already blocked, so we need to + * wait for any remaining transactions to drain out before proceding. + */ +void +xfs_quiesce_attr( + struct xfs_mount *mp) +{ + int error = 0; + + /* wait for all modifications to complete */ + while (atomic_read(&mp->m_active_trans) > 0) + delay(100); + + /* flush inodes and push all remaining buffers out to disk */ + xfs_quiesce_fs(mp); + + ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); + + /* Push the superblock and write an unmount record */ + error = xfs_log_sbcount(mp, 1); + if (error) + xfs_fs_cmn_err(CE_WARN, mp, + "xfs_attr_quiesce: failed to log sb changes. " + "Frozen image may not be consistent."); + xfs_log_unmount_write(mp); + xfs_unmountfs_writesb(mp); +} + /* * Enqueue a work item to be picked up by the vfs xfssyncd thread. * Doing this has two advantages: diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 4591dc0..3b49aa3 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -40,6 +40,7 @@ int xfs_sync_inodes(struct xfs_mount *mp, int flags); int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); +void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index b55a9bb..883dd0f 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -59,61 +59,6 @@ #include "xfs_sync.h" -STATIC void -xfs_quiesce_fs( - xfs_mount_t *mp) -{ - int count = 0, pincount; - - xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); - - /* - * This loop must run at least twice. The first instance of the loop - * will flush most meta data but that will generate more meta data - * (typically directory updates). Which then must be flushed and - * logged before we can write the unmount record. - */ - do { - xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); - pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); - if (!pincount) { - delay(50); - count++; - } - } while (count < 2); -} - -/* - * Second stage of a quiesce. The data is already synced, now we have to take - * care of the metadata. New transactions are already blocked, so we need to - * wait for any remaining transactions to drain out before proceding. - */ -void -xfs_attr_quiesce( - xfs_mount_t *mp) -{ - int error = 0; - - /* wait for all modifications to complete */ - while (atomic_read(&mp->m_active_trans) > 0) - delay(100); - - /* flush inodes and push all remaining buffers out to disk */ - xfs_quiesce_fs(mp); - - ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); - - /* Push the superblock and write an unmount record */ - error = xfs_log_sbcount(mp, 1); - if (error) - xfs_fs_cmn_err(CE_WARN, mp, - "xfs_attr_quiesce: failed to log sb changes. " - "Frozen image may not be consistent."); - xfs_log_unmount_write(mp); - xfs_unmountfs_writesb(mp); -} - /* * xfs_unmount_flush implements a set of flush operation on special * inodes, which are needed as a separate set of operations so that diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h index 6701d0e..6b8e0b5 100644 --- a/fs/xfs/xfs_vfsops.h +++ b/fs/xfs/xfs_vfsops.h @@ -10,6 +10,5 @@ struct xfs_mount_args; void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, int lnnum); -void xfs_attr_quiesce(struct xfs_mount *mp); #endif /* _XFS_VFSOPS_H */ -- cgit v0.10.2 From 3471394ba56f44761ce1c300f139478dbfb49d4a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:21:10 +1100 Subject: [XFS] fix instant oops with tracing enabled We can only read inode->i_count if the inode is actually there and not a NULL pointer. This was introduced in one of the recent sync patches. SGI-PV: 988255 SGI-Modid: xfs-linux-melb:xfs-kern:32315a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index dceb6db..ac827d2 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -92,7 +92,7 @@ static inline int xfs_icount(struct xfs_inode *ip) { struct inode *inode = VFS_I(ip); - if (!inode) + if (inode) return atomic_read(&inode->i_count); return -1; } -- cgit v0.10.2 From 6441e549157b749bae003cce70b4c8b62e4801fa Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:21:19 +1100 Subject: [XFS] factor xfs_iget_core() into hit and miss cases There are really two cases in xfs_iget_core(). The first is the cache hit case, the second is the miss case. They share very little code, and hence can easily be factored out into separate functions. This makes the code much easier to understand and subsequently modify. SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32317a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 58865fe..b2539b1 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -40,161 +40,119 @@ #include "xfs_utils.h" /* - * Look up an inode by number in the given file system. - * The inode is looked up in the cache held in each AG. - * If the inode is found in the cache, attach it to the provided - * vnode. - * - * If it is not in core, read it in from the file system's device, - * add it to the cache and attach the provided vnode. - * - * The inode is locked according to the value of the lock_flags parameter. - * This flag parameter indicates how and if the inode's IO lock and inode lock - * should be taken. - * - * mp -- the mount point structure for the current file system. It points - * to the inode hash table. - * tp -- a pointer to the current transaction if there is one. This is - * simply passed through to the xfs_iread() call. - * ino -- the number of the inode desired. This is the unique identifier - * within the file system for the inode being requested. - * lock_flags -- flags indicating how to lock the inode. See the comment - * for xfs_ilock() for a list of valid values. - * bno -- the block number starting the buffer containing the inode, - * if known (as by bulkstat), else 0. + * Check the validity of the inode we just found it the cache */ -STATIC int -xfs_iget_core( - struct inode *inode, - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - uint flags, - uint lock_flags, - xfs_inode_t **ipp, - xfs_daddr_t bno) +static int +xfs_iget_cache_hit( + struct inode *inode, + struct xfs_perag *pag, + struct xfs_inode *ip, + int flags, + int lock_flags) __releases(pag->pag_ici_lock) { - struct inode *old_inode; - xfs_inode_t *ip; - int error; - unsigned long first_index, mask; - xfs_perag_t *pag; - xfs_agino_t agino; + struct xfs_mount *mp = ip->i_mount; + struct inode *old_inode; + int error = 0; - /* the radix tree exists only in inode capable AGs */ - if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) - return EINVAL; - - /* get the perag structure and ensure that it's inode capable */ - pag = xfs_get_perag(mp, ino); - if (!pag->pagi_inodeok) - return EINVAL; - ASSERT(pag->pag_ici_init); - agino = XFS_INO_TO_AGINO(mp, ino); - -again: - read_lock(&pag->pag_ici_lock); - ip = radix_tree_lookup(&pag->pag_ici_root, agino); + /* + * If INEW is set this inode is being set up + * Pause and try again. + */ + if (xfs_iflags_test(ip, XFS_INEW)) { + error = EAGAIN; + XFS_STATS_INC(xs_ig_frecycle); + goto out_error; + } - if (ip != NULL) { + old_inode = ip->i_vnode; + if (old_inode == NULL) { /* - * If INEW is set this inode is being set up + * If IRECLAIM is set this inode is + * on its way out of the system, * we need to pause and try again. */ - if (xfs_iflags_test(ip, XFS_INEW)) { - read_unlock(&pag->pag_ici_lock); - delay(1); + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { + error = EAGAIN; XFS_STATS_INC(xs_ig_frecycle); + goto out_error; + } + ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - goto again; + /* + * If lookup is racing with unlink, then we + * should return an error immediately so we + * don't remove it from the reclaim list and + * potentially leak the inode. + */ + if ((ip->i_d.di_mode == 0) && + !(flags & XFS_IGET_CREATE)) { + error = ENOENT; + goto out_error; } + xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - old_inode = ip->i_vnode; - if (old_inode == NULL) { - /* - * If IRECLAIM is set this inode is - * on its way out of the system, - * we need to pause and try again. - */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { - read_unlock(&pag->pag_ici_lock); - delay(1); - XFS_STATS_INC(xs_ig_frecycle); - - goto again; - } - ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - - /* - * If lookup is racing with unlink, then we - * should return an error immediately so we - * don't remove it from the reclaim list and - * potentially leak the inode. - */ - if ((ip->i_d.di_mode == 0) && - !(flags & XFS_IGET_CREATE)) { - read_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); - return ENOENT; - } - - xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - - XFS_STATS_INC(xs_ig_found); - xfs_iflags_clear(ip, XFS_IRECLAIMABLE); - read_unlock(&pag->pag_ici_lock); - - XFS_MOUNT_ILOCK(mp); - list_del_init(&ip->i_reclaim); - XFS_MOUNT_IUNLOCK(mp); - - goto finish_inode; - - } else if (inode != old_inode) { - /* The inode is being torn down, pause and - * try again. - */ - if (old_inode->i_state & (I_FREEING | I_CLEAR)) { - read_unlock(&pag->pag_ici_lock); - delay(1); - XFS_STATS_INC(xs_ig_frecycle); - - goto again; - } + xfs_iflags_clear(ip, XFS_IRECLAIMABLE); + read_unlock(&pag->pag_ici_lock); + + XFS_MOUNT_ILOCK(mp); + list_del_init(&ip->i_reclaim); + XFS_MOUNT_IUNLOCK(mp); + + } else if (inode != old_inode) { + /* The inode is being torn down, pause and + * try again. + */ + if (old_inode->i_state & (I_FREEING | I_CLEAR)) { + error = EAGAIN; + XFS_STATS_INC(xs_ig_frecycle); + goto out_error; + } /* Chances are the other vnode (the one in the inode) is being torn * down right now, and we landed on top of it. Question is, what do * we do? Unhook the old inode and hook up the new one? */ - cmn_err(CE_PANIC, - "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", - old_inode, inode); - } - - /* - * Inode cache hit - */ + cmn_err(CE_PANIC, + "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", + old_inode, inode); + } else { read_unlock(&pag->pag_ici_lock); - XFS_STATS_INC(xs_ig_found); + } -finish_inode: - if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { - xfs_put_perag(mp, pag); - return ENOENT; - } + if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { + error = ENOENT; + goto out; + } - if (lock_flags != 0) - xfs_ilock(ip, lock_flags); + if (lock_flags != 0) + xfs_ilock(ip, lock_flags); - xfs_iflags_clear(ip, XFS_ISTALE); - xfs_itrace_exit_tag(ip, "xfs_iget.found"); - goto return_ip; - } + xfs_iflags_clear(ip, XFS_ISTALE); + xfs_itrace_exit_tag(ip, "xfs_iget.found"); + XFS_STATS_INC(xs_ig_found); + return 0; - /* - * Inode cache miss - */ +out_error: read_unlock(&pag->pag_ici_lock); - XFS_STATS_INC(xs_ig_missed); +out: + return error; +} + + +static int +xfs_iget_cache_miss( + struct xfs_mount *mp, + struct xfs_perag *pag, + xfs_trans_t *tp, + xfs_ino_t ino, + struct xfs_inode **ipp, + xfs_daddr_t bno, + int flags, + int lock_flags) __releases(pag->pag_ici_lock) +{ + struct xfs_inode *ip; + int error; + unsigned long first_index, mask; + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); /* * Read the disk inode attributes into a new inode structure and get @@ -202,17 +160,14 @@ finish_inode: */ error = xfs_iread(mp, tp, ino, &ip, bno, (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); - if (error) { - xfs_put_perag(mp, pag); + if (error) return error; - } xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { - xfs_idestroy(ip); - xfs_put_perag(mp, pag); - return ENOENT; + error = ENOENT; + goto out_destroy; } /* @@ -220,9 +175,8 @@ finish_inode: * write spinlock. */ if (radix_tree_preload(GFP_KERNEL)) { - xfs_idestroy(ip); - delay(1); - goto again; + error = EAGAIN; + goto out_destroy; } if (lock_flags) @@ -231,32 +185,104 @@ finish_inode: mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = agino & mask; write_lock(&pag->pag_ici_lock); - /* - * insert the new inode - */ + + /* insert the new inode */ error = radix_tree_insert(&pag->pag_ici_root, agino, ip); if (unlikely(error)) { - BUG_ON(error != -EEXIST); - write_unlock(&pag->pag_ici_lock); - radix_tree_preload_end(); - if (lock_flags) - xfs_iunlock(ip, lock_flags); - xfs_idestroy(ip); + WARN_ON(error != -EEXIST); XFS_STATS_INC(xs_ig_dup); - goto again; + error = EAGAIN; + goto out_unlock; } - /* - * These values _must_ be set before releasing the radix tree lock! - */ + /* These values _must_ be set before releasing the radix tree lock! */ ip->i_udquot = ip->i_gdquot = NULL; xfs_iflags_set(ip, XFS_INEW); write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); + *ipp = ip; + return 0; + +out_unlock: + write_unlock(&pag->pag_ici_lock); + radix_tree_preload_end(); +out_destroy: + xfs_idestroy(ip); + return error; +} + +/* + * Look up an inode by number in the given file system. + * The inode is looked up in the cache held in each AG. + * If the inode is found in the cache, attach it to the provided + * vnode. + * + * If it is not in core, read it in from the file system's device, + * add it to the cache and attach the provided vnode. + * + * The inode is locked according to the value of the lock_flags parameter. + * This flag parameter indicates how and if the inode's IO lock and inode lock + * should be taken. + * + * mp -- the mount point structure for the current file system. It points + * to the inode hash table. + * tp -- a pointer to the current transaction if there is one. This is + * simply passed through to the xfs_iread() call. + * ino -- the number of the inode desired. This is the unique identifier + * within the file system for the inode being requested. + * lock_flags -- flags indicating how to lock the inode. See the comment + * for xfs_ilock() for a list of valid values. + * bno -- the block number starting the buffer containing the inode, + * if known (as by bulkstat), else 0. + */ +STATIC int +xfs_iget_core( + struct inode *inode, + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_ino_t ino, + uint flags, + uint lock_flags, + xfs_inode_t **ipp, + xfs_daddr_t bno) +{ + xfs_inode_t *ip; + int error; + xfs_perag_t *pag; + xfs_agino_t agino; + + /* the radix tree exists only in inode capable AGs */ + if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) + return EINVAL; + + /* get the perag structure and ensure that it's inode capable */ + pag = xfs_get_perag(mp, ino); + if (!pag->pagi_inodeok) + return EINVAL; + ASSERT(pag->pag_ici_init); + agino = XFS_INO_TO_AGINO(mp, ino); + +again: + error = 0; + read_lock(&pag->pag_ici_lock); + ip = radix_tree_lookup(&pag->pag_ici_root, agino); + + if (ip) { + error = xfs_iget_cache_hit(inode, pag, ip, flags, lock_flags); + if (error) + goto out_error_or_again; + } else { + read_unlock(&pag->pag_ici_lock); + XFS_STATS_INC(xs_ig_missed); + + error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno, + flags, lock_flags); + if (error) + goto out_error_or_again; + } xfs_put_perag(mp, pag); - return_ip: ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); @@ -276,6 +302,14 @@ finish_inode: if (ip->i_d.di_mode != 0) xfs_setup_inode(ip); return 0; + +out_error_or_again: + if (error == EAGAIN) { + delay(1); + goto again; + } + xfs_put_perag(mp, pag); + return error; } -- cgit v0.10.2 From 94b97e39b0c983e86f0028c456dcf213abc722a0 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:21:30 +1100 Subject: [XFS] Never call mark_inode_dirty_sync() directly Once the Linux inode and the XFS inode are combined, we cannot rely on just check if the linux inode exists as a method of determining if it is valid or not. Hence we should always call xfs_mark_inode_dirty_sync() instead as it does the correct checks to determine if the liinux inode is in a valid state or not. SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32318a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index a44d68e..8fbc97d 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -191,7 +191,7 @@ xfs_setfilesize( ip->i_d.di_size = isize; ip->i_update_core = 1; ip->i_update_size = 1; - mark_inode_dirty_sync(ioend->io_inode); + xfs_mark_inode_dirty_sync(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 095d271..3bfb3c0 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -128,7 +128,7 @@ xfs_ichgtime( if (sync_it) { SYNCHRONIZE(); ip->i_update_core = 1; - mark_inode_dirty_sync(inode); + xfs_mark_inode_dirty_sync(ip); } } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 15fb262..b87e455 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -948,7 +948,7 @@ xfs_fs_write_inode( * it dirty again so we'll try again later. */ if (error) - mark_inode_dirty_sync(inode); + xfs_mark_inode_dirty_sync(XFS_I(inode)); return -error; } -- cgit v0.10.2 From 2cb1599f9b2ecdd7a9e59feeee647eb258966839 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:32:23 +1100 Subject: Inode: Allow external initialisers To allow XFS to combine the XFS and linux inodes into a single structure, we need to drive inode lookup from the XFS inode cache, not the generic inode cache. This means that we need initialise a struct inode from a context outside alloc_inode() as it is no longer used by XFS. Factor and export the struct inode initialisation code from alloc_inode() to inode_init_always() as a counterpart to inode_init_once(). i.e. we have to call this init function for each inode instantiation (always), as opposed inode_init_once() which is only called on slab object instantiation (once). Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/inode.c b/fs/inode.c index 0487ddb..e7ee999 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -108,84 +108,100 @@ static void wake_up_inode(struct inode *inode) wake_up_bit(&inode->i_state, __I_LOCK); } -static struct inode *alloc_inode(struct super_block *sb) +/** + * inode_init_always - perform inode structure intialisation + * @sb - superblock inode belongs to. + * @inode - inode to initialise + * + * These are initializations that need to be done on every inode + * allocation as the fields are not initialised by slab allocation. + */ +struct inode *inode_init_always(struct super_block *sb, struct inode *inode) { static const struct address_space_operations empty_aops; static struct inode_operations empty_iops; static const struct file_operations empty_fops; - struct inode *inode; - - if (sb->s_op->alloc_inode) - inode = sb->s_op->alloc_inode(sb); - else - inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL); - if (inode) { - struct address_space * const mapping = &inode->i_data; - - inode->i_sb = sb; - inode->i_blkbits = sb->s_blocksize_bits; - inode->i_flags = 0; - atomic_set(&inode->i_count, 1); - inode->i_op = &empty_iops; - inode->i_fop = &empty_fops; - inode->i_nlink = 1; - atomic_set(&inode->i_writecount, 0); - inode->i_size = 0; - inode->i_blocks = 0; - inode->i_bytes = 0; - inode->i_generation = 0; + struct address_space * const mapping = &inode->i_data; + + inode->i_sb = sb; + inode->i_blkbits = sb->s_blocksize_bits; + inode->i_flags = 0; + atomic_set(&inode->i_count, 1); + inode->i_op = &empty_iops; + inode->i_fop = &empty_fops; + inode->i_nlink = 1; + atomic_set(&inode->i_writecount, 0); + inode->i_size = 0; + inode->i_blocks = 0; + inode->i_bytes = 0; + inode->i_generation = 0; #ifdef CONFIG_QUOTA - memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); + memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); #endif - inode->i_pipe = NULL; - inode->i_bdev = NULL; - inode->i_cdev = NULL; - inode->i_rdev = 0; - inode->dirtied_when = 0; - if (security_inode_alloc(inode)) { - if (inode->i_sb->s_op->destroy_inode) - inode->i_sb->s_op->destroy_inode(inode); - else - kmem_cache_free(inode_cachep, (inode)); - return NULL; - } + inode->i_pipe = NULL; + inode->i_bdev = NULL; + inode->i_cdev = NULL; + inode->i_rdev = 0; + inode->dirtied_when = 0; + if (security_inode_alloc(inode)) { + if (inode->i_sb->s_op->destroy_inode) + inode->i_sb->s_op->destroy_inode(inode); + else + kmem_cache_free(inode_cachep, (inode)); + return NULL; + } - spin_lock_init(&inode->i_lock); - lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); + spin_lock_init(&inode->i_lock); + lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); - mutex_init(&inode->i_mutex); - lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); + mutex_init(&inode->i_mutex); + lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); - init_rwsem(&inode->i_alloc_sem); - lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); + init_rwsem(&inode->i_alloc_sem); + lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); - mapping->a_ops = &empty_aops; - mapping->host = inode; - mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); - mapping->assoc_mapping = NULL; - mapping->backing_dev_info = &default_backing_dev_info; - mapping->writeback_index = 0; + mapping->a_ops = &empty_aops; + mapping->host = inode; + mapping->flags = 0; + mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); + mapping->assoc_mapping = NULL; + mapping->backing_dev_info = &default_backing_dev_info; + mapping->writeback_index = 0; - /* - * If the block_device provides a backing_dev_info for client - * inodes then use that. Otherwise the inode share the bdev's - * backing_dev_info. - */ - if (sb->s_bdev) { - struct backing_dev_info *bdi; + /* + * If the block_device provides a backing_dev_info for client + * inodes then use that. Otherwise the inode share the bdev's + * backing_dev_info. + */ + if (sb->s_bdev) { + struct backing_dev_info *bdi; - bdi = sb->s_bdev->bd_inode_backing_dev_info; - if (!bdi) - bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; - mapping->backing_dev_info = bdi; - } - inode->i_private = NULL; - inode->i_mapping = mapping; + bdi = sb->s_bdev->bd_inode_backing_dev_info; + if (!bdi) + bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + mapping->backing_dev_info = bdi; } + inode->i_private = NULL; + inode->i_mapping = mapping; + return inode; } +EXPORT_SYMBOL(inode_init_always); + +static struct inode *alloc_inode(struct super_block *sb) +{ + struct inode *inode; + + if (sb->s_op->alloc_inode) + inode = sb->s_op->alloc_inode(sb); + else + inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); + + if (inode) + return inode_init_always(sb, inode); + return NULL; +} void destroy_inode(struct inode *inode) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b248d6..04abead 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1881,6 +1881,7 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int origin); extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); +extern struct inode * inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); extern void iput(struct inode *); extern struct inode * igrab(struct inode *); -- cgit v0.10.2 From 8290c35f87304a6b73d4fd17b03580b4f7425de8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:35:24 +1100 Subject: Inode: Allow external list initialisation To allow XFS to combine the XFS and linux inodes into a single structure, we need to drive inode lookup from the XFS inode cache, not the generic inode cache. This means that we need initialise a struct inode from a context outside alloc_inode() as it is no longer used by XFS. After inode allocation and initialisation, we need to add the inode to the superblock list, the in-use list, hash it and do some accounting. This all needs to be done with the inode_lock held and there are already several places in fs/inode.c that do this list manipulation. Factor out the common code, add a locking wrapper and export the function so ti can be called from XFS. Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/inode.c b/fs/inode.c index e7ee999..fbcf6c5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -550,6 +550,49 @@ repeat: return node ? inode : NULL; } +static unsigned long hash(struct super_block *sb, unsigned long hashval) +{ + unsigned long tmp; + + tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / + L1_CACHE_BYTES; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); + return tmp & I_HASHMASK; +} + +static inline void +__inode_add_to_lists(struct super_block *sb, struct hlist_head *head, + struct inode *inode) +{ + inodes_stat.nr_inodes++; + list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); + if (head) + hlist_add_head(&inode->i_hash, head); +} + +/** + * inode_add_to_lists - add a new inode to relevant lists + * @sb - superblock inode belongs to. + * @inode - inode to mark in use + * + * When an inode is allocated it needs to be accounted for, added to the in use + * list, the owning superblock and the inode hash. This needs to be done under + * the inode_lock, so export a function to do this rather than the inode lock + * itself. We calculate the hash list to add to here so it is all internal + * which requires the caller to have already set up the inode number in the + * inode to add. + */ +void inode_add_to_lists(struct super_block *sb, struct inode *inode) +{ + struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); + + spin_lock(&inode_lock); + __inode_add_to_lists(sb, head, inode); + spin_unlock(&inode_lock); +} +EXPORT_SYMBOL_GPL(inode_add_to_lists); + /** * new_inode - obtain an inode * @sb: superblock @@ -577,9 +620,7 @@ struct inode *new_inode(struct super_block *sb) inode = alloc_inode(sb); if (inode) { spin_lock(&inode_lock); - inodes_stat.nr_inodes++; - list_add(&inode->i_list, &inode_in_use); - list_add(&inode->i_sb_list, &sb->s_inodes); + __inode_add_to_lists(sb, NULL, inode); inode->i_ino = ++last_ino; inode->i_state = 0; spin_unlock(&inode_lock); @@ -638,10 +679,7 @@ static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *h if (set(inode, data)) goto set_failed; - inodes_stat.nr_inodes++; - list_add(&inode->i_list, &inode_in_use); - list_add(&inode->i_sb_list, &sb->s_inodes); - hlist_add_head(&inode->i_hash, head); + __inode_add_to_lists(sb, head, inode); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -687,10 +725,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he old = find_inode_fast(sb, head, ino); if (!old) { inode->i_ino = ino; - inodes_stat.nr_inodes++; - list_add(&inode->i_list, &inode_in_use); - list_add(&inode->i_sb_list, &sb->s_inodes); - hlist_add_head(&inode->i_hash, head); + __inode_add_to_lists(sb, head, inode); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -714,16 +749,6 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he return inode; } -static unsigned long hash(struct super_block *sb, unsigned long hashval) -{ - unsigned long tmp; - - tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / - L1_CACHE_BYTES; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); - return tmp & I_HASHMASK; -} - /** * iunique - get a unique inode number * @sb: superblock diff --git a/include/linux/fs.h b/include/linux/fs.h index 04abead..1deedf2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1883,6 +1883,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); extern struct inode * inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); +extern void inode_add_to_lists(struct super_block *, struct inode *); extern void iput(struct inode *); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); -- cgit v0.10.2 From bf904248a2adb3f3be4eb4fb1837ce3bb28cca76 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:36:14 +1100 Subject: [XFS] Combine the XFS and Linux inodes To avoid issues with different lifecycles of XFS and Linux inodes, embedd the linux inode inside the XFS inode. This means that the linux inode has the same lifecycle as the XFS inode, even when it has been released by the OS. XFS inodes don't live much longer than this (a short stint in reclaim at most), so there isn't significant memory usage penalties here. Version 3 o kill xfs_icount() Version 2 o remove unused commented out code from xfs_iget(). o kill useless cast in VFS_I() SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32323a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 3bfb3c0..37bb101 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -64,14 +64,14 @@ xfs_synchronize_atime( { struct inode *inode = VFS_I(ip); - if (inode) { + if (!(inode->i_state & I_CLEAR)) { ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; } } /* - * If the linux inode exists, mark it dirty. + * If the linux inode is valid, mark it dirty. * Used when commiting a dirty inode into a transaction so that * the inode will get written back by the linux code */ @@ -81,7 +81,7 @@ xfs_mark_inode_dirty_sync( { struct inode *inode = VFS_I(ip); - if (inode) + if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) mark_inode_dirty_sync(inode); } @@ -766,12 +766,21 @@ xfs_diflags_to_iflags( * When reading existing inodes from disk this is called directly * from xfs_iget, when creating a new inode it is called from * xfs_ialloc after setting up the inode. + * + * We are always called with an uninitialised linux inode here. + * We need to initialise the necessary fields and take a reference + * on it. */ void xfs_setup_inode( struct xfs_inode *ip) { - struct inode *inode = ip->i_vnode; + struct inode *inode = &ip->i_vnode; + + inode->i_ino = ip->i_ino; + inode->i_state = I_NEW|I_LOCK; + inode_add_to_lists(ip->i_mount->m_super, inode); + ASSERT(atomic_read(&inode->i_count) == 1); inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index b87e455..c6ef684 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -72,7 +72,6 @@ static struct quotactl_ops xfs_quotactl_operations; static struct super_operations xfs_super_operations; -static kmem_zone_t *xfs_vnode_zone; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; @@ -867,29 +866,24 @@ xfsaild_stop( } - +/* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) { - return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); + BUG(); } +/* + * we need to provide an empty inode free function to prevent + * the generic code from trying to free our combined inode. + */ STATIC void xfs_fs_destroy_inode( - struct inode *inode) -{ - kmem_zone_free(xfs_vnode_zone, inode); -} - -STATIC void -xfs_fs_inode_init_once( - void *vnode) + struct inode *inode) { - inode_init_once((struct inode *)vnode); } - /* * Slab object creation initialisation for the XFS inode. * This covers only the idempotent fields in the XFS inode; @@ -898,13 +892,18 @@ xfs_fs_inode_init_once( * fields in the xfs inode that left in the initialise state * when freeing the inode. */ -void -xfs_inode_init_once( +STATIC void +xfs_fs_inode_init_once( void *inode) { struct xfs_inode *ip = inode; memset(ip, 0, sizeof(struct xfs_inode)); + + /* vfs inode */ + inode_init_once(VFS_I(ip)); + + /* xfs inode */ atomic_set(&ip->i_iocount, 0); atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); @@ -975,8 +974,6 @@ xfs_fs_clear_inode( if (xfs_reclaim(ip)) panic("%s: cannot reclaim 0x%p\n", __func__, inode); } - - ASSERT(XFS_I(inode) == NULL); } STATIC void @@ -1829,16 +1826,10 @@ xfs_free_trace_bufs(void) STATIC int __init xfs_init_zones(void) { - xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, - xfs_fs_inode_init_once); - if (!xfs_vnode_zone) - goto out; xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); if (!xfs_ioend_zone) - goto out_destroy_vnode_zone; + goto out; xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, xfs_ioend_zone); @@ -1854,6 +1845,7 @@ xfs_init_zones(void) "xfs_bmap_free_item"); if (!xfs_bmap_free_item_zone) goto out_destroy_log_ticket_zone; + xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), "xfs_btree_cur"); if (!xfs_btree_cur_zone) @@ -1901,8 +1893,8 @@ xfs_init_zones(void) xfs_inode_zone = kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, xfs_inode_init_once); + KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, + xfs_fs_inode_init_once); if (!xfs_inode_zone) goto out_destroy_efi_zone; @@ -1950,8 +1942,6 @@ xfs_init_zones(void) mempool_destroy(xfs_ioend_pool); out_destroy_ioend_zone: kmem_zone_destroy(xfs_ioend_zone); - out_destroy_vnode_zone: - kmem_zone_destroy(xfs_vnode_zone); out: return -ENOMEM; } @@ -1976,7 +1966,6 @@ xfs_destroy_zones(void) kmem_zone_destroy(xfs_log_ticket_zone); mempool_destroy(xfs_ioend_pool); kmem_zone_destroy(xfs_ioend_zone); - kmem_zone_destroy(xfs_vnode_zone); } diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index ac827d2..ad18262 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -84,25 +84,12 @@ vn_ioerror( #ifdef XFS_INODE_TRACE -/* - * Reference count of Linux inode if present, -1 if the xfs_inode - * has no associated Linux inode. - */ -static inline int xfs_icount(struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - - if (inode) - return atomic_read(&inode->i_count); - return -1; -} - #define KTRACE_ENTER(ip, vk, s, line, ra) \ ktrace_enter( (ip)->i_trace, \ /* 0 */ (void *)(__psint_t)(vk), \ /* 1 */ (void *)(s), \ /* 2 */ (void *)(__psint_t) line, \ -/* 3 */ (void *)(__psint_t)xfs_icount(ip), \ +/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \ /* 4 */ (void *)(ra), \ /* 5 */ NULL, \ /* 6 */ (void *)(__psint_t)current_cpu(), \ diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b2539b1..c4414e8 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -44,77 +44,65 @@ */ static int xfs_iget_cache_hit( - struct inode *inode, struct xfs_perag *pag, struct xfs_inode *ip, int flags, int lock_flags) __releases(pag->pag_ici_lock) { struct xfs_mount *mp = ip->i_mount; - struct inode *old_inode; int error = 0; /* * If INEW is set this inode is being set up + * If IRECLAIM is set this inode is being torn down * Pause and try again. */ - if (xfs_iflags_test(ip, XFS_INEW)) { + if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { error = EAGAIN; XFS_STATS_INC(xs_ig_frecycle); goto out_error; } - old_inode = ip->i_vnode; - if (old_inode == NULL) { + /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ + if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { + /* - * If IRECLAIM is set this inode is - * on its way out of the system, - * we need to pause and try again. + * If lookup is racing with unlink, then we should return an + * error immediately so we don't remove it from the reclaim + * list and potentially leak the inode. */ - if (xfs_iflags_test(ip, XFS_IRECLAIM)) { - error = EAGAIN; - XFS_STATS_INC(xs_ig_frecycle); + + if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { + error = ENOENT; goto out_error; } - ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + + xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); /* - * If lookup is racing with unlink, then we - * should return an error immediately so we - * don't remove it from the reclaim list and - * potentially leak the inode. + * We need to re-initialise the VFS inode as it has been + * 'freed' by the VFS. Do this here so we can deal with + * errors cleanly, then tag it so it can be set up correctly + * later. */ - if ((ip->i_d.di_mode == 0) && - !(flags & XFS_IGET_CREATE)) { - error = ENOENT; + if (!inode_init_always(mp->m_super, VFS_I(ip))) { + error = ENOMEM; goto out_error; } - xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); - + xfs_iflags_set(ip, XFS_INEW); xfs_iflags_clear(ip, XFS_IRECLAIMABLE); read_unlock(&pag->pag_ici_lock); XFS_MOUNT_ILOCK(mp); list_del_init(&ip->i_reclaim); XFS_MOUNT_IUNLOCK(mp); - - } else if (inode != old_inode) { - /* The inode is being torn down, pause and - * try again. - */ - if (old_inode->i_state & (I_FREEING | I_CLEAR)) { - error = EAGAIN; - XFS_STATS_INC(xs_ig_frecycle); - goto out_error; - } -/* Chances are the other vnode (the one in the inode) is being torn -* down right now, and we landed on top of it. Question is, what do -* we do? Unhook the old inode and hook up the new one? -*/ - cmn_err(CE_PANIC, - "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p", - old_inode, inode); + } else if (!igrab(VFS_I(ip))) { + /* If the VFS inode is being torn down, pause and try again. */ + error = EAGAIN; + XFS_STATS_INC(xs_ig_frecycle); + goto out_error; } else { + /* we've got a live one */ read_unlock(&pag->pag_ici_lock); } @@ -215,11 +203,11 @@ out_destroy: /* * Look up an inode by number in the given file system. * The inode is looked up in the cache held in each AG. - * If the inode is found in the cache, attach it to the provided - * vnode. + * If the inode is found in the cache, initialise the vfs inode + * if necessary. * * If it is not in core, read it in from the file system's device, - * add it to the cache and attach the provided vnode. + * add it to the cache and initialise the vfs inode. * * The inode is locked according to the value of the lock_flags parameter. * This flag parameter indicates how and if the inode's IO lock and inode lock @@ -236,9 +224,8 @@ out_destroy: * bno -- the block number starting the buffer containing the inode, * if known (as by bulkstat), else 0. */ -STATIC int -xfs_iget_core( - struct inode *inode, +int +xfs_iget( xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, @@ -269,7 +256,7 @@ again: ip = radix_tree_lookup(&pag->pag_ici_root, agino); if (ip) { - error = xfs_iget_cache_hit(inode, pag, ip, flags, lock_flags); + error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); if (error) goto out_error_or_again; } else { @@ -283,23 +270,16 @@ again: } xfs_put_perag(mp, pag); - ASSERT(ip->i_df.if_ext_max == - XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); - xfs_iflags_set(ip, XFS_IMODIFIED); *ipp = ip; - /* - * Set up the Linux with the Linux inode. - */ - ip->i_vnode = inode; - inode->i_private = ip; - + ASSERT(ip->i_df.if_ext_max == + XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); /* * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ - if (ip->i_d.di_mode != 0) + if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0) xfs_setup_inode(ip); return 0; @@ -314,75 +294,6 @@ out_error_or_again: /* - * The 'normal' internal xfs_iget, if needed it will - * 'allocate', or 'get', the vnode. - */ -int -xfs_iget( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - uint flags, - uint lock_flags, - xfs_inode_t **ipp, - xfs_daddr_t bno) -{ - struct inode *inode; - xfs_inode_t *ip; - int error; - - XFS_STATS_INC(xs_ig_attempts); - -retry: - inode = iget_locked(mp->m_super, ino); - if (!inode) - /* If we got no inode we are out of memory */ - return ENOMEM; - - if (inode->i_state & I_NEW) { - XFS_STATS_INC(vn_active); - XFS_STATS_INC(vn_alloc); - - error = xfs_iget_core(inode, mp, tp, ino, flags, - lock_flags, ipp, bno); - if (error) { - make_bad_inode(inode); - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - iput(inode); - } - return error; - } - - /* - * If the inode is not fully constructed due to - * filehandle mismatches wait for the inode to go - * away and try again. - * - * iget_locked will call __wait_on_freeing_inode - * to wait for the inode to go away. - */ - if (is_bad_inode(inode)) { - iput(inode); - delay(1); - goto retry; - } - - ip = XFS_I(inode); - if (!ip) { - iput(inode); - delay(1); - goto retry; - } - - if (lock_flags != 0) - xfs_ilock(ip, lock_flags); - XFS_STATS_INC(xs_ig_found); - *ipp = ip; - return 0; -} - -/* * Look for the inode corresponding to the given ino in the hash table. * If it is there and its i_transp pointer matches tp, return it. * Otherwise, return NULL. @@ -482,14 +393,6 @@ xfs_ireclaim(xfs_inode_t *ip) XFS_QM_DQDETACH(ip->i_mount, ip); /* - * Pull our behavior descriptor from the vnode chain. - */ - if (ip->i_vnode) { - ip->i_vnode->i_private = NULL; - ip->i_vnode = NULL; - } - - /* * Free all memory associated with the inode. */ xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bc33762..99d9118 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -813,6 +813,16 @@ xfs_inode_alloc( ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(list_empty(&ip->i_reclaim)); + /* + * initialise the VFS inode here to get failures + * out of the way early. + */ + if (!inode_init_always(mp->m_super, VFS_I(ip))) { + kmem_zone_free(xfs_inode_zone, ip); + return NULL; + } + + /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; ip->i_blkno = 0; @@ -1086,6 +1096,7 @@ xfs_ialloc( uint flags; int error; timespec_t tv; + int filestreams = 0; /* * Call the space management code to pick @@ -1093,9 +1104,8 @@ xfs_ialloc( */ error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, ialloc_context, call_again, &ino); - if (error != 0) { + if (error) return error; - } if (*call_again || ino == NULLFSINO) { *ipp = NULL; return 0; @@ -1109,9 +1119,8 @@ xfs_ialloc( */ error = xfs_trans_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); - if (error != 0) { + if (error) return error; - } ASSERT(ip != NULL); ip->i_d.di_mode = (__uint16_t)mode; @@ -1192,13 +1201,12 @@ xfs_ialloc( flags |= XFS_ILOG_DEV; break; case S_IFREG: - if (pip && xfs_inode_is_filestream(pip)) { - error = xfs_filestream_associate(pip, ip); - if (error < 0) - return -error; - if (!error) - xfs_iflags_set(ip, XFS_IFILESTREAM); - } + /* + * we can't set up filestreams until after the VFS inode + * is set up properly. + */ + if (pip && xfs_inode_is_filestream(pip)) + filestreams = 1; /* fall through */ case S_IFDIR: if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { @@ -1264,6 +1272,15 @@ xfs_ialloc( /* now that we have an i_mode we can setup inode ops and unlock */ xfs_setup_inode(ip); + /* now we have set up the vfs inode we can associate the filestream */ + if (filestreams) { + error = xfs_filestream_associate(pip, ip); + if (error < 0) + return -error; + if (!error) + xfs_iflags_set(ip, XFS_IFILESTREAM); + } + *ipp = ip; return 0; } @@ -2650,6 +2667,10 @@ xfs_idestroy_fork( * It must free the inode itself and any buffers allocated for * if_extents/if_data and if_broot. It must also free the lock * associated with the inode. + * + * Note: because we don't initialise everything on reallocation out + * of the zone, we must ensure we nullify everything correctly before + * freeing the structure. */ void xfs_idestroy( diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 6fd20fc..345b43a 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -236,7 +236,6 @@ typedef struct xfs_inode { /* Inode linking and identification information. */ struct xfs_mount *i_mount; /* fs mount struct ptr */ struct list_head i_reclaim; /* reclaim list */ - struct inode *i_vnode; /* vnode backpointer */ struct xfs_dquot *i_udquot; /* user dquot */ struct xfs_dquot *i_gdquot; /* group dquot */ @@ -271,6 +270,10 @@ typedef struct xfs_inode { xfs_fsize_t i_size; /* in-memory size */ xfs_fsize_t i_new_size; /* size when write completes */ atomic_t i_iocount; /* outstanding I/O count */ + + /* VFS inode */ + struct inode i_vnode; /* embedded VFS inode */ + /* Trace buffers per inode. */ #ifdef XFS_INODE_TRACE struct ktrace *i_trace; /* general inode trace */ @@ -298,13 +301,13 @@ typedef struct xfs_inode { /* Convert from vfs inode to xfs inode */ static inline struct xfs_inode *XFS_I(struct inode *inode) { - return (struct xfs_inode *)inode->i_private; + return container_of(inode, struct xfs_inode, i_vnode); } /* convert from xfs inode to vfs inode */ static inline struct inode *VFS_I(struct xfs_inode *ip) { - return (struct inode *)ip->i_vnode; + return &ip->i_vnode; } /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index a671457..7fb577c 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2833,6 +2833,7 @@ xfs_reclaim( if (!ip->i_update_core && (ip->i_itemp == NULL)) { xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_iflock(ip); + xfs_iflags_set(ip, XFS_IRECLAIMABLE); return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); } else { xfs_mount_t *mp = ip->i_mount; @@ -2841,8 +2842,6 @@ xfs_reclaim( XFS_MOUNT_ILOCK(mp); spin_lock(&ip->i_flags_lock); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - VFS_I(ip)->i_private = NULL; - ip->i_vnode = NULL; spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); XFS_MOUNT_IUNLOCK(mp); @@ -2857,10 +2856,6 @@ xfs_finish_reclaim( int sync_mode) { xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); - struct inode *vp = VFS_I(ip); - - if (vp && VN_BAD(vp)) - goto reclaim; /* The hash lock here protects a thread in xfs_iget_core from * racing with us on linking the inode back with a vnode. @@ -2870,7 +2865,7 @@ xfs_finish_reclaim( write_lock(&pag->pag_ici_lock); spin_lock(&ip->i_flags_lock); if (__xfs_iflags_test(ip, XFS_IRECLAIM) || - (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) { + !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { spin_unlock(&ip->i_flags_lock); write_unlock(&pag->pag_ici_lock); if (locked) { @@ -2904,15 +2899,13 @@ xfs_finish_reclaim( * In the case of a forced shutdown we rely on xfs_iflush() to * wait for the inode to be unpinned before returning an error. */ - if (xfs_iflush(ip, sync_mode) == 0) { + if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { /* synchronize with xfs_iflush_done */ xfs_iflock(ip); xfs_ifunlock(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); - - reclaim: xfs_ireclaim(ip); return 0; } -- cgit v0.10.2 From 99fa8cb3c580d4445fe8fc239454e8f37a3b6847 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:36:40 +1100 Subject: [XFS] Prevent use-after-free caused by synchronous inode reclaim With the combined linux and XFS inode, we need to ensure that the combined structure is not freed before the generic code is finished with the inode. As it turns out, there is a case where the XFS inode is freed before the linux inode - when xfs_reclaim() is called from ->clear_inode() on a clean inode, the xfs inode is freed during that call. The generic code references the inode after the ->clear_inode() call, so this is a use after free situation. Fix the problem by moving the xfs_reclaim() call to ->destroy_inode() instead of in ->clear_inode(). This ensures the combined inode structure is not freed until after the generic code has finished with it. SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32324a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c6ef684..bb535a7 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -875,13 +875,18 @@ xfs_fs_alloc_inode( } /* - * we need to provide an empty inode free function to prevent - * the generic code from trying to free our combined inode. + * Now that the generic code is guaranteed not to be accessing + * the linux inode, we can reclaim the inode. */ STATIC void xfs_fs_destroy_inode( struct inode *inode) { + xfs_inode_t *ip = XFS_I(inode); + + XFS_STATS_INC(vn_reclaim); + if (xfs_reclaim(ip)) + panic("%s: cannot reclaim 0x%p\n", __func__, inode); } /* @@ -958,22 +963,13 @@ xfs_fs_clear_inode( { xfs_inode_t *ip = XFS_I(inode); - /* - * ip can be null when xfs_iget_core calls xfs_idestroy if we - * find an inode with di_mode == 0 but without IGET_CREATE set. - */ - if (ip) { - xfs_itrace_entry(ip); - XFS_STATS_INC(vn_rele); - XFS_STATS_INC(vn_remove); - XFS_STATS_INC(vn_reclaim); - XFS_STATS_DEC(vn_active); - - xfs_inactive(ip); - xfs_iflags_clear(ip, XFS_IMODIFIED); - if (xfs_reclaim(ip)) - panic("%s: cannot reclaim 0x%p\n", __func__, inode); - } + xfs_itrace_entry(ip); + XFS_STATS_INC(vn_rele); + XFS_STATS_INC(vn_remove); + XFS_STATS_DEC(vn_active); + + xfs_inactive(ip); + xfs_iflags_clear(ip, XFS_IMODIFIED); } STATIC void -- cgit v0.10.2 From 493dca6178cf4a6ae2a16c602d6cb455bb7d31bd Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 30 Oct 2008 17:36:52 +1100 Subject: [XFS] Fix build warning - xfs_fs_alloc_inode() needs a return statement SGI-PV: 988141 SGI-Modid: xfs-linux-melb:xfs-kern:32325a Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index bb535a7..1d67d7f 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -872,6 +872,7 @@ xfs_fs_alloc_inode( struct super_block *sb) { BUG(); + return NULL; } /* -- cgit v0.10.2 From fce08f2f3bd0d08feeb4cea70e44aa3471d9bb4c Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:03 +1100 Subject: [XFS] move inode reclaim functions to xfs_sync.c Background inode reclaim is run by the xfssyncd. Move the reclaim worker functions to be close to the sync code as the are very similar in structure and are both run from the same background thread. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32329a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b2b7082..79038ea 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -583,3 +583,94 @@ xfs_syncd_stop( kthread_stop(mp->m_sync_task); } +int +xfs_finish_reclaim( + xfs_inode_t *ip, + int locked, + int sync_mode) +{ + xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); + + /* The hash lock here protects a thread in xfs_iget_core from + * racing with us on linking the inode back with a vnode. + * Once we have the XFS_IRECLAIM flag set it will not touch + * us. + */ + write_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + if (__xfs_iflags_test(ip, XFS_IRECLAIM) || + !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + if (locked) { + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } + return 1; + } + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + xfs_put_perag(ip->i_mount, pag); + + /* + * If the inode is still dirty, then flush it out. If the inode + * is not in the AIL, then it will be OK to flush it delwri as + * long as xfs_iflush() does not keep any references to the inode. + * We leave that decision up to xfs_iflush() since it has the + * knowledge of whether it's OK to simply do a delwri flush of + * the inode or whether we need to wait until the inode is + * pulled from the AIL. + * We get the flush lock regardless, though, just to make sure + * we don't free it while it is being flushed. + */ + if (!locked) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_iflock(ip); + } + + /* + * In the case of a forced shutdown we rely on xfs_iflush() to + * wait for the inode to be unpinned before returning an error. + */ + if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { + /* synchronize with xfs_iflush_done */ + xfs_iflock(ip); + xfs_ifunlock(ip); + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_ireclaim(ip); + return 0; +} + +int +xfs_finish_reclaim_all( + xfs_mount_t *mp, + int noblock, + int mode) +{ + xfs_inode_t *ip, *n; + +restart: + XFS_MOUNT_ILOCK(mp); + list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { + if (noblock) { + if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) + continue; + if (xfs_ipincount(ip) || + !xfs_iflock_nowait(ip)) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + continue; + } + } + XFS_MOUNT_IUNLOCK(mp); + if (xfs_finish_reclaim(ip, noblock, mode)) + delay(1); + goto restart; + } + XFS_MOUNT_IUNLOCK(mp); + return 0; +} + + diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 3b49aa3..23117a1 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -45,4 +45,7 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); +int xfs_finish_reclaim(struct xfs_inode *ip, int locked, int sync_mode); +int xfs_finish_reclaim_all(struct xfs_mount *mp, int noblock, int mode); + #endif diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 345b43a..64e50ff 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -496,8 +496,6 @@ int xfs_isilocked(xfs_inode_t *, uint); uint xfs_ilock_map_shared(xfs_inode_t *); void xfs_iunlock_map_shared(xfs_inode_t *, uint); void xfs_ireclaim(xfs_inode_t *); -int xfs_finish_reclaim(xfs_inode_t *, int, int); -int xfs_finish_reclaim_all(struct xfs_mount *, int, int); /* * xfs_inode.c prototypes. diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 7fb577c..cdcc835 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2849,96 +2849,6 @@ xfs_reclaim( return 0; } -int -xfs_finish_reclaim( - xfs_inode_t *ip, - int locked, - int sync_mode) -{ - xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); - - /* The hash lock here protects a thread in xfs_iget_core from - * racing with us on linking the inode back with a vnode. - * Once we have the XFS_IRECLAIM flag set it will not touch - * us. - */ - write_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - if (__xfs_iflags_test(ip, XFS_IRECLAIM) || - !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - if (locked) { - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } - return 1; - } - __xfs_iflags_set(ip, XFS_IRECLAIM); - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - xfs_put_perag(ip->i_mount, pag); - - /* - * If the inode is still dirty, then flush it out. If the inode - * is not in the AIL, then it will be OK to flush it delwri as - * long as xfs_iflush() does not keep any references to the inode. - * We leave that decision up to xfs_iflush() since it has the - * knowledge of whether it's OK to simply do a delwri flush of - * the inode or whether we need to wait until the inode is - * pulled from the AIL. - * We get the flush lock regardless, though, just to make sure - * we don't free it while it is being flushed. - */ - if (!locked) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); - } - - /* - * In the case of a forced shutdown we rely on xfs_iflush() to - * wait for the inode to be unpinned before returning an error. - */ - if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { - /* synchronize with xfs_iflush_done */ - xfs_iflock(ip); - xfs_ifunlock(ip); - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - xfs_ireclaim(ip); - return 0; -} - -int -xfs_finish_reclaim_all( - xfs_mount_t *mp, - int noblock, - int mode) -{ - xfs_inode_t *ip, *n; - -restart: - XFS_MOUNT_ILOCK(mp); - list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { - if (noblock) { - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) - continue; - if (xfs_ipincount(ip) || - !xfs_iflock_nowait(ip)) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - continue; - } - } - XFS_MOUNT_IUNLOCK(mp); - if (xfs_finish_reclaim(ip, noblock, mode)) - delay(1); - goto restart; - } - XFS_MOUNT_IUNLOCK(mp); - return 0; -} - /* * xfs_alloc_file_space() * This routine allocates disk space for the given file. -- cgit v0.10.2 From 1dc3318ae1c1cc11f9fb8279a806de448e2b90e8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:15 +1100 Subject: [XFS] rename inode reclaim functions The function names xfs_finish_reclaim and xfs_finish_reclaim_all are not very descriptive of what they are reclaiming. Rename to xfs_reclaim_inode[s] to match the xfs_sync_inodes() function. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32330a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 79038ea..34413ce 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -364,7 +364,7 @@ xfs_quiesce_fs( int count = 0, pincount; xfs_flush_buftarg(mp->m_ddev_targp, 0); - xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* * This loop must run at least twice. The first instance of the loop @@ -505,7 +505,7 @@ xfs_sync_worker( if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - xfs_finish_reclaim_all(mp, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); /* dgc: errors ignored here */ error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); error = xfs_sync_fsdata(mp, SYNC_BDFLUSH); @@ -584,7 +584,7 @@ xfs_syncd_stop( } int -xfs_finish_reclaim( +xfs_reclaim_inode( xfs_inode_t *ip, int locked, int sync_mode) @@ -645,7 +645,7 @@ xfs_finish_reclaim( } int -xfs_finish_reclaim_all( +xfs_reclaim_inodes( xfs_mount_t *mp, int noblock, int mode) @@ -665,7 +665,7 @@ restart: } } XFS_MOUNT_IUNLOCK(mp); - if (xfs_finish_reclaim(ip, noblock, mode)) + if (xfs_reclaim_inode(ip, noblock, mode)) delay(1); goto restart; } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 23117a1..c1bcd50 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -45,7 +45,7 @@ void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_device(struct xfs_inode *ip); -int xfs_finish_reclaim(struct xfs_inode *ip, int locked, int sync_mode); -int xfs_finish_reclaim_all(struct xfs_mount *mp, int noblock, int mode); +int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); +int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); #endif diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 43e5917..3704bae 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1235,7 +1235,7 @@ xfs_unmountfs( * need to force the log first. */ xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); - xfs_finish_reclaim_all(mp, 0, XFS_IFLUSH_ASYNC); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC); XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index cdcc835..0794563 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2834,7 +2834,7 @@ xfs_reclaim( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_iflock(ip); xfs_iflags_set(ip, XFS_IRECLAIMABLE); - return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); + return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); } else { xfs_mount_t *mp = ip->i_mount; -- cgit v0.10.2 From 396beb85311689e38634926058d9a3bb0576ca8a Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:26 +1100 Subject: [XFS] mark inodes for reclaim via a tag in the inode radix tree Prepare for removing the deleted inode list by marking inodes for reclaim in the inode radix trees so that we can use the radix trees to find reclaimable inodes. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32331a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 34413ce..9e7f4dc 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -644,6 +644,47 @@ xfs_reclaim_inode( return 0; } +void +xfs_inode_set_reclaim_tag( + xfs_inode_t *ip) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + + read_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + radix_tree_tag_set(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); +} + +void +__xfs_inode_clear_reclaim_tag( + xfs_mount_t *mp, + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + radix_tree_tag_clear(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); +} + +void +xfs_inode_clear_reclaim_tag( + xfs_inode_t *ip) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + + read_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + __xfs_inode_clear_reclaim_tag(mp, pag, ip); + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); +} + int xfs_reclaim_inodes( xfs_mount_t *mp, diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index c1bcd50..5f6de1e 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -48,4 +48,8 @@ void xfs_flush_device(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); +void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); +void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); +void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, + struct xfs_inode *ip); #endif diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 729ee3e..2bfd863 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -204,6 +204,11 @@ typedef struct xfs_perag #endif } xfs_perag_t; +/* + * tags for inode radix tree + */ +#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */ + #define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels) #define XFS_MIN_FREELIST_RAW(bl,cl,mp) \ (MIN(bl + 1, XFS_AG_MAXLEVELS(mp)) + MIN(cl + 1, XFS_AG_MAXLEVELS(mp))) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index c4414e8..a0387f1 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -91,6 +91,9 @@ xfs_iget_cache_hit( } xfs_iflags_set(ip, XFS_INEW); xfs_iflags_clear(ip, XFS_IRECLAIMABLE); + + /* clear the radix tree reclaim flag as well. */ + __xfs_inode_clear_reclaim_tag(mp, pag, ip); read_unlock(&pag->pag_ici_lock); XFS_MOUNT_ILOCK(mp); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0794563..f89a73e 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2845,6 +2845,7 @@ xfs_reclaim( spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); XFS_MOUNT_IUNLOCK(mp); + xfs_inode_set_reclaim_tag(ip); } return 0; } -- cgit v0.10.2 From 7a3be02baef7bdec43965103441bde5de4dd8601 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:37 +1100 Subject: [XFS] use the inode radix tree for reclaiming inodes Use the reclaim tag to walk the radix tree and find the inodes under reclaim. This was the only user of the deleted inode list. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32333a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 9e7f4dc..bbb40e2 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -685,32 +685,93 @@ xfs_inode_clear_reclaim_tag( xfs_put_perag(mp, pag); } -int -xfs_reclaim_inodes( + +STATIC void +xfs_reclaim_inodes_ag( xfs_mount_t *mp, - int noblock, + int ag, + int noblock, int mode) { - xfs_inode_t *ip, *n; + xfs_inode_t *ip = NULL; + xfs_perag_t *pag = &mp->m_perag[ag]; + int nr_found; + int first_index; + int skipped; restart: - XFS_MOUNT_ILOCK(mp); - list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) { + first_index = 0; + skipped = 0; + do { + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, + (void**)&ip, first_index, 1, + XFS_ICI_RECLAIM_TAG); + + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; + } + + /* update the index for the next lookup */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + + ASSERT(xfs_iflags_test(ip, (XFS_IRECLAIMABLE|XFS_IRECLAIM))); + + /* ignore if already under reclaim */ + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { + read_unlock(&pag->pag_ici_lock); + continue; + } + if (noblock) { - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { + read_unlock(&pag->pag_ici_lock); continue; + } if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { xfs_iunlock(ip, XFS_ILOCK_EXCL); + read_unlock(&pag->pag_ici_lock); continue; } } - XFS_MOUNT_IUNLOCK(mp); + read_unlock(&pag->pag_ici_lock); + + /* + * hmmm - this is an inode already in reclaim. Do + * we even bother catching it here? + */ if (xfs_reclaim_inode(ip, noblock, mode)) - delay(1); + skipped++; + } while (nr_found); + + if (skipped) { + delay(1); goto restart; } - XFS_MOUNT_IUNLOCK(mp); + return; + +} + +int +xfs_reclaim_inodes( + xfs_mount_t *mp, + int noblock, + int mode) +{ + int i; + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + xfs_reclaim_inodes_ag(mp, i, noblock, mode); + } return 0; } -- cgit v0.10.2 From 116545130cbc5214523c2f994a11c81ef9eb9186 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:37:49 +1100 Subject: [XFS] kill deleted inodes list Now that the deleted inodes list is unused, kill it. This also removes the i_reclaim list head from the xfs_inode, shrinking it by two pointers. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32334a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1d67d7f..206a949 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -913,7 +913,6 @@ xfs_fs_inode_init_once( atomic_set(&ip->i_iocount, 0); atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); - INIT_LIST_HEAD(&ip->i_reclaim); init_waitqueue_head(&ip->i_ipin_wait); /* * Because we want to use a counting completion, complete @@ -1546,7 +1545,6 @@ xfs_fs_fill_super( goto out_free_args; spin_lock_init(&mp->m_sb_lock); - mutex_init(&mp->m_ilock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); INIT_LIST_HEAD(&mp->m_sync_list); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index bbb40e2..22006b5 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -644,6 +644,11 @@ xfs_reclaim_inode( return 0; } +/* + * We set the inode flag atomically with the radix tree tag. + * Once we get tag lookups on the radix tree, this inode flag + * can go away. + */ void xfs_inode_set_reclaim_tag( xfs_inode_t *ip) @@ -655,6 +660,7 @@ xfs_inode_set_reclaim_tag( spin_lock(&ip->i_flags_lock); radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); spin_unlock(&ip->i_flags_lock); read_unlock(&pag->pag_ici_lock); xfs_put_perag(mp, pag); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index a0387f1..8001338 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -95,10 +95,6 @@ xfs_iget_cache_hit( /* clear the radix tree reclaim flag as well. */ __xfs_inode_clear_reclaim_tag(mp, pag, ip); read_unlock(&pag->pag_ici_lock); - - XFS_MOUNT_ILOCK(mp); - list_del_init(&ip->i_reclaim); - XFS_MOUNT_IUNLOCK(mp); } else if (!igrab(VFS_I(ip))) { /* If the VFS inode is being torn down, pause and try again. */ error = EAGAIN; @@ -419,11 +415,7 @@ xfs_iextract( write_unlock(&pag->pag_ici_lock); xfs_put_perag(mp, pag); - /* Deal with the deleted inodes list */ - XFS_MOUNT_ILOCK(mp); - list_del_init(&ip->i_reclaim); mp->m_ireclaims++; - XFS_MOUNT_IUNLOCK(mp); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 99d9118..4eb629f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -811,7 +811,7 @@ xfs_inode_alloc( ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(list_empty(&ip->i_reclaim)); + ASSERT(completion_done(&ip->i_flush)); /* * initialise the VFS inode here to get failures @@ -2729,7 +2729,7 @@ xfs_idestroy( ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(list_empty(&ip->i_reclaim)); + ASSERT(completion_done(&ip->i_flush)); kmem_zone_free(xfs_inode_zone, ip); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 64e50ff..a5aeb9c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -235,7 +235,6 @@ typedef struct dm_attrs_s { typedef struct xfs_inode { /* Inode linking and identification information. */ struct xfs_mount *i_mount; /* fs mount struct ptr */ - struct list_head i_reclaim; /* reclaim list */ struct xfs_dquot *i_udquot; /* user dquot */ struct xfs_dquot *i_gdquot; /* group dquot */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 3704bae..177976d 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -580,7 +580,6 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; - INIT_LIST_HEAD(&mp->m_del_inodes); /* * Setup for attributes, in case they get created. diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4e62802..67cf0b2 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -248,8 +248,6 @@ typedef struct xfs_mount { xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ spinlock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ - struct list_head m_del_inodes; /* inodes to reclaim */ - mutex_t m_ilock; /* inode list mutex */ uint m_ireclaims; /* count of calls to reclaim*/ uint m_readio_log; /* min read size log bytes */ uint m_readio_blocks; /* min read size blocks */ @@ -312,8 +310,7 @@ typedef struct xfs_mount { int m_attr_magicpct;/* 37% of the blocksize */ int m_dir_magicpct; /* 37% of the dir blocksize */ __uint8_t m_mk_sharedro; /* mark shared ro on unmount */ - __uint8_t m_inode_quiesce;/* call quiesce on new inodes. - field governed by m_ilock */ + __uint8_t m_inode_quiesce;/* call quiesce on new inodes. */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ int m_dirblksize; /* directory block sz--bytes */ diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index f89a73e..1d15a32 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2835,18 +2835,8 @@ xfs_reclaim( xfs_iflock(ip); xfs_iflags_set(ip, XFS_IRECLAIMABLE); return xfs_reclaim_inode(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC); - } else { - xfs_mount_t *mp = ip->i_mount; - - /* Protect sync and unpin from us */ - XFS_MOUNT_ILOCK(mp); - spin_lock(&ip->i_flags_lock); - __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - spin_unlock(&ip->i_flags_lock); - list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); - XFS_MOUNT_IUNLOCK(mp); - xfs_inode_set_reclaim_tag(ip); } + xfs_inode_set_reclaim_tag(ip); return 0; } -- cgit v0.10.2 From 8c38ab032094ff1d903c79db689607b1ebae13ca Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:38:00 +1100 Subject: [XFS] Prevent looping in xfs_sync_inodes_ag If the last block of the AG has inodes in it and the AG is an exactly power-of-2 size then the last inode in the AG points to the last block in the AG. If we try to find the next inode in the AG by adding one to the inode number, we increment the inode number past the size of the AG. The result is that the macro XFS_INO_TO_AGINO() will strip the AG portion of the inode number and return an inode number of zero. That is, instead of terminating the lookup loop because we hit the inode number went outside the valid range for the AG, the search index returns to zero and we start traversing the radix tree from the start again. This results in an endless loop in xfs_sync_inodes_ag(). Fix it be detecting if the new search index decreases as a result of incrementing the current inode number. That indicate an overflow and hence that we have finished processing the AG so we can terminate the loop. SGI-PV: 988142 SGI-Modid: xfs-linux-melb:xfs-kern:32335a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 22006b5..ee1648b 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -59,7 +59,7 @@ xfs_sync_inodes_ag( { xfs_perag_t *pag = &mp->m_perag[ag]; int nr_found; - int first_index = 0; + uint32_t first_index = 0; int error = 0; int last_error = 0; int fflag = XFS_B_ASYNC; @@ -97,8 +97,17 @@ xfs_sync_inodes_ag( break; } - /* update the index for the next lookup */ + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } /* * skip inodes in reclaim. Let xfs_syncsub do that for @@ -702,7 +711,7 @@ xfs_reclaim_inodes_ag( xfs_inode_t *ip = NULL; xfs_perag_t *pag = &mp->m_perag[ag]; int nr_found; - int first_index; + uint32_t first_index; int skipped; restart: @@ -724,8 +733,17 @@ restart: break; } - /* update the index for the next lookup */ + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } ASSERT(xfs_iflags_test(ip, (XFS_IRECLAIMABLE|XFS_IRECLAIM))); -- cgit v0.10.2 From a7444053fb3ebd3d905e3c7a7bd5ea80a54b083a Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:38:12 +1100 Subject: [XFS] Account for allocated blocks when expanding directories When we create a directory, we reserve a number of blocks for the maximum possible expansion of of the directory due to various btree splits, freespace allocation, etc. Unfortunately, each allocation is not reflected in the total number of blocks still available to the transaction, so the maximal reservation is used over and over again. This leads to problems where an allocation group has only enough blocks for *some* of the allocations required for the directory modification. After the first N allocations, the remaining blocks in the allocation group drops below the total reservation, and subsequent allocations fail because the allocator will not allow the allocation to proceed if the AG does not have the enough blocks available for the entire allocation total. This results in an ENOSPC occurring after an allocation has already occurred. This results in aborting the directory operation (leaving the directory in an inconsistent state) and cancelling a dirty transaction, which results in a filesystem shutdown. Avoid the problem by reflecting the number of blocks allocated in any directory expansion in the total number of blocks available to the modification in progress. This prevents a directory modification from being aborted part way through with an ENOSPC. SGI-PV: 988144 SGI-Modid: xfs-linux-melb:xfs-kern:32340a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 9e561a9..a11a839 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1566,11 +1566,14 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) int nmap, error, w, count, c, got, i, mapi; xfs_trans_t *tp; xfs_mount_t *mp; + xfs_drfsbno_t nblks; dp = args->dp; mp = dp->i_mount; w = args->whichfork; tp = args->trans; + nblks = dp->i_d.di_nblocks; + /* * For new directories adjust the file offset and block count. */ @@ -1647,6 +1650,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) } if (mapp != &map) kmem_free(mapp); + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *new_blkno = (xfs_dablk_t)bno; return 0; } diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 80e0dc5..1afb122 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -525,11 +525,13 @@ xfs_dir2_grow_inode( xfs_mount_t *mp; int nmap; /* number of bmap entries */ xfs_trans_t *tp; + xfs_drfsbno_t nblks; xfs_dir2_trace_args_s("grow_inode", args, space); dp = args->dp; tp = args->trans; mp = dp->i_mount; + nblks = dp->i_d.di_nblocks; /* * Set lowest possible block in the space requested. */ @@ -622,7 +624,11 @@ xfs_dir2_grow_inode( */ if (mapp != &map) kmem_free(mapp); + + /* account for newly allocated blocks in reserved blocks total */ + args->total -= dp->i_d.di_nblocks - nblks; *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); + /* * Update file's size if this is the data space and it grew. */ -- cgit v0.10.2 From 82fa9012458d867936d7bf130e6e14bdebc6873c Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:38:26 +1100 Subject: [XFS] Allocate the struct xfs_ail Rather than embedding the struct xfs_ail in the struct xfs_mount, allocate it during AIL initialisation. Add a back pointer to the struct xfs_ail so that we can pass around the xfs_ail and still be able to access the xfs_mount if need be. This is th first step involved in isolating the AIL implementation from the surrounding filesystem code. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32346a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 206a949..655508c 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -814,18 +814,18 @@ xfs_setup_devices( */ void xfsaild_wakeup( - xfs_mount_t *mp, + struct xfs_ail *ailp, xfs_lsn_t threshold_lsn) { - mp->m_ail.xa_target = threshold_lsn; - wake_up_process(mp->m_ail.xa_task); + ailp->xa_target = threshold_lsn; + wake_up_process(ailp->xa_task); } int xfsaild( void *data) { - xfs_mount_t *mp = (xfs_mount_t *)data; + struct xfs_ail *ailp = data; xfs_lsn_t last_pushed_lsn = 0; long tout = 0; @@ -837,11 +837,11 @@ xfsaild( /* swsusp */ try_to_freeze(); - ASSERT(mp->m_log); - if (XFS_FORCED_SHUTDOWN(mp)) + ASSERT(ailp->xa_mount->m_log); + if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) continue; - tout = xfsaild_push(mp, &last_pushed_lsn); + tout = xfsaild_push(ailp, &last_pushed_lsn); } return 0; @@ -849,20 +849,20 @@ xfsaild( int xfsaild_start( - xfs_mount_t *mp) + struct xfs_ail *ailp) { - mp->m_ail.xa_target = 0; - mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild"); - if (IS_ERR(mp->m_ail.xa_task)) - return -PTR_ERR(mp->m_ail.xa_task); + ailp->xa_target = 0; + ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild"); + if (IS_ERR(ailp->xa_task)) + return -PTR_ERR(ailp->xa_task); return 0; } void xfsaild_stop( - xfs_mount_t *mp) + struct xfs_ail *ailp) { - kthread_stop(mp->m_ail.xa_task); + kthread_stop(ailp->xa_task); } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 67cf0b2..287f900 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -63,6 +63,7 @@ struct xfs_extdelta; struct xfs_swapext; struct xfs_mru_cache; struct xfs_nameops; +struct xfs_ail; /* * Prototypes and functions for the Data Migration subsystem. @@ -224,18 +225,11 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) #endif -typedef struct xfs_ail { - struct list_head xa_ail; - uint xa_gen; - struct task_struct *xa_task; - xfs_lsn_t xa_target; -} xfs_ail_t; - typedef struct xfs_mount { struct super_block *m_super; xfs_tid_t m_tid; /* next unused tid for fs */ spinlock_t m_ail_lock; /* fs AIL mutex */ - xfs_ail_t m_ail; /* fs active log item list */ + struct xfs_ail *m_ail; /* fs active log item list */ xfs_sb_t m_sb; /* copy of fs superblock */ spinlock_t m_sb_lock; /* sb counter lock */ struct xfs_buf *m_sb_bp; /* buffer for superblock */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 1f77c00..db72b52 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -28,13 +28,13 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" -STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *); -STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *); -STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *); -STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *); +STATIC void xfs_ail_insert(struct xfs_ail *, xfs_log_item_t *); +STATIC xfs_log_item_t * xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *); +STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *); +STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *); #ifdef DEBUG -STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *); +STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); #else #define xfs_ail_check(a,l) #endif /* DEBUG */ @@ -57,7 +57,7 @@ xfs_trans_tail_ail( xfs_log_item_t *lip; spin_lock(&mp->m_ail_lock); - lip = xfs_ail_min(&mp->m_ail); + lip = xfs_ail_min(mp->m_ail); if (lip == NULL) { lsn = (xfs_lsn_t)0; } else { @@ -91,10 +91,10 @@ xfs_trans_push_ail( { xfs_log_item_t *lip; - lip = xfs_ail_min(&mp->m_ail); + lip = xfs_ail_min(mp->m_ail); if (lip && !XFS_FORCED_SHUTDOWN(mp)) { - if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) - xfsaild_wakeup(mp, threshold_lsn); + if (XFS_LSN_CMP(threshold_lsn, mp->m_ail->xa_target) > 0) + xfsaild_wakeup(mp->m_ail, threshold_lsn); } } @@ -111,12 +111,12 @@ xfs_trans_first_push_ail( { xfs_log_item_t *lip; - lip = xfs_ail_min(&mp->m_ail); - *gen = (int)mp->m_ail.xa_gen; + lip = xfs_ail_min(mp->m_ail); + *gen = (int)mp->m_ail->xa_gen; if (lsn == 0) return lip; - list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) { + list_for_each_entry(lip, &mp->m_ail->xa_ail, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) return lip; } @@ -129,17 +129,18 @@ xfs_trans_first_push_ail( */ long xfsaild_push( - xfs_mount_t *mp, + struct xfs_ail *ailp, xfs_lsn_t *last_lsn) { long tout = 1000; /* milliseconds */ xfs_lsn_t last_pushed_lsn = *last_lsn; - xfs_lsn_t target = mp->m_ail.xa_target; + xfs_lsn_t target = ailp->xa_target; xfs_lsn_t lsn; xfs_log_item_t *lip; int gen; int restarts; int flush_log, count, stuck; + xfs_mount_t *mp = ailp->xa_mount; #define XFS_TRANS_PUSH_AIL_RESTARTS 10 @@ -331,7 +332,7 @@ xfs_trans_unlocked_item( * the call to xfs_log_move_tail() doesn't do anything if there's * not enough free space to wake people up so we're safe calling it. */ - min_lip = xfs_ail_min(&mp->m_ail); + min_lip = xfs_ail_min(mp->m_ail); if (min_lip == lip) xfs_log_move_tail(mp, 1); @@ -362,10 +363,10 @@ xfs_trans_update_ail( xfs_log_item_t *dlip=NULL; xfs_log_item_t *mlip; /* ptr to minimum lip */ - mlip = xfs_ail_min(&mp->m_ail); + mlip = xfs_ail_min(mp->m_ail); if (lip->li_flags & XFS_LI_IN_AIL) { - dlip = xfs_ail_delete(&mp->m_ail, lip); + dlip = xfs_ail_delete(mp->m_ail, lip); ASSERT(dlip == lip); } else { lip->li_flags |= XFS_LI_IN_AIL; @@ -373,11 +374,11 @@ xfs_trans_update_ail( lip->li_lsn = lsn; - xfs_ail_insert(&mp->m_ail, lip); - mp->m_ail.xa_gen++; + xfs_ail_insert(mp->m_ail, lip); + mp->m_ail->xa_gen++; if (mlip == dlip) { - mlip = xfs_ail_min(&mp->m_ail); + mlip = xfs_ail_min(mp->m_ail); spin_unlock(&mp->m_ail_lock); xfs_log_move_tail(mp, mlip->li_lsn); } else { @@ -411,17 +412,17 @@ xfs_trans_delete_ail( xfs_log_item_t *mlip; if (lip->li_flags & XFS_LI_IN_AIL) { - mlip = xfs_ail_min(&mp->m_ail); - dlip = xfs_ail_delete(&mp->m_ail, lip); + mlip = xfs_ail_min(mp->m_ail); + dlip = xfs_ail_delete(mp->m_ail, lip); ASSERT(dlip == lip); lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; - mp->m_ail.xa_gen++; + mp->m_ail->xa_gen++; if (mlip == dlip) { - mlip = xfs_ail_min(&mp->m_ail); + mlip = xfs_ail_min(mp->m_ail); spin_unlock(&mp->m_ail_lock); xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); } else { @@ -459,8 +460,8 @@ xfs_trans_first_ail( { xfs_log_item_t *lip; - lip = xfs_ail_min(&mp->m_ail); - *gen = (int)mp->m_ail.xa_gen; + lip = xfs_ail_min(mp->m_ail); + *gen = (int)mp->m_ail->xa_gen; return lip; } @@ -482,11 +483,11 @@ xfs_trans_next_ail( xfs_log_item_t *nlip; ASSERT(mp && lip && gen); - if (mp->m_ail.xa_gen == *gen) { - nlip = xfs_ail_next(&mp->m_ail, lip); + if (mp->m_ail->xa_gen == *gen) { + nlip = xfs_ail_next(mp->m_ail, lip); } else { - nlip = xfs_ail_min(&mp->m_ail); - *gen = (int)mp->m_ail.xa_gen; + nlip = xfs_ail_min(mp->m_ail); + *gen = (int)mp->m_ail->xa_gen; if (restarts != NULL) { XFS_STATS_INC(xs_push_ail_restarts); (*restarts)++; @@ -515,15 +516,25 @@ int xfs_trans_ail_init( xfs_mount_t *mp) { - INIT_LIST_HEAD(&mp->m_ail.xa_ail); - return xfsaild_start(mp); + struct xfs_ail *ailp; + + ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); + if (!ailp) + return ENOMEM; + + ailp->xa_mount = mp; + INIT_LIST_HEAD(&ailp->xa_ail); + return xfsaild_start(ailp); } void xfs_trans_ail_destroy( xfs_mount_t *mp) { - xfsaild_stop(mp); + struct xfs_ail *ailp = mp->m_ail; + + xfsaild_stop(ailp); + kmem_free(ailp); } /* @@ -534,7 +545,7 @@ xfs_trans_ail_destroy( */ STATIC void xfs_ail_insert( - xfs_ail_t *ailp, + struct xfs_ail *ailp, xfs_log_item_t *lip) /* ARGSUSED */ { @@ -568,7 +579,7 @@ xfs_ail_insert( /*ARGSUSED*/ STATIC xfs_log_item_t * xfs_ail_delete( - xfs_ail_t *ailp, + struct xfs_ail *ailp, xfs_log_item_t *lip) /* ARGSUSED */ { @@ -585,7 +596,7 @@ xfs_ail_delete( */ STATIC xfs_log_item_t * xfs_ail_min( - xfs_ail_t *ailp) + struct xfs_ail *ailp) /* ARGSUSED */ { if (list_empty(&ailp->xa_ail)) @@ -601,7 +612,7 @@ xfs_ail_min( */ STATIC xfs_log_item_t * xfs_ail_next( - xfs_ail_t *ailp, + struct xfs_ail *ailp, xfs_log_item_t *lip) /* ARGSUSED */ { @@ -617,7 +628,7 @@ xfs_ail_next( */ STATIC void xfs_ail_check( - xfs_ail_t *ailp, + struct xfs_ail *ailp, xfs_log_item_t *lip) { xfs_log_item_t *prev_lip; diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 3c748c4..98317fd 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -56,13 +56,20 @@ struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *); struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *, struct xfs_log_item *, int *, int *); - /* * AIL push thread support */ -long xfsaild_push(struct xfs_mount *, xfs_lsn_t *); -void xfsaild_wakeup(struct xfs_mount *, xfs_lsn_t); -int xfsaild_start(struct xfs_mount *); -void xfsaild_stop(struct xfs_mount *); +struct xfs_ail { + struct xfs_mount *xa_mount; + struct list_head xa_ail; + uint xa_gen; + struct task_struct *xa_task; + xfs_lsn_t xa_target; +}; + +long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); +void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); +int xfsaild_start(struct xfs_ail *); +void xfsaild_stop(struct xfs_ail *); #endif /* __XFS_TRANS_PRIV_H__ */ -- cgit v0.10.2 From 27d8d5fe0ef9daeaafbdd32b14b32a2211930062 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:38:39 +1100 Subject: [XFS] Use a cursor for AIL traversal. To replace the current generation number ensuring sanity of the AIL traversal, replace it with an external cursor that is linked to the AIL. Basically, we store the next item in the cursor whenever we want to drop the AIL lock to do something to the current item. When we regain the lock. the current item may already be free, so we can't reference it, but the next item in the traversal is already held in the cursor. When we move or delete an object, we search all the active cursors and if there is an item match we clear the cursor(s) that point to the object. This forces the traversal to restart transparently. We don't invalidate the cursor on insert because the cursor still points to a valid item. If the intem is inserted between the current item and the cursor it does not matter; the traversal is considered to be past the insertion point so it will be picked up in the next traversal. Hence traversal restarts pretty much disappear altogether with this method of traversal, which should substantially reduce the overhead of pushing on a busy AIL. Version 2 o add restart logic o comment cursor interface o minor cleanups SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32347a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0b02c64..4184085 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -900,7 +900,7 @@ xfs_log_move_tail(xfs_mount_t *mp, int xfs_log_need_covered(xfs_mount_t *mp) { - int needed = 0, gen; + int needed = 0; xlog_t *log = mp->m_log; if (!xfs_fs_writable(mp)) @@ -909,7 +909,7 @@ xfs_log_need_covered(xfs_mount_t *mp) spin_lock(&log->l_icloglock); if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || (log->l_covered_state == XLOG_STATE_COVER_NEED2)) - && !xfs_trans_first_ail(mp, &gen) + && !xfs_trans_first_ail(mp, NULL) && xlog_iclogs_empty(log)) { if (log->l_covered_state == XLOG_STATE_COVER_NEED) log->l_covered_state = XLOG_STATE_COVER_DONE; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 199c8ea..37ba489 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -54,10 +54,8 @@ STATIC void xlog_recover_insert_item_backq(xlog_recover_item_t **q, xlog_recover_item_t *item); #if defined(DEBUG) STATIC void xlog_recover_check_summary(xlog_t *); -STATIC void xlog_recover_check_ail(xfs_mount_t *, xfs_log_item_t *, int); #else #define xlog_recover_check_summary(log) -#define xlog_recover_check_ail(mp, lip, gen) #endif @@ -2710,8 +2708,8 @@ xlog_recover_do_efd_trans( xfs_efd_log_format_t *efd_formatp; xfs_efi_log_item_t *efip = NULL; xfs_log_item_t *lip; - int gen; __uint64_t efi_id; + struct xfs_ail_cursor cur; if (pass == XLOG_RECOVER_PASS1) { return; @@ -2730,7 +2728,8 @@ xlog_recover_do_efd_trans( */ mp = log->l_mp; spin_lock(&mp->m_ail_lock); - lip = xfs_trans_first_ail(mp, &gen); + xfs_trans_ail_cursor_init(mp->m_ail, &cur); + lip = xfs_trans_first_ail(mp, &cur); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { efip = (xfs_efi_log_item_t *)lip; @@ -2741,11 +2740,13 @@ xlog_recover_do_efd_trans( */ xfs_trans_delete_ail(mp, lip); xfs_efi_item_free(efip); - return; + spin_lock(&mp->m_ail_lock); + break; } } - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); + lip = xfs_trans_next_ail(mp, &cur); } + xfs_trans_ail_cursor_done(mp->m_ail, &cur); spin_unlock(&mp->m_ail_lock); } @@ -3030,33 +3031,6 @@ abort_error: } /* - * Verify that once we've encountered something other than an EFI - * in the AIL that there are no more EFIs in the AIL. - */ -#if defined(DEBUG) -STATIC void -xlog_recover_check_ail( - xfs_mount_t *mp, - xfs_log_item_t *lip, - int gen) -{ - int orig_gen = gen; - - do { - ASSERT(lip->li_type != XFS_LI_EFI); - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); - /* - * The check will be bogus if we restart from the - * beginning of the AIL, so ASSERT that we don't. - * We never should since we're holding the AIL lock - * the entire time. - */ - ASSERT(gen == orig_gen); - } while (lip != NULL); -} -#endif /* DEBUG */ - -/* * When this is called, all of the EFIs which did not have * corresponding EFDs should be in the AIL. What we do now * is free the extents associated with each one. @@ -3080,20 +3054,25 @@ xlog_recover_process_efis( { xfs_log_item_t *lip; xfs_efi_log_item_t *efip; - int gen; xfs_mount_t *mp; int error = 0; + struct xfs_ail_cursor cur; mp = log->l_mp; spin_lock(&mp->m_ail_lock); - lip = xfs_trans_first_ail(mp, &gen); + xfs_trans_ail_cursor_init(mp->m_ail, &cur); + lip = xfs_trans_first_ail(mp, &cur); while (lip != NULL) { /* * We're done when we see something other than an EFI. + * There should be no EFIs left in the AIL now. */ if (lip->li_type != XFS_LI_EFI) { - xlog_recover_check_ail(mp, lip, gen); +#ifdef DEBUG + for (; lip; lip = xfs_trans_next_ail(mp, &cur)) + ASSERT(lip->li_type != XFS_LI_EFI); +#endif break; } @@ -3102,17 +3081,19 @@ xlog_recover_process_efis( */ efip = (xfs_efi_log_item_t *)lip; if (efip->efi_flags & XFS_EFI_RECOVERED) { - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); + lip = xfs_trans_next_ail(mp, &cur); continue; } spin_unlock(&mp->m_ail_lock); error = xlog_recover_process_efi(mp, efip); - if (error) - return error; spin_lock(&mp->m_ail_lock); - lip = xfs_trans_next_ail(mp, lip, &gen, NULL); + if (error) + goto out; + lip = xfs_trans_next_ail(mp, &cur); } +out: + xfs_trans_ail_cursor_done(mp->m_ail, &cur); spin_unlock(&mp->m_ail_lock); return error; } diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index db72b52..7b8bfcf 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -99,26 +99,137 @@ xfs_trans_push_ail( } /* + * AIL traversal cursor initialisation. + * + * The cursor keeps track of where our current traversal is up + * to by tracking the next ƣtem in the list for us. However, for + * this to be safe, removing an object from the AIL needs to invalidate + * any cursor that points to it. hence the traversal cursor needs to + * be linked to the struct xfs_ail so that deletion can search all the + * active cursors for invalidation. + * + * We don't link the push cursor because it is embedded in the struct + * xfs_ail and hence easily findable. + */ +void +xfs_trans_ail_cursor_init( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur) +{ + cur->item = NULL; + if (cur == &ailp->xa_cursors) + return; + + cur->next = ailp->xa_cursors.next; + ailp->xa_cursors.next = cur; +} + +/* + * Set the cursor to the next item, because when we look + * up the cursor the current item may have been freed. + */ +STATIC void +xfs_trans_ail_cursor_set( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + struct xfs_log_item *lip) +{ + if (lip) + cur->item = xfs_ail_next(ailp, lip); +} + +/* + * Get the next item in the traversal and advance the cursor. + * If the cursor was invalidated (inidicated by a lip of 1), + * restart the traversal. + */ +STATIC struct xfs_log_item * +xfs_trans_ail_cursor_next( + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur) +{ + struct xfs_log_item *lip = cur->item; + + if ((__psint_t)lip & 1) + lip = xfs_ail_min(ailp); + xfs_trans_ail_cursor_set(ailp, cur, lip); + return lip; +} + +/* + * Invalidate any cursor that is pointing to this item. This is + * called when an item is removed from the AIL. Any cursor pointing + * to this object is now invalid and the traversal needs to be + * terminated so it doesn't reference a freed object. We set the + * cursor item to a value of 1 so we can distinguish between an + * invalidation and the end of the list when getting the next item + * from the cursor. + */ +STATIC void +xfs_trans_ail_cursor_clear( + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_ail_cursor *cur; + + /* need to search all cursors */ + for (cur = &ailp->xa_cursors; cur; cur = cur->next) { + if (cur->item == lip) + cur->item = (struct xfs_log_item *) + ((__psint_t)cur->item | 1); + } +} + +/* + * Now that the traversal is complete, we need to remove the cursor + * from the list of traversing cursors. Avoid removing the embedded + * push cursor, but use the fact it is alway present to make the + * list deletion simple. + */ +void +xfs_trans_ail_cursor_done( + struct xfs_ail *ailp, + struct xfs_ail_cursor *done) +{ + struct xfs_ail_cursor *prev = NULL; + struct xfs_ail_cursor *cur; + + done->item = NULL; + if (done == &ailp->xa_cursors) + return; + prev = &ailp->xa_cursors; + for (cur = prev->next; cur; prev = cur, cur = prev->next) { + if (cur == done) { + prev->next = cur->next; + break; + } + } + ASSERT(cur); +} + +/* * Return the item in the AIL with the current lsn. * Return the current tree generation number for use * in calls to xfs_trans_next_ail(). */ STATIC xfs_log_item_t * xfs_trans_first_push_ail( - xfs_mount_t *mp, - int *gen, - xfs_lsn_t lsn) + struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn) { - xfs_log_item_t *lip; + xfs_log_item_t *lip; - lip = xfs_ail_min(mp->m_ail); - *gen = (int)mp->m_ail->xa_gen; + lip = xfs_ail_min(ailp); + xfs_trans_ail_cursor_set(ailp, cur, lip); if (lsn == 0) return lip; - list_for_each_entry(lip, &mp->m_ail->xa_ail, li_ail) { - if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) + list_for_each_entry(lip, &ailp->xa_ail, li_ail) { + if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) { + xfs_trans_ail_cursor_set(ailp, cur, lip); return lip; + } } return NULL; @@ -137,22 +248,21 @@ xfsaild_push( xfs_lsn_t target = ailp->xa_target; xfs_lsn_t lsn; xfs_log_item_t *lip; - int gen; - int restarts; int flush_log, count, stuck; xfs_mount_t *mp = ailp->xa_mount; - -#define XFS_TRANS_PUSH_AIL_RESTARTS 10 + struct xfs_ail_cursor *cur = &ailp->xa_cursors; spin_lock(&mp->m_ail_lock); - lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn); + xfs_trans_ail_cursor_init(ailp, cur); + lip = xfs_trans_first_push_ail(ailp, cur, *last_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* * AIL is empty or our push has reached the end. */ + xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&mp->m_ail_lock); last_pushed_lsn = 0; - goto out; + return tout; } XFS_STATS_INC(xs_push_ail); @@ -170,7 +280,7 @@ xfsaild_push( */ tout = 10; lsn = lip->li_lsn; - flush_log = stuck = count = restarts = 0; + flush_log = stuck = count = 0; while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) { int lock_result; /* @@ -245,13 +355,12 @@ xfsaild_push( if (stuck > 100) break; - lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); + lip = xfs_trans_ail_cursor_next(ailp, cur); if (lip == NULL) break; - if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS) - break; lsn = lip->li_lsn; } + xfs_trans_ail_cursor_done(ailp, cur); spin_unlock(&mp->m_ail_lock); if (flush_log) { @@ -275,8 +384,7 @@ xfsaild_push( */ tout += 20; last_pushed_lsn = 0; - } else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) || - ((stuck * 100) / count > 90)) { + } else if ((stuck * 100) / count > 90) { /* * Either there is a lot of contention on the AIL or we * are stuck due to operations in progress. "Stuck" in this @@ -288,7 +396,6 @@ xfsaild_push( */ tout += 10; } -out: *last_lsn = last_pushed_lsn; return tout; } /* xfsaild_push */ @@ -348,9 +455,6 @@ xfs_trans_unlocked_item( * we move in the AIL is the minimum one, update the tail lsn in the * log manager. * - * Increment the AIL's generation count to indicate that the tree - * has changed. - * * This function must be called with the AIL lock held. The lock * is dropped before returning. */ @@ -368,14 +472,13 @@ xfs_trans_update_ail( if (lip->li_flags & XFS_LI_IN_AIL) { dlip = xfs_ail_delete(mp->m_ail, lip); ASSERT(dlip == lip); + xfs_trans_ail_cursor_clear(mp->m_ail, dlip); } else { lip->li_flags |= XFS_LI_IN_AIL; } lip->li_lsn = lsn; - xfs_ail_insert(mp->m_ail, lip); - mp->m_ail->xa_gen++; if (mlip == dlip) { mlip = xfs_ail_min(mp->m_ail); @@ -415,11 +518,11 @@ xfs_trans_delete_ail( mlip = xfs_ail_min(mp->m_ail); dlip = xfs_ail_delete(mp->m_ail, lip); ASSERT(dlip == lip); + xfs_trans_ail_cursor_clear(mp->m_ail, dlip); lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; - mp->m_ail->xa_gen++; if (mlip == dlip) { mlip = xfs_ail_min(mp->m_ail); @@ -455,46 +558,29 @@ xfs_trans_delete_ail( */ xfs_log_item_t * xfs_trans_first_ail( - xfs_mount_t *mp, - int *gen) + struct xfs_mount *mp, + struct xfs_ail_cursor *cur) { - xfs_log_item_t *lip; + xfs_log_item_t *lip; + struct xfs_ail *ailp = mp->m_ail; - lip = xfs_ail_min(mp->m_ail); - *gen = (int)mp->m_ail->xa_gen; + lip = xfs_ail_min(ailp); + xfs_trans_ail_cursor_set(ailp, cur, lip); return lip; } /* - * If the generation count of the tree has not changed since the - * caller last took something from the AIL, then return the elmt - * in the tree which follows the one given. If the count has changed, - * then return the minimum elmt of the AIL and bump the restarts counter - * if one is given. + * Grab the next item in the AIL from the cursor passed in. */ xfs_log_item_t * xfs_trans_next_ail( - xfs_mount_t *mp, - xfs_log_item_t *lip, - int *gen, - int *restarts) + struct xfs_mount *mp, + struct xfs_ail_cursor *cur) { - xfs_log_item_t *nlip; - - ASSERT(mp && lip && gen); - if (mp->m_ail->xa_gen == *gen) { - nlip = xfs_ail_next(mp->m_ail, lip); - } else { - nlip = xfs_ail_min(mp->m_ail); - *gen = (int)mp->m_ail->xa_gen; - if (restarts != NULL) { - XFS_STATS_INC(xs_push_ail_restarts); - (*restarts)++; - } - } + struct xfs_ail *ailp = mp->m_ail; - return (nlip); + return xfs_trans_ail_cursor_next(ailp, cur); } @@ -517,6 +603,7 @@ xfs_trans_ail_init( xfs_mount_t *mp) { struct xfs_ail *ailp; + int error; ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); if (!ailp) @@ -524,7 +611,15 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); - return xfsaild_start(ailp); + error = xfsaild_start(ailp); + if (error) + goto out_free_ailp; + mp->m_ail = ailp; + return 0; + +out_free_ailp: + kmem_free(ailp); + return error; } void diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 98317fd..f114d38 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -44,20 +44,33 @@ xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, xfs_extlen_t idx); /* - * From xfs_trans_ail.c + * AIL traversal cursor. + * + * Rather than using a generation number for detecting changes in the ail, use + * a cursor that is protected by the ail lock. The aild cursor exists in the + * struct xfs_ail, but other traversals can declare it on the stack and link it + * to the ail list. + * + * When an object is deleted from or moved int the AIL, the cursor list is + * searched to see if the object is a designated cursor item. If it is, it is + * deleted from the cursor so that the next time the cursor is used traversal + * will return to the start. + * + * This means a traversal colliding with a removal will cause a restart of the + * list scan, rather than any insertion or deletion anywhere in the list. The + * low bit of the item pointer is set if the cursor has been invalidated so + * that we can tell the difference between invalidation and reaching the end + * of the list to trigger traversal restarts. */ -void xfs_trans_update_ail(struct xfs_mount *mp, - struct xfs_log_item *lip, xfs_lsn_t lsn) - __releases(mp->m_ail_lock); -void xfs_trans_delete_ail(struct xfs_mount *mp, - struct xfs_log_item *lip) - __releases(mp->m_ail_lock); -struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *, int *); -struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *, - struct xfs_log_item *, int *, int *); +struct xfs_ail_cursor { + struct xfs_ail_cursor *next; + struct xfs_log_item *item; +}; /* - * AIL push thread support + * Private AIL structures. + * + * Eventually we need to drive the locking in here as well. */ struct xfs_ail { struct xfs_mount *xa_mount; @@ -65,8 +78,28 @@ struct xfs_ail { uint xa_gen; struct task_struct *xa_task; xfs_lsn_t xa_target; + struct xfs_ail_cursor xa_cursors; }; +/* + * From xfs_trans_ail.c + */ +void xfs_trans_update_ail(struct xfs_mount *mp, + struct xfs_log_item *lip, xfs_lsn_t lsn) + __releases(mp->m_ail_lock); +void xfs_trans_delete_ail(struct xfs_mount *mp, + struct xfs_log_item *lip) + __releases(mp->m_ail_lock); +struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *mp, + struct xfs_ail_cursor *cur); +struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *mp, + struct xfs_ail_cursor *cur); + +void xfs_trans_ail_cursor_init(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur); +void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur); + long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); int xfsaild_start(struct xfs_ail *); -- cgit v0.10.2 From 5b00f14fbd60d42441f78c0e414a539cbfba5cb9 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:00 +1100 Subject: [XFS] move the AIl traversal over to a consistent interface With the new cursor interface, it makes sense to make all the traversing code use the cursor interface and make the old one go away. This means more of the AIL interfacing is done by passing struct xfs_ail pointers around the place instead of struct xfs_mount pointers. We can replace the use of xfs_trans_first_ail() in xfs_log_need_covered() as it is only checking if the AIL is empty. We can do that with a call to xfs_trans_ail_tail() instead, where a zero LSN returned indicates and empty AIL... SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32348a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4184085..31fbb2e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -909,7 +909,7 @@ xfs_log_need_covered(xfs_mount_t *mp) spin_lock(&log->l_icloglock); if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || (log->l_covered_state == XLOG_STATE_COVER_NEED2)) - && !xfs_trans_first_ail(mp, NULL) + && !xfs_trans_ail_tail(mp->m_ail) && xlog_iclogs_empty(log)) { if (log->l_covered_state == XLOG_STATE_COVER_NEED) log->l_covered_state = XLOG_STATE_COVER_DONE; @@ -946,7 +946,7 @@ xlog_assign_tail_lsn(xfs_mount_t *mp) xfs_lsn_t tail_lsn; xlog_t *log = mp->m_log; - tail_lsn = xfs_trans_tail_ail(mp); + tail_lsn = xfs_trans_ail_tail(mp->m_ail); spin_lock(&log->l_grant_lock); if (tail_lsn != 0) { log->l_tail_lsn = tail_lsn; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 37ba489..45ea0d9 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2728,8 +2728,7 @@ xlog_recover_do_efd_trans( */ mp = log->l_mp; spin_lock(&mp->m_ail_lock); - xfs_trans_ail_cursor_init(mp->m_ail, &cur); - lip = xfs_trans_first_ail(mp, &cur); + lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { efip = (xfs_efi_log_item_t *)lip; @@ -2744,7 +2743,7 @@ xlog_recover_do_efd_trans( break; } } - lip = xfs_trans_next_ail(mp, &cur); + lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); } xfs_trans_ail_cursor_done(mp->m_ail, &cur); spin_unlock(&mp->m_ail_lock); @@ -3061,8 +3060,7 @@ xlog_recover_process_efis( mp = log->l_mp; spin_lock(&mp->m_ail_lock); - xfs_trans_ail_cursor_init(mp->m_ail, &cur); - lip = xfs_trans_first_ail(mp, &cur); + lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); while (lip != NULL) { /* * We're done when we see something other than an EFI. @@ -3070,7 +3068,8 @@ xlog_recover_process_efis( */ if (lip->li_type != XFS_LI_EFI) { #ifdef DEBUG - for (; lip; lip = xfs_trans_next_ail(mp, &cur)) + for (; lip; + lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur)) ASSERT(lip->li_type != XFS_LI_EFI); #endif break; @@ -3081,7 +3080,7 @@ xlog_recover_process_efis( */ efip = (xfs_efi_log_item_t *)lip; if (efip->efi_flags & XFS_EFI_RECOVERED) { - lip = xfs_trans_next_ail(mp, &cur); + lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); continue; } @@ -3090,7 +3089,7 @@ xlog_recover_process_efis( spin_lock(&mp->m_ail_lock); if (error) goto out; - lip = xfs_trans_next_ail(mp, &cur); + lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); } out: xfs_trans_ail_cursor_done(mp->m_ail, &cur); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 1d89d50..ae2ae3e 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -971,7 +971,6 @@ void xfs_trans_cancel(xfs_trans_t *, int); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); -xfs_lsn_t xfs_trans_tail_ail(struct xfs_mount *); void xfs_trans_unlocked_item(struct xfs_mount *, xfs_log_item_t *); xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 7b8bfcf..286934d 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -50,20 +50,20 @@ STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); * lsn of the last item in the AIL. */ xfs_lsn_t -xfs_trans_tail_ail( - xfs_mount_t *mp) +xfs_trans_ail_tail( + struct xfs_ail *ailp) { xfs_lsn_t lsn; xfs_log_item_t *lip; - spin_lock(&mp->m_ail_lock); - lip = xfs_ail_min(mp->m_ail); + spin_lock(&ailp->xa_mount->m_ail_lock); + lip = xfs_ail_min(ailp); if (lip == NULL) { lsn = (xfs_lsn_t)0; } else { lsn = lip->li_lsn; } - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_mount->m_ail_lock); return lsn; } @@ -111,7 +111,7 @@ xfs_trans_push_ail( * We don't link the push cursor because it is embedded in the struct * xfs_ail and hence easily findable. */ -void +STATIC void xfs_trans_ail_cursor_init( struct xfs_ail *ailp, struct xfs_ail_cursor *cur) @@ -143,7 +143,7 @@ xfs_trans_ail_cursor_set( * If the cursor was invalidated (inidicated by a lip of 1), * restart the traversal. */ -STATIC struct xfs_log_item * +struct xfs_log_item * xfs_trans_ail_cursor_next( struct xfs_ail *ailp, struct xfs_ail_cursor *cur) @@ -157,30 +157,6 @@ xfs_trans_ail_cursor_next( } /* - * Invalidate any cursor that is pointing to this item. This is - * called when an item is removed from the AIL. Any cursor pointing - * to this object is now invalid and the traversal needs to be - * terminated so it doesn't reference a freed object. We set the - * cursor item to a value of 1 so we can distinguish between an - * invalidation and the end of the list when getting the next item - * from the cursor. - */ -STATIC void -xfs_trans_ail_cursor_clear( - struct xfs_ail *ailp, - struct xfs_log_item *lip) -{ - struct xfs_ail_cursor *cur; - - /* need to search all cursors */ - for (cur = &ailp->xa_cursors; cur; cur = cur->next) { - if (cur->item == lip) - cur->item = (struct xfs_log_item *) - ((__psint_t)cur->item | 1); - } -} - -/* * Now that the traversal is complete, we need to remove the cursor * from the list of traversing cursors. Avoid removing the embedded * push cursor, but use the fact it is alway present to make the @@ -208,31 +184,55 @@ xfs_trans_ail_cursor_done( } /* + * Invalidate any cursor that is pointing to this item. This is + * called when an item is removed from the AIL. Any cursor pointing + * to this object is now invalid and the traversal needs to be + * terminated so it doesn't reference a freed object. We set the + * cursor item to a value of 1 so we can distinguish between an + * invalidation and the end of the list when getting the next item + * from the cursor. + */ +STATIC void +xfs_trans_ail_cursor_clear( + struct xfs_ail *ailp, + struct xfs_log_item *lip) +{ + struct xfs_ail_cursor *cur; + + /* need to search all cursors */ + for (cur = &ailp->xa_cursors; cur; cur = cur->next) { + if (cur->item == lip) + cur->item = (struct xfs_log_item *) + ((__psint_t)cur->item | 1); + } +} + +/* * Return the item in the AIL with the current lsn. * Return the current tree generation number for use * in calls to xfs_trans_next_ail(). */ -STATIC xfs_log_item_t * -xfs_trans_first_push_ail( +xfs_log_item_t * +xfs_trans_ail_cursor_first( struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn) { xfs_log_item_t *lip; + xfs_trans_ail_cursor_init(ailp, cur); lip = xfs_ail_min(ailp); - xfs_trans_ail_cursor_set(ailp, cur, lip); if (lsn == 0) - return lip; + goto out; list_for_each_entry(lip, &ailp->xa_ail, li_ail) { - if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) { - xfs_trans_ail_cursor_set(ailp, cur, lip); - return lip; - } + if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) + break; } - - return NULL; + lip = NULL; +out: + xfs_trans_ail_cursor_set(ailp, cur, lip); + return lip; } /* @@ -254,7 +254,7 @@ xfsaild_push( spin_lock(&mp->m_ail_lock); xfs_trans_ail_cursor_init(ailp, cur); - lip = xfs_trans_first_push_ail(ailp, cur, *last_lsn); + lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { /* * AIL is empty or our push has reached the end. @@ -552,39 +552,6 @@ xfs_trans_delete_ail( /* - * Return the item in the AIL with the smallest lsn. - * Return the current tree generation number for use - * in calls to xfs_trans_next_ail(). - */ -xfs_log_item_t * -xfs_trans_first_ail( - struct xfs_mount *mp, - struct xfs_ail_cursor *cur) -{ - xfs_log_item_t *lip; - struct xfs_ail *ailp = mp->m_ail; - - lip = xfs_ail_min(ailp); - xfs_trans_ail_cursor_set(ailp, cur, lip); - - return lip; -} - -/* - * Grab the next item in the AIL from the cursor passed in. - */ -xfs_log_item_t * -xfs_trans_next_ail( - struct xfs_mount *mp, - struct xfs_ail_cursor *cur) -{ - struct xfs_ail *ailp = mp->m_ail; - - return xfs_trans_ail_cursor_next(ailp, cur); -} - - -/* * The active item list (AIL) is a doubly linked list of log * items sorted by ascending lsn. The base of the list is * a forw/back pointer pair embedded in the xfs mount structure. diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index f114d38..aa58535 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -90,14 +90,15 @@ void xfs_trans_update_ail(struct xfs_mount *mp, void xfs_trans_delete_ail(struct xfs_mount *mp, struct xfs_log_item *lip) __releases(mp->m_ail_lock); -struct xfs_log_item *xfs_trans_first_ail(struct xfs_mount *mp, - struct xfs_ail_cursor *cur); -struct xfs_log_item *xfs_trans_next_ail(struct xfs_mount *mp, - struct xfs_ail_cursor *cur); -void xfs_trans_ail_cursor_init(struct xfs_ail *ailp, +xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); + +struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, + struct xfs_ail_cursor *cur, + xfs_lsn_t lsn); +struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); -void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, +void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, struct xfs_ail_cursor *cur); long xfsaild_push(struct xfs_ail *, xfs_lsn_t *); -- cgit v0.10.2 From 7b2e2a31f5c23b5f028af8c895137b4c512cc1c8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:12 +1100 Subject: [XFS] Allow 64 bit machines to avoid the AIL lock during flushes When copying lsn's from the log item to the inode or dquot flush lsn, we currently grab the AIL lock. We do this because the LSN is a 64 bit quantity and it needs to be read atomically. The lock is used to guarantee atomicity for 32 bit platforms. Make the LSN copying a small function, and make the function used conditional on BITS_PER_LONG so that 64 bit machines don't need to take the AIL lock in these places. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32349a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 1e6bf39..59c1081 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1272,10 +1272,8 @@ xfs_qm_dqflush( dqp->dq_flags &= ~(XFS_DQ_DIRTY); mp = dqp->q_mount; - /* lsn is 64 bits */ - spin_lock(&mp->m_ail_lock); - dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn; - spin_unlock(&mp->m_ail_lock); + xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, + &dqp->q_logitem.qli_item.li_lsn); /* * Attach an iodone routine so that we can remove this dquot from the diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4eb629f..2951ffd 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2214,9 +2214,9 @@ xfs_ifree_cluster( iip = (xfs_inode_log_item_t *)lip; ASSERT(iip->ili_logged == 1); lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done; - spin_lock(&mp->m_ail_lock); - iip->ili_flush_lsn = iip->ili_item.li_lsn; - spin_unlock(&mp->m_ail_lock); + xfs_trans_ail_copy_lsn(mp->m_ail, + &iip->ili_flush_lsn, + &iip->ili_item.li_lsn); xfs_iflags_set(iip->ili_inode, XFS_ISTALE); pre_flushed++; } @@ -2237,9 +2237,8 @@ xfs_ifree_cluster( iip->ili_last_fields = iip->ili_format.ilf_fields; iip->ili_format.ilf_fields = 0; iip->ili_logged = 1; - spin_lock(&mp->m_ail_lock); - iip->ili_flush_lsn = iip->ili_item.li_lsn; - spin_unlock(&mp->m_ail_lock); + xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, + &iip->ili_item.li_lsn); xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t*,xfs_log_item_t*)) @@ -3476,10 +3475,8 @@ xfs_iflush_int( iip->ili_format.ilf_fields = 0; iip->ili_logged = 1; - ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ - spin_lock(&mp->m_ail_lock); - iip->ili_flush_lsn = iip->ili_item.li_lsn; - spin_unlock(&mp->m_ail_lock); + xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, + &iip->ili_item.li_lsn); /* * Attach the function xfs_iflush_done to the inode's diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index aa58535..708cff7 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -106,4 +106,27 @@ void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t); int xfsaild_start(struct xfs_ail *); void xfsaild_stop(struct xfs_ail *); +#if BITS_PER_LONG != 64 +static inline void +xfs_trans_ail_copy_lsn( + struct xfs_ail *ailp, + xfs_lsn_t *dst, + xfs_lsn_t *src) +{ + ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ + spin_lock(&ailp->xa_mount->m_ail_lock); + *dst = *src; + spin_unlock(&ailp->xa_mount->m_ail_lock); +} +#else +static inline void +xfs_trans_ail_copy_lsn( + struct xfs_ail *ailp, + xfs_lsn_t *dst, + xfs_lsn_t *src) +{ + ASSERT(sizeof(xfs_lsn_t) == 8); + *dst = *src; +} +#endif #endif /* __XFS_TRANS_PRIV_H__ */ -- cgit v0.10.2 From c7e8f268278a292d3823b4352182fa7755a71410 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:23 +1100 Subject: [XFS] Move the AIL lock into the struct xfs_ail Bring the ail lock inside the struct xfs_ail. This means the AIL can be entirely manipulated via the struct xfs_ail rather than needing both the struct xfs_mount and the struct xfs_ail. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32350a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 59c1081..0d7a62b 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1333,7 +1333,7 @@ xfs_qm_dqflush_done( if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && qip->qli_item.li_lsn == qip->qli_flush_lsn) { - spin_lock(&dqp->q_mount->m_ail_lock); + spin_lock(&dqp->q_mount->m_ail->xa_lock); /* * xfs_trans_delete_ail() drops the AIL lock. */ @@ -1341,7 +1341,7 @@ xfs_qm_dqflush_done( xfs_trans_delete_ail(dqp->q_mount, (xfs_log_item_t*)qip); else - spin_unlock(&dqp->q_mount->m_ail_lock); + spin_unlock(&dqp->q_mount->m_ail->xa_lock); } /* diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 48f0810..0e1fa51 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -555,7 +555,7 @@ xfs_qm_qoffend_logitem_committed( xfs_qoff_logitem_t *qfs; qfs = qfe->qql_start_lip; - spin_lock(&qfs->qql_item.li_mountp->m_ail_lock); + spin_lock(&qfs->qql_item.li_mountp->m_ail->xa_lock); /* * Delete the qoff-start logitem from the AIL. * xfs_trans_delete_ail() drops the AIL lock. diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 002fc26..c557fd6 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -408,7 +408,7 @@ xfs_buf_item_unpin( XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); } else { - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); xfs_buf_item_relse(bp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); @@ -1138,7 +1138,7 @@ xfs_buf_iodone( * * Either way, AIL is useless if we're forcing a shutdown. */ - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); /* * xfs_trans_delete_ail() drops the AIL lock. */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 8aa28f7..f1dcd80 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -111,7 +111,7 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) xfs_mount_t *mp; mp = efip->efi_item.li_mountp; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * xfs_trans_delete_ail() drops the AIL lock. @@ -120,7 +120,7 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } } @@ -138,7 +138,7 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) xfs_log_item_desc_t *lidp; mp = efip->efi_item.li_mountp; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * free the xaction descriptor pointing to this item @@ -153,7 +153,7 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } } @@ -352,7 +352,7 @@ xfs_efi_release(xfs_efi_log_item_t *efip, ASSERT(efip->efi_next_extent > 0); ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); ASSERT(efip->efi_next_extent >= nextents); efip->efi_next_extent -= nextents; extents_left = efip->efi_next_extent; @@ -363,7 +363,7 @@ xfs_efi_release(xfs_efi_log_item_t *efip, xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2951ffd..6d82c23 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2715,11 +2715,11 @@ xfs_idestroy( ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || XFS_FORCED_SHUTDOWN(ip->i_mount)); if (lip->li_flags & XFS_LI_IN_AIL) { - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (lip->li_flags & XFS_LI_IN_AIL) xfs_trans_delete_ail(mp, lip); else - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } xfs_inode_item_destroy(ip); ip->i_itemp = NULL; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 97c7452..291d30a 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -991,7 +991,7 @@ xfs_iflush_done( */ if (iip->ili_logged && (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { - spin_lock(&ip->i_mount->m_ail_lock); + spin_lock(&ip->i_mount->m_ail->xa_lock); if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { /* * xfs_trans_delete_ail() drops the AIL lock. @@ -999,7 +999,7 @@ xfs_iflush_done( xfs_trans_delete_ail(ip->i_mount, (xfs_log_item_t*)iip); } else { - spin_unlock(&ip->i_mount->m_ail_lock); + spin_unlock(&ip->i_mount->m_ail->xa_lock); } } @@ -1038,14 +1038,14 @@ xfs_iflush_abort( mp = ip->i_mount; if (iip) { if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { /* * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip); } else - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } iip->ili_logged = 0; /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 31fbb2e..a2f7422 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -567,7 +567,6 @@ xfs_log_mount( /* * Initialize the AIL now we have a log. */ - spin_lock_init(&mp->m_ail_lock); error = xfs_trans_ail_init(mp); if (error) { cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 45ea0d9..a484feb 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2681,7 +2681,7 @@ xlog_recover_do_efi_trans( efip->efi_next_extent = efi_formatp->efi_nextents; efip->efi_flags |= XFS_EFI_COMMITTED; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); /* * xfs_trans_update_ail() drops the AIL lock. */ @@ -2727,7 +2727,7 @@ xlog_recover_do_efd_trans( * in the AIL. */ mp = log->l_mp; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { @@ -2739,14 +2739,14 @@ xlog_recover_do_efd_trans( */ xfs_trans_delete_ail(mp, lip); xfs_efi_item_free(efip); - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); break; } } lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); } xfs_trans_ail_cursor_done(mp->m_ail, &cur); - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } /* @@ -3058,7 +3058,7 @@ xlog_recover_process_efis( struct xfs_ail_cursor cur; mp = log->l_mp; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); while (lip != NULL) { @@ -3084,16 +3084,16 @@ xlog_recover_process_efis( continue; } - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); error = xlog_recover_process_efi(mp, efip); - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (error) goto out; lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); } out: xfs_trans_ail_cursor_done(mp->m_ail, &cur); - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); return error; } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 287f900..d3b7525 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -228,7 +228,6 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); typedef struct xfs_mount { struct super_block *m_super; xfs_tid_t m_tid; /* next unused tid for fs */ - spinlock_t m_ail_lock; /* fs AIL mutex */ struct xfs_ail *m_ail; /* fs active log item list */ xfs_sb_t m_sb; /* copy of fs superblock */ spinlock_t m_sb_lock; /* sb counter lock */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 4e1c22a..99ba0e2 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1425,7 +1425,7 @@ xfs_trans_chunk_committed( * the test below. */ mp = lip->li_mountp; - spin_lock(&mp->m_ail_lock); + spin_lock(&mp->m_ail->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { /* * This will set the item's lsn to item_lsn @@ -1436,7 +1436,7 @@ xfs_trans_chunk_committed( */ xfs_trans_update_ail(mp, lip, item_lsn); } else { - spin_unlock(&mp->m_ail_lock); + spin_unlock(&mp->m_ail->xa_lock); } /* diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 286934d..0cd47a7 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. + * Copyright (c) 2008 Dave Chinner * All Rights Reserved. * * This program is free software; you can redistribute it and/or @@ -56,14 +57,14 @@ xfs_trans_ail_tail( xfs_lsn_t lsn; xfs_log_item_t *lip; - spin_lock(&ailp->xa_mount->m_ail_lock); + spin_lock(&ailp->xa_lock); lip = xfs_ail_min(ailp); if (lip == NULL) { lsn = (xfs_lsn_t)0; } else { lsn = lip->li_lsn; } - spin_unlock(&ailp->xa_mount->m_ail_lock); + spin_unlock(&ailp->xa_lock); return lsn; } @@ -252,7 +253,7 @@ xfsaild_push( xfs_mount_t *mp = ailp->xa_mount; struct xfs_ail_cursor *cur = &ailp->xa_cursors; - spin_lock(&mp->m_ail_lock); + spin_lock(&ailp->xa_lock); xfs_trans_ail_cursor_init(ailp, cur); lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); if (!lip || XFS_FORCED_SHUTDOWN(mp)) { @@ -260,7 +261,7 @@ xfsaild_push( * AIL is empty or our push has reached the end. */ xfs_trans_ail_cursor_done(ailp, cur); - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_lock); last_pushed_lsn = 0; return tout; } @@ -295,7 +296,7 @@ xfsaild_push( * skip to the next item in the list. */ lock_result = IOP_TRYLOCK(lip); - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_lock); switch (lock_result) { case XFS_ITEM_SUCCESS: XFS_STATS_INC(xs_push_ail_success); @@ -332,7 +333,7 @@ xfsaild_push( break; } - spin_lock(&mp->m_ail_lock); + spin_lock(&ailp->xa_lock); /* should we bother continuing? */ if (XFS_FORCED_SHUTDOWN(mp)) break; @@ -361,7 +362,7 @@ xfsaild_push( lsn = lip->li_lsn; } xfs_trans_ail_cursor_done(ailp, cur); - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_lock); if (flush_log) { /* @@ -462,30 +463,31 @@ void xfs_trans_update_ail( xfs_mount_t *mp, xfs_log_item_t *lip, - xfs_lsn_t lsn) __releases(mp->m_ail_lock) + xfs_lsn_t lsn) __releases(ailp->xa_lock) { - xfs_log_item_t *dlip=NULL; + struct xfs_ail *ailp = mp->m_ail; + xfs_log_item_t *dlip = NULL; xfs_log_item_t *mlip; /* ptr to minimum lip */ - mlip = xfs_ail_min(mp->m_ail); + mlip = xfs_ail_min(ailp); if (lip->li_flags & XFS_LI_IN_AIL) { - dlip = xfs_ail_delete(mp->m_ail, lip); + dlip = xfs_ail_delete(ailp, lip); ASSERT(dlip == lip); - xfs_trans_ail_cursor_clear(mp->m_ail, dlip); + xfs_trans_ail_cursor_clear(ailp, dlip); } else { lip->li_flags |= XFS_LI_IN_AIL; } lip->li_lsn = lsn; - xfs_ail_insert(mp->m_ail, lip); + xfs_ail_insert(ailp, lip); if (mlip == dlip) { - mlip = xfs_ail_min(mp->m_ail); - spin_unlock(&mp->m_ail_lock); + mlip = xfs_ail_min(ailp); + spin_unlock(&ailp->xa_lock); xfs_log_move_tail(mp, mlip->li_lsn); } else { - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_lock); } @@ -509,27 +511,28 @@ xfs_trans_update_ail( void xfs_trans_delete_ail( xfs_mount_t *mp, - xfs_log_item_t *lip) __releases(mp->m_ail_lock) + xfs_log_item_t *lip) __releases(ailp->xa_lock) { + struct xfs_ail *ailp = mp->m_ail; xfs_log_item_t *dlip; xfs_log_item_t *mlip; if (lip->li_flags & XFS_LI_IN_AIL) { - mlip = xfs_ail_min(mp->m_ail); - dlip = xfs_ail_delete(mp->m_ail, lip); + mlip = xfs_ail_min(ailp); + dlip = xfs_ail_delete(ailp, lip); ASSERT(dlip == lip); - xfs_trans_ail_cursor_clear(mp->m_ail, dlip); + xfs_trans_ail_cursor_clear(ailp, dlip); lip->li_flags &= ~XFS_LI_IN_AIL; lip->li_lsn = 0; if (mlip == dlip) { - mlip = xfs_ail_min(mp->m_ail); - spin_unlock(&mp->m_ail_lock); + mlip = xfs_ail_min(ailp); + spin_unlock(&ailp->xa_lock); xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); } else { - spin_unlock(&mp->m_ail_lock); + spin_unlock(&ailp->xa_lock); } } else { @@ -537,13 +540,11 @@ xfs_trans_delete_ail( * If the file system is not being shutdown, we are in * serious trouble if we get to this stage. */ - if (XFS_FORCED_SHUTDOWN(mp)) - spin_unlock(&mp->m_ail_lock); - else { + spin_unlock(&ailp->xa_lock); + if (!XFS_FORCED_SHUTDOWN(mp)) { xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, "%s: attempting to delete a log item that is not in the AIL", __func__); - spin_unlock(&mp->m_ail_lock); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } } @@ -578,6 +579,7 @@ xfs_trans_ail_init( ailp->xa_mount = mp; INIT_LIST_HEAD(&ailp->xa_ail); + spin_lock_init(&ailp->xa_lock); error = xfsaild_start(ailp); if (error) goto out_free_ailp; diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 708cff7..6ca0a7a 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -79,6 +79,7 @@ struct xfs_ail { struct task_struct *xa_task; xfs_lsn_t xa_target; struct xfs_ail_cursor xa_cursors; + spinlock_t xa_lock; }; /* @@ -114,9 +115,9 @@ xfs_trans_ail_copy_lsn( xfs_lsn_t *src) { ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */ - spin_lock(&ailp->xa_mount->m_ail_lock); + spin_lock(&ailp->xa_lock); *dst = *src; - spin_unlock(&ailp->xa_mount->m_ail_lock); + spin_unlock(&ailp->xa_lock); } #else static inline void -- cgit v0.10.2 From a9c21c1b9deaced836034e77fe25fe0b55c21f02 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:35 +1100 Subject: [XFS] Given the log a pointer to the AIL When we need to go from the log to the AIL, we have to go via the xfs_mount. Add a xfs_ail pointer to the log so we can go directly to the AIL associated with the log. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32351a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a2f7422..405a41a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -572,6 +572,7 @@ xfs_log_mount( cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); goto error; } + mp->m_log->l_ailp = mp->m_ail; /* * skip log recovery on a norecovery mount. pretend it all @@ -908,7 +909,7 @@ xfs_log_need_covered(xfs_mount_t *mp) spin_lock(&log->l_icloglock); if (((log->l_covered_state == XLOG_STATE_COVER_NEED) || (log->l_covered_state == XLOG_STATE_COVER_NEED2)) - && !xfs_trans_ail_tail(mp->m_ail) + && !xfs_trans_ail_tail(log->l_ailp) && xlog_iclogs_empty(log)) { if (log->l_covered_state == XLOG_STATE_COVER_NEED) log->l_covered_state = XLOG_STATE_COVER_DONE; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index e7d8f84..de7ef6c 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -404,6 +404,7 @@ typedef struct xlog_in_core { typedef struct log { /* The following fields don't need locking */ struct xfs_mount *l_mp; /* mount point */ + struct xfs_ail *l_ailp; /* AIL log is working with */ struct xfs_buf *l_xbuf; /* extra buffer for log * wrapping */ struct xfs_buftarg *l_targ; /* buftarg of log */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a484feb..0bbde7b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2681,7 +2681,7 @@ xlog_recover_do_efi_trans( efip->efi_next_extent = efi_formatp->efi_nextents; efip->efi_flags |= XFS_EFI_COMMITTED; - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&log->l_ailp->xa_lock); /* * xfs_trans_update_ail() drops the AIL lock. */ @@ -2710,6 +2710,7 @@ xlog_recover_do_efd_trans( xfs_log_item_t *lip; __uint64_t efi_id; struct xfs_ail_cursor cur; + struct xfs_ail *ailp; if (pass == XLOG_RECOVER_PASS1) { return; @@ -2727,8 +2728,9 @@ xlog_recover_do_efd_trans( * in the AIL. */ mp = log->l_mp; - spin_lock(&mp->m_ail->xa_lock); - lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); + ailp = log->l_ailp; + spin_lock(&ailp->xa_lock); + lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { if (lip->li_type == XFS_LI_EFI) { efip = (xfs_efi_log_item_t *)lip; @@ -2739,14 +2741,14 @@ xlog_recover_do_efd_trans( */ xfs_trans_delete_ail(mp, lip); xfs_efi_item_free(efip); - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); break; } } - lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); + lip = xfs_trans_ail_cursor_next(ailp, &cur); } - xfs_trans_ail_cursor_done(mp->m_ail, &cur); - spin_unlock(&mp->m_ail->xa_lock); + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); } /* @@ -3053,14 +3055,13 @@ xlog_recover_process_efis( { xfs_log_item_t *lip; xfs_efi_log_item_t *efip; - xfs_mount_t *mp; int error = 0; struct xfs_ail_cursor cur; + struct xfs_ail *ailp; - mp = log->l_mp; - spin_lock(&mp->m_ail->xa_lock); - - lip = xfs_trans_ail_cursor_first(mp->m_ail, &cur, 0); + ailp = log->l_ailp; + spin_lock(&ailp->xa_lock); + lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { /* * We're done when we see something other than an EFI. @@ -3068,8 +3069,7 @@ xlog_recover_process_efis( */ if (lip->li_type != XFS_LI_EFI) { #ifdef DEBUG - for (; lip; - lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur)) + for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur)) ASSERT(lip->li_type != XFS_LI_EFI); #endif break; @@ -3080,20 +3080,20 @@ xlog_recover_process_efis( */ efip = (xfs_efi_log_item_t *)lip; if (efip->efi_flags & XFS_EFI_RECOVERED) { - lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); + lip = xfs_trans_ail_cursor_next(ailp, &cur); continue; } - spin_unlock(&mp->m_ail->xa_lock); - error = xlog_recover_process_efi(mp, efip); - spin_lock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); + error = xlog_recover_process_efi(log->l_mp, efip); + spin_lock(&ailp->xa_lock); if (error) goto out; - lip = xfs_trans_ail_cursor_next(mp->m_ail, &cur); + lip = xfs_trans_ail_cursor_next(ailp, &cur); } out: - xfs_trans_ail_cursor_done(mp->m_ail, &cur); - spin_unlock(&mp->m_ail->xa_lock); + xfs_trans_ail_cursor_done(ailp, &cur); + spin_unlock(&ailp->xa_lock); return error; } -- cgit v0.10.2 From fc1829f34d30899701dfd5890030d39e13e1f47d Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:46 +1100 Subject: [XFS] Add ail pointer into log items Add an xfs_ail pointer to log items so that the log items can reference the AIL directly during callbacks without needed a struct xfs_mount. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32352a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index c557fd6..793e53c 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -731,6 +731,7 @@ xfs_buf_item_init( bip->bli_item.li_type = XFS_LI_BUF; bip->bli_item.li_ops = &xfs_buf_item_ops; bip->bli_item.li_mountp = mp; + bip->bli_item.li_ailp = mp->m_ail; bip->bli_buf = bp; xfs_buf_hold(bp); bip->bli_format.blf_type = XFS_LI_BUF; @@ -1123,11 +1124,13 @@ xfs_buf_iodone( xfs_buf_log_item_t *bip) { struct xfs_mount *mp; + struct xfs_ail *ailp; ASSERT(bip->bli_buf == bp); xfs_buf_rele(bp); mp = bip->bli_item.li_mountp; + ailp = bip->bli_item.li_ailp; /* * If we are forcibly shutting down, this may well be @@ -1138,7 +1141,7 @@ xfs_buf_iodone( * * Either way, AIL is useless if we're forcing a shutdown. */ - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); /* * xfs_trans_delete_ail() drops the AIL lock. */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f1dcd80..dab5737 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -108,10 +108,12 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip) STATIC void xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) { - xfs_mount_t *mp; + xfs_mount_t *mp; + struct xfs_ail *ailp; mp = efip->efi_item.li_mountp; - spin_lock(&mp->m_ail->xa_lock); + ailp = efip->efi_item.li_ailp; + spin_lock(&ailp->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * xfs_trans_delete_ail() drops the AIL lock. @@ -120,7 +122,7 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } } @@ -134,11 +136,13 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) STATIC void xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) { - xfs_mount_t *mp; + xfs_mount_t *mp; + struct xfs_ail *ailp; xfs_log_item_desc_t *lidp; mp = efip->efi_item.li_mountp; - spin_lock(&mp->m_ail->xa_lock); + ailp = efip->efi_item.li_ailp; + spin_lock(&ailp->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * free the xaction descriptor pointing to this item @@ -153,7 +157,7 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } } @@ -268,6 +272,7 @@ xfs_efi_init(xfs_mount_t *mp, efip->efi_item.li_type = XFS_LI_EFI; efip->efi_item.li_ops = &xfs_efi_item_ops; efip->efi_item.li_mountp = mp; + efip->efi_item.li_ailp = mp->m_ail; efip->efi_format.efi_nextents = nextents; efip->efi_format.efi_id = (__psint_t)(void*)efip; @@ -345,14 +350,16 @@ void xfs_efi_release(xfs_efi_log_item_t *efip, uint nextents) { - xfs_mount_t *mp; - int extents_left; + xfs_mount_t *mp; + struct xfs_ail *ailp; + int extents_left; mp = efip->efi_item.li_mountp; + ailp = efip->efi_item.li_ailp; ASSERT(efip->efi_next_extent > 0); ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); ASSERT(efip->efi_next_extent >= nextents); efip->efi_next_extent -= nextents; extents_left = efip->efi_next_extent; @@ -363,7 +370,7 @@ xfs_efi_release(xfs_efi_log_item_t *efip, xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } } @@ -565,6 +572,7 @@ xfs_efd_init(xfs_mount_t *mp, efdp->efd_item.li_type = XFS_LI_EFD; efdp->efd_item.li_ops = &xfs_efd_item_ops; efdp->efd_item.li_mountp = mp; + efdp->efd_item.li_ailp = mp->m_ail; efdp->efd_efip = efip; efdp->efd_format.efd_nextents = nextents; efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 291d30a..47594f4 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -932,6 +932,7 @@ xfs_inode_item_init( iip->ili_item.li_type = XFS_LI_INODE; iip->ili_item.li_ops = &xfs_inode_item_ops; iip->ili_item.li_mountp = mp; + iip->ili_item.li_ailp = mp->m_ail; iip->ili_inode = ip; /* diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 99ba0e2..5163e12 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1383,11 +1383,13 @@ xfs_trans_chunk_committed( xfs_log_item_desc_t *lidp; xfs_log_item_t *lip; xfs_lsn_t item_lsn; - struct xfs_mount *mp; int i; lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { + struct xfs_mount *mp; + struct xfs_ail *ailp; + if (xfs_lic_isfree(licp, i)) { continue; } @@ -1425,7 +1427,8 @@ xfs_trans_chunk_committed( * the test below. */ mp = lip->li_mountp; - spin_lock(&mp->m_ail->xa_lock); + ailp = lip->li_ailp; + spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { /* * This will set the item's lsn to item_lsn @@ -1436,7 +1439,7 @@ xfs_trans_chunk_committed( */ xfs_trans_update_ail(mp, lip, item_lsn); } else { - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index ae2ae3e..0df5154 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -768,6 +768,7 @@ typedef struct xfs_log_item { xfs_lsn_t li_lsn; /* last on-disk lsn */ struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ struct xfs_mount *li_mountp; /* ptr to fs mount */ + struct xfs_ail *li_ailp; /* ptr to AIL */ uint li_type; /* item type */ uint li_flags; /* misc flags */ struct xfs_log_item *li_bio_list; /* buffer item list */ diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index 3c666e8..e110bf5 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c @@ -22,6 +22,14 @@ #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" +/* XXX: from here down needed until struct xfs_trans has it's own ailp */ +#include "xfs_bit.h" +#include "xfs_buf_item.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" STATIC int xfs_trans_unlock_chunk(xfs_log_item_chunk_t *, int, int, xfs_lsn_t); @@ -79,6 +87,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) lidp->lid_size = 0; lip->li_desc = lidp; lip->li_mountp = tp->t_mountp; + lip->li_ailp = tp->t_mountp->m_ail; return lidp; } @@ -120,6 +129,7 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) lidp->lid_size = 0; lip->li_desc = lidp; lip->li_mountp = tp->t_mountp; + lip->li_ailp = tp->t_mountp->m_ail; return lidp; } -- cgit v0.10.2 From 783a2f656f9674c31d4019708a94af93fa1d1c22 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:39:58 +1100 Subject: [XFS] Finish removing the mount pointer from the AIL API Change all the remaining AIL API functions that are passed struct xfs_mount pointers to pass pointers directly to the struct xfs_ail being used. With this conversion, all external access to the AIL is via the struct xfs_ail. Hence the operation and referencing of the AIL is almost entirely independent of the xfs_mount that is using it - it is now much more tightly tied to the log and the items it is tracking in the log than it is tied to the xfs_mount. SGI-PV: 988143 SGI-Modid: xfs-linux-melb:xfs-kern:32353a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 0d7a62b..591ca66 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -1319,8 +1319,10 @@ xfs_qm_dqflush_done( xfs_dq_logitem_t *qip) { xfs_dquot_t *dqp; + struct xfs_ail *ailp; dqp = qip->qli_dquot; + ailp = qip->qli_item.li_ailp; /* * We only want to pull the item from the AIL if its @@ -1333,15 +1335,12 @@ xfs_qm_dqflush_done( if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && qip->qli_item.li_lsn == qip->qli_flush_lsn) { - spin_lock(&dqp->q_mount->m_ail->xa_lock); - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ + /* xfs_trans_ail_delete() drops the AIL lock. */ + spin_lock(&ailp->xa_lock); if (qip->qli_item.li_lsn == qip->qli_flush_lsn) - xfs_trans_delete_ail(dqp->q_mount, - (xfs_log_item_t*)qip); + xfs_trans_ail_delete(ailp, (xfs_log_item_t*)qip); else - spin_unlock(&dqp->q_mount->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } /* @@ -1371,7 +1370,7 @@ xfs_dqunlock( mutex_unlock(&(dqp->q_qlock)); if (dqp->q_logitem.qli_dquot == dqp) { /* Once was dqp->q_mount, but might just have been cleared */ - xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp, + xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, (xfs_log_item_t*)&(dqp->q_logitem)); } } diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 0e1fa51..1728f6a 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -553,14 +553,16 @@ xfs_qm_qoffend_logitem_committed( xfs_lsn_t lsn) { xfs_qoff_logitem_t *qfs; + struct xfs_ail *ailp; qfs = qfe->qql_start_lip; - spin_lock(&qfs->qql_item.li_mountp->m_ail->xa_lock); + ailp = qfs->qql_item.li_ailp; + spin_lock(&ailp->xa_lock); /* * Delete the qoff-start logitem from the AIL. - * xfs_trans_delete_ail() drops the AIL lock. + * xfs_trans_ail_delete() drops the AIL lock. */ - xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); kmem_free(qfs); kmem_free(qfe); return (xfs_lsn_t)-1; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 793e53c..d245d04 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -375,7 +375,7 @@ xfs_buf_item_unpin( xfs_buf_log_item_t *bip, int stale) { - xfs_mount_t *mp; + struct xfs_ail *ailp; xfs_buf_t *bp; int freed; @@ -387,7 +387,7 @@ xfs_buf_item_unpin( xfs_buftrace("XFS_UNPIN", bp); freed = atomic_dec_and_test(&bip->bli_refcount); - mp = bip->bli_item.li_mountp; + ailp = bip->bli_item.li_ailp; xfs_bunpin(bp); if (freed && stale) { ASSERT(bip->bli_flags & XFS_BLI_STALE); @@ -399,17 +399,17 @@ xfs_buf_item_unpin( xfs_buftrace("XFS_UNPIN STALE", bp); /* * If we get called here because of an IO error, we may - * or may not have the item on the AIL. xfs_trans_delete_ail() + * or may not have the item on the AIL. xfs_trans_ail_delete() * will take care of that situation. - * xfs_trans_delete_ail() drops the AIL lock. + * xfs_trans_ail_delete() drops the AIL lock. */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); } else { - spin_lock(&mp->m_ail->xa_lock); - xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); + spin_lock(&ailp->xa_lock); + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); xfs_buf_item_relse(bp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); } @@ -1123,29 +1123,23 @@ xfs_buf_iodone( xfs_buf_t *bp, xfs_buf_log_item_t *bip) { - struct xfs_mount *mp; - struct xfs_ail *ailp; + struct xfs_ail *ailp = bip->bli_item.li_ailp; ASSERT(bip->bli_buf == bp); xfs_buf_rele(bp); - mp = bip->bli_item.li_mountp; - ailp = bip->bli_item.li_ailp; /* * If we are forcibly shutting down, this may well be * off the AIL already. That's because we simulate the * log-committed callbacks to unpin these buffers. Or we may never * have put this item on AIL because of the transaction was - * aborted forcibly. xfs_trans_delete_ail() takes care of these. + * aborted forcibly. xfs_trans_ail_delete() takes care of these. * * Either way, AIL is useless if we're forcing a shutdown. */ spin_lock(&ailp->xa_lock); - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); xfs_buf_item_free(bip); } diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index dab5737..05a4bdd 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -108,17 +108,12 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip) STATIC void xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) { - xfs_mount_t *mp; - struct xfs_ail *ailp; + struct xfs_ail *ailp = efip->efi_item.li_ailp; - mp = efip->efi_item.li_mountp; - ailp = efip->efi_item.li_ailp; spin_lock(&ailp->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; @@ -136,12 +131,9 @@ xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) STATIC void xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) { - xfs_mount_t *mp; - struct xfs_ail *ailp; + struct xfs_ail *ailp = efip->efi_item.li_ailp; xfs_log_item_desc_t *lidp; - mp = efip->efi_item.li_mountp; - ailp = efip->efi_item.li_ailp; spin_lock(&ailp->xa_lock); if (efip->efi_flags & XFS_EFI_CANCELED) { /* @@ -149,11 +141,9 @@ xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) */ lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip); xfs_trans_free_item(tp, lidp); - /* - * pull the item off the AIL. - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); + + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; @@ -350,12 +340,9 @@ void xfs_efi_release(xfs_efi_log_item_t *efip, uint nextents) { - xfs_mount_t *mp; - struct xfs_ail *ailp; + struct xfs_ail *ailp = efip->efi_item.li_ailp; int extents_left; - mp = efip->efi_item.li_mountp; - ailp = efip->efi_item.li_ailp; ASSERT(efip->efi_next_extent > 0); ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); @@ -364,10 +351,8 @@ xfs_efi_release(xfs_efi_log_item_t *efip, efip->efi_next_extent -= nextents; extents_left = efip->efi_next_extent; if (extents_left == 0) { - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip); + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)efip); xfs_efi_item_free(efip); } else { spin_unlock(&ailp->xa_lock); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 8001338..a1f209b 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -38,6 +38,8 @@ #include "xfs_ialloc.h" #include "xfs_quota.h" #include "xfs_utils.h" +#include "xfs_trans_priv.h" +#include "xfs_inode_item.h" /* * Check the validity of the inode we just found it the cache @@ -616,7 +618,7 @@ xfs_iunlock( * it is in the AIL and anyone is waiting on it. Don't do * this if the caller has asked us not to. */ - xfs_trans_unlocked_item(ip->i_mount, + xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp, (xfs_log_item_t*)(ip->i_itemp)); } xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6d82c23..c83f699 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2709,17 +2709,17 @@ xfs_idestroy( * inode still in the AIL. If it is there, we should remove * it to prevent a use-after-free from occurring. */ - xfs_mount_t *mp = ip->i_mount; xfs_log_item_t *lip = &ip->i_itemp->ili_item; + struct xfs_ail *ailp = lip->li_ailp; ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || XFS_FORCED_SHUTDOWN(ip->i_mount)); if (lip->li_flags & XFS_LI_IN_AIL) { - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); if (lip->li_flags & XFS_LI_IN_AIL) - xfs_trans_delete_ail(mp, lip); + xfs_trans_ail_delete(ailp, lip); else - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } xfs_inode_item_destroy(ip); ip->i_itemp = NULL; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 47594f4..aa9bf05 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -977,9 +977,8 @@ xfs_iflush_done( xfs_buf_t *bp, xfs_inode_log_item_t *iip) { - xfs_inode_t *ip; - - ip = iip->ili_inode; + xfs_inode_t *ip = iip->ili_inode; + struct xfs_ail *ailp = iip->ili_item.li_ailp; /* * We only want to pull the item from the AIL if it is @@ -992,15 +991,12 @@ xfs_iflush_done( */ if (iip->ili_logged && (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { - spin_lock(&ip->i_mount->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(ip->i_mount, - (xfs_log_item_t*)iip); + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t*)iip); } else { - spin_unlock(&ip->i_mount->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } } @@ -1032,21 +1028,20 @@ void xfs_iflush_abort( xfs_inode_t *ip) { - xfs_inode_log_item_t *iip; + xfs_inode_log_item_t *iip = ip->i_itemp; xfs_mount_t *mp; iip = ip->i_itemp; mp = ip->i_mount; if (iip) { + struct xfs_ail *ailp = iip->ili_item.li_ailp; if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { - spin_lock(&mp->m_ail->xa_lock); + spin_lock(&ailp->xa_lock); if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { - /* - * xfs_trans_delete_ail() drops the AIL lock. - */ - xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip); + /* xfs_trans_ail_delete() drops the AIL lock. */ + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)iip); } else - spin_unlock(&mp->m_ail->xa_lock); + spin_unlock(&ailp->xa_lock); } iip->ili_logged = 0; /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 405a41a..5184017 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1413,7 +1413,7 @@ xlog_grant_push_ail(xfs_mount_t *mp, */ if (threshold_lsn && !XLOG_FORCED_SHUTDOWN(log)) - xfs_trans_push_ail(mp, threshold_lsn); + xfs_trans_ail_push(log->l_ailp, threshold_lsn); } /* xlog_grant_push_ail */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 0bbde7b..cff901e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2683,9 +2683,9 @@ xlog_recover_do_efi_trans( spin_lock(&log->l_ailp->xa_lock); /* - * xfs_trans_update_ail() drops the AIL lock. + * xfs_trans_ail_update() drops the AIL lock. */ - xfs_trans_update_ail(mp, (xfs_log_item_t *)efip, lsn); + xfs_trans_ail_update(log->l_ailp, (xfs_log_item_t *)efip, lsn); return 0; } @@ -2704,13 +2704,12 @@ xlog_recover_do_efd_trans( xlog_recover_item_t *item, int pass) { - xfs_mount_t *mp; xfs_efd_log_format_t *efd_formatp; xfs_efi_log_item_t *efip = NULL; xfs_log_item_t *lip; __uint64_t efi_id; struct xfs_ail_cursor cur; - struct xfs_ail *ailp; + struct xfs_ail *ailp = log->l_ailp; if (pass == XLOG_RECOVER_PASS1) { return; @@ -2727,8 +2726,6 @@ xlog_recover_do_efd_trans( * Search for the efi with the id in the efd format structure * in the AIL. */ - mp = log->l_mp; - ailp = log->l_ailp; spin_lock(&ailp->xa_lock); lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); while (lip != NULL) { @@ -2736,10 +2733,10 @@ xlog_recover_do_efd_trans( efip = (xfs_efi_log_item_t *)lip; if (efip->efi_format.efi_id == efi_id) { /* - * xfs_trans_delete_ail() drops the + * xfs_trans_ail_delete() drops the * AIL lock. */ - xfs_trans_delete_ail(mp, lip); + xfs_trans_ail_delete(ailp, lip); xfs_efi_item_free(efip); spin_lock(&ailp->xa_lock); break; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 5163e12..ad137ef 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1387,7 +1387,6 @@ xfs_trans_chunk_committed( lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { - struct xfs_mount *mp; struct xfs_ail *ailp; if (xfs_lic_isfree(licp, i)) { @@ -1426,7 +1425,6 @@ xfs_trans_chunk_committed( * This would cause the earlier transaction to fail * the test below. */ - mp = lip->li_mountp; ailp = lip->li_ailp; spin_lock(&ailp->xa_lock); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { @@ -1435,9 +1433,9 @@ xfs_trans_chunk_committed( * and update the position of the item in * the AIL. * - * xfs_trans_update_ail() drops the AIL lock. + * xfs_trans_ail_update() drops the AIL lock. */ - xfs_trans_update_ail(mp, lip, item_lsn); + xfs_trans_ail_update(ailp, lip, item_lsn); } else { spin_unlock(&ailp->xa_lock); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 0df5154..d6fe4a8 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -971,9 +971,6 @@ int _xfs_trans_commit(xfs_trans_t *, void xfs_trans_cancel(xfs_trans_t *, int); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); -void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); -void xfs_trans_unlocked_item(struct xfs_mount *, - xfs_log_item_t *); xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx); diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0cd47a7..67ee466 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -86,16 +86,16 @@ xfs_trans_ail_tail( * any of the objects, so the lock is not needed. */ void -xfs_trans_push_ail( - xfs_mount_t *mp, - xfs_lsn_t threshold_lsn) +xfs_trans_ail_push( + struct xfs_ail *ailp, + xfs_lsn_t threshold_lsn) { - xfs_log_item_t *lip; + xfs_log_item_t *lip; - lip = xfs_ail_min(mp->m_ail); - if (lip && !XFS_FORCED_SHUTDOWN(mp)) { - if (XFS_LSN_CMP(threshold_lsn, mp->m_ail->xa_target) > 0) - xfsaild_wakeup(mp->m_ail, threshold_lsn); + lip = xfs_ail_min(ailp); + if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { + if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) + xfsaild_wakeup(ailp, threshold_lsn); } } @@ -412,7 +412,7 @@ xfsaild_push( */ void xfs_trans_unlocked_item( - xfs_mount_t *mp, + struct xfs_ail *ailp, xfs_log_item_t *lip) { xfs_log_item_t *min_lip; @@ -424,7 +424,7 @@ xfs_trans_unlocked_item( * over some potentially valid data. */ if (!(lip->li_flags & XFS_LI_IN_AIL) || - XFS_FORCED_SHUTDOWN(mp)) { + XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { return; } @@ -440,10 +440,10 @@ xfs_trans_unlocked_item( * the call to xfs_log_move_tail() doesn't do anything if there's * not enough free space to wake people up so we're safe calling it. */ - min_lip = xfs_ail_min(mp->m_ail); + min_lip = xfs_ail_min(ailp); if (min_lip == lip) - xfs_log_move_tail(mp, 1); + xfs_log_move_tail(ailp->xa_mount, 1); } /* xfs_trans_unlocked_item */ @@ -460,12 +460,11 @@ xfs_trans_unlocked_item( * is dropped before returning. */ void -xfs_trans_update_ail( - xfs_mount_t *mp, +xfs_trans_ail_update( + struct xfs_ail *ailp, xfs_log_item_t *lip, xfs_lsn_t lsn) __releases(ailp->xa_lock) { - struct xfs_ail *ailp = mp->m_ail; xfs_log_item_t *dlip = NULL; xfs_log_item_t *mlip; /* ptr to minimum lip */ @@ -485,7 +484,7 @@ xfs_trans_update_ail( if (mlip == dlip) { mlip = xfs_ail_min(ailp); spin_unlock(&ailp->xa_lock); - xfs_log_move_tail(mp, mlip->li_lsn); + xfs_log_move_tail(ailp->xa_mount, mlip->li_lsn); } else { spin_unlock(&ailp->xa_lock); } @@ -509,11 +508,10 @@ xfs_trans_update_ail( * is dropped before returning. */ void -xfs_trans_delete_ail( - xfs_mount_t *mp, +xfs_trans_ail_delete( + struct xfs_ail *ailp, xfs_log_item_t *lip) __releases(ailp->xa_lock) { - struct xfs_ail *ailp = mp->m_ail; xfs_log_item_t *dlip; xfs_log_item_t *mlip; @@ -530,7 +528,8 @@ xfs_trans_delete_ail( if (mlip == dlip) { mlip = xfs_ail_min(ailp); spin_unlock(&ailp->xa_lock); - xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); + xfs_log_move_tail(ailp->xa_mount, + (mlip ? mlip->li_lsn : 0)); } else { spin_unlock(&ailp->xa_lock); } @@ -540,6 +539,8 @@ xfs_trans_delete_ail( * If the file system is not being shutdown, we are in * serious trouble if we get to this stage. */ + struct xfs_mount *mp = ailp->xa_mount; + spin_unlock(&ailp->xa_lock); if (!XFS_FORCED_SHUTDOWN(mp)) { xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 4e855b5..8ee2f8c 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -527,9 +527,8 @@ xfs_trans_brelse(xfs_trans_t *tp, lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); if (lip->li_type == XFS_LI_BUF) { bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); - xfs_trans_unlocked_item( - bip->bli_item.li_mountp, - lip); + xfs_trans_unlocked_item(bip->bli_item.li_ailp, + lip); } } xfs_buf_relse(bp); @@ -626,7 +625,7 @@ xfs_trans_brelse(xfs_trans_t *tp, * tell the AIL that the buffer is being unlocked. */ if (bip != NULL) { - xfs_trans_unlocked_item(bip->bli_item.li_mountp, + xfs_trans_unlocked_item(bip->bli_item.li_ailp, (xfs_log_item_t*)bip); } diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 6ca0a7a..73e2ad3 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -85,12 +85,15 @@ struct xfs_ail { /* * From xfs_trans_ail.c */ -void xfs_trans_update_ail(struct xfs_mount *mp, - struct xfs_log_item *lip, xfs_lsn_t lsn) - __releases(mp->m_ail_lock); -void xfs_trans_delete_ail(struct xfs_mount *mp, - struct xfs_log_item *lip) - __releases(mp->m_ail_lock); +void xfs_trans_ail_update(struct xfs_ail *ailp, + struct xfs_log_item *lip, xfs_lsn_t lsn) + __releases(ailp->xa_lock); +void xfs_trans_ail_delete(struct xfs_ail *ailp, + struct xfs_log_item *lip) + __releases(ailp->xa_lock); +void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); +void xfs_trans_unlocked_item(struct xfs_ail *, + xfs_log_item_t *); xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp); -- cgit v0.10.2 From 5a792c4579af8466246408e38fd4eff45d8493b8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:40:09 +1100 Subject: [XFS] XFS: Check for valid transaction headers in recovery When we are about to add a new item to a transaction in recovery, we need to check that it is valid first. Currently we just assert that header magic number matches, but in production systems that is not present and we add a corrupted transaction to the list to be processed. This results in a kernel oops later when processing the corrupted transaction. Instead, if we detect a corrupted transaction, abort recovery and leave the user to clean up the mess that has occurred. SGI-PV: 988145 SGI-Modid: xfs-linux-melb:xfs-kern:32356a Signed-off-by: David Chinner Signed-off-by: Tim Shimmin Signed-off-by: Eric Sandeen Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index cff901e..b411d494 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1417,7 +1417,13 @@ xlog_recover_add_to_trans( return 0; item = trans->r_itemq; if (item == NULL) { - ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); + /* we need to catch log corruptions here */ + if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { + xlog_warn("XFS: xlog_recover_add_to_trans: " + "bad header magic number"); + ASSERT(0); + return XFS_ERROR(EIO); + } if (len == sizeof(xfs_trans_header_t)) xlog_recover_add_item(&trans->r_itemq); memcpy(&trans->r_theader, dp, len); /* d, s, l */ -- cgit v0.10.2 From 9d565ffa33d574c2a22442f9d95ca2bd0be7cc42 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:53:24 +1100 Subject: [XFS] kill struct xfs_mount_args No need to parse the mount option into a structure before applying it to struct xfs_mount. The content of xfs_start_flags gets merged into xfs_parseargs. Calls inbetween don't care and can use mount members instead of the args struct. This patch uncovered that the mount option for shared filesystems wasn't ever exposed on Linux. The code to handle it is #if 0'ed in this patch pending a decision on this feature. I'll send a writeup about it to the list soon. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32371a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 655508c..5638a99 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -18,7 +18,6 @@ #include "xfs.h" #include "xfs_bit.h" #include "xfs_log.h" -#include "xfs_clnt.h" #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" @@ -75,32 +74,6 @@ static struct super_operations xfs_super_operations; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; -STATIC struct xfs_mount_args * -xfs_args_allocate( - struct super_block *sb, - int silent) -{ - struct xfs_mount_args *args; - - args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL); - if (!args) - return NULL; - - args->logbufs = args->logbufsize = -1; - strncpy(args->fsname, sb->s_id, MAXNAMELEN); - - /* Copy the already-parsed mount(2) flags we're interested in */ - if (sb->s_flags & MS_DIRSYNC) - args->flags |= XFSMNT_DIRSYNC; - if (sb->s_flags & MS_SYNCHRONOUS) - args->flags |= XFSMNT_WSYNC; - if (silent) - args->flags |= XFSMNT_QUIET; - args->flags |= XFSMNT_32BITINODES; - - return args; -} - #define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ #define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ #define MNTOPT_LOGDEV "logdev" /* log device */ @@ -189,26 +162,54 @@ suffix_strtoul(char *s, char **endp, unsigned int base) return simple_strtoul((const char *)s, endp, base) << shift_left_factor; } +/* + * This function fills in xfs_mount_t fields based on mount args. + * Note: the superblock has _not_ yet been read in. + * + * Note that this function leaks the various device name allocations on + * failure. The caller takes care of them. + */ STATIC int xfs_parseargs( struct xfs_mount *mp, char *options, - struct xfs_mount_args *args, - int update) + char **mtpt) { + struct super_block *sb = mp->m_super; char *this_char, *value, *eov; - int dsunit, dswidth, vol_dsunit, vol_dswidth; - int iosize; + int dsunit = 0; + int dswidth = 0; + int iosize = 0; int dmapi_implies_ikeep = 1; + uchar_t iosizelog = 0; + + /* + * Copy binary VFS mount flags we are interested in. + */ + if (sb->s_flags & MS_RDONLY) + mp->m_flags |= XFS_MOUNT_RDONLY; + if (sb->s_flags & MS_DIRSYNC) + mp->m_flags |= XFS_MOUNT_DIRSYNC; + if (sb->s_flags & MS_SYNCHRONOUS) + mp->m_flags |= XFS_MOUNT_WSYNC; + + /* + * Set some default flags that could be cleared by the mount option + * parsing. + */ + mp->m_flags |= XFS_MOUNT_BARRIER; + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - args->flags |= XFSMNT_BARRIER; - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; + /* + * These can be overridden by the mount option parsing. + */ + mp->m_logbufs = -1; + mp->m_logbsize = -1; if (!options) goto done; - iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0; - while ((this_char = strsep(&options, ",")) != NULL) { if (!*this_char) continue; @@ -222,7 +223,7 @@ xfs_parseargs( this_char); return EINVAL; } - args->logbufs = simple_strtoul(value, &eov, 10); + mp->m_logbufs = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -230,7 +231,7 @@ xfs_parseargs( this_char); return EINVAL; } - args->logbufsize = suffix_strtoul(value, &eov, 10); + mp->m_logbsize = suffix_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -238,7 +239,9 @@ xfs_parseargs( this_char); return EINVAL; } - strncpy(args->logname, value, MAXNAMELEN); + mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_logname) + return ENOMEM; } else if (!strcmp(this_char, MNTOPT_MTPT)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -246,7 +249,9 @@ xfs_parseargs( this_char); return EINVAL; } - strncpy(args->mtpt, value, MAXNAMELEN); + *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!*mtpt) + return ENOMEM; } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -254,7 +259,9 @@ xfs_parseargs( this_char); return EINVAL; } - strncpy(args->rtname, value, MAXNAMELEN); + mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_rtname) + return ENOMEM; } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -263,8 +270,7 @@ xfs_parseargs( return EINVAL; } iosize = simple_strtoul(value, &eov, 10); - args->flags |= XFSMNT_IOSIZE; - args->iosizelog = (uint8_t) iosize; + iosizelog = (uint8_t) iosize; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -273,8 +279,7 @@ xfs_parseargs( return EINVAL; } iosize = suffix_strtoul(value, &eov, 10); - args->flags |= XFSMNT_IOSIZE; - args->iosizelog = ffs(iosize) - 1; + iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_GRPID) || !strcmp(this_char, MNTOPT_BSDGROUPS)) { mp->m_flags |= XFS_MOUNT_GRPID; @@ -282,23 +287,25 @@ xfs_parseargs( !strcmp(this_char, MNTOPT_SYSVGROUPS)) { mp->m_flags &= ~XFS_MOUNT_GRPID; } else if (!strcmp(this_char, MNTOPT_WSYNC)) { - args->flags |= XFSMNT_WSYNC; + mp->m_flags |= XFS_MOUNT_WSYNC; } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { - args->flags |= XFSMNT_OSYNCISOSYNC; + mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { - args->flags |= XFSMNT_NORECOVERY; + mp->m_flags |= XFS_MOUNT_NORECOVERY; } else if (!strcmp(this_char, MNTOPT_INO64)) { - args->flags |= XFSMNT_INO64; -#if !XFS_BIG_INUMS +#if XFS_BIG_INUMS + mp->m_flags |= XFS_MOUNT_INO64; + mp->m_inoadd = XFS_INO64_OFFSET; +#else cmn_err(CE_WARN, "XFS: %s option not allowed on this system", this_char); return EINVAL; #endif } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { - args->flags |= XFSMNT_NOALIGN; + mp->m_flags |= XFS_MOUNT_NOALIGN; } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { - args->flags |= XFSMNT_SWALLOC; + mp->m_flags |= XFS_MOUNT_SWALLOC; } else if (!strcmp(this_char, MNTOPT_SUNIT)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -316,7 +323,7 @@ xfs_parseargs( } dswidth = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { - args->flags &= ~XFSMNT_32BITINODES; + mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; #if !XFS_BIG_INUMS cmn_err(CE_WARN, "XFS: %s option not allowed on this system", @@ -324,56 +331,60 @@ xfs_parseargs( return EINVAL; #endif } else if (!strcmp(this_char, MNTOPT_NOUUID)) { - args->flags |= XFSMNT_NOUUID; + mp->m_flags |= XFS_MOUNT_NOUUID; } else if (!strcmp(this_char, MNTOPT_BARRIER)) { - args->flags |= XFSMNT_BARRIER; + mp->m_flags |= XFS_MOUNT_BARRIER; } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { - args->flags &= ~XFSMNT_BARRIER; + mp->m_flags &= ~XFS_MOUNT_BARRIER; } else if (!strcmp(this_char, MNTOPT_IKEEP)) { - args->flags |= XFSMNT_IKEEP; + mp->m_flags |= XFS_MOUNT_IKEEP; } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { dmapi_implies_ikeep = 0; - args->flags &= ~XFSMNT_IKEEP; + mp->m_flags &= ~XFS_MOUNT_IKEEP; } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { - args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; + mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; } else if (!strcmp(this_char, MNTOPT_ATTR2)) { - args->flags |= XFSMNT_ATTR2; + mp->m_flags |= XFS_MOUNT_ATTR2; } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { - args->flags &= ~XFSMNT_ATTR2; - args->flags |= XFSMNT_NOATTR2; + mp->m_flags &= ~XFS_MOUNT_ATTR2; + mp->m_flags |= XFS_MOUNT_NOATTR2; } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { - args->flags2 |= XFSMNT2_FILESTREAMS; + mp->m_flags |= XFS_MOUNT_FILESTREAMS; } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { - args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA); - args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA); + mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_QUOTA) || !strcmp(this_char, MNTOPT_UQUOTA) || !strcmp(this_char, MNTOPT_USRQUOTA)) { - args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF; + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_UQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || !strcmp(this_char, MNTOPT_UQUOTANOENF)) { - args->flags |= XFSMNT_UQUOTA; - args->flags &= ~XFSMNT_UQUOTAENF; + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_UQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_PQUOTA) || !strcmp(this_char, MNTOPT_PRJQUOTA)) { - args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { - args->flags |= XFSMNT_PQUOTA; - args->flags &= ~XFSMNT_PQUOTAENF; + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_OQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_GQUOTA) || !strcmp(this_char, MNTOPT_GRPQUOTA)) { - args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { - args->flags |= XFSMNT_GQUOTA; - args->flags &= ~XFSMNT_GQUOTAENF; + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_OQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_DMAPI)) { - args->flags |= XFSMNT_DMAPI; + mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, MNTOPT_XDSM)) { - args->flags |= XFSMNT_DMAPI; + mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, MNTOPT_DMI)) { - args->flags |= XFSMNT_DMAPI; + mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, "ihashsize")) { cmn_err(CE_WARN, "XFS: ihashsize no longer used, option is deprecated."); @@ -391,27 +402,29 @@ xfs_parseargs( } } - if (args->flags & XFSMNT_NORECOVERY) { - if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) { - cmn_err(CE_WARN, - "XFS: no-recovery mounts must be read-only."); - return EINVAL; - } + /* + * no recovery flag requires a read-only mount + */ + if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && + !(mp->m_flags & XFS_MOUNT_RDONLY)) { + cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); + return EINVAL; } - if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { + if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { cmn_err(CE_WARN, "XFS: sunit and swidth options incompatible with the noalign option"); return EINVAL; } - if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { + if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && + (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { cmn_err(CE_WARN, "XFS: cannot mount with both project and group quota"); return EINVAL; } - if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') { + if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) { printk("XFS: %s option needs the mount point option as well\n", MNTOPT_DMAPI); return EINVAL; @@ -439,27 +452,66 @@ xfs_parseargs( * Note that if "ikeep" or "noikeep" mount options are * supplied, then they are honored. */ - if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep) - args->flags |= XFSMNT_IKEEP; + if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep) + mp->m_flags |= XFS_MOUNT_IKEEP; - if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { +done: + if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { + /* + * At this point the superblock has not been read + * in, therefore we do not know the block size. + * Before the mount call ends we will convert + * these to FSBs. + */ if (dsunit) { - args->sunit = dsunit; - args->flags |= XFSMNT_RETERR; - } else { - args->sunit = vol_dsunit; + mp->m_dalign = dsunit; + mp->m_flags |= XFS_MOUNT_RETERR; } - dswidth ? (args->swidth = dswidth) : - (args->swidth = vol_dswidth); - } else { - args->sunit = args->swidth = 0; + + if (dswidth) + mp->m_swidth = dswidth; + } + + if (mp->m_logbufs != -1 && + mp->m_logbufs != 0 && + (mp->m_logbufs < XLOG_MIN_ICLOGS || + mp->m_logbufs > XLOG_MAX_ICLOGS)) { + cmn_err(CE_WARN, + "XFS: invalid logbufs value: %d [not %d-%d]", + mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); + return XFS_ERROR(EINVAL); + } + if (mp->m_logbsize != -1 && + mp->m_logbsize != 0 && + (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || + mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || + !is_power_of_2(mp->m_logbsize))) { + cmn_err(CE_WARN, + "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", + mp->m_logbsize); + return XFS_ERROR(EINVAL); + } + + mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_fsname) + return ENOMEM; + mp->m_fsname_len = strlen(mp->m_fsname) + 1; + + if (iosizelog) { + if (iosizelog > XFS_MAX_IO_LOG || + iosizelog < XFS_MIN_IO_LOG) { + cmn_err(CE_WARN, + "XFS: invalid log iosize: %d [not %d-%d]", + iosizelog, XFS_MIN_IO_LOG, + XFS_MAX_IO_LOG); + return XFS_ERROR(EINVAL); + } + + mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; + mp->m_readio_log = iosizelog; + mp->m_writeio_log = iosizelog; } -done: - if (args->flags & XFSMNT_32BITINODES) - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - if (args->flags2) - args->flags |= XFSMNT_FLAGS2; return 0; } @@ -705,8 +757,7 @@ xfs_close_devices( */ STATIC int xfs_open_devices( - struct xfs_mount *mp, - struct xfs_mount_args *args) + struct xfs_mount *mp) { struct block_device *ddev = mp->m_super->s_bdev; struct block_device *logdev = NULL, *rtdev = NULL; @@ -715,14 +766,14 @@ xfs_open_devices( /* * Open real time and log devices - order is important. */ - if (args->logname[0]) { - error = xfs_blkdev_get(mp, args->logname, &logdev); + if (mp->m_logname) { + error = xfs_blkdev_get(mp, mp->m_logname, &logdev); if (error) goto out; } - if (args->rtname[0]) { - error = xfs_blkdev_get(mp, args->rtname, &rtdev); + if (mp->m_rtname) { + error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); if (error) goto out_close_logdev; @@ -1288,175 +1339,28 @@ xfs_fs_setxquota( /* * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock has _not_ yet been read in. - */ -STATIC int -xfs_start_flags( - struct xfs_mount_args *ap, - struct xfs_mount *mp) -{ - int error; - - /* Values are in BBs */ - if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { - /* - * At this point the superblock has not been read - * in, therefore we do not know the block size. - * Before the mount call ends we will convert - * these to FSBs. - */ - mp->m_dalign = ap->sunit; - mp->m_swidth = ap->swidth; - } - - if (ap->logbufs != -1 && - ap->logbufs != 0 && - (ap->logbufs < XLOG_MIN_ICLOGS || - ap->logbufs > XLOG_MAX_ICLOGS)) { - cmn_err(CE_WARN, - "XFS: invalid logbufs value: %d [not %d-%d]", - ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); - return XFS_ERROR(EINVAL); - } - mp->m_logbufs = ap->logbufs; - if (ap->logbufsize != -1 && - ap->logbufsize != 0 && - (ap->logbufsize < XLOG_MIN_RECORD_BSIZE || - ap->logbufsize > XLOG_MAX_RECORD_BSIZE || - !is_power_of_2(ap->logbufsize))) { - cmn_err(CE_WARN, - "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", - ap->logbufsize); - return XFS_ERROR(EINVAL); - } - - error = ENOMEM; - - mp->m_logbsize = ap->logbufsize; - mp->m_fsname_len = strlen(ap->fsname) + 1; - - mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL); - if (!mp->m_fsname) - goto out; - - if (ap->rtname[0]) { - mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL); - if (!mp->m_rtname) - goto out_free_fsname; - - } - - if (ap->logname[0]) { - mp->m_logname = kstrdup(ap->logname, GFP_KERNEL); - if (!mp->m_logname) - goto out_free_rtname; - } - - if (ap->flags & XFSMNT_WSYNC) - mp->m_flags |= XFS_MOUNT_WSYNC; -#if XFS_BIG_INUMS - if (ap->flags & XFSMNT_INO64) { - mp->m_flags |= XFS_MOUNT_INO64; - mp->m_inoadd = XFS_INO64_OFFSET; - } -#endif - if (ap->flags & XFSMNT_RETERR) - mp->m_flags |= XFS_MOUNT_RETERR; - if (ap->flags & XFSMNT_NOALIGN) - mp->m_flags |= XFS_MOUNT_NOALIGN; - if (ap->flags & XFSMNT_SWALLOC) - mp->m_flags |= XFS_MOUNT_SWALLOC; - if (ap->flags & XFSMNT_OSYNCISOSYNC) - mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; - if (ap->flags & XFSMNT_32BITINODES) - mp->m_flags |= XFS_MOUNT_32BITINODES; - - if (ap->flags & XFSMNT_IOSIZE) { - if (ap->iosizelog > XFS_MAX_IO_LOG || - ap->iosizelog < XFS_MIN_IO_LOG) { - cmn_err(CE_WARN, - "XFS: invalid log iosize: %d [not %d-%d]", - ap->iosizelog, XFS_MIN_IO_LOG, - XFS_MAX_IO_LOG); - return XFS_ERROR(EINVAL); - } - - mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; - mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; - } - - if (ap->flags & XFSMNT_IKEEP) - mp->m_flags |= XFS_MOUNT_IKEEP; - if (ap->flags & XFSMNT_DIRSYNC) - mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (ap->flags & XFSMNT_ATTR2) - mp->m_flags |= XFS_MOUNT_ATTR2; - if (ap->flags & XFSMNT_NOATTR2) - mp->m_flags |= XFS_MOUNT_NOATTR2; - - if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - - /* - * no recovery flag requires a read-only mount - */ - if (ap->flags & XFSMNT_NORECOVERY) { - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - cmn_err(CE_WARN, - "XFS: tried to mount a FS read-write without recovery!"); - return XFS_ERROR(EINVAL); - } - mp->m_flags |= XFS_MOUNT_NORECOVERY; - } - - if (ap->flags & XFSMNT_NOUUID) - mp->m_flags |= XFS_MOUNT_NOUUID; - if (ap->flags & XFSMNT_BARRIER) - mp->m_flags |= XFS_MOUNT_BARRIER; - else - mp->m_flags &= ~XFS_MOUNT_BARRIER; - - if (ap->flags2 & XFSMNT2_FILESTREAMS) - mp->m_flags |= XFS_MOUNT_FILESTREAMS; - - if (ap->flags & XFSMNT_DMAPI) - mp->m_flags |= XFS_MOUNT_DMAPI; - return 0; - - - out_free_rtname: - kfree(mp->m_rtname); - out_free_fsname: - kfree(mp->m_fsname); - out: - return error; -} - -/* - * This function fills in xfs_mount_t fields based on mount args. * Note: the superblock _has_ now been read in. */ STATIC int xfs_finish_flags( - struct xfs_mount_args *ap, struct xfs_mount *mp) { int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); /* Fail a mount where the logbuf is smaller then the log stripe */ if (xfs_sb_version_haslogv2(&mp->m_sb)) { - if ((ap->logbufsize <= 0) && - (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { + if (mp->m_logbsize <= 0 && + mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { mp->m_logbsize = mp->m_sb.sb_logsunit; - } else if (ap->logbufsize > 0 && - ap->logbufsize < mp->m_sb.sb_logsunit) { + } else if (mp->m_logbsize > 0 && + mp->m_logbsize < mp->m_sb.sb_logsunit) { cmn_err(CE_WARN, "XFS: logbuf size must be greater than or equal to log stripe size"); return XFS_ERROR(EINVAL); } } else { /* Fail a mount if the logbuf is larger than 32K */ - if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) { + if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { cmn_err(CE_WARN, "XFS: logbuf size for version 1 logs must be 16K or 32K"); return XFS_ERROR(EINVAL); @@ -1468,7 +1372,7 @@ xfs_finish_flags( * told by noattr2 to turn it off */ if (xfs_sb_version_hasattr2(&mp->m_sb) && - !(ap->flags & XFSMNT_NOATTR2)) + !(mp->m_flags & XFS_MOUNT_NOATTR2)) mp->m_flags |= XFS_MOUNT_ATTR2; /* @@ -1480,6 +1384,7 @@ xfs_finish_flags( return XFS_ERROR(EROFS); } +#if 0 /* shared mounts were never supported on Linux */ /* * check for shared mount. */ @@ -1502,25 +1407,11 @@ xfs_finish_flags( /* * Shared XFS V0 can't deal with DMI. Return EINVAL. */ - if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI)) + if (mp->m_sb.sb_shared_vn == 0 && + (mp->m_flags & XFS_MOUNT_DMAPI)) return XFS_ERROR(EINVAL); } - - if (ap->flags & XFSMNT_UQUOTA) { - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); - if (ap->flags & XFSMNT_UQUOTAENF) - mp->m_qflags |= XFS_UQUOTA_ENFD; - } - - if (ap->flags & XFSMNT_GQUOTA) { - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); - if (ap->flags & XFSMNT_GQUOTAENF) - mp->m_qflags |= XFS_OQUOTA_ENFD; - } else if (ap->flags & XFSMNT_PQUOTA) { - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); - if (ap->flags & XFSMNT_PQUOTAENF) - mp->m_qflags |= XFS_OQUOTA_ENFD; - } +#endif return 0; } @@ -1533,16 +1424,12 @@ xfs_fs_fill_super( { struct inode *root; struct xfs_mount *mp = NULL; - struct xfs_mount_args *args; int flags = 0, error = ENOMEM; - - args = xfs_args_allocate(sb, silent); - if (!args) - return -ENOMEM; + char *mtpt = NULL; mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) - goto out_free_args; + goto out; spin_lock_init(&mp->m_sb_lock); mutex_init(&mp->m_growlock); @@ -1554,12 +1441,9 @@ xfs_fs_fill_super( mp->m_super = sb; sb->s_fs_info = mp; - if (sb->s_flags & MS_RDONLY) - mp->m_flags |= XFS_MOUNT_RDONLY; - - error = xfs_parseargs(mp, (char *)data, args, 0); + error = xfs_parseargs(mp, (char *)data, &mtpt); if (error) - goto out_free_mp; + goto out_free_fsname; sb_min_blocksize(sb, BBSIZE); sb->s_xattr = xfs_xattr_handlers; @@ -1567,33 +1451,28 @@ xfs_fs_fill_super( sb->s_qcop = &xfs_quotactl_operations; sb->s_op = &xfs_super_operations; - error = xfs_dmops_get(mp, args); + error = xfs_dmops_get(mp); if (error) - goto out_free_mp; - error = xfs_qmops_get(mp, args); + goto out_free_fsname; + error = xfs_qmops_get(mp); if (error) goto out_put_dmops; - if (args->flags & XFSMNT_QUIET) + if (silent) flags |= XFS_MFSI_QUIET; - error = xfs_open_devices(mp, args); + error = xfs_open_devices(mp); if (error) goto out_put_qmops; if (xfs_icsb_init_counters(mp)) mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; - /* - * Setup flags based on mount(2) options and then the superblock - */ - error = xfs_start_flags(args, mp); - if (error) - goto out_free_fsname; error = xfs_readsb(mp, flags); if (error) - goto out_free_fsname; - error = xfs_finish_flags(args, mp); + goto out_destroy_counters; + + error = xfs_finish_flags(mp); if (error) goto out_free_sb; @@ -1612,7 +1491,7 @@ xfs_fs_fill_super( if (error) goto out_filestream_unmount; - XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname); + XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname); sb->s_dirt = 1; sb->s_magic = XFS_SB_MAGIC; @@ -1641,27 +1520,27 @@ xfs_fs_fill_super( if (error) goto fail_vnrele; - xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); + kfree(mtpt); - kfree(args); + xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); return 0; out_filestream_unmount: xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); - out_free_fsname: - xfs_free_fsname(mp); + out_destroy_counters: xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); out_put_qmops: xfs_qmops_put(mp); out_put_dmops: xfs_dmops_put(mp); - out_free_mp: + out_free_fsname: + xfs_free_fsname(mp); + kfree(mtpt); kfree(mp); - out_free_args: - kfree(args); + out: return -error; fail_vnrele: diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index db1986a..5b198d1 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -20,7 +20,6 @@ #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_clnt.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index eea2e60..9556df9 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -20,7 +20,6 @@ #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_clnt.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h deleted file mode 100644 index d2ce5dd..0000000 --- a/fs/xfs/xfs_clnt.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_CLNT_H__ -#define __XFS_CLNT_H__ - -/* - * XFS arguments structure, constructed from the arguments we - * are passed via the mount system call. - * - * NOTE: The mount system call is handled differently between - * Linux and IRIX. In IRIX we worked work with a binary data - * structure coming in across the syscall interface from user - * space (the mount userspace knows about each filesystem type - * and the set of valid options for it, and converts the users - * argument string into a binary structure _before_ making the - * system call), and the ABI issues that this implies. - * - * In Linux, we are passed a comma separated set of options; - * ie. a NULL terminated string of characters. Userspace mount - * code does not have any knowledge of mount options expected by - * each filesystem type and so each filesystem parses its mount - * options in kernel space. - * - * For the Linux port, we kept this structure pretty much intact - * and use it internally (because the existing code groks it). - */ -struct xfs_mount_args { - int flags; /* flags -> see XFSMNT_... macros below */ - int flags2; /* flags -> see XFSMNT2_... macros below */ - int logbufs; /* Number of log buffers, -1 to default */ - int logbufsize; /* Size of log buffers, -1 to default */ - char fsname[MAXNAMELEN+1]; /* data device name */ - char rtname[MAXNAMELEN+1]; /* realtime device filename */ - char logname[MAXNAMELEN+1]; /* journal device filename */ - char mtpt[MAXNAMELEN+1]; /* filesystem mount point */ - int sunit; /* stripe unit (BBs) */ - int swidth; /* stripe width (BBs), multiple of sunit */ - uchar_t iosizelog; /* log2 of the preferred I/O size */ - int ihashsize; /* inode hash table size (buckets) */ -}; - -/* - * XFS mount option flags -- args->flags1 - */ -#define XFSMNT_ATTR2 0x00000001 /* allow ATTR2 EA format */ -#define XFSMNT_WSYNC 0x00000002 /* safe mode nfs mount - * compatible */ -#define XFSMNT_INO64 0x00000004 /* move inode numbers up - * past 2^32 */ -#define XFSMNT_UQUOTA 0x00000008 /* user quota accounting */ -#define XFSMNT_PQUOTA 0x00000010 /* IRIX prj quota accounting */ -#define XFSMNT_UQUOTAENF 0x00000020 /* user quota limit - * enforcement */ -#define XFSMNT_PQUOTAENF 0x00000040 /* IRIX project quota limit - * enforcement */ -#define XFSMNT_QUIET 0x00000080 /* don't report mount errors */ -#define XFSMNT_NOALIGN 0x00000200 /* don't allocate at - * stripe boundaries*/ -#define XFSMNT_RETERR 0x00000400 /* return error to user */ -#define XFSMNT_NORECOVERY 0x00000800 /* no recovery, implies - * read-only mount */ -#define XFSMNT_SHARED 0x00001000 /* shared XFS mount */ -#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */ -#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */ - /* (osyncisdsync is default) */ -#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */ -#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32 - * bits of address space */ -#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */ -#define XFSMNT_GQUOTAENF 0x00800000 /* group quota limit - * enforcement */ -#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */ -#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */ -#define XFSMNT_BARRIER 0x04000000 /* use write barriers */ -#define XFSMNT_IKEEP 0x08000000 /* inode cluster delete */ -#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width - * allocation */ -#define XFSMNT_DIRSYNC 0x40000000 /* sync creat,link,unlink,rename - * symlink,mkdir,rmdir,mknod */ -#define XFSMNT_FLAGS2 0x80000000 /* more flags set in flags2 */ - -/* - * XFS mount option flags -- args->flags2 - */ -#define XFSMNT2_COMPAT_IOSIZE 0x00000001 /* don't report large preferred - * I/O size in stat(2) */ -#define XFSMNT2_FILESTREAMS 0x00000002 /* enable the filestreams - * allocator */ - -#endif /* __XFS_CLNT_H__ */ diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c index a1e55fb..e71e258 100644 --- a/fs/xfs/xfs_dmops.c +++ b/fs/xfs/xfs_dmops.c @@ -25,7 +25,6 @@ #include "xfs_inum.h" #include "xfs_ag.h" #include "xfs_mount.h" -#include "xfs_clnt.h" static struct xfs_dmops xfs_dmcore_stub = { @@ -38,9 +37,9 @@ static struct xfs_dmops xfs_dmcore_stub = { }; int -xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) +xfs_dmops_get(struct xfs_mount *mp) { - if (args->flags & XFSMNT_DMAPI) { + if (mp->m_flags & XFS_MOUNT_DMAPI) { cmn_err(CE_WARN, "XFS: dmapi support not available in this kernel."); return EINVAL; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d3b7525..e3f618c 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -515,9 +515,9 @@ extern void xfs_freesb(xfs_mount_t *); extern int xfs_fs_writable(xfs_mount_t *); extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t); -extern int xfs_dmops_get(struct xfs_mount *, struct xfs_mount_args *); +extern int xfs_dmops_get(struct xfs_mount *); extern void xfs_dmops_put(struct xfs_mount *); -extern int xfs_qmops_get(struct xfs_mount *, struct xfs_mount_args *); +extern int xfs_qmops_get(struct xfs_mount *); extern void xfs_qmops_put(struct xfs_mount *); extern struct xfs_dmops xfs_dmcore_xfs; diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c index a294e58..27f8058 100644 --- a/fs/xfs/xfs_qmops.c +++ b/fs/xfs/xfs_qmops.c @@ -28,7 +28,6 @@ #include "xfs_mount.h" #include "xfs_quota.h" #include "xfs_error.h" -#include "xfs_clnt.h" STATIC struct xfs_dquot * @@ -131,9 +130,9 @@ static struct xfs_qmops xfs_qmcore_stub = { }; int -xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args) +xfs_qmops_get(struct xfs_mount *mp) { - if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) { + if (XFS_IS_QUOTA_RUNNING(mp)) { #ifdef CONFIG_XFS_QUOTA mp->m_qm_ops = &xfs_qmcore_xfs; #else diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 883dd0f..305d9f3 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -49,7 +49,6 @@ #include "xfs_extfree_item.h" #include "xfs_acl.h" #include "xfs_attr.h" -#include "xfs_clnt.h" #include "xfs_mru_cache.h" #include "xfs_filestream.h" #include "xfs_fsops.h" -- cgit v0.10.2 From 469fc23d5dd172665c69099db776c39bb1b70f92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:54:57 +1100 Subject: [XFS] fix the noquota mount option Noquota should clear all mount options, and not just user and group quota. Probably doesn't matter very much in real life. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32372a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 5638a99..62d4858 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -355,6 +355,7 @@ xfs_parseargs( } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_QUOTA) || !strcmp(this_char, MNTOPT_UQUOTA) || -- cgit v0.10.2 From 1ec7944beb6f3c29f1e58a66422130133727e9e1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:55:08 +1100 Subject: [XFS] fix biosize option iosizelog shouldn't be the same as iosize but the logarithm of it. Then again the current biosize option doesn't make much sense to me as it doesn't set the preferred I/O size as mentioned in the comment next to it but rather the allocation size and thus is identical to the allocsize option (except for the missing logarithm). It's also not documented in Documentation/filesystems/xfs.txt or the mount manpage. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32373a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 62d4858..c3d004b 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -270,7 +270,7 @@ xfs_parseargs( return EINVAL; } iosize = simple_strtoul(value, &eov, 10); - iosizelog = (uint8_t) iosize; + iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, -- cgit v0.10.2 From 2b7035fd7473c799ca3372092d72c768c7db329d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 17:55:18 +1100 Subject: [XFS] Trivial xfs_remove comment fixup The dp to ip comment should be for the unconditional xfs_droplink call, and the "." link obviously only exists for directories, so it should be in the is_dir conditional. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32374a Signed-off-by: Christoph Hellwig Signed-off-by: Donald Douwsma Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1d15a32..1c89011 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2009,7 +2009,7 @@ xfs_remove( goto out_bmap_cancel; /* - * Drop the link from dp to ip. + * Drop the "." link from ip to self. */ error = xfs_droplink(tp, ip); if (error) @@ -2024,7 +2024,7 @@ xfs_remove( } /* - * Drop the "." link from ip to self. + * Drop the link from dp to ip. */ error = xfs_droplink(tp, ip); if (error) -- cgit v0.10.2 From 56e73ec47d749047f441e6b9d60d964535d31c3b Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:55:27 +1100 Subject: [XFS] Can't lock inodes in radix tree preload region When we are inside a radix tree preload region, we cannot sleep. Recently we moved the inode locking inside the preload region for the inode radix tree. Fix that, and fix a missed unlock in another error path in the same code at the same time. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32385a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Christoph Hellwig diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index a1f209b..377c0cd 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -159,18 +159,19 @@ xfs_iget_cache_miss( goto out_destroy; } + if (lock_flags) + xfs_ilock(ip, lock_flags); + /* * Preload the radix tree so we can insert safely under the - * write spinlock. + * write spinlock. Note that we cannot sleep inside the preload + * region. */ if (radix_tree_preload(GFP_KERNEL)) { error = EAGAIN; - goto out_destroy; + goto out_unlock; } - if (lock_flags) - xfs_ilock(ip, lock_flags); - mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = agino & mask; write_lock(&pag->pag_ici_lock); @@ -181,7 +182,7 @@ xfs_iget_cache_miss( WARN_ON(error != -EEXIST); XFS_STATS_INC(xs_ig_dup); error = EAGAIN; - goto out_unlock; + goto out_preload_end; } /* These values _must_ be set before releasing the radix tree lock! */ @@ -193,9 +194,12 @@ xfs_iget_cache_miss( *ipp = ip; return 0; -out_unlock: +out_preload_end: write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); +out_unlock: + if (lock_flags) + xfs_iunlock(ip, lock_flags); out_destroy: xfs_idestroy(ip); return error; -- cgit v0.10.2 From 455486b9ccdd0a1d7432a03302f549b1c917c181 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 18:03:14 +1100 Subject: [XFS] avoid all reclaimable inodes in xfs_sync_inodes_ag If we are syncing data in xfs_sync_inodes_ag(), the VFS inode must still be referencable as the dirty data state is carried on the VFS inode. hence if we can't get a reference via igrab(), the inode must be in reclaim which implies that it has no dirty data attached. Leave such inodes to the reclaim code to flush the dirty inode state to disk and so avoid attempting to access the VFS inode when it may not exist in xfs_sync_inodes_ag(). Version 4: o don't reference linux inode until after igrab() succeeds Version 3: o converted unlock/rele to an xfs_iput() call. Version 2: o change igrab logic to be more linear o remove initial reclaimable inode check now that we are using igrab() failure to find reclaimable inodes o assert that igrab failure occurs only on reclaimable inodes o clean up inode locking - only grab the iolock if we are doing a SYNC_DELWRI call and we have a dirty inode. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32391a Signed-off-by: David Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Peter Leckie Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index ee1648b..fb5cca3 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -63,25 +63,16 @@ xfs_sync_inodes_ag( int error = 0; int last_error = 0; int fflag = XFS_B_ASYNC; - int lock_flags = XFS_ILOCK_SHARED; if (flags & SYNC_DELWRI) fflag = XFS_B_DELWRI; if (flags & SYNC_WAIT) fflag = 0; /* synchronous overrides all */ - if (flags & SYNC_DELWRI) { - /* - * We need the I/O lock if we're going to call any of - * the flush/inval routines. - */ - lock_flags |= XFS_IOLOCK_SHARED; - } - do { struct inode *inode; - boolean_t inode_refed; xfs_inode_t *ip = NULL; + int lock_flags = XFS_ILOCK_SHARED; /* * use a gang lookup to find the next inode in the tree @@ -109,22 +100,6 @@ xfs_sync_inodes_ag( break; } - /* - * skip inodes in reclaim. Let xfs_syncsub do that for - * us so we don't need to worry. - */ - if (xfs_iflags_test(ip, (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { - read_unlock(&pag->pag_ici_lock); - continue; - } - - /* bad inodes are dealt with elsewhere */ - inode = VFS_I(ip); - if (is_bad_inode(inode)) { - read_unlock(&pag->pag_ici_lock); - continue; - } - /* nothing to sync during shutdown */ if (XFS_FORCED_SHUTDOWN(mp)) { read_unlock(&pag->pag_ici_lock); @@ -132,42 +107,34 @@ xfs_sync_inodes_ag( } /* - * If we can't get a reference on the VFS_I, the inode must be - * in reclaim. If we can get the inode lock without blocking, - * it is safe to flush the inode because we hold the tree lock - * and xfs_iextract will block right now. Hence if we lock the - * inode while holding the tree lock, xfs_ireclaim() is - * guaranteed to block on the inode lock we now hold and hence - * it is safe to reference the inode until we drop the inode - * locks completely. + * If we can't get a reference on the inode, it must be + * in reclaim. Leave it for the reclaim code to flush. */ - inode_refed = B_FALSE; - if (igrab(inode)) { - read_unlock(&pag->pag_ici_lock); - xfs_ilock(ip, lock_flags); - inode_refed = B_TRUE; - } else { - if (!xfs_ilock_nowait(ip, lock_flags)) { - /* leave it to reclaim */ - read_unlock(&pag->pag_ici_lock); - continue; - } + inode = VFS_I(ip); + if (!igrab(inode)) { read_unlock(&pag->pag_ici_lock); + continue; + } + read_unlock(&pag->pag_ici_lock); + + /* bad inodes are dealt with elsewhere */ + if (is_bad_inode(inode)) { + IRELE(ip); + continue; } /* * If we have to flush data or wait for I/O completion - * we need to drop the ilock that we currently hold. - * If we need to drop the lock, insert a marker if we - * have not already done so. + * we need to hold the iolock. */ if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { - xfs_iunlock(ip, XFS_ILOCK_SHARED); + xfs_ilock(ip, XFS_IOLOCK_SHARED); + lock_flags |= XFS_IOLOCK_SHARED; error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); if (flags & SYNC_IOWAIT) vn_iowait(ip); - xfs_ilock(ip, XFS_ILOCK_SHARED); } + xfs_ilock(ip, XFS_ILOCK_SHARED); if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) { if (flags & SYNC_WAIT) { @@ -183,13 +150,7 @@ xfs_sync_inodes_ag( xfs_ifunlock(ip); } } - - if (lock_flags) - xfs_iunlock(ip, lock_flags); - - if (inode_refed) { - IRELE(ip); - } + xfs_iput(ip, lock_flags); if (error) last_error = error; -- cgit v0.10.2 From c679eef0520eb3c2c731fce505e61b8ef9469aac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 18:04:13 +1100 Subject: [XFS] stop using xfs_itobp in xfs_bulkstat xfs_bulkstat only wants the dinode, offset and buffer from a given inode number. Instead of using xfs_itobp on a fake inode which is complicated and currently leads to leaks of the security data just use xfs_inotobp which is designed to do exactly the kind of lookup xfs_bulkstat wants. The only thing that's missing in xfs_inotobp is a flags paramter that let's us pass down XFS_IMAP_BULKSTAT, but that can easily added. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32397a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c83f699..35e4191 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -222,25 +222,26 @@ xfs_imap_to_bp( * Use xfs_imap() to determine the size and location of the * buffer to read from disk. */ -STATIC int +int xfs_inotobp( xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, xfs_dinode_t **dipp, xfs_buf_t **bpp, - int *offset) + int *offset, + uint imap_flags) { xfs_imap_t imap; xfs_buf_t *bp; int error; imap.im_blkno = 0; - error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); + error = xfs_imap(mp, tp, ino, &imap, imap_flags | XFS_IMAP_LOOKUP); if (error) return error; - error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); + error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); if (error) return error; @@ -792,7 +793,7 @@ xfs_dic2xflags( /* * Allocate and initialise an xfs_inode. */ -struct xfs_inode * +STATIC struct xfs_inode * xfs_inode_alloc( struct xfs_mount *mp, xfs_ino_t ino) @@ -2046,7 +2047,7 @@ xfs_iunlink_remove( } next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino); error = xfs_inotobp(mp, tp, next_ino, &last_dip, - &last_ibp, &last_offset); + &last_ibp, &last_offset, 0); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index a5aeb9c..5d12cfe 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -158,7 +158,7 @@ typedef struct xfs_icdinode { #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ /* - * Flags for xfs_itobp(), xfs_imap() and xfs_dilocate(). + * Flags for xfs_inotobp, xfs_itobp(), xfs_imap() and xfs_dilocate(). */ #define XFS_IMAP_LOOKUP 0x1 #define XFS_IMAP_BULKSTAT 0x2 @@ -514,7 +514,6 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, xfs_fsize_t, int, int); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); -struct xfs_inode * xfs_inode_alloc(struct xfs_mount *, xfs_ino_t); void xfs_idestroy(xfs_inode_t *); void xfs_iextract(xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); @@ -531,6 +530,9 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *); #endif /* __KERNEL__ */ +int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, + xfs_ino_t, struct xfs_dinode **, + struct xfs_buf **, int *, uint); int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, struct xfs_dinode **, struct xfs_buf **, xfs_daddr_t, uint, uint); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 42a214b..3511803 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -359,7 +359,6 @@ xfs_bulkstat( int ubused; /* bytes used by formatter */ xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ xfs_dinode_t *dip; /* ptr into bp for specific inode */ - xfs_inode_t *ip; /* ptr to in-core inode struct */ /* * Get the last inode value, see if there's nothing to do. @@ -585,6 +584,8 @@ xfs_bulkstat( if (flags & (BULKSTAT_FG_QUICK | BULKSTAT_FG_INLINE)) { + int offset; + ino = XFS_AGINO_TO_INO(mp, agno, agino); bno = XFS_AGB_TO_DADDR(mp, agno, @@ -595,19 +596,13 @@ xfs_bulkstat( */ if (bp) xfs_buf_relse(bp); - ip = xfs_inode_alloc(mp, ino); - if (!ip) { - bp = NULL; - rval = ENOMEM; - break; - } - error = xfs_itobp(mp, NULL, ip, - &dip, &bp, bno, - XFS_IMAP_BULKSTAT, - XFS_BUF_LOCK); + + error = xfs_inotobp(mp, NULL, ino, &dip, + &bp, &offset, + XFS_IMAP_BULKSTAT); + if (!error) - clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; - xfs_idestroy(ip); + clustidx = offset / mp->m_sb.sb_inodesize; if (XFS_TEST_ERROR(error != 0, mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, XFS_RANDOM_BULKSTAT_READ_CHUNK)) { -- cgit v0.10.2 From 087e3b0460c367d0f4a5b71d7b013968ae23b588 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 18:24:37 +1100 Subject: Inode: export symbol destroy_inode To make sure we free the security data inodes need to be freed using the proper VFS helper (which we also need to export for this). We mark these inodes bad so we can skip the flush path for them. Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: David Chinner diff --git a/fs/inode.c b/fs/inode.c index fbcf6c5..f84ba33 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -212,6 +212,7 @@ void destroy_inode(struct inode *inode) else kmem_cache_free(inode_cachep, (inode)); } +EXPORT_SYMBOL(destroy_inode); /* -- cgit v0.10.2 From 9ed0451ee0a13469f7b38e4ced8974036f6d114f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 18:26:04 +1100 Subject: [XFS] free partially initialized inodes using destroy_inode To make sure we free the security data inodes need to be freed using the proper VFS helper (which we also need to export for this). We mark these inodes bad so we can skip the flush path for them. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32398a Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy Signed-off-by: David Chinner diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 377c0cd..837cae7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -201,7 +201,7 @@ out_unlock: if (lock_flags) xfs_iunlock(ip, lock_flags); out_destroy: - xfs_idestroy(ip); + xfs_destroy_inode(ip); return error; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 35e4191..cd52282 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -898,18 +898,14 @@ xfs_iread( * know that this is a new incore inode. */ error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); - if (error) { - xfs_idestroy(ip); - return error; - } + if (error) + goto out_destroy_inode; /* * If we got something that isn't an inode it means someone * (nfs or dmi) has a stale handle. */ if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) { - xfs_idestroy(ip); - xfs_trans_brelse(tp, bp); #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " "dip->di_core.di_magic (0x%x) != " @@ -917,7 +913,8 @@ xfs_iread( be16_to_cpu(dip->di_core.di_magic), XFS_DINODE_MAGIC); #endif /* DEBUG */ - return XFS_ERROR(EINVAL); + error = XFS_ERROR(EINVAL); + goto out_brelse; } /* @@ -931,14 +928,12 @@ xfs_iread( xfs_dinode_from_disk(&ip->i_d, &dip->di_core); error = xfs_iformat(ip, dip); if (error) { - xfs_idestroy(ip); - xfs_trans_brelse(tp, bp); #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " "xfs_iformat() returned error %d", error); #endif /* DEBUG */ - return error; + goto out_brelse; } } else { ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic); @@ -1004,6 +999,12 @@ xfs_iread( xfs_trans_brelse(tp, bp); *ipp = ip; return 0; + + out_brelse: + xfs_trans_brelse(tp, bp); + out_destroy_inode: + xfs_destroy_inode(ip); + return error; } /* diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 5d12cfe..7f007ef 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -310,6 +310,23 @@ static inline struct inode *VFS_I(struct xfs_inode *ip) } /* + * Get rid of a partially initialized inode. + * + * We have to go through destroy_inode to make sure allocations + * from init_inode_always like the security data are undone. + * + * We mark the inode bad so that it takes the short cut in + * the reclaim path instead of going through the flush path + * which doesn't make sense for an inode that has never seen the + * light of day. + */ +static inline void xfs_destroy_inode(struct xfs_inode *ip) +{ + make_bad_inode(VFS_I(ip)); + return destroy_inode(VFS_I(ip)); +} + +/* * i_flags helper functions */ static inline void -- cgit v0.10.2 From 7ee49acfe54883f16d28d9486b789431a5804d18 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 18:26:51 +1100 Subject: [XFS] correctly select first log item to push Under heavy metadata load we are seeing log hangs. The AIL has items in it ready to be pushed, and they are within the push target window. However, we are not pushing them when the last pushed LSN is less than the LSN of the first log item on the AIL. This is a regression introduced by the AIL push cursor modifications. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32409a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy Signed-off-by: Tim Shimmin diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 67ee466..2d47f10 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -228,7 +228,7 @@ xfs_trans_ail_cursor_first( list_for_each_entry(lip, &ailp->xa_ail, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) - break; + goto out; } lip = NULL; out: -- cgit v0.10.2 From ea5a3dc8356bf1cf27bab9a5a0da5dfbbb82013d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 30 Oct 2008 18:27:48 +1100 Subject: [XFS] kill sys_cred capable_cred has been unused for a while so we can kill it and sys_cred. That also means the cred argument to xfs_setattr and xfs_change_file_space can be removed now. SGI-PV: 988918 SGI-Modid: xfs-linux-melb:xfs-kern:32412a Signed-off-by: Christoph Hellwig Signed-off-by: Tim Shimmin Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 652721c..98da219 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -27,12 +27,4 @@ typedef struct cred { /* EMPTY */ } cred_t; -extern struct cred *sys_cred; - -/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ -static inline int capable_cred(cred_t *cr, int cid) -{ - return (cr == sys_cred) ? 1 : capable(cid); -} - #endif /* __XFS_CRED_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index ef90e64..46e862b 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -43,10 +43,3 @@ xfs_param_t xfs_params = { .inherit_nodfrg = { 0, 1, 1 }, .fstrm_timer = { 1, 30*100, 3600*100}, }; - -/* - * Global system credential structure. - */ -static cred_t sys_cred_val; -cred_t *sys_cred = &sys_cred_val; - diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h index 2770b00..69f71ca 100644 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ b/fs/xfs/linux-2.6/xfs_globals.h @@ -19,6 +19,5 @@ #define __XFS_GLOBALS_H__ extern uint64_t xfs_panic_mask; /* set to cause more panics */ -extern struct cred *sys_cred; #endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d3438c7..b5ea3f2 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -691,8 +691,7 @@ xfs_ioc_space( if (ioflags & IO_INVIS) attr_flags |= XFS_ATTR_DMI; - error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, - NULL, attr_flags); + error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, attr_flags); return -error; } diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 37bb101..f78bc22 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -601,7 +601,7 @@ xfs_vn_setattr( struct dentry *dentry, struct iattr *iattr) { - return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL); + return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); } /* @@ -642,7 +642,7 @@ xfs_vn_fallocate( xfs_ilock(ip, XFS_IOLOCK_EXCL); error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, - 0, NULL, XFS_ATTR_NOLOCK); + 0, XFS_ATTR_NOLOCK); if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) new_size = offset + len; @@ -653,7 +653,7 @@ xfs_vn_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL); + error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index b2f639a..8b3d1bd 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -758,7 +758,7 @@ xfs_acl_setmode( if (gap && nomask) iattr.ia_mode |= gap->ae_perm << 3; - return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred); + return xfs_setattr(XFS_I(vp), &iattr, 0); } /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1c89011..34a1982 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -79,8 +79,7 @@ int xfs_setattr( struct xfs_inode *ip, struct iattr *iattr, - int flags, - cred_t *credp) + int flags) { xfs_mount_t *mp = ip->i_mount; struct inode *inode = VFS_I(ip); @@ -3362,7 +3361,6 @@ xfs_change_file_space( int cmd, xfs_flock64_t *bf, xfs_off_t offset, - cred_t *credp, int attr_flags) { xfs_mount_t *mp = ip->i_mount; @@ -3450,7 +3448,7 @@ xfs_change_file_space( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = startoffset; - error = xfs_setattr(ip, &iattr, attr_flags, credp); + error = xfs_setattr(ip, &iattr, attr_flags); if (error) return error; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index e932a96..b1ae8e3 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -15,8 +15,7 @@ struct xfs_iomap; int xfs_open(struct xfs_inode *ip); -int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags, - struct cred *credp); +int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ @@ -44,8 +43,7 @@ int xfs_inode_flush(struct xfs_inode *ip, int flags); int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); int xfs_reclaim(struct xfs_inode *ip); int xfs_change_file_space(struct xfs_inode *ip, int cmd, - xfs_flock64_t *bf, xfs_off_t offset, - struct cred *credp, int attr_flags); + xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, struct xfs_inode *target_ip); -- cgit v0.10.2 From f3f0d7b026ae34d6ed5ae67cd4dd5909f9cd70a5 Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Thu, 30 Oct 2008 18:30:09 +1100 Subject: [XFS] remove restricted chown parameter from xfs linux On Linux all filesystems are supposed to be operating under Posix' restricted chown. Restricted chown means it restricts chown to the owner unless you have CAP_FOWNER. NOTE: that 2 files outside of fs/xfs have been modified too for this change. Reviewed-by: Dave Chinner SGI-PV: 988919 SGI-Modid: 2.6.x-xfs-melb:linux:32413b Signed-off-by: Tim Shimmin Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 0a1668b..9878f50 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -229,10 +229,6 @@ The following sysctls are available for the XFS filesystem: ISGID bit is cleared if the irix_sgid_inherit compatibility sysctl is set. - fs.xfs.restrict_chown (Min: 0 Default: 1 Max: 1) - Controls whether unprivileged users can use chown to "give away" - a file to another user. - fs.xfs.inherit_sync (Min: 0 Default: 1 Max: 1) Setting this to "1" will cause the "sync" flag set by the xfs_io(8) chattr command on a directory to be diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index c35da23a..fafeb48 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -730,7 +730,6 @@ static const struct trans_ctl_table trans_fs_quota_table[] = { }; static const struct trans_ctl_table trans_fs_xfs_table[] = { - { XFS_RESTRICT_CHOWN, "restrict_chown" }, { XFS_SGID_INHERIT, "irix_sgid_inherit" }, { XFS_SYMLINK_MODE, "irix_symlink_mode" }, { XFS_PANIC_MASK, "panic_mask" }, -- cgit v0.10.2 From e0b8e8b65d578f5d5538465dff8392cf02e1cc5d Mon Sep 17 00:00:00 2001 From: Tim Shimmin Date: Thu, 30 Oct 2008 18:30:48 +1100 Subject: [XFS] remove restricted chown parameter from xfs linux On Linux all filesystems are supposed to be operating under Posix' restricted chown. Restricted chown means it restricts chown to the owner unless you have CAP_FOWNER. NOTE: that 2 files outside of fs/xfs have been modified too for this change. Reviewed-by: Dave Chinner SGI-PV: 988919 SGI-Modid: xfs-linux-melb:xfs-kern:32413a Signed-off-by: Tim Shimmin Signed-off-by: Christoph Hellwig Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 46e862b..2ae8b1c 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -26,7 +26,6 @@ */ xfs_param_t xfs_params = { /* MIN DFLT MAX */ - .restrict_chown = { 0, 1, 1 }, .sgid_inherit = { 0, 0, 1 }, .symlink_mode = { 0, 0, 1 }, .panic_mask = { 0, 0, 255 }, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index b5ea3f2..d25694e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1103,10 +1103,6 @@ xfs_ioctl_setattr( /* * Change file ownership. Must be the owner or privileged. - * If the system was configured with the "restricted_chown" - * option, the owner is not permitted to give away the file, - * and can change the group id only to a group of which he - * or she is a member. */ if (mask & FSX_PROJID) { /* diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index cc0f7b3..2147176 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -107,7 +107,6 @@ #undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ #endif -#define restricted_chown xfs_params.restrict_chown.val #define irix_sgid_inherit xfs_params.sgid_inherit.val #define irix_symlink_mode xfs_params.symlink_mode.val #define xfs_panic_mask xfs_params.panic_mask.val diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7dacb5b..916c0ff 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -56,17 +56,6 @@ xfs_stats_clear_proc_handler( static ctl_table xfs_table[] = { { - .ctl_name = XFS_RESTRICT_CHOWN, - .procname = "restrict_chown", - .data = &xfs_params.restrict_chown.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xfs_params.restrict_chown.min, - .extra2 = &xfs_params.restrict_chown.max - }, - { .ctl_name = XFS_SGID_INHERIT, .procname = "irix_sgid_inherit", .data = &xfs_params.sgid_inherit.val, diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h index 4aadb80..b9937d4 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ b/fs/xfs/linux-2.6/xfs_sysctl.h @@ -31,7 +31,6 @@ typedef struct xfs_sysctl_val { } xfs_sysctl_val_t; typedef struct xfs_param { - xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/ xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is * not a member of parent dir GID. */ xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ @@ -68,7 +67,7 @@ typedef struct xfs_param { enum { /* XFS_REFCACHE_SIZE = 1 */ /* XFS_REFCACHE_PURGE = 2 */ - XFS_RESTRICT_CHOWN = 3, + /* XFS_RESTRICT_CHOWN = 3 */ XFS_SGID_INHERIT = 4, XFS_SYMLINK_MODE = 5, XFS_PANIC_MASK = 6, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 34a1982..c45ea27 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -232,10 +232,6 @@ xfs_setattr( /* * Change file ownership. Must be the owner or privileged. - * If the system was configured with the "restricted_chown" - * option, the owner is not permitted to give away the file, - * and can change the group id only to a group of which he - * or she is a member. */ if (mask & (ATTR_UID|ATTR_GID)) { /* @@ -259,9 +255,8 @@ xfs_setattr( * shall be equal to either the group ID or one of the * supplementary group IDs of the calling process. */ - if (restricted_chown && - (iuid != uid || (igid != gid && - !in_group_p((gid_t)gid))) && + if ((iuid != uid || + (igid != gid && !in_group_p((gid_t)gid))) && !capable(CAP_CHOWN)) { code = XFS_ERROR(EPERM); goto error_return; @@ -455,10 +450,6 @@ xfs_setattr( /* * Change file ownership. Must be the owner or privileged. - * If the system was configured with the "restricted_chown" - * option, the owner is not permitted to give away the file, - * and can change the group id only to a group of which he - * or she is a member. */ if (mask & (ATTR_UID|ATTR_GID)) { /* -- cgit v0.10.2 From 6bfb3d065f4c498c17a3a07f3dc08cedff53aff4 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 18:32:43 +1100 Subject: [XFS] Fix race when looking up reclaimable inodes If we get a race looking up a reclaimable inode, we can end up with the winner proceeding to use the inode before it has been completely re-initialised. This is a Bad Thing. Fix the race by checking whether we are still initialising the inod eonce we have a reference to it, and if so wait for the initialisation to complete before continuing. While there, fix a leaked reference count in the same code when encountering an unlinked inode and we are not doing a lookup for a create operation. SGI-PV: 987246 SGI-Modid: xfs-linux-melb:xfs-kern:32429a Signed-off-by: David Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 2147176..77d6ddc 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -77,6 +77,7 @@ #include #include #include +#include #include #include diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 837cae7..bf4dc5e 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -52,7 +52,7 @@ xfs_iget_cache_hit( int lock_flags) __releases(pag->pag_ici_lock) { struct xfs_mount *mp = ip->i_mount; - int error = 0; + int error = EAGAIN; /* * If INEW is set this inode is being set up @@ -60,7 +60,6 @@ xfs_iget_cache_hit( * Pause and try again. */ if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { - error = EAGAIN; XFS_STATS_INC(xs_ig_frecycle); goto out_error; } @@ -73,7 +72,6 @@ xfs_iget_cache_hit( * error immediately so we don't remove it from the reclaim * list and potentially leak the inode. */ - if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { error = ENOENT; goto out_error; @@ -91,27 +89,42 @@ xfs_iget_cache_hit( error = ENOMEM; goto out_error; } + + /* + * We must set the XFS_INEW flag before clearing the + * XFS_IRECLAIMABLE flag so that if a racing lookup does + * not find the XFS_IRECLAIMABLE above but has the igrab() + * below succeed we can safely check XFS_INEW to detect + * that this inode is still being initialised. + */ xfs_iflags_set(ip, XFS_INEW); xfs_iflags_clear(ip, XFS_IRECLAIMABLE); /* clear the radix tree reclaim flag as well. */ __xfs_inode_clear_reclaim_tag(mp, pag, ip); - read_unlock(&pag->pag_ici_lock); } else if (!igrab(VFS_I(ip))) { /* If the VFS inode is being torn down, pause and try again. */ - error = EAGAIN; XFS_STATS_INC(xs_ig_frecycle); goto out_error; - } else { - /* we've got a live one */ - read_unlock(&pag->pag_ici_lock); + } else if (xfs_iflags_test(ip, XFS_INEW)) { + /* + * We are racing with another cache hit that is + * currently recycling this inode out of the XFS_IRECLAIMABLE + * state. Wait for the initialisation to complete before + * continuing. + */ + wait_on_inode(VFS_I(ip)); } if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { error = ENOENT; - goto out; + iput(VFS_I(ip)); + goto out_error; } + /* We've got a live one. */ + read_unlock(&pag->pag_ici_lock); + if (lock_flags != 0) xfs_ilock(ip, lock_flags); @@ -122,7 +135,6 @@ xfs_iget_cache_hit( out_error: read_unlock(&pag->pag_ici_lock); -out: return error; } -- cgit v0.10.2 From 91b777125175077fb74025608dba87f100586c62 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 31 Oct 2008 15:50:04 +1100 Subject: CRED: Wrap task credential accesses in the XFS filesystem Wrap access to task credentials so that they can be separated more easily from the task_struct during the introduction of COW creds. Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id(). Change some task->e?[ug]id to task_e?[ug]id(). In some places it makes more sense to use RCU directly rather than a convenient wrapper; these will be addressed by later patches. Signed-off-by: David Howells Reviewed-by: James Morris Acked-by: Serge Hallyn diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 98da219..e279d00 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -24,7 +24,7 @@ * Credentials */ typedef struct cred { - /* EMPTY */ + /* EMPTY */ } cred_t; #endif /* __XFS_CRED_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d25694e..f1bd6c3 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1006,7 +1006,7 @@ xfs_ioctl_setattr( * to the file owner ID, except in cases where the * CAP_FSETID capability is applicable. */ - if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) { + if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) { code = XFS_ERROR(EPERM); goto error_return; } diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 8b3d1bd..a8cdd73 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -366,7 +366,7 @@ xfs_acl_allow_set( return ENOTDIR; if (vp->i_sb->s_flags & MS_RDONLY) return EROFS; - if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) + if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER)) return EPERM; return 0; } @@ -413,13 +413,13 @@ xfs_acl_access( switch (fap->acl_entry[i].ae_tag) { case ACL_USER_OBJ: seen_userobj = 1; - if (fuid != current->fsuid) + if (fuid != current_fsuid()) continue; matched.ae_tag = ACL_USER_OBJ; matched.ae_perm = allows; break; case ACL_USER: - if (fap->acl_entry[i].ae_id != current->fsuid) + if (fap->acl_entry[i].ae_id != current_fsuid()) continue; matched.ae_tag = ACL_USER; matched.ae_perm = allows; -- cgit v0.10.2 From 644c3567d16b7e53cf52ae98c4150d601c9eacfe Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 10 Nov 2008 16:50:24 +1100 Subject: [XFS] handle memory allocation failures during log initialisation When there is no memory left in the system, xfs_buf_get_noaddr() can fail. If this happens at mount time during xlog_alloc_log() we fail to catch the error and oops. Catch the error from xfs_buf_get_noaddr(), and allow other memory allocations to fail and catch those errors too. Report the error to the console and fail the mount with ENOMEM. Tested by manually injecting errors into xfs_buf_get_noaddr() and xlog_alloc_log(). Version 2: o remove unnecessary casts of the returned pointer from kmem_zalloc() SGI-PV: 987246 Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 5184017..92c20a8 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -563,6 +563,11 @@ xfs_log_mount( } mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); + if (!mp->m_log) { + cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); + error = ENOMEM; + goto out; + } /* * Initialize the AIL now we have a log. @@ -601,6 +606,7 @@ xfs_log_mount( return 0; error: xfs_log_unmount_dealloc(mp); +out: return error; } /* xfs_log_mount */ @@ -1217,7 +1223,9 @@ xlog_alloc_log(xfs_mount_t *mp, int i; int iclogsize; - log = (xlog_t *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP); + log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); + if (!log) + return NULL; log->l_mp = mp; log->l_targ = log_target; @@ -1249,6 +1257,8 @@ xlog_alloc_log(xfs_mount_t *mp, xlog_get_iclog_buffer_size(mp, log); bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); + if (!bp) + goto out_free_log; XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); @@ -1275,13 +1285,17 @@ xlog_alloc_log(xfs_mount_t *mp, iclogsize = log->l_iclog_size; ASSERT(log->l_iclog_size >= 4096); for (i=0; i < log->l_iclog_bufs; i++) { - *iclogp = (xlog_in_core_t *) - kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); + *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); + if (!*iclogp) + goto out_free_iclog; + iclog = *iclogp; iclog->ic_prev = prev_iclog; prev_iclog = iclog; bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); + if (!bp) + goto out_free_iclog; if (!XFS_BUF_CPSEMA(bp)) ASSERT(0); XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); @@ -1323,6 +1337,25 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ return log; + +out_free_iclog: + for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { + prev_iclog = iclog->ic_next; + if (iclog->ic_bp) { + sv_destroy(&iclog->ic_force_wait); + sv_destroy(&iclog->ic_write_wait); + xfs_buf_free(iclog->ic_bp); + xlog_trace_iclog_dealloc(iclog); + } + kmem_free(iclog); + } + spinlock_destroy(&log->l_icloglock); + spinlock_destroy(&log->l_grant_lock); + xlog_trace_loggrant_dealloc(log); + xfs_buf_free(log->l_xbuf); +out_free_log: + kmem_free(log); + return NULL; } /* xlog_alloc_log */ -- cgit v0.10.2 From d44dab8d1cde8aeba1faf44a7654f90800feb7fc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 10 Nov 2008 17:06:05 +1100 Subject: fs: xfs needs inode_wait to be exported Since wait_on_inode() references it. Signed-off-by: Stephen Rothwell Reviewed-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/inode.c b/fs/inode.c index f84ba33..098a244 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1334,6 +1334,7 @@ int inode_wait(void *word) schedule(); return 0; } +EXPORT_SYMBOL(inode_wait); /* * If we try to find an inode in the inode hash while it is being -- cgit v0.10.2 From cb4f0d1d4229f609f43c68acec69c7618ed72397 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 10 Nov 2008 17:11:18 +1100 Subject: [XFS] fix uninitialised variable bug in dquot release gcc on ARM warns about an using an uninitialised variable in xfs_qm_dqrele_all_inodes(). This is a real bug, but gcc on x86_64 is not reporting this warning so it went unnoticed. Fix the bug by bring the inode radix tree walk code up to date with xfs_sync_inodes_ag(). SGI-PV: 987246 Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 9ff28e6..2c57a6e 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1036,9 +1036,6 @@ xfs_qm_dqrele_inodes_ag( int nr_found; do { - boolean_t inode_refed; - struct inode *inode; - /* * use a gang lookup to find the next inode in the tree * as the tree is sparse and a gang lookup walks to find @@ -1053,27 +1050,37 @@ xfs_qm_dqrele_inodes_ag( break; } - /* update the index for the next lookup */ + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } - /* skip quota inodes and those in reclaim */ - inode = VFS_I(ip); - if (!inode || ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { + /* skip quota inodes */ + if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); read_unlock(&pag->pag_ici_lock); continue; } - if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { - inode = igrab(inode); - read_unlock(&pag->pag_ici_lock); - if (!inode) - continue; - inode_refed = B_TRUE; - xfs_ilock(ip, XFS_ILOCK_EXCL); - } else { + + /* + * If we can't get a reference on the inode, it must be + * in reclaim. Leave it for the reclaim code to flush. + */ + if (!igrab(VFS_I(ip))) { read_unlock(&pag->pag_ici_lock); + continue; } + read_unlock(&pag->pag_ici_lock); + + xfs_ilock(ip, XFS_ILOCK_EXCL); if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; @@ -1083,9 +1090,8 @@ xfs_qm_dqrele_inodes_ag( xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (inode_refed) - IRELE(ip); + xfs_iput(ip, XFS_ILOCK_EXCL); + } while (nr_found); } -- cgit v0.10.2 From 6307091fe69ae74747298bdcaf43119ad67bda3a Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 10 Nov 2008 17:13:23 +1100 Subject: [XFS] Avoid using inodes that haven't been completely initialised The radix tree walks in xfs_sync_inodes_ag and xfs_qm_dqrele_all_inodes() can find inodes that are still undergoing initialisation. Avoid them by checking for the the XFS_INEW() flag once we have a reference on the inode. This flag is cleared once the inode is properly initialised. SGI-PV: 987246 Signed-off-by: Dave Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index f78bc22..69c98cf 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -780,7 +780,6 @@ xfs_setup_inode( inode->i_ino = ip->i_ino; inode->i_state = I_NEW|I_LOCK; inode_add_to_lists(ip->i_mount->m_super, inode); - ASSERT(atomic_read(&inode->i_count) == 1); inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index fb5cca3..d12d31b 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -117,8 +117,9 @@ xfs_sync_inodes_ag( } read_unlock(&pag->pag_ici_lock); - /* bad inodes are dealt with elsewhere */ - if (is_bad_inode(inode)) { + /* avoid new or bad inodes */ + if (is_bad_inode(inode) || + xfs_iflags_test(ip, XFS_INEW)) { IRELE(ip); continue; } diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 2c57a6e..68139b3 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1080,6 +1080,12 @@ xfs_qm_dqrele_inodes_ag( } read_unlock(&pag->pag_ici_lock); + /* avoid new inodes though we shouldn't find any here */ + if (xfs_iflags_test(ip, XFS_INEW)) { + IRELE(ip); + continue; + } + xfs_ilock(ip, XFS_ILOCK_EXCL); if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { xfs_qm_dqrele(ip->i_udquot); -- cgit v0.10.2 From cc09c0dc57de7f7d2ed89d480b5653e5f6a32f2c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 17 Nov 2008 17:37:10 +1100 Subject: [XFS] Fix double free of log tickets When an I/O error occurs during an intermediate commit on a rolling transaction, xfs_trans_commit() will free the transaction structure and the related ticket. However, the duplicate transaction that gets used as the transaction continues still contains a pointer to the ticket. Hence when the duplicate transaction is cancelled and freed, we free the ticket a second time. Add reference counting to the ticket so that we hold an extra reference to the ticket over the transaction commit. We drop the extra reference once we have checked that the transaction commit did not return an error, thus avoiding a double free on commit error. Credit to Nick Piggin for tripping over the problem. SGI-PV: 989741 Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index db28905..c391221 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4292,9 +4292,15 @@ xfs_bmap_finish( * We have a new transaction, so we should return committed=1, * even though we're returning an error. */ - if (error) { + if (error) return error; - } + + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(ntp->t_ticket); + if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES, logcount))) return error; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cd52282..b977100 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1782,8 +1782,14 @@ xfs_itruncate_finish( xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_ihold(ntp, ip); - if (!error) - error = xfs_trans_reserve(ntp, 0, + if (error) + return error; + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(ntp->t_ticket); + error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 92c20a8..4bf44ae 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -100,12 +100,11 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, /* local ticket functions */ -STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, +STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log, int unit_bytes, int count, char clientid, uint flags); -STATIC void xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket); #if defined(DEBUG) STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr); @@ -360,7 +359,7 @@ xfs_log_done(xfs_mount_t *mp, */ xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); xlog_ungrant_log_space(log, ticket); - xlog_ticket_put(log, ticket); + xfs_log_ticket_put(ticket); } else { xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); xlog_regrant_reserve_log_space(log, ticket); @@ -514,7 +513,7 @@ xfs_log_reserve(xfs_mount_t *mp, retval = xlog_regrant_write_log_space(log, internal_ticket); } else { /* may sleep if need to allocate more tickets */ - internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, + internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, client, flags); if (!internal_ticket) return XFS_ERROR(ENOMEM); @@ -749,7 +748,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (tic) { xlog_trace_loggrant(log, tic, "unmount rec"); xlog_ungrant_log_space(log, tic); - xlog_ticket_put(log, tic); + xfs_log_ticket_put(tic); } } else { /* @@ -3222,22 +3221,33 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) */ /* - * Free a used ticket. + * Free a used ticket when it's refcount falls to zero. */ -STATIC void -xlog_ticket_put(xlog_t *log, - xlog_ticket_t *ticket) +void +xfs_log_ticket_put( + xlog_ticket_t *ticket) { - sv_destroy(&ticket->t_wait); - kmem_zone_free(xfs_log_ticket_zone, ticket); -} /* xlog_ticket_put */ + ASSERT(atomic_read(&ticket->t_ref) > 0); + if (atomic_dec_and_test(&ticket->t_ref)) { + sv_destroy(&ticket->t_wait); + kmem_zone_free(xfs_log_ticket_zone, ticket); + } +} +xlog_ticket_t * +xfs_log_ticket_get( + xlog_ticket_t *ticket) +{ + ASSERT(atomic_read(&ticket->t_ref) > 0); + atomic_inc(&ticket->t_ref); + return ticket; +} /* * Allocate and initialise a new log ticket. */ STATIC xlog_ticket_t * -xlog_ticket_get(xlog_t *log, +xlog_ticket_alloc(xlog_t *log, int unit_bytes, int cnt, char client, @@ -3308,6 +3318,7 @@ xlog_ticket_get(xlog_t *log, unit_bytes += 2*BBSIZE; } + atomic_set(&tic->t_ref, 1); tic->t_unit_res = unit_bytes; tic->t_curr_res = unit_bytes; tic->t_cnt = cnt; @@ -3323,7 +3334,7 @@ xlog_ticket_get(xlog_t *log, xlog_tic_reset_res(tic); return tic; -} /* xlog_ticket_get */ +} /****************************************************************************** diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d47b91f..8a3e84e 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -134,6 +134,7 @@ typedef struct xfs_log_callback { #ifdef __KERNEL__ /* Log manager interfaces */ struct xfs_mount; +struct xlog_ticket; xfs_lsn_t xfs_log_done(struct xfs_mount *mp, xfs_log_ticket_t ticket, void **iclog, @@ -177,6 +178,9 @@ int xfs_log_need_covered(struct xfs_mount *mp); void xlog_iodone(struct xfs_buf *); +struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket); +void xfs_log_ticket_put(struct xlog_ticket *ticket); + #endif diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index de7ef6c..b39a198 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -245,6 +245,7 @@ typedef struct xlog_ticket { struct xlog_ticket *t_next; /* :4|8 */ struct xlog_ticket *t_prev; /* :4|8 */ xlog_tid_t t_tid; /* transaction identifier : 4 */ + atomic_t t_ref; /* ticket reference count : 4 */ int t_curr_res; /* current reservation in bytes : 4 */ int t_unit_res; /* unit reservation in bytes : 4 */ char t_ocnt; /* original count : 1 */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index ad137ef..8570b82 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -290,7 +290,7 @@ xfs_trans_dup( ASSERT(tp->t_ticket != NULL); ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); - ntp->t_ticket = tp->t_ticket; + ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; tp->t_blk_res = tp->t_blk_res_used; ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; @@ -1260,6 +1260,13 @@ xfs_trans_roll( trans = *tpp; /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(trans->t_ticket); + + + /* * Reserve space in the log for th next transaction. * This also pushes items in the "AIL", the list of logged items, * out to disk if they are taking up space at the tail of the log diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 35d4d41..77114493 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -172,6 +172,12 @@ xfs_dir_ialloc( *ipp = NULL; return code; } + + /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(tp->t_ticket); code = xfs_trans_reserve(tp, 0, log_res, 0, XFS_TRANS_PERM_LOG_RES, log_count); /* diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c45ea27..0574aad 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1019,6 +1019,12 @@ xfs_inactive_symlink_rmt( goto error0; } /* + * transaction commit worked ok so we can drop the extra ticket + * reference that we gained in xfs_trans_dup() + */ + xfs_log_ticket_put(tp->t_ticket); + + /* * Remove the memory for extent descriptions (just bookkeeping). */ if (ip->i_df.if_bytes) -- cgit v0.10.2 From bac8dca9f9b1dfcf9c4ecb4f9ca17185b828cc20 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:31 +1100 Subject: [XFS] fix NULL pointer dereference in xfs_log_force_umount xfs_log_force_umount may be called very early during log recovery where If we fail a buffer read in xlog_recover_do_inode_trans we abort the mount. But at that point log recovery has started delayed writeback of inode buffers. As part of the aborted mount we try to flush out all delwri buffers, but at that point we have already freed the superblock, and set mp->m_sb_bp to NULL, and xfs_log_force_umount which gets called after the inode buffer writeback trips over it. Make xfs_log_force_umount a little more careful when accessing mp->m_sb_bp to avoid this. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4bf44ae..8a5b055 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3569,7 +3569,8 @@ xfs_log_force_umount( if (!log || log->l_flags & XLOG_ACTIVE_RECOVERY) { mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; - XFS_BUF_DONE(mp->m_sb_bp); + if (mp->m_sb_bp) + XFS_BUF_DONE(mp->m_sb_bp); return 0; } @@ -3590,7 +3591,9 @@ xfs_log_force_umount( spin_lock(&log->l_icloglock); spin_lock(&log->l_grant_lock); mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; - XFS_BUF_DONE(mp->m_sb_bp); + if (mp->m_sb_bp) + XFS_BUF_DONE(mp->m_sb_bp); + /* * This flag is sort of redundant because of the mount flag, but * it's good to maintain the separation between the log and the rest -- cgit v0.10.2 From f999a5bf3fa6b3d11334c3ba1e9dcfed5ff9f8a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:32 +1100 Subject: [XFS] wire up ->open for directories Currently there's no ->open method set for directories on XFS. That means we don't perform any check for opening too large directories without O_LARGEFILE, we don't check for shut down filesystems, and we don't actually do the readahead for the first block in the directory. Instead of just setting the directories open routine to xfs_file_open we merge the shutdown check directly into xfs_file_open and create a new xfs_dir_open that first calls xfs_file_open and then performs the readahead for block 0. (First sent on September 29th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 3fee790..72fc8d8 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -38,6 +38,7 @@ #include "xfs_rw.h" #include "xfs_ioctl32.h" #include "xfs_vnodeops.h" +#include "xfs_da_btree.h" #include #include @@ -169,11 +170,37 @@ xfs_file_splice_write_invis( STATIC int xfs_file_open( struct inode *inode, - struct file *filp) + struct file *file) { - if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) + if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EFBIG; - return -xfs_open(XFS_I(inode)); + if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) + return -EIO; + return 0; +} + +STATIC int +xfs_dir_open( + struct inode *inode, + struct file *file) +{ + struct xfs_inode *ip = XFS_I(inode); + int mode; + int error; + + error = xfs_file_open(inode, file); + if (error) + return error; + + /* + * If there are any blocks, read-ahead block 0 as we're almost + * certain to have the next operation be a read there. + */ + mode = xfs_ilock_map_shared(ip); + if (ip->i_d.di_nextents > 0) + xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); + xfs_iunlock(ip, mode); + return 0; } STATIC int @@ -345,6 +372,7 @@ const struct file_operations xfs_invis_file_operations = { const struct file_operations xfs_dir_file_operations = { + .open = xfs_dir_open, .read = generic_read_dir, .readdir = xfs_file_readdir, .llseek = generic_file_llseek, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0574aad..c055bdb 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -54,28 +54,6 @@ #include "xfs_vnodeops.h" int -xfs_open( - xfs_inode_t *ip) -{ - int mode; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return XFS_ERROR(EIO); - - /* - * If it's a directory with any blocks, read-ahead block 0 - * as we're almost certain to have the next operation be a read there. - */ - if (S_ISDIR(ip->i_d.di_mode) && ip->i_d.di_nextents > 0) { - mode = xfs_ilock_map_shared(ip); - if (ip->i_d.di_nextents > 0) - (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); - xfs_iunlock(ip, mode); - } - return 0; -} - -int xfs_setattr( struct xfs_inode *ip, struct iattr *iattr, diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index b1ae8e3..a559400 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -14,7 +14,6 @@ struct xfs_inode; struct xfs_iomap; -int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ -- cgit v0.10.2 From 6c31b93a14a453c8756ffd228e24910ffdf30c5d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:32 +1100 Subject: [XFS] allow inode64 mount option on 32 bit systems Now that we've stopped using the Linux inode cache when can trivally support the inode64 mount option on 32bit architectures. As far as the kernel and most userspace is concerned this works perfectly, but applications still using really old stat and readdir interfaces will get an EOVERFLOW error when hitting an inode number not fitting into 32 bits (that problem of course also exists when using these applications on a 64bit kernel). Note that because inode64 is simply a mount option we can currently mount a filesystem having > 32 bit inode numbers and cause a variety of problems, all this is solved but this patch which enables XFS_BIG_INUMS, even when inode64 is not used. (First sent on October 18th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 77d6ddc..cfe16a3 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -21,18 +21,12 @@ #include /* - * Some types are conditional depending on the target system. * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. - * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well - * as requiring XFS_BIG_BLKNOS to be set. + * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. */ #if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) # define XFS_BIG_BLKNOS 1 -# if BITS_PER_LONG == 64 -# define XFS_BIG_INUMS 1 -# else -# define XFS_BIG_INUMS 0 -# endif +# define XFS_BIG_INUMS 1 #else # define XFS_BIG_BLKNOS 0 # define XFS_BIG_INUMS 0 -- cgit v0.10.2 From 65795910c1b798f8a47181b48cf6eb163a15e778 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:33 +1100 Subject: [XFS] fix spurious gcc warnings Some recent gcc warnings don't like passing string variables to printf-like functions without using at least a "%s" format string. Change the two occurances of that in xfs to please gcc. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index 64f4ec9..c3526d4 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c @@ -61,7 +61,7 @@ xfs_read_xfsstats( /* Loop over all stats groups */ for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { - len += sprintf(buffer + len, xstats[i].desc); + len += sprintf(buffer + len, "%s", xstats[i].desc); /* inner loop does each group */ while (j < xstats[i].endpoint) { val = 0; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c3d004b..cc5e07e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1848,10 +1848,9 @@ STATIC int __init init_xfs_fs(void) { int error; - static char message[] __initdata = KERN_INFO \ - XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"; - printk(message); + printk(KERN_INFO XFS_VERSION_STRING " with " + XFS_BUILD_OPTIONS " enabled\n"); ktrace_init(64); vn_init(); -- cgit v0.10.2 From 2e6560929d8ab4b650fecc3a87013852b34f0922 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 28 Nov 2008 14:23:33 +1100 Subject: [XFS] fix error inversion problems with data flushing XFS gets the sign of the error wrong in several places when gathering the error from generic linux functions. These functions return negative error values, while the core XFS code returns positive error values. Hence when XFS inverts the error to be returned to the VFS, it can incorrectly invert a negative error and this error will be ignored by the syscall return. Fix all the problems related to calling filemap_* functions. Problem initially identified by Nick Piggin in xfs_fsync(). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 36caa6d..5aeb777 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -24,6 +24,10 @@ int fs_noerr(void) { return 0; } int fs_nosys(void) { return ENOSYS; } void fs_noval(void) { return; } +/* + * note: all filemap functions return negative error codes. These + * need to be inverted before returning to the xfs core functions. + */ void xfs_tosspages( xfs_inode_t *ip, @@ -53,7 +57,7 @@ xfs_flushinval_pages( if (!ret) truncate_inode_pages(mapping, first); } - return ret; + return -ret; } int @@ -72,10 +76,23 @@ xfs_flush_pages( xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = filemap_fdatawrite(mapping); if (flags & XFS_B_ASYNC) - return ret; + return -ret; ret2 = filemap_fdatawait(mapping); if (!ret) ret = ret2; } - return ret; + return -ret; +} + +int +xfs_wait_on_pages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last) +{ + struct address_space *mapping = VFS_I(ip)->i_mapping; + + if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) + return -filemap_fdatawait(mapping); + return 0; } diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 1957e535..4959c87 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -243,7 +243,7 @@ xfs_read( if (unlikely(ioflags & IO_ISDIRECT)) { if (inode->i_mapping->nrpages) - ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), + ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); mutex_unlock(&inode->i_mutex); if (ret) { diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index cc5e07e..ae92290 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -990,21 +990,26 @@ xfs_fs_write_inode( struct inode *inode, int sync) { + struct xfs_inode *ip = XFS_I(inode); int error = 0; int flags = 0; - xfs_itrace_entry(XFS_I(inode)); + xfs_itrace_entry(ip); if (sync) { - filemap_fdatawait(inode->i_mapping); + error = xfs_wait_on_pages(ip, 0, -1); + if (error) + goto out_error; flags |= FLUSH_SYNC; } - error = xfs_inode_flush(XFS_I(inode), flags); + error = xfs_inode_flush(ip, flags); + +out_error: /* * if we failed to write out the inode then mark * it dirty again so we'll try again later. */ if (error) - xfs_mark_inode_dirty_sync(XFS_I(inode)); + xfs_mark_inode_dirty_sync(ip); return -error; } diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c055bdb..f26b038 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -681,7 +681,7 @@ xfs_fsync( return XFS_ERROR(EIO); /* capture size updates in I/O completion before writing the inode. */ - error = filemap_fdatawait(VFS_I(ip)->i_mapping); + error = xfs_wait_on_pages(ip, 0, -1); if (error) return XFS_ERROR(error); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index a559400..2a45b00 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -75,5 +75,6 @@ int xfs_flushinval_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last, int fiopt); int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last, uint64_t flags, int fiopt); +int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last); #endif /* _XFS_VNODEOPS_H */ -- cgit v0.10.2 From 0924b585fc49bf371bc700c23e516a538bf589af Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 28 Nov 2008 14:23:34 +1100 Subject: [XFS] fix uninitialised variable bug in dquot release. gcc is warning about an uninitialised variable in xfs_growfs_rt(). This is a false positive. Fix it by changing the scope of the transaction pointer to wholly within the internal loop inside the function. While there, preemptively change xfs_growfs_rt_alloc() in the same way as it has exactly the same structure as xfs_growfs_rt() but gcc is not warning about it. Yet. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index e2f68de..f18b9b2 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -85,7 +85,6 @@ xfs_growfs_rt_alloc( { xfs_fileoff_t bno; /* block number in file */ xfs_buf_t *bp; /* temporary buffer for zeroing */ - int cancelflags; /* flags for xfs_trans_cancel */ int committed; /* transaction committed flag */ xfs_daddr_t d; /* disk block address */ int error; /* error return value */ @@ -96,15 +95,16 @@ xfs_growfs_rt_alloc( xfs_bmbt_irec_t map; /* block map output */ int nmap; /* number of block maps */ int resblks; /* space reservation */ - xfs_trans_t *tp; /* transaction pointer */ /* * Allocate space to the file, as necessary. */ while (oblocks < nblocks) { + int cancelflags = 0; + xfs_trans_t *tp; + tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); - cancelflags = 0; /* * Reserve space & log for one extent added to the file. */ @@ -171,7 +171,9 @@ xfs_growfs_rt_alloc( mp->m_bsize, 0); if (bp == NULL) { error = XFS_ERROR(EIO); - goto error_cancel; +error_cancel: + xfs_trans_cancel(tp, cancelflags); + goto error; } memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); @@ -188,8 +190,6 @@ xfs_growfs_rt_alloc( oblocks = map.br_startoff + map.br_blockcount; } return 0; -error_cancel: - xfs_trans_cancel(tp, cancelflags); error: return error; } @@ -1856,7 +1856,6 @@ xfs_growfs_rt( { xfs_rtblock_t bmbno; /* bitmap block number */ xfs_buf_t *bp; /* temporary buffer */ - int cancelflags; /* flags for xfs_trans_cancel */ int error; /* error return value */ xfs_inode_t *ip; /* bitmap inode, used as lock */ xfs_mount_t *nmp; /* new (fake) mount structure */ @@ -1872,10 +1871,8 @@ xfs_growfs_rt( xfs_extlen_t rsumblocks; /* current number of rt summary blks */ xfs_sb_t *sbp; /* old superblock */ xfs_fsblock_t sumbno; /* summary block number */ - xfs_trans_t *tp; /* transaction pointer */ sbp = &mp->m_sb; - cancelflags = 0; /* * Initial error checking. */ @@ -1942,6 +1939,9 @@ xfs_growfs_rt( ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); bmbno < nrbmblocks; bmbno++) { + xfs_trans_t *tp; + int cancelflags = 0; + *nmp = *mp; nsbp = &nmp->m_sb; /* @@ -1967,16 +1967,15 @@ xfs_growfs_rt( * Start a transaction, get the log reservation. */ tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE); - cancelflags = 0; if ((error = xfs_trans_reserve(tp, 0, XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0))) - break; + goto error_cancel; /* * Lock out other callers by grabbing the bitmap inode lock. */ if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, XFS_ILOCK_EXCL, &ip))) - break; + goto error_cancel; ASSERT(ip == mp->m_rbmip); /* * Update the bitmap inode's size. @@ -1990,7 +1989,7 @@ xfs_growfs_rt( */ if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, XFS_ILOCK_EXCL, &ip))) - break; + goto error_cancel; ASSERT(ip == mp->m_rsumip); /* * Update the summary inode's size. @@ -2005,7 +2004,7 @@ xfs_growfs_rt( mp->m_rsumlevels != nmp->m_rsumlevels) { error = xfs_rtcopy_summary(mp, nmp, tp); if (error) - break; + goto error_cancel; } /* * Update superblock fields. @@ -2031,8 +2030,11 @@ xfs_growfs_rt( bp = NULL; error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents, nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno); - if (error) + if (error) { +error_cancel: + xfs_trans_cancel(tp, cancelflags); break; + } /* * Mark more blocks free in the superblock. */ @@ -2045,15 +2047,10 @@ xfs_growfs_rt( mp->m_rsumsize = nrsumsize; error = xfs_trans_commit(tp, 0); - if (error) { - tp = NULL; + if (error) break; - } } - if (error && tp) - xfs_trans_cancel(tp, cancelflags); - /* * Free the fake mp structure. */ -- cgit v0.10.2 From 8a7141a8b931d60d42830432b82078cd6dace83b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 28 Nov 2008 14:23:35 +1100 Subject: [XFS] convert xfs_getbmap to take formatter functions Preliminary work to hook up fiemap, this allows us to pass in an arbitrary formatter to copy extent data back to userspace. The formatter takes info for 1 extent, a pointer to the user "thing*" and a pointer to a "filled" variable to indicate whether a userspace buffer did get filled in (for fiemap, hole "extents" are skipped). I'm just using the getbmapx struct as a "common denominator" because as far as I can see, it holds all info that any formatters will care about. ("*thing" because fiemap doesn't pass the user pointer around, but rather has a pointer to a fiemap info structure, and helpers associated with it) Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index f1bd6c3..d597059 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1250,43 +1250,67 @@ xfs_ioc_setxflags( } STATIC int +xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) +{ + struct getbmap __user *base = *ap; + + /* copy only getbmap portion (not getbmapx) */ + if (copy_to_user(base, bmv, sizeof(struct getbmap))) + return XFS_ERROR(EFAULT); + + *ap += sizeof(struct getbmap); + return 0; +} + +STATIC int xfs_ioc_getbmap( struct xfs_inode *ip, int ioflags, unsigned int cmd, void __user *arg) { - struct getbmap bm; - int iflags; + struct getbmapx bmx; int error; - if (copy_from_user(&bm, arg, sizeof(bm))) + if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) return -XFS_ERROR(EFAULT); - if (bm.bmv_count < 2) + if (bmx.bmv_count < 2) return -XFS_ERROR(EINVAL); - iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); + bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); if (ioflags & IO_INVIS) - iflags |= BMV_IF_NO_DMAPI_READ; + bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; - error = xfs_getbmap(ip, &bm, (struct getbmap __user *)arg+1, iflags); + error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, + (struct getbmap *)arg+1); if (error) return -error; - if (copy_to_user(arg, &bm, sizeof(bm))) + /* copy back header - only size of getbmap */ + if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) return -XFS_ERROR(EFAULT); return 0; } STATIC int +xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) +{ + struct getbmapx __user *base = *ap; + + if (copy_to_user(base, bmv, sizeof(struct getbmapx))) + return XFS_ERROR(EFAULT); + + *ap += sizeof(struct getbmapx); + return 0; +} + +STATIC int xfs_ioc_getbmapx( struct xfs_inode *ip, void __user *arg) { struct getbmapx bmx; - struct getbmap bm; - int iflags; int error; if (copy_from_user(&bmx, arg, sizeof(bmx))) @@ -1295,26 +1319,16 @@ xfs_ioc_getbmapx( if (bmx.bmv_count < 2) return -XFS_ERROR(EINVAL); - /* - * Map input getbmapx structure to a getbmap - * structure for xfs_getbmap. - */ - GETBMAP_CONVERT(bmx, bm); - - iflags = bmx.bmv_iflags; - - if (iflags & (~BMV_IF_VALID)) + if (bmx.bmv_iflags & (~BMV_IF_VALID)) return -XFS_ERROR(EINVAL); - iflags |= BMV_IF_EXTENDED; - - error = xfs_getbmap(ip, &bm, (struct getbmapx __user *)arg+1, iflags); + error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, + (struct getbmapx *)arg+1); if (error) return -error; - GETBMAP_CONVERT(bm, bmx); - - if (copy_to_user(arg, &bmx, sizeof(bmx))) + /* copy back header */ + if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) return -XFS_ERROR(EFAULT); return 0; diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index c391221..8077580 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5811,9 +5811,9 @@ error0: STATIC int xfs_getbmapx_fix_eof_hole( xfs_inode_t *ip, /* xfs incore inode pointer */ - struct getbmap *out, /* output structure */ + struct getbmapx *out, /* output structure */ int prealloced, /* this is a file with - * preallocated data space */ + * preallocated data space */ __int64_t end, /* last block requested */ xfs_fsblock_t startblock) { @@ -5839,14 +5839,18 @@ xfs_getbmapx_fix_eof_hole( } /* - * Fcntl interface to xfs_bmapi. + * Get inode's extents as described in bmv, and format for output. + * Calls formatter to fill the user's buffer until all extents + * are mapped, until the passed-in bmv->bmv_count slots have + * been filled, or until the formatter short-circuits the loop, + * if it is tracking filled-in extents on its own. */ int /* error code */ xfs_getbmap( xfs_inode_t *ip, - struct getbmap *bmv, /* user bmap structure */ - void __user *ap, /* pointer to user's array */ - int interface) /* interface flags */ + struct getbmapx *bmv, /* user bmap structure */ + xfs_bmap_format_t formatter, /* format to user */ + void *arg) /* formatter arg */ { __int64_t bmvend; /* last block requested */ int error; /* return value */ @@ -5859,19 +5863,20 @@ xfs_getbmap( int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ - struct getbmap out; /* output structure */ + struct getbmapx out; /* output structure */ int whichfork; /* data or attr fork */ int prealloced; /* this is a file with * preallocated data space */ int sh_unwritten; /* true, if unwritten */ /* extents listed separately */ + int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ - __int32_t oflags; /* getbmapx bmv_oflags field */ mp = ip->i_mount; + iflags = bmv->bmv_iflags; - whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; - sh_unwritten = (interface & BMV_IF_PREALLOC) != 0; + whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; + sh_unwritten = (iflags & BMV_IF_PREALLOC) != 0; /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ @@ -5886,7 +5891,7 @@ xfs_getbmap( * could misinterpret holes in a DMAPI file as true holes, * when in fact they may represent offline user data. */ - if ((interface & BMV_IF_NO_DMAPI_READ) == 0 && + if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_READ) && whichfork == XFS_DATA_FORK) { error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); @@ -5993,52 +5998,35 @@ xfs_getbmap( ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { - nexleft--; - oflags = (map[i].br_state == XFS_EXT_UNWRITTEN) ? + out.bmv_oflags = (map[i].br_state == XFS_EXT_UNWRITTEN) ? BMV_OF_PREALLOC : 0; out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); + out.bmv_unused1 = out.bmv_unused2 = 0; ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); if (map[i].br_startblock == HOLESTARTBLOCK && whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ goto unlock_and_return; } else { + int full = 0; /* user array is full */ + if (!xfs_getbmapx_fix_eof_hole(ip, &out, prealloced, bmvend, map[i].br_startblock)) { goto unlock_and_return; } - /* return either getbmap/getbmapx structure. */ - if (interface & BMV_IF_EXTENDED) { - struct getbmapx outx; - - GETBMAP_CONVERT(out,outx); - outx.bmv_oflags = oflags; - outx.bmv_unused1 = outx.bmv_unused2 = 0; - if (copy_to_user(ap, &outx, - sizeof(outx))) { - error = XFS_ERROR(EFAULT); - goto unlock_and_return; - } - } else { - if (copy_to_user(ap, &out, - sizeof(out))) { - error = XFS_ERROR(EFAULT); - goto unlock_and_return; - } - } + /* format results & advance arg */ + error = formatter(&arg, &out, &full); + if (error || full) + goto unlock_and_return; + nexleft--; bmv->bmv_offset = out.bmv_offset + out.bmv_length; bmv->bmv_length = MAX((__int64_t)0, (__int64_t)(bmvend - bmv->bmv_offset)); bmv->bmv_entries++; - ap = (interface & BMV_IF_EXTENDED) ? - (void __user *) - ((struct getbmapx __user *)ap + 1) : - (void __user *) - ((struct getbmap __user *)ap + 1); } } } while (nmap && nexleft && bmv->bmv_length); diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 7c9d12c..284571c 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -356,15 +356,18 @@ xfs_bmap_finish( xfs_bmap_free_t *flist, /* i/o: list extents to free */ int *committed); /* xact committed or not */ +/* bmap to userspace formatter - copy to user & advance pointer */ +typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *); + /* - * Fcntl interface to xfs_bmapi. + * Get inode's extents as described in bmv, and format for output. */ int /* error code */ xfs_getbmap( xfs_inode_t *ip, - struct getbmap *bmv, /* user bmap structure */ - void __user *ap, /* pointer to user's array */ - int iflags); /* interface flags */ + struct getbmapx *bmv, /* user bmap structure */ + xfs_bmap_format_t formatter, /* format to user */ + void *arg); /* formatter arg */ /* * Check if the endoff is outside the last extent. If so the caller will grow diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 01c0cc8..df859d6 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -114,22 +114,10 @@ struct getbmapx { #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ #define BMV_IF_VALID (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC) -#ifdef __KERNEL__ -#define BMV_IF_EXTENDED 0x40000000 /* getpmapx if set */ -#endif /* bmv_oflags values - returned for for each non-header segment */ #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ -/* Convert getbmap <-> getbmapx - move fields from p1 to p2. */ -#define GETBMAP_CONVERT(p1,p2) { \ - p2.bmv_offset = p1.bmv_offset; \ - p2.bmv_block = p1.bmv_block; \ - p2.bmv_length = p1.bmv_length; \ - p2.bmv_count = p1.bmv_count; \ - p2.bmv_entries = p1.bmv_entries; } - - /* * Structure for XFS_IOC_FSSETDM. * For use by backup and restore programs to set the XFS on-disk inode -- cgit v0.10.2 From 5af317c942aebc928ab244eb69581bd8e5333215 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 28 Nov 2008 14:23:35 +1100 Subject: [XFS] Add new getbmap flags. This adds a new output flag, BMV_OF_LAST to indicate if we've hit the last extent in the inode. This potentially saves an extra call from userspace to see when the whole mapping is done. It also adds BMV_IF_DELALLOC and BMV_OF_DELALLOC to request, and indicate, delayed-allocation extents. In this case bmv_block is set to -2 (-1 was already taken for HOLESTARTBLOCK; unfortunately these are the reverse of the in-kernel constants.) These new flags facilitate addition of the new fiemap interface. Rather than adding sh_delalloc, remove sh_unwritten & just test the flags directly. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8077580..138308e 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5819,6 +5819,9 @@ xfs_getbmapx_fix_eof_hole( { __int64_t fixlen; xfs_mount_t *mp; /* file system mount point */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t lastx; /* last extent pointer */ + xfs_fileoff_t fileblock; if (startblock == HOLESTARTBLOCK) { mp = ip->i_mount; @@ -5832,7 +5835,15 @@ xfs_getbmapx_fix_eof_hole( out->bmv_length = fixlen; } } else { - out->bmv_block = XFS_FSB_TO_DB(ip, startblock); + if (startblock == DELAYSTARTBLOCK) + out->bmv_block = -2; + else + out->bmv_block = XFS_FSB_TO_DB(ip, startblock); + fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && + (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) + out->bmv_oflags |= BMV_OF_LAST; } return 1; @@ -5867,8 +5878,6 @@ xfs_getbmap( int whichfork; /* data or attr fork */ int prealloced; /* this is a file with * preallocated data space */ - int sh_unwritten; /* true, if unwritten */ - /* extents listed separately */ int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ @@ -5876,7 +5885,6 @@ xfs_getbmap( iflags = bmv->bmv_iflags; whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; - sh_unwritten = (iflags & BMV_IF_PREALLOC) != 0; /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ @@ -5947,8 +5955,9 @@ xfs_getbmap( xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (whichfork == XFS_DATA_FORK && - (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { + if (((iflags & BMV_IF_DELALLOC) == 0) && + (whichfork == XFS_DATA_FORK) && + (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ error = xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); @@ -5958,7 +5967,8 @@ xfs_getbmap( } } - ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); + ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) || + ip->i_delayed_blks == 0); lock = xfs_ilock_map_shared(ip); @@ -5970,7 +5980,7 @@ xfs_getbmap( nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; bmapi_flags = XFS_BMAPI_AFLAG(whichfork) | - ((sh_unwritten) ? 0 : XFS_BMAPI_IGSTATE); + ((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE); /* * Allocate enough space to handle "subnex" maps at a time. @@ -5980,9 +5990,12 @@ xfs_getbmap( bmv->bmv_entries = 0; - if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0) { - error = 0; - goto unlock_and_return; + if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) { + if (((iflags & BMV_IF_DELALLOC) == 0) || + whichfork == XFS_ATTR_FORK) { + error = 0; + goto unlock_and_return; + } } nexleft = nex; @@ -5998,15 +6011,20 @@ xfs_getbmap( ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { - out.bmv_oflags = (map[i].br_state == XFS_EXT_UNWRITTEN) ? - BMV_OF_PREALLOC : 0; + out.bmv_oflags = 0; + if (map[i].br_state == XFS_EXT_UNWRITTEN) + out.bmv_oflags |= BMV_OF_PREALLOC; + else if (map[i].br_startblock == DELAYSTARTBLOCK) + out.bmv_oflags |= BMV_OF_DELALLOC; out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); out.bmv_unused1 = out.bmv_unused2 = 0; - ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); + ASSERT(((iflags & BMV_IF_DELALLOC) != 0) || + (map[i].br_startblock != DELAYSTARTBLOCK)); if (map[i].br_startblock == HOLESTARTBLOCK && whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ + out.bmv_oflags |= BMV_OF_LAST; goto unlock_and_return; } else { int full = 0; /* user array is full */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index df859d6..4ae03ba 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -113,10 +113,14 @@ struct getbmapx { #define BMV_IF_ATTRFORK 0x1 /* return attr fork rather than data */ #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ -#define BMV_IF_VALID (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC) +#define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ +#define BMV_IF_VALID \ + (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) /* bmv_oflags values - returned for for each non-header segment */ #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ +#define BMV_OF_DELALLOC 0x2 /* segment = delayed allocation */ +#define BMV_OF_LAST 0x4 /* segment is the last in the file */ /* * Structure for XFS_IOC_FSSETDM. -- cgit v0.10.2 From f35642e2f89f2b0379e929bd9027342365abc839 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 28 Nov 2008 14:23:35 +1100 Subject: [XFS] Hook up the fiemap ioctl. This adds the fiemap inode_operation, which for us converts the fiemap values & flags into a getbmapx structure which can be sent to xfs_getbmap. The formatter then copies the bmv array back into the user's fiemap buffer via the fiemap helpers. If we wanted to be more clever, we could also return mapping data for in-inode attributes, but I'm not terribly motivated to do that just yet. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 69c98cf..2903faf 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -53,6 +53,7 @@ #include #include #include +#include /* * Bring the atime in the XFS inode uptodate. @@ -661,6 +662,88 @@ out_error: return error; } +#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) + +/* + * Call fiemap helper to fill in user data. + * Returns positive errors to xfs_getbmap. + */ +STATIC int +xfs_fiemap_format( + void **arg, + struct getbmapx *bmv, + int *full) +{ + int error; + struct fiemap_extent_info *fieinfo = *arg; + u32 fiemap_flags = 0; + u64 logical, physical, length; + + /* Do nothing for a hole */ + if (bmv->bmv_block == -1LL) + return 0; + + logical = BBTOB(bmv->bmv_offset); + physical = BBTOB(bmv->bmv_block); + length = BBTOB(bmv->bmv_length); + + if (bmv->bmv_oflags & BMV_OF_PREALLOC) + fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; + else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { + fiemap_flags |= FIEMAP_EXTENT_DELALLOC; + physical = 0; /* no block yet */ + } + if (bmv->bmv_oflags & BMV_OF_LAST) + fiemap_flags |= FIEMAP_EXTENT_LAST; + + error = fiemap_fill_next_extent(fieinfo, logical, physical, + length, fiemap_flags); + if (error > 0) { + error = 0; + *full = 1; /* user array now full */ + } + + return -error; +} + +STATIC int +xfs_vn_fiemap( + struct inode *inode, + struct fiemap_extent_info *fieinfo, + u64 start, + u64 length) +{ + xfs_inode_t *ip = XFS_I(inode); + struct getbmapx bm; + int error; + + error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); + if (error) + return error; + + /* Set up bmap header for xfs internal routine */ + bm.bmv_offset = BTOBB(start); + /* Special case for whole file */ + if (length == FIEMAP_MAX_OFFSET) + bm.bmv_length = -1LL; + else + bm.bmv_length = BTOBB(length); + + /* our formatter will tell xfs_getbmap when to stop. */ + bm.bmv_count = MAXEXTNUM; + bm.bmv_iflags = BMV_IF_PREALLOC; + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) + bm.bmv_iflags |= BMV_IF_ATTRFORK; + if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) + bm.bmv_iflags |= BMV_IF_DELALLOC; + + error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); + if (error) + return -error; + + return 0; +} + static const struct inode_operations xfs_inode_operations = { .permission = xfs_vn_permission, .truncate = xfs_vn_truncate, @@ -671,6 +754,7 @@ static const struct inode_operations xfs_inode_operations = { .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, .fallocate = xfs_vn_fallocate, + .fiemap = xfs_vn_fiemap, }; static const struct inode_operations xfs_dir_inode_operations = { -- cgit v0.10.2 From 00dd4029e9afa642c2b26dc3aac834322ac29b4a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:36 +1100 Subject: [XFS] remove bhv_statvfs_t typedef Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 0ab60bc..8de5c4c 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h @@ -31,8 +31,6 @@ struct xfs_inode; struct xfs_mount; struct xfs_mount_args; -typedef struct kstatfs bhv_statvfs_t; - #define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ #define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ #define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index 9556df9..bc6c5cc 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -50,7 +50,7 @@ STATIC void xfs_fill_statvfs_from_dquot( - bhv_statvfs_t *statp, + struct kstatfs *statp, xfs_disk_dquot_t *dp) { __uint64_t limit; @@ -87,7 +87,7 @@ xfs_fill_statvfs_from_dquot( STATIC void xfs_qm_statvfs( xfs_inode_t *ip, - bhv_statvfs_t *statp) + struct kstatfs *statp) { xfs_mount_t *mp = ip->i_mount; xfs_dquot_t *dqp; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index e3f618c..d703143 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -134,7 +134,7 @@ typedef struct xfs_dquot * (*xfs_dqvopchown_t)( struct xfs_dquot **, struct xfs_dquot *); typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *, struct xfs_dquot *, struct xfs_dquot *, uint); -typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, bhv_statvfs_t *); +typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *); typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags); typedef int (*xfs_quotactl_t)(struct xfs_mount *, int, int, xfs_caddr_t); -- cgit v0.10.2 From 2b5decd09e9f98c4e361f97f3e32d80164774f75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:36 +1100 Subject: [XFS] remove xfs_vfs.h The only thing left are the forced shutdown flags and freeze macros which fit into xfs_mount.h much better. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index a4b254e..6ce9c9c 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -36,7 +36,6 @@ #include "xfs_bmap_btree.h" #include "xfs_attr_sf.h" #include "xfs_dir2_sf.h" -#include "xfs_vfs.h" #include "xfs_vnode.h" #include "xfs_dinode.h" #include "xfs_inode.h" diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index cfe16a3..507492d 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -80,7 +80,6 @@ #include #include -#include #include #include #include diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h deleted file mode 100644 index 8de5c4c..0000000 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_VFS_H__ -#define __XFS_VFS_H__ - -#include -#include "xfs_fs.h" - -struct inode; - -struct fid; -struct cred; -struct seq_file; -struct super_block; -struct xfs_inode; -struct xfs_mount; -struct xfs_mount_args; - -#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ -#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ -#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ -#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */ -#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ -#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ - -#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen) -#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l)) - -#endif /* __XFS_VFS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index bf89e41..fb49e0f 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -18,7 +18,10 @@ #ifndef __XFS_VNODE_H__ #define __XFS_VNODE_H__ +#include "xfs_fs.h" + struct file; +struct xfs_inode; struct xfs_iomap; struct attrlist_cursor_kern; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index d703143..4fce22a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -428,6 +428,16 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, #define xfs_force_shutdown(m,f) \ xfs_do_force_shutdown(m, f, __FILE__, __LINE__) +#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ +#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ +#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ +#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */ +#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ +#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ + +#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen) +#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l)) + /* * Flags for xfs_mountfs */ -- cgit v0.10.2 From 207fcfad58482c7c9f92939a1f6df9f7e8873a34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:37 +1100 Subject: [XFS] remove xfs_vfsops.h The only thing left is xfs_do_force_shutdown which already has a defintion in xfs_mount.h. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 7f7abec..595751f 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -29,7 +29,6 @@ #include "xfs_vnodeops.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_vfsops.h" /* * Note that we only accept fileids which are long enough rather than allow diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index ae92290..5389f07 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -48,7 +48,6 @@ #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_vnodeops.h" -#include "xfs_vfsops.h" #include "xfs_version.h" #include "xfs_log_priv.h" #include "xfs_trans_priv.h" diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 305d9f3..cad3da3 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -53,7 +53,6 @@ #include "xfs_filestream.h" #include "xfs_fsops.h" #include "xfs_vnodeops.h" -#include "xfs_vfsops.h" #include "xfs_utils.h" #include "xfs_sync.h" diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h deleted file mode 100644 index 6b8e0b5..0000000 --- a/fs/xfs/xfs_vfsops.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _XFS_VFSOPS_H -#define _XFS_VFSOPS_H 1 - -struct cred; -struct xfs_fid; -struct inode; -struct kstatfs; -struct xfs_mount; -struct xfs_mount_args; - -void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, - int lnnum); - -#endif /* _XFS_VFSOPS_H */ -- cgit v0.10.2 From 26c5295135d10fc90cbf160adfda392d91f58279 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 28 Nov 2008 14:23:37 +1100 Subject: [XFS] remove i_gen from incore inode i_gen is incremented in directory operations when the directory is changed. It is never read or otherwise used so it should be removed to help reduce the size of the struct xfs_inode. The patch also removes a duplicate logging of the directory inode core. We only need to do this once per transaction so kill the one associated with the i_gen increment. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7f007ef..ea691c7 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -261,7 +261,6 @@ typedef struct xfs_inode { unsigned short i_flags; /* see defined flags below */ unsigned char i_update_core; /* timestamps/size is dirty */ unsigned char i_update_size; /* di_size field is dirty */ - unsigned int i_gen; /* generation count */ unsigned int i_delayed_blks; /* count of delay alloc blks */ xfs_icdinode_t i_d; /* most of ondisk inode */ diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index d700dac..02f0e8f 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -367,19 +367,11 @@ xfs_rename( &first_block, &free_list, spaceres); if (error) goto abort_return; - xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Update the generation counts on all the directory inodes - * that we're modifying. - */ - src_dp->i_gen++; + xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); - - if (new_parent) { - target_dp->i_gen++; + if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); - } /* * If this is a synchronous mount, make sure that the diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index f26b038..b29a0eb 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1599,8 +1599,6 @@ xfs_create( xfs_trans_set_sync(tp); } - dp->i_gen++; - /* * Attach the dquot(s) to the inodes and modify them incore. * These ids of the inode couldn't have changed since the new @@ -1967,13 +1965,6 @@ xfs_remove( } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Bump the in memory generation count on the parent - * directory so that other can know that it has changed. - */ - dp->i_gen++; - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - if (is_dir) { /* * Drop the link from ip's "..". @@ -1991,8 +1982,8 @@ xfs_remove( } else { /* * When removing a non-directory we need to log the parent - * inode here for the i_gen update. For a directory this is - * done implicitly by the xfs_droplink call for the ".." entry. + * inode here. For a directory this is done implicitly + * by the xfs_droplink call for the ".." entry. */ xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } @@ -2152,7 +2143,6 @@ xfs_link( if (error) goto abort_return; xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - tdp->i_gen++; xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); error = xfs_bumplink(tp, sip); @@ -2329,18 +2319,10 @@ xfs_mkdir( } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - /* - * Bump the in memory version number of the parent directory - * so that other processes accessing it will recognize that - * the directory has changed. - */ - dp->i_gen++; - error = xfs_dir_init(tp, cdp, dp); if (error) goto error2; - cdp->i_gen = 1; error = xfs_bumplink(tp, dp); if (error) goto error2; @@ -2627,13 +2609,6 @@ xfs_symlink( xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); /* - * Bump the in memory version number of the parent directory - * so that other processes accessing it will recognize that - * the directory has changed. - */ - dp->i_gen++; - - /* * If this is a synchronous mount, make sure that the * symlink transaction goes to disk before returning to * the user. -- cgit v0.10.2 From 5e1be0fb1a3950597aeda448698e85b0595a2e92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:37 +1100 Subject: [XFS] factor out xfs_read_agi helper Add a helper to read the AGI header and perform basic verification. Based on hunks from a larger patch from Dave Chinner. (First sent on Juli 23rd) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 2bfd863..f4a3153 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -142,6 +142,9 @@ typedef struct xfs_agi { #define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp)) #define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp)) +extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, struct xfs_buf **bpp); + /* * The third a.g. block contains the a.g. freelist, an array * of block pointers to blocks owned by the allocation btree code. diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index c8a56c52..efb65fe 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1462,70 +1462,95 @@ xfs_ialloc_log_agi( xfs_trans_log_buf(tp, bp, first, last); } +#ifdef DEBUG +STATIC void +xfs_check_agi_unlinked( + struct xfs_agi *agi) +{ + int i; + + for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) + ASSERT(agi->agi_unlinked[i]); +} +#else +#define xfs_check_agi_unlinked(agi) +#endif + /* * Read in the allocation group header (inode allocation section) */ int -xfs_ialloc_read_agi( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_buf_t **bpp) /* allocation group hdr buf */ +xfs_read_agi( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + struct xfs_buf **bpp) /* allocation group hdr buf */ { - xfs_agi_t *agi; /* allocation group header */ - int agi_ok; /* agi is consistent */ - xfs_buf_t *bp; /* allocation group hdr buf */ - xfs_perag_t *pag; /* per allocation group data */ - int error; + struct xfs_agi *agi; /* allocation group header */ + int agi_ok; /* agi is consistent */ + int error; ASSERT(agno != NULLAGNUMBER); - error = xfs_trans_read_buf( - mp, tp, mp->m_ddev_targp, + + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, &bp); + XFS_FSS_TO_BB(mp, 1), 0, bpp); if (error) return error; - ASSERT(bp && !XFS_BUF_GETERROR(bp)); + + ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp)); + agi = XFS_BUF_TO_AGI(*bpp); /* * Validate the magic number of the agi block. */ - agi = XFS_BUF_TO_AGI(bp); - agi_ok = - be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && - XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); + agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && + XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && + be32_to_cpu(agi->agi_seqno) == agno; if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, XFS_RANDOM_IALLOC_READ_AGI))) { - XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW, + XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW, mp, agi); - xfs_trans_brelse(tp, bp); + xfs_trans_brelse(tp, *bpp); return XFS_ERROR(EFSCORRUPTED); } + + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF); + + xfs_check_agi_unlinked(agi); + return 0; +} + +int +xfs_ialloc_read_agi( + struct xfs_mount *mp, /* file system mount structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + struct xfs_buf **bpp) /* allocation group hdr buf */ +{ + struct xfs_agi *agi; /* allocation group header */ + struct xfs_perag *pag; /* per allocation group data */ + int error; + + error = xfs_read_agi(mp, tp, agno, bpp); + if (error) + return error; + + agi = XFS_BUF_TO_AGI(*bpp); pag = &mp->m_perag[agno]; + if (!pag->pagi_init) { pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); pag->pagi_count = be32_to_cpu(agi->agi_count); pag->pagi_init = 1; - } else { - /* - * It's possible for these to be out of sync if - * we are in the middle of a forced shutdown. - */ - ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - XFS_FORCED_SHUTDOWN(mp)); - } - -#ifdef DEBUG - { - int i; - - for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) - ASSERT(agi->agi_unlinked[i]); } -#endif - XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF); - *bpp = bp; + /* + * It's possible for these to be out of sync if + * we are in the middle of a forced shutdown. + */ + ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || + XFS_FORCED_SHUTDOWN(mp)); return 0; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b977100..46b0526 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1843,13 +1843,10 @@ xfs_iunlink( xfs_dinode_t *dip; xfs_buf_t *agibp; xfs_buf_t *ibp; - xfs_agnumber_t agno; - xfs_daddr_t agdaddr; xfs_agino_t agino; short bucket_index; int offset; int error; - int agi_ok; ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_mode != 0); @@ -1857,31 +1854,15 @@ xfs_iunlink( mp = tp->t_mountp; - agno = XFS_INO_TO_AGNO(mp, ip->i_ino); - agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); - /* * Get the agi buffer first. It ensures lock ordering * on the list. */ - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, - XFS_FSS_TO_BB(mp, 1), 0, &agibp); + error = xfs_read_agi(mp, tp, XFS_INO_TO_AGNO(mp, ip->i_ino), &agibp); if (error) return error; - - /* - * Validate the magic number of the agi block. - */ agi = XFS_BUF_TO_AGI(agibp); - agi_ok = - be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && - XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); - if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK, - XFS_RANDOM_IUNLINK))) { - XFS_CORRUPTION_ERROR("xfs_iunlink", XFS_ERRLEVEL_LOW, mp, agi); - xfs_trans_brelse(tp, agibp); - return XFS_ERROR(EFSCORRUPTED); - } + /* * Get the index into the agi hash table for the * list this inode will go on. @@ -1941,7 +1922,6 @@ xfs_iunlink_remove( xfs_buf_t *agibp; xfs_buf_t *ibp; xfs_agnumber_t agno; - xfs_daddr_t agdaddr; xfs_agino_t agino; xfs_agino_t next_agino; xfs_buf_t *last_ibp; @@ -1949,45 +1929,20 @@ xfs_iunlink_remove( short bucket_index; int offset, last_offset = 0; int error; - int agi_ok; - /* - * First pull the on-disk inode from the AGI unlinked list. - */ mp = tp->t_mountp; - agno = XFS_INO_TO_AGNO(mp, ip->i_ino); - agdaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); /* * Get the agi buffer first. It ensures lock ordering * on the list. */ - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, agdaddr, - XFS_FSS_TO_BB(mp, 1), 0, &agibp); - if (error) { - cmn_err(CE_WARN, - "xfs_iunlink_remove: xfs_trans_read_buf() returned an error %d on %s. Returning error.", - error, mp->m_fsname); + error = xfs_read_agi(mp, tp, agno, &agibp); + if (error) return error; - } - /* - * Validate the magic number of the agi block. - */ + agi = XFS_BUF_TO_AGI(agibp); - agi_ok = - be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && - XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)); - if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IUNLINK_REMOVE, - XFS_RANDOM_IUNLINK_REMOVE))) { - XFS_CORRUPTION_ERROR("xfs_iunlink_remove", XFS_ERRLEVEL_LOW, - mp, agi); - xfs_trans_brelse(tp, agibp); - cmn_err(CE_WARN, - "xfs_iunlink_remove: XFS_TEST_ERROR() returned an error on %s. Returning EFSCORRUPTED.", - mp->m_fsname); - return XFS_ERROR(EFSCORRUPTED); - } + /* * Get the index into the agi hash table for the * list this inode will go on. diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b411d494..b552676 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3117,19 +3117,16 @@ xlog_recover_clear_agi_bucket( int error; tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); - error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); - if (!error) - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, &agibp); + error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), + 0, 0, 0); if (error) goto out_abort; - error = EINVAL; - agi = XFS_BUF_TO_AGI(agibp); - if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) + error = xfs_read_agi(mp, tp, agno, &agibp); + if (error) goto out_abort; + agi = XFS_BUF_TO_AGI(agibp); agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket); @@ -3190,16 +3187,17 @@ xlog_recover_process_iunlinks( /* * Find the agi for this ag. */ - agibp = xfs_buf_read(mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agibp)) { - xfs_ioerror_alert("xlog_recover_process_iunlinks(#1)", - log->l_mp, agibp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp))); + error = xfs_read_agi(mp, NULL, agno, &agibp); + if (error) { + /* + * AGI is b0rked. Don't process it. + * + * We should probably mark the filesystem as corrupt + * after we've recovered all the ag's we can.... + */ + continue; } agi = XFS_BUF_TO_AGI(agibp); - ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agi->agi_magicnum)); for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { @@ -3278,22 +3276,12 @@ xlog_recover_process_iunlinks( /* * Reacquire the agibuffer and continue around - * the loop. + * the loop. This should never fail as we know + * the buffer was good earlier on. */ - agibp = xfs_buf_read(mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, - XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agibp)) { - xfs_ioerror_alert( - "xlog_recover_process_iunlinks(#2)", - log->l_mp, agibp, - XFS_AG_DADDR(mp, agno, - XFS_AGI_DADDR(mp))); - } + error = xfs_read_agi(mp, NULL, agno, &agibp); + ASSERT(error == 0); agi = XFS_BUF_TO_AGI(agibp); - ASSERT(XFS_AGI_MAGIC == be32_to_cpu( - agi->agi_magicnum)); } } @@ -3980,11 +3968,9 @@ xlog_recover_check_summary( { xfs_mount_t *mp; xfs_agf_t *agfp; - xfs_agi_t *agip; xfs_buf_t *agfbp; xfs_buf_t *agibp; xfs_daddr_t agfdaddr; - xfs_daddr_t agidaddr; xfs_buf_t *sbbp; #ifdef XFS_LOUD_RECOVERY xfs_sb_t *sbp; @@ -3993,6 +3979,7 @@ xlog_recover_check_summary( __uint64_t freeblks; __uint64_t itotal; __uint64_t ifree; + int error; mp = log->l_mp; @@ -4016,21 +4003,14 @@ xlog_recover_check_summary( be32_to_cpu(agfp->agf_flcount); xfs_buf_relse(agfbp); - agidaddr = XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)); - agibp = xfs_buf_read(mp->m_ddev_targp, agidaddr, - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agibp)) { - xfs_ioerror_alert("xlog_recover_check_summary(agi)", - mp, agibp, agidaddr); - } - agip = XFS_BUF_TO_AGI(agibp); - ASSERT(XFS_AGI_MAGIC == be32_to_cpu(agip->agi_magicnum)); - ASSERT(XFS_AGI_GOOD_VERSION(be32_to_cpu(agip->agi_versionnum))); - ASSERT(be32_to_cpu(agip->agi_seqno) == agno); + error = xfs_read_agi(mp, NULL, agno, &agibp); + if (!error) { + struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); - itotal += be32_to_cpu(agip->agi_count); - ifree += be32_to_cpu(agip->agi_freecount); - xfs_buf_relse(agibp); + itotal += be32_to_cpu(agi->agi_count); + ifree += be32_to_cpu(agi->agi_freecount); + xfs_buf_relse(agibp); + } } sbbp = xfs_getsb(mp, 0); -- cgit v0.10.2 From 4805621a37d9b2b16641b5c68597651419e9e252 Mon Sep 17 00:00:00 2001 From: "From: Christoph Hellwig" Date: Fri, 28 Nov 2008 14:23:38 +1100 Subject: [XFS] factor out xfs_read_agf helper Add a helper to read the AGF header and perform basic verification. Based on hunks from a larger patch from Dave Chinner. (First sent on Juli 23rd) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index f4a3153..f2e2181 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -91,6 +91,8 @@ typedef struct xfs_agf { #define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp)) #define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp)) +extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); /* * Size of the unlinked inode hash table in the agi. diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index c47ce90..028e44e 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -2233,44 +2233,41 @@ xfs_alloc_put_freelist( * Read in the allocation group header (free/alloc section). */ int /* error */ -xfs_alloc_read_agf( - xfs_mount_t *mp, /* mount point structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags, /* XFS_ALLOC_FLAG_... */ - xfs_buf_t **bpp) /* buffer for the ag freelist header */ +xfs_read_agf( + struct xfs_mount *mp, /* mount point structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_BUF_ */ + struct xfs_buf **bpp) /* buffer for the ag freelist header */ { - xfs_agf_t *agf; /* ag freelist header */ + struct xfs_agf *agf; /* ag freelist header */ int agf_ok; /* set if agf is consistent */ - xfs_buf_t *bp; /* return value */ - xfs_perag_t *pag; /* per allocation group data */ int error; ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf( mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), - (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U, - &bp); + XFS_FSS_TO_BB(mp, 1), flags, bpp); if (error) return error; - ASSERT(!bp || !XFS_BUF_GETERROR(bp)); - if (!bp) { - *bpp = NULL; + if (!*bpp) return 0; - } + + ASSERT(!XFS_BUF_GETERROR(*bpp)); + agf = XFS_BUF_TO_AGF(*bpp); + /* * Validate the magic number of the agf block. */ - agf = XFS_BUF_TO_AGF(bp); agf_ok = be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); + be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) && + be32_to_cpu(agf->agf_seqno) == agno; if (xfs_sb_version_haslazysbcount(&mp->m_sb)) agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <= be32_to_cpu(agf->agf_length); @@ -2278,9 +2275,41 @@ xfs_alloc_read_agf( XFS_RANDOM_ALLOC_READ_AGF))) { XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", XFS_ERRLEVEL_LOW, mp, agf); - xfs_trans_brelse(tp, bp); + xfs_trans_brelse(tp, *bpp); return XFS_ERROR(EFSCORRUPTED); } + + XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF); + return 0; +} + +/* + * Read in the allocation group header (free/alloc section). + */ +int /* error */ +xfs_alloc_read_agf( + struct xfs_mount *mp, /* mount point structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_ALLOC_FLAG_... */ + struct xfs_buf **bpp) /* buffer for the ag freelist header */ +{ + struct xfs_agf *agf; /* ag freelist header */ + struct xfs_perag *pag; /* per allocation group data */ + int error; + + ASSERT(agno != NULLAGNUMBER); + + error = xfs_read_agf(mp, tp, agno, + (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0, + bpp); + if (error) + return error; + if (!*bpp) + return 0; + ASSERT(!XFS_BUF_GETERROR(*bpp)); + + agf = XFS_BUF_TO_AGF(*bpp); pag = &mp->m_perag[agno]; if (!pag->pagf_init) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); @@ -2308,8 +2337,6 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); } #endif - XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF); - *bpp = bp; return 0; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b552676..d949879 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3970,7 +3970,6 @@ xlog_recover_check_summary( xfs_agf_t *agfp; xfs_buf_t *agfbp; xfs_buf_t *agibp; - xfs_daddr_t agfdaddr; xfs_buf_t *sbbp; #ifdef XFS_LOUD_RECOVERY xfs_sb_t *sbp; @@ -3987,21 +3986,18 @@ xlog_recover_check_summary( itotal = 0LL; ifree = 0LL; for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { - agfdaddr = XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)); - agfbp = xfs_buf_read(mp->m_ddev_targp, agfdaddr, - XFS_FSS_TO_BB(mp, 1), 0); - if (XFS_BUF_ISERROR(agfbp)) { - xfs_ioerror_alert("xlog_recover_check_summary(agf)", - mp, agfbp, agfdaddr); + error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, + "xlog_recover_check_summary(agf)" + "agf read failed agno %d error %d", + agno, error); + } else { + agfp = XFS_BUF_TO_AGF(agfbp); + freeblks += be32_to_cpu(agfp->agf_freeblks) + + be32_to_cpu(agfp->agf_flcount); + xfs_buf_relse(agfbp); } - agfp = XFS_BUF_TO_AGF(agfbp); - ASSERT(XFS_AGF_MAGIC == be32_to_cpu(agfp->agf_magicnum)); - ASSERT(XFS_AGF_GOOD_VERSION(be32_to_cpu(agfp->agf_versionnum))); - ASSERT(be32_to_cpu(agfp->agf_seqno) == agno); - - freeblks += be32_to_cpu(agfp->agf_freeblks) + - be32_to_cpu(agfp->agf_flcount); - xfs_buf_relse(agfbp); error = xfs_read_agi(mp, NULL, agno, &agibp); if (!error) { -- cgit v0.10.2 From b28708d6a0a3ed65a68f0dcd8e6d1c09f14e5cf3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:38 +1100 Subject: [XFS] sanitize xlog_in_core_t definition Move all fields from xlog_iclog_fields_t into xlog_in_core_t instead of having them in a substructure and the using #defines to make it look like they were directly in xlog_in_core_t. Also document that xlog_in_core_2_t is grossly misnamed, and make all references to it typesafe. (First sent on Semptember 15th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 8a5b055..aadaa14 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1029,12 +1029,6 @@ xlog_iodone(xfs_buf_t *bp) ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); aborted = 0; - - /* - * Some versions of cpp barf on the recursive definition of - * ic_log -> hic_fields.ic_log and expand ic_log twice when - * it is passed through two macros. Workaround broken cpp. - */ l = iclog->ic_log; /* @@ -1301,7 +1295,7 @@ xlog_alloc_log(xfs_mount_t *mp, XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); iclog->ic_bp = bp; - iclog->hic_data = bp->b_addr; + iclog->ic_data = bp->b_addr; #ifdef DEBUG log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); #endif @@ -1321,7 +1315,7 @@ xlog_alloc_log(xfs_mount_t *mp, atomic_set(&iclog->ic_refcnt, 0); spin_lock_init(&iclog->ic_callback_lock); iclog->ic_callback_tail = &(iclog->ic_callback); - iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; + iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); @@ -3463,7 +3457,7 @@ xlog_verify_iclog(xlog_t *log, ptr = iclog->ic_datap; base_ptr = ptr; ophead = (xlog_op_header_t *)ptr; - xhdr = (xlog_in_core_2_t *)&iclog->ic_header; + xhdr = iclog->ic_data; for (i = 0; i < len; i++) { ophead = (xlog_op_header_t *)ptr; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b39a198..654167b 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -310,6 +310,16 @@ typedef struct xlog_rec_ext_header { } xlog_rec_ext_header_t; #ifdef __KERNEL__ + +/* + * Quite misnamed, because this union lays out the actual on-disk log buffer. + */ +typedef union xlog_in_core2 { + xlog_rec_header_t hic_header; + xlog_rec_ext_header_t hic_xheader; + char hic_sector[XLOG_HEADER_SIZE]; +} xlog_in_core_2_t; + /* * - A log record header is 512 bytes. There is plenty of room to grow the * xlog_rec_header_t into the reserved space. @@ -339,7 +349,7 @@ typedef struct xlog_rec_ext_header { * We'll put all the read-only and l_icloglock fields in the first cacheline, * and move everything else out to subsequent cachelines. */ -typedef struct xlog_iclog_fields { +typedef struct xlog_in_core { sv_t ic_force_wait; sv_t ic_write_wait; struct xlog_in_core *ic_next; @@ -362,41 +372,11 @@ typedef struct xlog_iclog_fields { /* reference counts need their own cacheline */ atomic_t ic_refcnt ____cacheline_aligned_in_smp; -} xlog_iclog_fields_t; - -typedef union xlog_in_core2 { - xlog_rec_header_t hic_header; - xlog_rec_ext_header_t hic_xheader; - char hic_sector[XLOG_HEADER_SIZE]; -} xlog_in_core_2_t; - -typedef struct xlog_in_core { - xlog_iclog_fields_t hic_fields; - xlog_in_core_2_t *hic_data; + xlog_in_core_2_t *ic_data; +#define ic_header ic_data->hic_header } xlog_in_core_t; /* - * Defines to save our code from this glop. - */ -#define ic_force_wait hic_fields.ic_force_wait -#define ic_write_wait hic_fields.ic_write_wait -#define ic_next hic_fields.ic_next -#define ic_prev hic_fields.ic_prev -#define ic_bp hic_fields.ic_bp -#define ic_log hic_fields.ic_log -#define ic_callback hic_fields.ic_callback -#define ic_callback_lock hic_fields.ic_callback_lock -#define ic_callback_tail hic_fields.ic_callback_tail -#define ic_trace hic_fields.ic_trace -#define ic_size hic_fields.ic_size -#define ic_offset hic_fields.ic_offset -#define ic_refcnt hic_fields.ic_refcnt -#define ic_bwritecnt hic_fields.ic_bwritecnt -#define ic_state hic_fields.ic_state -#define ic_datap hic_fields.ic_datap -#define ic_header hic_data->hic_header - -/* * The reservation head lsn is not made up of a cycle number and block number. * Instead, it uses a cycle number and byte number. Logs don't expect to * overflow 31 bits worth of byte offset, so using a byte number will mean diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index d949879..9abb96a 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3332,7 +3332,6 @@ xlog_pack_data( int size = iclog->ic_offset + roundoff; __be32 cycle_lsn; xfs_caddr_t dp; - xlog_in_core_2_t *xhdr; xlog_pack_data_checksum(log, iclog, size); @@ -3347,7 +3346,8 @@ xlog_pack_data( } if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - xhdr = (xlog_in_core_2_t *)&iclog->ic_header; + xlog_in_core_2_t *xhdr = iclog->ic_data; + for ( ; i < BTOBB(size); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); @@ -3405,7 +3405,6 @@ xlog_unpack_data( xlog_t *log) { int i, j, k; - xlog_in_core_2_t *xhdr; for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { @@ -3414,7 +3413,7 @@ xlog_unpack_data( } if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - xhdr = (xlog_in_core_2_t *)rhead; + xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); -- cgit v0.10.2 From d42f08f61c5e7f0ed4c6b6df4c9987ddb85ec66e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:38 +1100 Subject: [XFS] kill xfs_ialloc_log_di xfs_ialloc_log_di is only used to log the full inode core + di_next_unlinked. That means all the offset magic is not nessecary and we can simply use xfs_trans_log_buf directly. Also add a comment describing what we should do here instead. (First sent on October 7th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index d7cf392..10f9204 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -106,40 +106,6 @@ typedef struct xfs_dinode #define XFS_MAXLINK_1 65535U /* - * Bit names for logging disk inodes only - */ -#define XFS_DI_MAGIC 0x0000001 -#define XFS_DI_MODE 0x0000002 -#define XFS_DI_VERSION 0x0000004 -#define XFS_DI_FORMAT 0x0000008 -#define XFS_DI_ONLINK 0x0000010 -#define XFS_DI_UID 0x0000020 -#define XFS_DI_GID 0x0000040 -#define XFS_DI_NLINK 0x0000080 -#define XFS_DI_PROJID 0x0000100 -#define XFS_DI_PAD 0x0000200 -#define XFS_DI_ATIME 0x0000400 -#define XFS_DI_MTIME 0x0000800 -#define XFS_DI_CTIME 0x0001000 -#define XFS_DI_SIZE 0x0002000 -#define XFS_DI_NBLOCKS 0x0004000 -#define XFS_DI_EXTSIZE 0x0008000 -#define XFS_DI_NEXTENTS 0x0010000 -#define XFS_DI_NAEXTENTS 0x0020000 -#define XFS_DI_FORKOFF 0x0040000 -#define XFS_DI_AFORMAT 0x0080000 -#define XFS_DI_DMEVMASK 0x0100000 -#define XFS_DI_DMSTATE 0x0200000 -#define XFS_DI_FLAGS 0x0400000 -#define XFS_DI_GEN 0x0800000 -#define XFS_DI_NEXT_UNLINKED 0x1000000 -#define XFS_DI_U 0x2000000 -#define XFS_DI_A 0x4000000 -#define XFS_DI_NUM_BITS 27 -#define XFS_DI_ALL_BITS ((1 << XFS_DI_NUM_BITS) - 1) -#define XFS_DI_CORE_BITS (XFS_DI_ALL_BITS & ~(XFS_DI_U|XFS_DI_A)) - -/* * Values for di_format */ typedef enum xfs_dinode_fmt diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index efb65fe..86d463e 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -41,68 +41,6 @@ #include "xfs_error.h" #include "xfs_bmap.h" -/* - * Log specified fields for the inode given by bp and off. - */ -STATIC void -xfs_ialloc_log_di( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* inode buffer */ - int off, /* index of inode in buffer */ - int fields) /* bitmask of fields to log */ -{ - int first; /* first byte number */ - int ioffset; /* off in bytes */ - int last; /* last byte number */ - xfs_mount_t *mp; /* mount point structure */ - static const short offsets[] = { /* field offsets */ - /* keep in sync with bits */ - offsetof(xfs_dinode_core_t, di_magic), - offsetof(xfs_dinode_core_t, di_mode), - offsetof(xfs_dinode_core_t, di_version), - offsetof(xfs_dinode_core_t, di_format), - offsetof(xfs_dinode_core_t, di_onlink), - offsetof(xfs_dinode_core_t, di_uid), - offsetof(xfs_dinode_core_t, di_gid), - offsetof(xfs_dinode_core_t, di_nlink), - offsetof(xfs_dinode_core_t, di_projid), - offsetof(xfs_dinode_core_t, di_pad), - offsetof(xfs_dinode_core_t, di_atime), - offsetof(xfs_dinode_core_t, di_mtime), - offsetof(xfs_dinode_core_t, di_ctime), - offsetof(xfs_dinode_core_t, di_size), - offsetof(xfs_dinode_core_t, di_nblocks), - offsetof(xfs_dinode_core_t, di_extsize), - offsetof(xfs_dinode_core_t, di_nextents), - offsetof(xfs_dinode_core_t, di_anextents), - offsetof(xfs_dinode_core_t, di_forkoff), - offsetof(xfs_dinode_core_t, di_aformat), - offsetof(xfs_dinode_core_t, di_dmevmask), - offsetof(xfs_dinode_core_t, di_dmstate), - offsetof(xfs_dinode_core_t, di_flags), - offsetof(xfs_dinode_core_t, di_gen), - offsetof(xfs_dinode_t, di_next_unlinked), - offsetof(xfs_dinode_t, di_u), - offsetof(xfs_dinode_t, di_a), - sizeof(xfs_dinode_t) - }; - - - ASSERT(offsetof(xfs_dinode_t, di_core) == 0); - ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0); - mp = tp->t_mountp; - /* - * Get the inode-relative first and last bytes for these fields - */ - xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last); - /* - * Convert to buffer offsets and log it. - */ - ioffset = off << mp->m_sb.sb_inodelog; - first += ioffset; - last += ioffset; - xfs_trans_log_buf(tp, bp, first, last); -} /* * Allocation group level functions. @@ -406,18 +344,25 @@ xfs_ialloc_ag_alloc( XFS_BUF_LOCK); ASSERT(fbuf); ASSERT(!XFS_BUF_GETERROR(fbuf)); + /* - * Set initial values for the inodes in this buffer. + * Initialize all inodes in this buffer and then log them. + * + * XXX: It would be much better if we had just one transaction to + * log a whole cluster of inodes instead of all the indivdual + * transactions causing a lot of log traffic. */ xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); for (i = 0; i < ninodes; i++) { + int ioffset = i << args.mp->m_sb.sb_inodelog; + uint isize = sizeof(xfs_dinode_t) + sizeof(__be32); + free = XFS_MAKE_IPTR(args.mp, fbuf, i); free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); free->di_core.di_version = version; free->di_core.di_gen = cpu_to_be32(gen); free->di_next_unlinked = cpu_to_be32(NULLAGINO); - xfs_ialloc_log_di(tp, fbuf, i, - XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED); + xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); } xfs_trans_inode_alloc_buf(tp, fbuf); } -- cgit v0.10.2 From 81591fe2db19d0fc1ec2aaaa6a790a5ab97ac3ab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:39 +1100 Subject: [XFS] kill xfs_dinode_core_t Now that we have a separate xfs_icdinode_t for the in-core inode which gets logged there is no need anymore for the xfs_dinode vs xfs_dinode_core split - the fact that part of the structure gets logged through the inode log item and a small part not can better be described in a comment. All sizeof operations on the dinode_core either really wanted the icdinode and are switched to that one, or had already added the size of the agi unlinked list pointer. Later both will be replaced with helpers once we get the larger CRC-enabled dinode. Removing the data and attribute fork unions also has the advantage that xfs_dinode.h doesn't need to pull in every header under the sun. While we're at it also add some more comments describing the dinode structure. (First sent on October 7th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 10f9204..0b7ebf9 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -18,32 +18,32 @@ #ifndef __XFS_DINODE_H__ #define __XFS_DINODE_H__ -struct xfs_buf; -struct xfs_mount; - #define XFS_DINODE_VERSION_1 1 #define XFS_DINODE_VERSION_2 2 #define XFS_DINODE_GOOD_VERSION(v) \ (((v) == XFS_DINODE_VERSION_1 || (v) == XFS_DINODE_VERSION_2)) #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ -/* - * Disk inode structure. - * This is just the header; the inode is expanded to fill a variable size - * with the last field expanding. It is split into the core and "other" - * because we only need the core part in the in-core inode. - */ typedef struct xfs_timestamp { __be32 t_sec; /* timestamp seconds */ __be32 t_nsec; /* timestamp nanoseconds */ } xfs_timestamp_t; /* - * Note: Coordinate changes to this structure with the XFS_DI_* #defines - * below, the offsets table in xfs_ialloc_log_di() and struct xfs_icdinode - * in xfs_inode.h. + * On-disk inode structure. + * + * This is just the header or "dinode core", the inode is expanded to fill a + * variable size the leftover area split into a data and an attribute fork. + * The format of the data and attribute fork depends on the format of the + * inode as indicated by di_format and di_aformat. To access the data and + * attribute use the XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR macros + * below. + * + * There is a very similar struct icdinode in xfs_inode which matches the + * layout of the first 96 bytes of this structure, but is kept in native + * format instead of big endian. */ -typedef struct xfs_dinode_core { +typedef struct xfs_dinode { __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ __be16 di_mode; /* mode and type of file */ __u8 di_version; /* inode version */ @@ -69,33 +69,12 @@ typedef struct xfs_dinode_core { __be16 di_dmstate; /* DMIG state info */ __be16 di_flags; /* random flags, XFS_DIFLAG_... */ __be32 di_gen; /* generation number */ -} xfs_dinode_core_t; -#define DI_MAX_FLUSH 0xffff + /* di_next_unlinked is the only non-core field in the old dinode */ + __be32 di_next_unlinked;/* agi unlinked list ptr */ +} __attribute__((packed)) xfs_dinode_t; -typedef struct xfs_dinode -{ - xfs_dinode_core_t di_core; - /* - * In adding anything between the core and the union, be - * sure to update the macros like XFS_LITINO below. - */ - __be32 di_next_unlinked;/* agi unlinked list ptr */ - union { - xfs_bmdr_block_t di_bmbt; /* btree root block */ - xfs_bmbt_rec_32_t di_bmx[1]; /* extent list */ - xfs_dir2_sf_t di_dir2sf; /* shortform directory v2 */ - char di_c[1]; /* local contents */ - __be32 di_dev; /* device for S_IFCHR/S_IFBLK */ - uuid_t di_muuid; /* mount point value */ - char di_symlink[1]; /* local symbolic link */ - } di_u; - union { - xfs_bmdr_block_t di_abmbt; /* btree root block */ - xfs_bmbt_rec_32_t di_abmx[1]; /* extent list */ - xfs_attr_shortform_t di_attrsf; /* shortform attribute list */ - } di_a; -} xfs_dinode_t; +#define DI_MAX_FLUSH 0xffff /* * The 32 bit link count in the inode theoretically maxes out at UINT_MAX. @@ -108,14 +87,12 @@ typedef struct xfs_dinode /* * Values for di_format */ -typedef enum xfs_dinode_fmt -{ - XFS_DINODE_FMT_DEV, /* CHR, BLK: di_dev */ - XFS_DINODE_FMT_LOCAL, /* DIR, REG: di_c */ - /* LNK: di_symlink */ - XFS_DINODE_FMT_EXTENTS, /* DIR, REG, LNK: di_bmx */ - XFS_DINODE_FMT_BTREE, /* DIR, REG, LNK: di_bmbt */ - XFS_DINODE_FMT_UUID /* MNT: di_uuid */ +typedef enum xfs_dinode_fmt { + XFS_DINODE_FMT_DEV, /* xfs_dev_t */ + XFS_DINODE_FMT_LOCAL, /* bulk data */ + XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */ + XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */ + XFS_DINODE_FMT_UUID /* uuid_t */ } xfs_dinode_fmt_t; /* @@ -136,8 +113,8 @@ typedef enum xfs_dinode_fmt /* * Inode data & attribute fork sizes, per inode. */ -#define XFS_DFORK_Q(dip) ((dip)->di_core.di_forkoff != 0) -#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_core.di_forkoff << 3)) +#define XFS_DFORK_Q(dip) ((dip)->di_forkoff != 0) +#define XFS_DFORK_BOFF(dip) ((int)((dip)->di_forkoff << 3)) #define XFS_DFORK_DSIZE(dip,mp) \ (XFS_DFORK_Q(dip) ? \ @@ -152,23 +129,42 @@ typedef enum xfs_dinode_fmt XFS_DFORK_DSIZE(dip, mp) : \ XFS_DFORK_ASIZE(dip, mp)) -#define XFS_DFORK_DPTR(dip) ((dip)->di_u.di_c) +/* + * Return pointers to the data or attribute forks. + */ +#define XFS_DFORK_DPTR(dip) \ + ((char *)(dip) + sizeof(struct xfs_dinode)) #define XFS_DFORK_APTR(dip) \ - ((dip)->di_u.di_c + XFS_DFORK_BOFF(dip)) + (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) #define XFS_DFORK_PTR(dip,w) \ ((w) == XFS_DATA_FORK ? XFS_DFORK_DPTR(dip) : XFS_DFORK_APTR(dip)) + #define XFS_DFORK_FORMAT(dip,w) \ ((w) == XFS_DATA_FORK ? \ - (dip)->di_core.di_format : \ - (dip)->di_core.di_aformat) + (dip)->di_format : \ + (dip)->di_aformat) #define XFS_DFORK_NEXTENTS(dip,w) \ ((w) == XFS_DATA_FORK ? \ - be32_to_cpu((dip)->di_core.di_nextents) : \ - be16_to_cpu((dip)->di_core.di_anextents)) + be32_to_cpu((dip)->di_nextents) : \ + be16_to_cpu((dip)->di_anextents)) #define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp)) /* + * For block and character special files the 32bit dev_t is stored at the + * beginning of the data fork. + */ +static inline xfs_dev_t xfs_dinode_get_rdev(struct xfs_dinode *dip) +{ + return be32_to_cpu(*(__be32 *)XFS_DFORK_DPTR(dip)); +} + +static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) +{ + *(__be32 *)XFS_DFORK_DPTR(dip) = cpu_to_be32(rdev); +} + +/* * Values for di_flags * There should be a one-to-one correspondence between these flags and the * XFS_XFLAG_s. diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h index deecc9d..6ac44b5 100644 --- a/fs/xfs/xfs_dir2_sf.h +++ b/fs/xfs/xfs_dir2_sf.h @@ -34,13 +34,6 @@ struct xfs_mount; struct xfs_trans; /* - * Maximum size of a shortform directory. - */ -#define XFS_DIR2_SF_MAX_SIZE \ - (XFS_DINODE_MAX_SIZE - (uint)sizeof(xfs_dinode_core_t) - \ - (uint)sizeof(xfs_agino_t)) - -/* * Inode number stored as 8 8-bit values. */ typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t; diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 86d463e..47e9428 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -355,12 +355,12 @@ xfs_ialloc_ag_alloc( xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); for (i = 0; i < ninodes; i++) { int ioffset = i << args.mp->m_sb.sb_inodelog; - uint isize = sizeof(xfs_dinode_t) + sizeof(__be32); + uint isize = sizeof(struct xfs_dinode); free = XFS_MAKE_IPTR(args.mp, fbuf, i); - free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); - free->di_core.di_version = version; - free->di_core.di_gen = cpu_to_be32(gen); + free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); + free->di_version = version; + free->di_gen = cpu_to_be32(gen); free->di_next_unlinked = cpu_to_be32(NULLAGINO); xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 46b0526..1d2912d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -174,8 +174,8 @@ xfs_imap_to_bp( dip = (xfs_dinode_t *)xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); - di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && - XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); + di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && + XFS_DINODE_GOOD_VERSION(dip->di_version); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, XFS_RANDOM_ITOBP_INOTOBP))) { @@ -191,7 +191,7 @@ xfs_imap_to_bp( "daddr %lld #%d (magic=%x)", XFS_BUFTARG_NAME(mp->m_ddev_targp), (unsigned long long)imap->im_blkno, i, - be16_to_cpu(dip->di_core.di_magic)); + be16_to_cpu(dip->di_magic)); #endif xfs_trans_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); @@ -350,26 +350,26 @@ xfs_iformat( XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); error = 0; - if (unlikely(be32_to_cpu(dip->di_core.di_nextents) + - be16_to_cpu(dip->di_core.di_anextents) > - be64_to_cpu(dip->di_core.di_nblocks))) { + if (unlikely(be32_to_cpu(dip->di_nextents) + + be16_to_cpu(dip->di_anextents) > + be64_to_cpu(dip->di_nblocks))) { xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", (unsigned long long)ip->i_ino, - (int)(be32_to_cpu(dip->di_core.di_nextents) + - be16_to_cpu(dip->di_core.di_anextents)), + (int)(be32_to_cpu(dip->di_nextents) + + be16_to_cpu(dip->di_anextents)), (unsigned long long) - be64_to_cpu(dip->di_core.di_nblocks)); + be64_to_cpu(dip->di_nblocks)); XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); return XFS_ERROR(EFSCORRUPTED); } - if (unlikely(dip->di_core.di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { + if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.", (unsigned long long)ip->i_ino, - dip->di_core.di_forkoff); + dip->di_forkoff); XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); return XFS_ERROR(EFSCORRUPTED); @@ -380,25 +380,25 @@ xfs_iformat( case S_IFCHR: case S_IFBLK: case S_IFSOCK: - if (unlikely(dip->di_core.di_format != XFS_DINODE_FMT_DEV)) { + if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) { XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW, ip->i_mount, dip); return XFS_ERROR(EFSCORRUPTED); } ip->i_d.di_size = 0; ip->i_size = 0; - ip->i_df.if_u2.if_rdev = be32_to_cpu(dip->di_u.di_dev); + ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); break; case S_IFREG: case S_IFLNK: case S_IFDIR: - switch (dip->di_core.di_format) { + switch (dip->di_format) { case XFS_DINODE_FMT_LOCAL: /* * no local regular files yet */ - if (unlikely((be16_to_cpu(dip->di_core.di_mode) & S_IFMT) == S_IFREG)) { + if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, "corrupt inode %Lu " "(local format for regular file).", @@ -409,7 +409,7 @@ xfs_iformat( return XFS_ERROR(EFSCORRUPTED); } - di_size = be64_to_cpu(dip->di_core.di_size); + di_size = be64_to_cpu(dip->di_size); if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, "corrupt inode %Lu " @@ -451,7 +451,7 @@ xfs_iformat( ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); ip->i_afp->if_ext_max = XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); - switch (dip->di_core.di_aformat) { + switch (dip->di_aformat) { case XFS_DINODE_FMT_LOCAL: atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); size = be16_to_cpu(atp->hdr.totsize); @@ -663,7 +663,7 @@ xfs_iformat_btree( void xfs_dinode_from_disk( xfs_icdinode_t *to, - xfs_dinode_core_t *from) + xfs_dinode_t *from) { to->di_magic = be16_to_cpu(from->di_magic); to->di_mode = be16_to_cpu(from->di_mode); @@ -697,7 +697,7 @@ xfs_dinode_from_disk( void xfs_dinode_to_disk( - xfs_dinode_core_t *to, + xfs_dinode_t *to, xfs_icdinode_t *from) { to->di_magic = cpu_to_be16(from->di_magic); @@ -784,9 +784,7 @@ uint xfs_dic2xflags( xfs_dinode_t *dip) { - xfs_dinode_core_t *dic = &dip->di_core; - - return _xfs_dic2xflags(be16_to_cpu(dic->di_flags)) | + return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) | (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0); } @@ -905,12 +903,12 @@ xfs_iread( * If we got something that isn't an inode it means someone * (nfs or dmi) has a stale handle. */ - if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC) { + if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { #ifdef DEBUG xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " - "dip->di_core.di_magic (0x%x) != " + "dip->di_magic (0x%x) != " "XFS_DINODE_MAGIC (0x%x)", - be16_to_cpu(dip->di_core.di_magic), + be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC); #endif /* DEBUG */ error = XFS_ERROR(EINVAL); @@ -924,8 +922,8 @@ xfs_iread( * specific information. * Otherwise, just get the truly permanent information. */ - if (dip->di_core.di_mode) { - xfs_dinode_from_disk(&ip->i_d, &dip->di_core); + if (dip->di_mode) { + xfs_dinode_from_disk(&ip->i_d, dip); error = xfs_iformat(ip, dip); if (error) { #ifdef DEBUG @@ -936,10 +934,10 @@ xfs_iread( goto out_brelse; } } else { - ip->i_d.di_magic = be16_to_cpu(dip->di_core.di_magic); - ip->i_d.di_version = dip->di_core.di_version; - ip->i_d.di_gen = be32_to_cpu(dip->di_core.di_gen); - ip->i_d.di_flushiter = be16_to_cpu(dip->di_core.di_flushiter); + ip->i_d.di_magic = be16_to_cpu(dip->di_magic); + ip->i_d.di_version = dip->di_version; + ip->i_d.di_gen = be32_to_cpu(dip->di_gen); + ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); /* * Make sure to pull in the mode here as well in * case the inode is released without being used. @@ -2295,7 +2293,7 @@ xfs_ifree( * This is a temporary hack that would require a proper fix * in the future. */ - dip->di_core.di_mode = 0; + dip->di_mode = 0; if (delete) { xfs_ifree_cluster(ip, tp, first_ino); @@ -2909,15 +2907,16 @@ xfs_iflush_fork( case XFS_DINODE_FMT_DEV: if (iip->ili_format.ilf_fields & XFS_ILOG_DEV) { ASSERT(whichfork == XFS_DATA_FORK); - dip->di_u.di_dev = cpu_to_be32(ip->i_df.if_u2.if_rdev); + xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev); } break; case XFS_DINODE_FMT_UUID: if (iip->ili_format.ilf_fields & XFS_ILOG_UUID) { ASSERT(whichfork == XFS_DATA_FORK); - memcpy(&dip->di_u.di_muuid, &ip->i_df.if_u2.if_uuid, - sizeof(uuid_t)); + memcpy(XFS_DFORK_DPTR(dip), + &ip->i_df.if_u2.if_uuid, + sizeof(uuid_t)); } break; @@ -3295,11 +3294,11 @@ xfs_iflush_int( */ xfs_synchronize_atime(ip); - if (XFS_TEST_ERROR(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC, + if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", - ip->i_ino, be16_to_cpu(dip->di_core.di_magic), dip); + ip->i_ino, be16_to_cpu(dip->di_magic), dip); goto corrupt_out; } if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, @@ -3362,7 +3361,7 @@ xfs_iflush_int( * because if the inode is dirty at all the core must * be. */ - xfs_dinode_to_disk(&dip->di_core, &ip->i_d); + xfs_dinode_to_disk(dip, &ip->i_d); /* Wrap, we never let the log put out DI_MAX_FLUSH */ if (ip->i_d.di_flushiter == DI_MAX_FLUSH) @@ -3382,7 +3381,7 @@ xfs_iflush_int( * Convert it back. */ ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); - dip->di_core.di_onlink = cpu_to_be16(ip->i_d.di_nlink); + dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); } else { /* * The superblock version has already been bumped, @@ -3390,12 +3389,12 @@ xfs_iflush_int( * format permanent. */ ip->i_d.di_version = XFS_DINODE_VERSION_2; - dip->di_core.di_version = XFS_DINODE_VERSION_2; + dip->di_version = XFS_DINODE_VERSION_2; ip->i_d.di_onlink = 0; - dip->di_core.di_onlink = 0; + dip->di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - memset(&(dip->di_core.di_pad[0]), 0, - sizeof(dip->di_core.di_pad)); + memset(&(dip->di_pad[0]), 0, + sizeof(dip->di_pad)); ASSERT(ip->i_d.di_projid == 0); } } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ea691c7..705083a8 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -19,7 +19,6 @@ #define __XFS_INODE_H__ struct xfs_dinode; -struct xfs_dinode_core; struct xfs_inode; /* @@ -112,7 +111,7 @@ typedef struct xfs_ictimestamp { } xfs_ictimestamp_t; /* - * NOTE: This structure must be kept identical to struct xfs_dinode_core + * NOTE: This structure must be kept identical to struct xfs_dinode * in xfs_dinode.h except for the endianess annotations. */ typedef struct xfs_icdinode { @@ -553,8 +552,8 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, struct xfs_dinode **, struct xfs_buf **, xfs_daddr_t, uint, uint); void xfs_dinode_from_disk(struct xfs_icdinode *, - struct xfs_dinode_core *); -void xfs_dinode_to_disk(struct xfs_dinode_core *, + struct xfs_dinode *); +void xfs_dinode_to_disk(struct xfs_dinode *, struct xfs_icdinode *); void xfs_idestroy_fork(struct xfs_inode *, int); void xfs_idata_realloc(struct xfs_inode *, int, int); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index aa9bf05..27f18c1 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -281,7 +281,7 @@ xfs_inode_item_format( xfs_mark_inode_dirty_sync(ip); vecp->i_addr = (xfs_caddr_t)&ip->i_d; - vecp->i_len = sizeof(xfs_dinode_core_t); + vecp->i_len = sizeof(struct xfs_icdinode); XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE); vecp++; nvecs++; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 3511803..b3578cd 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -125,13 +125,9 @@ STATIC void xfs_bulkstat_one_dinode( xfs_mount_t *mp, /* mount point for filesystem */ xfs_ino_t ino, /* inode number to get data for */ - xfs_dinode_t *dip, /* dinode inode pointer */ + xfs_dinode_t *dic, /* dinode inode pointer */ xfs_bstat_t *buf) /* return buffer */ { - xfs_dinode_core_t *dic; /* dinode core info pointer */ - - dic = &dip->di_core; - /* * The inode format changed when we moved the link count and * made it 32 bits long. If this is an old format inode, @@ -162,7 +158,7 @@ xfs_bulkstat_one_dinode( buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); - buf->bs_xflags = xfs_dic2xflags(dip); + buf->bs_xflags = xfs_dic2xflags(dic); buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; buf->bs_extents = be32_to_cpu(dic->di_nextents); buf->bs_gen = be32_to_cpu(dic->di_gen); @@ -173,7 +169,7 @@ xfs_bulkstat_one_dinode( switch (dic->di_format) { case XFS_DINODE_FMT_DEV: - buf->bs_rdev = be32_to_cpu(dip->di_u.di_dev); + buf->bs_rdev = xfs_dinode_get_rdev(dic); buf->bs_blksize = BLKDEV_IOSIZE; buf->bs_blocks = 0; break; @@ -287,19 +283,19 @@ xfs_bulkstat_use_dinode( * to disk yet. This is a temporary hack that would require a proper * fix in the future. */ - if (be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC || - !XFS_DINODE_GOOD_VERSION(dip->di_core.di_version) || - !dip->di_core.di_mode) + if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || + !XFS_DINODE_GOOD_VERSION(dip->di_version) || + !dip->di_mode) return 0; if (flags & BULKSTAT_FG_QUICK) { *dipp = dip; return 1; } /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ - aformat = dip->di_core.di_aformat; + aformat = dip->di_aformat; if ((XFS_DFORK_Q(dip) == 0) || (aformat == XFS_DINODE_FMT_LOCAL) || - (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_core.di_anextents)) { + (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) { *dipp = dip; return 1; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9abb96a..4099618 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2320,7 +2320,7 @@ xlog_recover_do_inode_trans( * Make sure the place we're flushing out to really looks * like an inode! */ - if (unlikely(be16_to_cpu(dip->di_core.di_magic) != XFS_DINODE_MAGIC)) { + if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { xfs_buf_relse(bp); xfs_fs_cmn_err(CE_ALERT, mp, "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", @@ -2343,12 +2343,12 @@ xlog_recover_do_inode_trans( } /* Skip replay when the on disk inode is newer than the log one */ - if (dicp->di_flushiter < be16_to_cpu(dip->di_core.di_flushiter)) { + if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { /* * Deal with the wrap case, DI_MAX_FLUSH is less * than smaller numbers */ - if (be16_to_cpu(dip->di_core.di_flushiter) == DI_MAX_FLUSH && + if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { /* do nothing */ } else { @@ -2408,7 +2408,7 @@ xlog_recover_do_inode_trans( error = EFSCORRUPTED; goto error; } - if (unlikely(item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t))) { + if (unlikely(item->ri_buf[1].i_len > sizeof(struct xfs_icdinode))) { XFS_CORRUPTION_ERROR("xlog_recover_do_inode_trans(7)", XFS_ERRLEVEL_LOW, mp, dicp); xfs_buf_relse(bp); @@ -2420,23 +2420,24 @@ xlog_recover_do_inode_trans( } /* The core is in in-core format */ - xfs_dinode_to_disk(&dip->di_core, - (xfs_icdinode_t *)item->ri_buf[1].i_addr); + xfs_dinode_to_disk(dip, (xfs_icdinode_t *)item->ri_buf[1].i_addr); /* the rest is in on-disk format */ - if (item->ri_buf[1].i_len > sizeof(xfs_dinode_core_t)) { - memcpy((xfs_caddr_t) dip + sizeof(xfs_dinode_core_t), - item->ri_buf[1].i_addr + sizeof(xfs_dinode_core_t), - item->ri_buf[1].i_len - sizeof(xfs_dinode_core_t)); + if (item->ri_buf[1].i_len > sizeof(struct xfs_icdinode)) { + memcpy((xfs_caddr_t) dip + sizeof(struct xfs_icdinode), + item->ri_buf[1].i_addr + sizeof(struct xfs_icdinode), + item->ri_buf[1].i_len - sizeof(struct xfs_icdinode)); } fields = in_f->ilf_fields; switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) { case XFS_ILOG_DEV: - dip->di_u.di_dev = cpu_to_be32(in_f->ilf_u.ilfu_rdev); + xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev); break; case XFS_ILOG_UUID: - dip->di_u.di_muuid = in_f->ilf_u.ilfu_uuid; + memcpy(XFS_DFORK_DPTR(dip), + &in_f->ilf_u.ilfu_uuid, + sizeof(uuid_t)); break; } @@ -2452,12 +2453,12 @@ xlog_recover_do_inode_trans( switch (fields & XFS_ILOG_DFORK) { case XFS_ILOG_DDATA: case XFS_ILOG_DEXT: - memcpy(&dip->di_u, src, len); + memcpy(XFS_DFORK_DPTR(dip), src, len); break; case XFS_ILOG_DBROOT: xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len, - &dip->di_u.di_bmbt, + (xfs_bmdr_block_t *)XFS_DFORK_DPTR(dip), XFS_DFORK_DSIZE(dip, mp)); break; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 177976d..3f999b1 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -575,8 +575,7 @@ xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; - mp->m_litino = sbp->sb_inodesize - - ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t)); + mp->m_litino = sbp->sb_inodesize - sizeof(struct xfs_dinode); mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; -- cgit v0.10.2 From 51ce16d519da0bc3c548e0facef7cb3aab1ac8cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:39 +1100 Subject: [XFS] kill XFS_DINODE_VERSION_ defines These names don't add any value at all over just using the numerical values. (First sent on October 9th) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d597059..534b175 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1131,7 +1131,7 @@ xfs_ioctl_setattr( * the superblock version number since projids didn't * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. */ - if (ip->i_d.di_version == XFS_DINODE_VERSION_1) + if (ip->i_d.di_version == 1) xfs_bump_ino_vers2(tp, ip); } diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 0b7ebf9..162e872 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -18,11 +18,8 @@ #ifndef __XFS_DINODE_H__ #define __XFS_DINODE_H__ -#define XFS_DINODE_VERSION_1 1 -#define XFS_DINODE_VERSION_2 2 -#define XFS_DINODE_GOOD_VERSION(v) \ - (((v) == XFS_DINODE_VERSION_1 || (v) == XFS_DINODE_VERSION_2)) -#define XFS_DINODE_MAGIC 0x494e /* 'IN' */ +#define XFS_DINODE_MAGIC 0x494e /* 'IN' */ +#define XFS_DINODE_GOOD_VERSION(v) (((v) == 1 || (v) == 2)) typedef struct xfs_timestamp { __be32 t_sec; /* timestamp seconds */ diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 47e9428..16eda31 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -321,9 +321,9 @@ xfs_ialloc_ag_alloc( * able to use the file system. */ if (xfs_sb_version_hasnlink(&args.mp->m_sb)) - version = XFS_DINODE_VERSION_2; + version = 2; else - version = XFS_DINODE_VERSION_1; + version = 1; /* * Seed the new inode cluster with a random generation number. This diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1d2912d..083395c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -965,7 +965,7 @@ xfs_iread( * the new format. We don't change the version number so that we * can distinguish this from a real new format inode. */ - if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { + if (ip->i_d.di_version == 1) { ip->i_d.di_nlink = ip->i_d.di_onlink; ip->i_d.di_onlink = 0; ip->i_d.di_projid = 0; @@ -1139,8 +1139,8 @@ xfs_ialloc( * here rather than here and in the flush/logging code. */ if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && - ip->i_d.di_version == XFS_DINODE_VERSION_1) { - ip->i_d.di_version = XFS_DINODE_VERSION_2; + ip->i_d.di_version == 1) { + ip->i_d.di_version = 2; /* * We've already zeroed the old link count, the projid field, * and the pad field. @@ -1150,7 +1150,7 @@ xfs_ialloc( /* * Project ids won't be stored on disk if we are using a version 1 inode. */ - if ((prid != 0) && (ip->i_d.di_version == XFS_DINODE_VERSION_1)) + if ((prid != 0) && (ip->i_d.di_version == 1)) xfs_bump_ino_vers2(tp, ip); if (pip && XFS_INHERIT_GID(pip)) { @@ -3373,9 +3373,8 @@ xfs_iflush_int( * convert back to the old inode format. If the superblock version * has been updated, then make the conversion permanent. */ - ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || - xfs_sb_version_hasnlink(&mp->m_sb)); - if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { + ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); + if (ip->i_d.di_version == 1) { if (!xfs_sb_version_hasnlink(&mp->m_sb)) { /* * Convert it back. @@ -3388,8 +3387,8 @@ xfs_iflush_int( * so just make the conversion to the new inode * format permanent. */ - ip->i_d.di_version = XFS_DINODE_VERSION_2; - dip->di_version = XFS_DINODE_VERSION_2; + ip->i_d.di_version = 2; + dip->di_version = 2; ip->i_d.di_onlink = 0; dip->di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 27f18c1..c431181 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -296,9 +296,8 @@ xfs_inode_item_format( * has a new version number, then we don't bother converting back. */ mp = ip->i_mount; - ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || - xfs_sb_version_hasnlink(&mp->m_sb)); - if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { + ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); + if (ip->i_d.di_version == 1) { if (!xfs_sb_version_hasnlink(&mp->m_sb)) { /* * Convert it back. @@ -311,7 +310,7 @@ xfs_inode_item_format( * so just make the conversion to the new inode * format permanent. */ - ip->i_d.di_version = XFS_DINODE_VERSION_2; + ip->i_d.di_version = 2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); } diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index b3578cd..2cf16f4 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -139,7 +139,7 @@ xfs_bulkstat_one_dinode( * the new format. We don't change the version number so that we * can distinguish this from a real new format inode. */ - if (dic->di_version == XFS_DINODE_VERSION_1) { + if (dic->di_version == 1) { buf->bs_nlink = be16_to_cpu(dic->di_onlink); buf->bs_projid = 0; } else { diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 77114493..fcc2285 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -274,9 +274,9 @@ xfs_bump_ino_vers2( xfs_mount_t *mp; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); + ASSERT(ip->i_d.di_version == 1); - ip->i_d.di_version = XFS_DINODE_VERSION_2; + ip->i_d.di_version = 2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); mp = tp->t_mountp; @@ -308,7 +308,7 @@ xfs_bumplink( ASSERT(ip->i_d.di_nlink > 0); ip->i_d.di_nlink++; inc_nlink(VFS_I(ip)); - if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && + if ((ip->i_d.di_version == 1) && (ip->i_d.di_nlink > XFS_MAXLINK_1)) { /* * The inode has increased its number of links beyond -- cgit v0.10.2 From 23fac50f959a87febf7ce4ae9d47525121f10c7a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:40 +1100 Subject: [XFS] split up xlog_recover_process_iunlinks Split out the body of the main loop into a separate helper to make the code readable. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 4099618..841398d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3147,6 +3147,70 @@ out_error: return; } +STATIC xfs_agino_t +xlog_recover_process_one_iunlink( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agino_t agino, + int bucket) +{ + struct xfs_buf *ibp; + struct xfs_dinode *dip; + struct xfs_inode *ip; + xfs_ino_t ino; + int error; + + ino = XFS_AGINO_TO_INO(mp, agno, agino); + error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); + if (error) + goto fail; + + /* + * Get the on disk inode to find the next inode in the bucket. + */ + ASSERT(ip != NULL); + error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + if (error) + goto fail; + + ASSERT(dip != NULL); + ASSERT(ip->i_d.di_nlink == 0); + + /* setup for the next pass */ + agino = be32_to_cpu(dip->di_next_unlinked); + xfs_buf_relse(ibp); + + /* + * Prevent any DMAPI event from being sent when the reference on + * the inode is dropped. + */ + ip->i_d.di_dmevmask = 0; + + /* + * If this is a new inode, handle it specially. Otherwise, just + * drop our reference to the inode. If there are no other + * references, this will send the inode to xfs_inactive() which + * will truncate the file and free the inode. + */ + if (ip->i_d.di_mode == 0) + xfs_iput_new(ip, 0); + else + IRELE(ip); + return agino; + + fail: + /* + * We can't read in the inode this bucket points to, or this inode + * is messed up. Just ditch this bucket of inodes. We will lose + * some inodes and space, but at least we won't hang. + * + * Call xlog_recover_clear_agi_bucket() to perform a transaction to + * clear the inode pointer in the bucket. + */ + xlog_recover_clear_agi_bucket(mp, agno, bucket); + return NULLAGINO; +} + /* * xlog_iunlink_recover * @@ -3167,11 +3231,7 @@ xlog_recover_process_iunlinks( xfs_agnumber_t agno; xfs_agi_t *agi; xfs_buf_t *agibp; - xfs_buf_t *ibp; - xfs_dinode_t *dip; - xfs_inode_t *ip; xfs_agino_t agino; - xfs_ino_t ino; int bucket; int error; uint mp_dmevmask; @@ -3201,10 +3261,8 @@ xlog_recover_process_iunlinks( agi = XFS_BUF_TO_AGI(agibp); for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { - agino = be32_to_cpu(agi->agi_unlinked[bucket]); while (agino != NULLAGINO) { - /* * Release the agi buffer so that it can * be acquired in the normal course of the @@ -3212,68 +3270,8 @@ xlog_recover_process_iunlinks( */ xfs_buf_relse(agibp); - ino = XFS_AGINO_TO_INO(mp, agno, agino); - error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); - ASSERT(error || (ip != NULL)); - - if (!error) { - /* - * Get the on disk inode to find the - * next inode in the bucket. - */ - error = xfs_itobp(mp, NULL, ip, &dip, - &ibp, 0, 0, - XFS_BUF_LOCK); - ASSERT(error || (dip != NULL)); - } - - if (!error) { - ASSERT(ip->i_d.di_nlink == 0); - - /* setup for the next pass */ - agino = be32_to_cpu( - dip->di_next_unlinked); - xfs_buf_relse(ibp); - /* - * Prevent any DMAPI event from - * being sent when the - * reference on the inode is - * dropped. - */ - ip->i_d.di_dmevmask = 0; - - /* - * If this is a new inode, handle - * it specially. Otherwise, - * just drop our reference to the - * inode. If there are no - * other references, this will - * send the inode to - * xfs_inactive() which will - * truncate the file and free - * the inode. - */ - if (ip->i_d.di_mode == 0) - xfs_iput_new(ip, 0); - else - IRELE(ip); - } else { - /* - * We can't read in the inode - * this bucket points to, or - * this inode is messed up. Just - * ditch this bucket of inodes. We - * will lose some inodes and space, - * but at least we won't hang. Call - * xlog_recover_clear_agi_bucket() - * to perform a transaction to clear - * the inode pointer in the bucket. - */ - xlog_recover_clear_agi_bucket(mp, agno, - bucket); - - agino = NULLAGINO; - } + agino = xlog_recover_process_one_iunlink(mp, + agno, agino, bucket); /* * Reacquire the agibuffer and continue around -- cgit v0.10.2 From 76d8b277f7b715f78ee3cb09ee112563639693a5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:40 +1100 Subject: [XFS] stop using xfs_itobp in xfs_iread The only caller of xfs_itobp that doesn't have i_blkno setup is now the initial inode read. It needs access to the whole xfs_imap so using xfs_inotobp is not an option. Instead opencode the buffer lookup in xfs_iread and kill all the functionality for the initial map from xfs_itobp. (First sent on October 21st) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 083395c..1d5c28b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -262,15 +262,11 @@ xfs_inotobp( * If a non-zero error is returned, then the contents of bpp and * dipp are undefined. * - * If the inode is new and has not yet been initialized, use xfs_imap() - * to determine the size and location of the buffer to read from disk. - * If the inode has already been mapped to its buffer and read in once, - * then use the mapping information stored in the inode rather than - * calling xfs_imap(). This allows us to avoid the overhead of looking - * at the inode btree for small block file systems (see xfs_dilocate()). - * We can tell whether the inode has been mapped in before by comparing - * its disk block address to 0. Only uninitialized inodes will have - * 0 for the disk block address. + * The inode is expected to already been mapped to its buffer and read + * in once, thus we can use the mapping information stored in the inode + * rather than calling xfs_imap(). This allows us to avoid the overhead + * of looking at the inode btree for small block file systems + * (see xfs_dilocate()). */ int xfs_itobp( @@ -279,40 +275,19 @@ xfs_itobp( xfs_inode_t *ip, xfs_dinode_t **dipp, xfs_buf_t **bpp, - xfs_daddr_t bno, - uint imap_flags, uint buf_flags) { xfs_imap_t imap; xfs_buf_t *bp; int error; - if (ip->i_blkno == (xfs_daddr_t)0) { - imap.im_blkno = bno; - error = xfs_imap(mp, tp, ip->i_ino, &imap, - XFS_IMAP_LOOKUP | imap_flags); - if (error) - return error; + ASSERT(ip->i_blkno != 0); - /* - * Fill in the fields in the inode that will be used to - * map the inode to its buffer from now on. - */ - ip->i_blkno = imap.im_blkno; - ip->i_len = imap.im_len; - ip->i_boffset = imap.im_boffset; - } else { - /* - * We've already mapped the inode once, so just use the - * mapping that we saved the first time. - */ - imap.im_blkno = ip->i_blkno; - imap.im_len = ip->i_len; - imap.im_boffset = ip->i_boffset; - } - ASSERT(bno == 0 || bno == imap.im_blkno); + imap.im_blkno = ip->i_blkno; + imap.im_len = ip->i_len; + imap.im_boffset = ip->i_boffset; - error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags); + error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, 0); if (error) return error; @@ -882,6 +857,7 @@ xfs_iread( xfs_buf_t *bp; xfs_dinode_t *dip; xfs_inode_t *ip; + xfs_imap_t imap; int error; ip = xfs_inode_alloc(mp, ino); @@ -889,15 +865,27 @@ xfs_iread( return ENOMEM; /* - * Get pointer's to the on-disk inode and the buffer containing it. - * If the inode number refers to a block outside the file system - * then xfs_itobp() will return NULL. In this case we should - * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will - * know that this is a new incore inode. + * Get pointers to the on-disk inode and the buffer containing it. */ - error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); + imap.im_blkno = bno; + error = xfs_imap(mp, tp, ip->i_ino, &imap, + XFS_IMAP_LOOKUP | imap_flags); + if (error) + goto out_destroy_inode; + + /* + * Fill in the fields in the inode that will be used to + * map the inode to its buffer from now on. + */ + ip->i_blkno = imap.im_blkno; + ip->i_len = imap.im_len; + ip->i_boffset = imap.im_boffset; + ASSERT(bno == 0 || bno == imap.im_blkno); + + error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); if (error) goto out_destroy_inode; + dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); /* * If we got something that isn't an inode it means someone @@ -1878,7 +1866,7 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) return error; @@ -1960,7 +1948,7 @@ xfs_iunlink_remove( * of dealing with the buffer when there is no need to * change it. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2022,7 +2010,7 @@ xfs_iunlink_remove( * Now last_ibp points to the buffer previous to us on * the unlinked list. Pull us from the list. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2277,7 +2265,7 @@ xfs_ifree( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) return error; @@ -3191,7 +3179,7 @@ xfs_iflush( /* * Get the buffer containing the on-disk inode. */ - error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, + error = xfs_itobp(mp, NULL, ip, &dip, &bp, noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); if (error || !bp) { xfs_ifunlock(ip); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 705083a8..ec8b539 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -157,7 +157,7 @@ typedef struct xfs_icdinode { #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ /* - * Flags for xfs_inotobp, xfs_itobp(), xfs_imap() and xfs_dilocate(). + * Flags for xfs_inotobp, xfs_imap() and xfs_dilocate(). */ #define XFS_IMAP_LOOKUP 0x1 #define XFS_IMAP_BULKSTAT 0x2 @@ -550,7 +550,7 @@ int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, struct xfs_buf **, int *, uint); int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, struct xfs_dinode **, - struct xfs_buf **, xfs_daddr_t, uint, uint); + struct xfs_buf **, uint); void xfs_dinode_from_disk(struct xfs_icdinode *, struct xfs_dinode *); void xfs_dinode_to_disk(struct xfs_dinode *, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 841398d..48bdfa4 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3169,7 +3169,7 @@ xlog_recover_process_one_iunlink( * Get the on disk inode to find the next inode in the bucket. */ ASSERT(ip != NULL); - error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); + error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) goto fail; -- cgit v0.10.2 From a1941895034cda2bffa23ba845607c82138ccf52 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:40 +1100 Subject: [XFS] remove dead code for old inode item recovery We have removed the support for old-style inode items a while ago and xlog_recover_do_inode_trans is now only called for XFS_LI_INODE items. That means we can remove the call to xfs_imap there and with it the XFS_IMAP_LOOKUP that is set by all other callers. We can also mark xfs_imap static now. (First sent on October 21st) Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 16eda31..0ce56d6 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1262,8 +1262,7 @@ xfs_dilocate( #endif /* DEBUG */ return XFS_ERROR(EINVAL); } - if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) || - !(flags & XFS_IMAP_LOOKUP)) { + if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp))) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); *bno = XFS_AGB_TO_FSB(mp, agno, agbno); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1d5c28b..663ff9e 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -237,7 +237,7 @@ xfs_inotobp( int error; imap.im_blkno = 0; - error = xfs_imap(mp, tp, ino, &imap, imap_flags | XFS_IMAP_LOOKUP); + error = xfs_imap(mp, tp, ino, &imap, imap_flags); if (error) return error; @@ -868,8 +868,7 @@ xfs_iread( * Get pointers to the on-disk inode and the buffer containing it. */ imap.im_blkno = bno; - error = xfs_imap(mp, tp, ip->i_ino, &imap, - XFS_IMAP_LOOKUP | imap_flags); + error = xfs_imap(mp, tp, ip->i_ino, &imap, imap_flags); if (error) goto out_destroy_inode; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ec8b539..db1f688 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -159,8 +159,7 @@ typedef struct xfs_icdinode { /* * Flags for xfs_inotobp, xfs_imap() and xfs_dilocate(). */ -#define XFS_IMAP_LOOKUP 0x1 -#define XFS_IMAP_BULKSTAT 0x2 +#define XFS_IMAP_BULKSTAT 0x1 /* * Fork handling. diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 48bdfa4..bf8573b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2245,7 +2245,6 @@ xlog_recover_do_inode_trans( xfs_inode_log_format_t *in_f; xfs_mount_t *mp; xfs_buf_t *bp; - xfs_imap_t imap; xfs_dinode_t *dip; xfs_ino_t ino; int len; @@ -2273,48 +2272,29 @@ xlog_recover_do_inode_trans( } ino = in_f->ilf_ino; mp = log->l_mp; - if (ITEM_TYPE(item) == XFS_LI_INODE) { - imap.im_blkno = (xfs_daddr_t)in_f->ilf_blkno; - imap.im_len = in_f->ilf_len; - imap.im_boffset = in_f->ilf_boffset; - } else { - /* - * It's an old inode format record. We don't know where - * its cluster is located on disk, and we can't allow - * xfs_imap() to figure it out because the inode btrees - * are not ready to be used. Therefore do not pass the - * XFS_IMAP_LOOKUP flag to xfs_imap(). This will give - * us only the single block in which the inode lives - * rather than its cluster, so we must make sure to - * invalidate the buffer when we write it out below. - */ - imap.im_blkno = 0; - error = xfs_imap(log->l_mp, NULL, ino, &imap, 0); - if (error) - goto error; - } /* * Inode buffers can be freed, look out for it, * and do not replay the inode. */ - if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0)) { + if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, + in_f->ilf_len, 0)) { error = 0; goto error; } - bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len, - XFS_BUF_LOCK); + bp = xfs_buf_read_flags(mp->m_ddev_targp, in_f->ilf_blkno, + in_f->ilf_len, XFS_BUF_LOCK); if (XFS_BUF_ISERROR(bp)) { xfs_ioerror_alert("xlog_recover_do..(read#2)", mp, - bp, imap.im_blkno); + bp, in_f->ilf_blkno); error = XFS_BUF_GETERROR(bp); xfs_buf_relse(bp); goto error; } error = 0; ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); - dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); + dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); /* * Make sure the place we're flushing out to really looks -- cgit v0.10.2 From 94e1b69d1abd108d306e926c3012ec89e481c0da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:41 +1100 Subject: [XFS] merge xfs_imap into xfs_dilocate xfs_imap is the only caller of xfs_dilocate and doesn't add any significant value. Merge the two functions and document the various cases we have for inode cluster lookup in the new xfs_imap. Also remove the unused im_agblkno and im_ioffset fields from struct xfs_imap while we're at it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 0ce56d6..348ac30 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -40,6 +40,7 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_bmap.h" +#include "xfs_imap.h" /* @@ -1196,36 +1197,28 @@ error0: } /* - * Return the location of the inode in bno/off, for mapping it into a buffer. + * Return the location of the inode in imap, for mapping it into a buffer. */ -/*ARGSUSED*/ int -xfs_dilocate( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ +xfs_imap( + xfs_mount_t *mp, /* file system mount structure */ + xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t ino, /* inode to locate */ - xfs_fsblock_t *bno, /* output: block containing inode */ - int *len, /* output: num blocks in inode cluster */ - int *off, /* output: index in block of inode */ - uint flags) /* flags concerning inode lookup */ + struct xfs_imap *imap, /* location map structure */ + uint flags) /* flags for inode btree lookup */ { xfs_agblock_t agbno; /* block number of inode in the alloc group */ - xfs_buf_t *agbp; /* agi buffer */ xfs_agino_t agino; /* inode number within alloc group */ xfs_agnumber_t agno; /* allocation group number */ int blks_per_cluster; /* num blocks per inode cluster */ xfs_agblock_t chunk_agbno; /* first block in inode chunk */ - xfs_agino_t chunk_agino; /* first agino in inode chunk */ - __int32_t chunk_cnt; /* count of free inodes in chunk */ - xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ xfs_agblock_t cluster_agbno; /* first block in inode cluster */ - xfs_btree_cur_t *cur; /* inode btree cursor */ int error; /* error code */ - int i; /* temp state */ int offset; /* index of inode in its buffer */ int offset_agbno; /* blks from chunk start to inode */ ASSERT(ino != NULLFSINO); + /* * Split up the inode number into its parts. */ @@ -1240,20 +1233,20 @@ xfs_dilocate( return XFS_ERROR(EINVAL); if (agno >= mp->m_sb.sb_agcount) { xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_dilocate: agno (%d) >= " + "xfs_imap: agno (%d) >= " "mp->m_sb.sb_agcount (%d)", agno, mp->m_sb.sb_agcount); } if (agbno >= mp->m_sb.sb_agblocks) { xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_dilocate: agbno (0x%llx) >= " + "xfs_imap: agbno (0x%llx) >= " "mp->m_sb.sb_agblocks (0x%lx)", (unsigned long long) agbno, (unsigned long) mp->m_sb.sb_agblocks); } if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { xfs_fs_cmn_err(CE_ALERT, mp, - "xfs_dilocate: ino (0x%llx) != " + "xfs_imap: ino (0x%llx) != " "XFS_AGINO_TO_INO(mp, agno, agino) " "(0x%llx)", ino, XFS_AGINO_TO_INO(mp, agno, agino)); @@ -1262,63 +1255,89 @@ xfs_dilocate( #endif /* DEBUG */ return XFS_ERROR(EINVAL); } - if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp))) { + + /* + * If the inode cluster size is the same as the blocksize or + * smaller we get to the buffer by simple arithmetics. + */ + if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); - *bno = XFS_AGB_TO_FSB(mp, agno, agbno); - *off = offset; - *len = 1; + + imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); + imap->im_len = XFS_FSB_TO_BB(mp, 1); + imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); return 0; } + blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; - if (*bno != NULLFSBLOCK) { + + /* + * If we get a block number passed from bulkstat we can use it to + * find the buffer easily. + */ + if (imap->im_blkno) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); - cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno); - *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + - offset; - *len = blks_per_cluster; + + cluster_agbno = XFS_DADDR_TO_AGBNO(mp, imap->im_blkno); + offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock; + + imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); + imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); return 0; } + + /* + * If the inode chunks are aligned then use simple maths to + * find the location. Otherwise we have to do a btree + * lookup to find the location. + */ if (mp->m_inoalign_mask) { offset_agbno = agbno & mp->m_inoalign_mask; chunk_agbno = agbno - offset_agbno; } else { + xfs_btree_cur_t *cur; /* inode btree cursor */ + xfs_agino_t chunk_agino; /* first agino in inode chunk */ + __int32_t chunk_cnt; /* count of free inodes in chunk */ + xfs_inofree_t chunk_free; /* mask of free inodes in chunk */ + xfs_buf_t *agbp; /* agi buffer */ + int i; /* temp state */ + down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); up_read(&mp->m_peraglock); if (error) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_ialloc_read_agi() returned " "error %d, agno %d", error, agno); -#endif /* DEBUG */ return error; } + cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); - if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " + error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_inobt_lookup_le() failed"); -#endif /* DEBUG */ goto error0; } - if ((error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, - &chunk_free, &i))) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " + + error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt, + &chunk_free, &i); + if (error) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_inobt_get_rec() failed"); -#endif /* DEBUG */ goto error0; } if (i == 0) { #ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: " + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_inobt_get_rec() failed"); #endif /* DEBUG */ error = XFS_ERROR(EINVAL); } + error0: xfs_trans_brelse(tp, agbp); xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); if (error) @@ -1326,19 +1345,35 @@ xfs_dilocate( chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino); offset_agbno = agbno - chunk_agbno; } + ASSERT(agbno >= chunk_agbno); cluster_agbno = chunk_agbno + ((offset_agbno / blks_per_cluster) * blks_per_cluster); offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + XFS_INO_TO_OFFSET(mp, ino); - *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno); - *off = offset; - *len = blks_per_cluster; + + imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); + imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); + imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog); + + /* + * If the inode number maps to a block outside the bounds + * of the file system then return NULL rather than calling + * read_buf and panicing when we get an error from the + * driver. + */ + if ((imap->im_blkno + imap->im_len) > + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " + "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " + " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", + (unsigned long long) imap->im_blkno, + (unsigned long long) imap->im_len, + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); + return XFS_ERROR(EINVAL); + } + return 0; -error0: - xfs_trans_brelse(tp, agbp); - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - return error; } /* diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index ccf554a..50f558a 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -20,6 +20,7 @@ struct xfs_buf; struct xfs_dinode; +struct xfs_imap; struct xfs_mount; struct xfs_trans; @@ -104,17 +105,14 @@ xfs_difree( xfs_ino_t *first_ino); /* first inode in deleted cluster */ /* - * Return the location of the inode in bno/len/off, - * for mapping it into a buffer. + * Return the location of the inode in imap, for mapping it into a buffer. */ int -xfs_dilocate( +xfs_imap( struct xfs_mount *mp, /* file system mount structure */ struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t ino, /* inode to locate */ - xfs_fsblock_t *bno, /* output: block containing inode */ - int *len, /* output: num blocks in cluster*/ - int *off, /* output: index in block of inode */ + struct xfs_imap *imap, /* location map structure */ uint flags); /* flags for inode btree lookup */ /* diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h index f9ce6289..0869000 100644 --- a/fs/xfs/xfs_imap.h +++ b/fs/xfs/xfs_imap.h @@ -25,14 +25,7 @@ typedef struct xfs_imap { xfs_daddr_t im_blkno; /* starting BB of inode chunk */ uint im_len; /* length in BBs of inode chunk */ - xfs_agblock_t im_agblkno; /* logical block of inode chunk in ag */ - ushort im_ioffset; /* inode offset in block in "inodes" */ ushort im_boffset; /* inode offset in block in bytes */ } xfs_imap_t; -struct xfs_mount; -struct xfs_trans; -int xfs_imap(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, - xfs_imap_t *, uint); - #endif /* __XFS_IMAP_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 663ff9e..bf528b7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -266,7 +266,7 @@ xfs_inotobp( * in once, thus we can use the mapping information stored in the inode * rather than calling xfs_imap(). This allows us to avoid the overhead * of looking at the inode btree for small block file systems - * (see xfs_dilocate()). + * (see xfs_imap()). */ int xfs_itobp( @@ -2508,64 +2508,6 @@ xfs_idata_realloc( ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork)); } - - - -/* - * Map inode to disk block and offset. - * - * mp -- the mount point structure for the current file system - * tp -- the current transaction - * ino -- the inode number of the inode to be located - * imap -- this structure is filled in with the information necessary - * to retrieve the given inode from disk - * flags -- flags to pass to xfs_dilocate indicating whether or not - * lookups in the inode btree were OK or not - */ -int -xfs_imap( - xfs_mount_t *mp, - xfs_trans_t *tp, - xfs_ino_t ino, - xfs_imap_t *imap, - uint flags) -{ - xfs_fsblock_t fsbno; - int len; - int off; - int error; - - fsbno = imap->im_blkno ? - XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; - error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); - if (error) - return error; - - imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); - imap->im_len = XFS_FSB_TO_BB(mp, len); - imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); - imap->im_ioffset = (ushort)off; - imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); - - /* - * If the inode number maps to a block outside the bounds - * of the file system then return NULL rather than calling - * read_buf and panicing when we get an error from the - * driver. - */ - if ((imap->im_blkno + imap->im_len) > - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " - "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " - " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", - (unsigned long long) imap->im_blkno, - (unsigned long long) imap->im_len, - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); - return EINVAL; - } - return 0; -} - void xfs_idestroy_fork( xfs_inode_t *ip, diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index db1f688..0a9ad1c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -157,7 +157,7 @@ typedef struct xfs_icdinode { #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ /* - * Flags for xfs_inotobp, xfs_imap() and xfs_dilocate(). + * Flags for xfs_inotobp and xfs_imap(). */ #define XFS_IMAP_BULKSTAT 0x1 -- cgit v0.10.2 From 92bfc6e7c4eabbbd15e7d6d49123b296d05dcfd1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:41 +1100 Subject: [XFS] embededd struct xfs_imap into xfs_inode Most uses of struct xfs_imap are to map and inode to a buffer. To avoid copying around the inode location information we should just embedd a strcut xfs_imap into the xfs_inode. To make sure it doesn't bloat an inode the im_len is changed to a ushort, which is fine as that's what the users exepect anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 348ac30..4f45572 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -40,7 +40,6 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_bmap.h" -#include "xfs_imap.h" /* diff --git a/fs/xfs/xfs_imap.h b/fs/xfs/xfs_imap.h deleted file mode 100644 index 0869000..0000000 --- a/fs/xfs/xfs_imap.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_IMAP_H__ -#define __XFS_IMAP_H__ - -/* - * This is the structure passed to xfs_imap() to map - * an inode number to its on disk location. - */ -typedef struct xfs_imap { - xfs_daddr_t im_blkno; /* starting BB of inode chunk */ - uint im_len; /* length in BBs of inode chunk */ - ushort im_boffset; /* inode offset in block in bytes */ -} xfs_imap_t; - -#endif /* __XFS_IMAP_H__ */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bf528b7..72dc7a8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -23,7 +23,6 @@ #include "xfs_bit.h" #include "xfs_log.h" #include "xfs_inum.h" -#include "xfs_imap.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_sb.h" @@ -134,7 +133,7 @@ STATIC int xfs_imap_to_bp( xfs_mount_t *mp, xfs_trans_t *tp, - xfs_imap_t *imap, + struct xfs_imap *imap, xfs_buf_t **bpp, uint buf_flags, uint imap_flags) @@ -232,7 +231,7 @@ xfs_inotobp( int *offset, uint imap_flags) { - xfs_imap_t imap; + struct xfs_imap imap; xfs_buf_t *bp; int error; @@ -277,17 +276,12 @@ xfs_itobp( xfs_buf_t **bpp, uint buf_flags) { - xfs_imap_t imap; xfs_buf_t *bp; int error; - ASSERT(ip->i_blkno != 0); + ASSERT(ip->i_imap.im_blkno != 0); - imap.im_blkno = ip->i_blkno; - imap.im_len = ip->i_len; - imap.im_boffset = ip->i_boffset; - - error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, 0); + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0); if (error) return error; @@ -298,7 +292,7 @@ xfs_itobp( return EAGAIN; } - *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); + *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); *bpp = bp; return 0; } @@ -799,9 +793,7 @@ xfs_inode_alloc( /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; - ip->i_blkno = 0; - ip->i_len = 0; - ip->i_boffset =0; + memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); ip->i_afp = NULL; memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); ip->i_flags = 0; @@ -857,7 +849,6 @@ xfs_iread( xfs_buf_t *bp; xfs_dinode_t *dip; xfs_inode_t *ip; - xfs_imap_t imap; int error; ip = xfs_inode_alloc(mp, ino); @@ -865,26 +856,22 @@ xfs_iread( return ENOMEM; /* - * Get pointers to the on-disk inode and the buffer containing it. + * Fill in the location information in the in-core inode. */ - imap.im_blkno = bno; - error = xfs_imap(mp, tp, ip->i_ino, &imap, imap_flags); + ip->i_imap.im_blkno = bno; + error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, imap_flags); if (error) goto out_destroy_inode; + ASSERT(bno == 0 || bno == ip->i_imap.im_blkno); /* - * Fill in the fields in the inode that will be used to - * map the inode to its buffer from now on. + * Get pointers to the on-disk inode and the buffer containing it. */ - ip->i_blkno = imap.im_blkno; - ip->i_len = imap.im_len; - ip->i_boffset = imap.im_boffset; - ASSERT(bno == 0 || bno == imap.im_blkno); - - error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, imap_flags); + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, + XFS_BUF_LOCK, imap_flags); if (error) goto out_destroy_inode; - dip = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); + dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); /* * If we got something that isn't an inode it means someone @@ -1872,7 +1859,7 @@ xfs_iunlink( ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); /* both on-disk, don't endian flip twice */ dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; - offset = ip->i_boffset + + offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, @@ -1958,7 +1945,7 @@ xfs_iunlink_remove( ASSERT(next_agino != 0); if (next_agino != NULLAGINO) { dip->di_next_unlinked = cpu_to_be32(NULLAGINO); - offset = ip->i_boffset + + offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, @@ -2021,7 +2008,7 @@ xfs_iunlink_remove( ASSERT(next_agino != agino); if (next_agino != NULLAGINO) { dip->di_next_unlinked = cpu_to_be32(NULLAGINO); - offset = ip->i_boffset + + offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, @@ -3201,7 +3188,7 @@ xfs_iflush_int( } /* set *dip = inode's place in the buffer */ - dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_boffset); + dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); /* * Clear i_update_core before copying out the data. diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 0a9ad1c..d5c3aa1 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -83,6 +83,16 @@ typedef struct xfs_ifork { } xfs_ifork_t; /* + * Inode location information. Stored in the inode and passed to + * xfs_imap_to_bp() to get a buffer and dinode for a given inode. + */ +struct xfs_imap { + xfs_daddr_t im_blkno; /* starting BB of inode chunk */ + ushort im_len; /* length in BBs of inode chunk */ + ushort im_boffset; /* inode offset in block in bytes */ +}; + +/* * This is the xfs in-core inode structure. * Most of the on-disk inode is embedded in the i_d field. * @@ -238,9 +248,7 @@ typedef struct xfs_inode { /* Inode location stuff */ xfs_ino_t i_ino; /* inode number (agno/agino)*/ - xfs_daddr_t i_blkno; /* blkno of inode buffer */ - ushort i_len; /* len of inode buffer */ - ushort i_boffset; /* off of inode in buffer */ + struct xfs_imap i_imap; /* location for xfs_imap() */ /* Extent information. */ xfs_ifork_t *i_afp; /* attribute fork pointer */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c431181..977c4ae 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -942,9 +942,9 @@ xfs_inode_item_init( iip->ili_format.ilf_type = XFS_LI_INODE; iip->ili_format.ilf_ino = ip->i_ino; - iip->ili_format.ilf_blkno = ip->i_blkno; - iip->ili_format.ilf_len = ip->i_len; - iip->ili_format.ilf_boffset = ip->i_boffset; + iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; + iip->ili_format.ilf_len = ip->i_imap.im_len; + iip->ili_format.ilf_boffset = ip->i_imap.im_boffset; } /* diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2cf16f4..4315ce6 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -69,7 +69,7 @@ xfs_bulkstat_one_iget( } ASSERT(ip != NULL); - ASSERT(ip->i_blkno != (xfs_daddr_t)0); + ASSERT(ip->i_imap.im_blkno != 0); dic = &ip->i_d; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index bf8573b..ce6e907 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -36,7 +36,6 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" -#include "xfs_imap.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_log_priv.h" -- cgit v0.10.2 From b48d8d64377f39913663a06f4757f3b8c6fc6d87 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:41 +1100 Subject: [XFS] kill the XFS_IMAP_BULKSTAT flag Just pass down the XFS_IGET_* flags all the way down to xfs_imap instead of translating them mid-way. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 4f45572..e6ebbaeb 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1228,7 +1228,7 @@ xfs_imap( ino != XFS_AGINO_TO_INO(mp, agno, agino)) { #ifdef DEBUG /* no diagnostics for bulkstat, ino comes from userspace */ - if (flags & XFS_IMAP_BULKSTAT) + if (flags & XFS_IGET_BULKSTAT) return XFS_ERROR(EINVAL); if (agno >= mp->m_sb.sb_agcount) { xfs_fs_cmn_err(CE_ALERT, mp, diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index bf4dc5e..d60522a 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -159,8 +159,7 @@ xfs_iget_cache_miss( * Read the disk inode attributes into a new inode structure and get * a new vnode for it. This should also initialize i_ino and i_mount. */ - error = xfs_iread(mp, tp, ino, &ip, bno, - (flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0); + error = xfs_iread(mp, tp, ino, &ip, bno, flags); if (error) return error; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 72dc7a8..4290760 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -136,7 +136,7 @@ xfs_imap_to_bp( struct xfs_imap *imap, xfs_buf_t **bpp, uint buf_flags, - uint imap_flags) + uint iget_flags) { int error; int i; @@ -178,7 +178,7 @@ xfs_imap_to_bp( if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, XFS_RANDOM_ITOBP_INOTOBP))) { - if (imap_flags & XFS_IMAP_BULKSTAT) { + if (iget_flags & XFS_IGET_BULKSTAT) { xfs_trans_brelse(tp, bp); return XFS_ERROR(EINVAL); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index d5c3aa1..49e609c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -167,11 +167,6 @@ typedef struct xfs_icdinode { #define XFS_IFEXTIREC 0x08 /* Indirection array of extent blocks */ /* - * Flags for xfs_inotobp and xfs_imap(). - */ -#define XFS_IMAP_BULKSTAT 0x1 - -/* * Fork handling. */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 4315ce6..d4c0de8 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -595,7 +595,7 @@ xfs_bulkstat( error = xfs_inotobp(mp, NULL, ino, &dip, &bp, &offset, - XFS_IMAP_BULKSTAT); + XFS_IGET_BULKSTAT); if (!error) clustidx = offset / mp->m_sb.sb_inodesize; -- cgit v0.10.2 From 24f211bad09a31f19dda0c3faffe0244f4f235f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:42 +1100 Subject: [XFS] move inode allocation out xfs_iread Allocate the inode in xfs_iget_cache_miss and pass it into xfs_iread. This simplifies the error handling and allows xfs_iread to be shared with userspace which already uses these semantics. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index d60522a..27ec241 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -40,6 +40,82 @@ #include "xfs_utils.h" #include "xfs_trans_priv.h" #include "xfs_inode_item.h" +#include "xfs_bmap.h" +#include "xfs_btree_trace.h" +#include "xfs_dir2_trace.h" + + +/* + * Allocate and initialise an xfs_inode. + */ +STATIC struct xfs_inode * +xfs_inode_alloc( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + struct xfs_inode *ip; + + /* + * if this didn't occur in transactions, we could use + * KM_MAYFAIL and return NULL here on ENOMEM. Set the + * code up to do this anyway. + */ + ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); + if (!ip) + return NULL; + + ASSERT(atomic_read(&ip->i_iocount) == 0); + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!spin_is_locked(&ip->i_flags_lock)); + ASSERT(completion_done(&ip->i_flush)); + + /* + * initialise the VFS inode here to get failures + * out of the way early. + */ + if (!inode_init_always(mp->m_super, VFS_I(ip))) { + kmem_zone_free(xfs_inode_zone, ip); + return NULL; + } + + /* initialise the xfs inode */ + ip->i_ino = ino; + ip->i_mount = mp; + memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); + ip->i_afp = NULL; + memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); + ip->i_flags = 0; + ip->i_update_core = 0; + ip->i_update_size = 0; + ip->i_delayed_blks = 0; + memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); + ip->i_size = 0; + ip->i_new_size = 0; + + /* + * Initialize inode's trace buffers. + */ +#ifdef XFS_INODE_TRACE + ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_BMAP_TRACE + ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_BTREE_TRACE + ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_RW_TRACE + ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_ILOCK_TRACE + ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); +#endif +#ifdef XFS_DIR2_TRACE + ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); +#endif + + return ip; +} /* * Check the validity of the inode we just found it the cache @@ -155,13 +231,13 @@ xfs_iget_cache_miss( unsigned long first_index, mask; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); - /* - * Read the disk inode attributes into a new inode structure and get - * a new vnode for it. This should also initialize i_ino and i_mount. - */ - error = xfs_iread(mp, tp, ino, &ip, bno, flags); + ip = xfs_inode_alloc(mp, ino); + if (!ip) + return ENOMEM; + + error = xfs_iread(mp, tp, ip, bno, flags); if (error) - return error; + goto out_destroy; xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4290760..872191b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -758,119 +758,36 @@ xfs_dic2xflags( } /* - * Allocate and initialise an xfs_inode. - */ -STATIC struct xfs_inode * -xfs_inode_alloc( - struct xfs_mount *mp, - xfs_ino_t ino) -{ - struct xfs_inode *ip; - - /* - * if this didn't occur in transactions, we could use - * KM_MAYFAIL and return NULL here on ENOMEM. Set the - * code up to do this anyway. - */ - ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); - if (!ip) - return NULL; - - ASSERT(atomic_read(&ip->i_iocount) == 0); - ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(completion_done(&ip->i_flush)); - - /* - * initialise the VFS inode here to get failures - * out of the way early. - */ - if (!inode_init_always(mp->m_super, VFS_I(ip))) { - kmem_zone_free(xfs_inode_zone, ip); - return NULL; - } - - /* initialise the xfs inode */ - ip->i_ino = ino; - ip->i_mount = mp; - memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); - ip->i_afp = NULL; - memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); - ip->i_flags = 0; - ip->i_update_core = 0; - ip->i_update_size = 0; - ip->i_delayed_blks = 0; - memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); - ip->i_size = 0; - ip->i_new_size = 0; - - /* - * Initialize inode's trace buffers. - */ -#ifdef XFS_INODE_TRACE - ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BMAP_TRACE - ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_BTREE_TRACE - ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_RW_TRACE - ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_ILOCK_TRACE - ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); -#endif -#ifdef XFS_DIR2_TRACE - ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); -#endif - - return ip; -} - -/* - * Given a mount structure and an inode number, return a pointer - * to a newly allocated in-core inode corresponding to the given - * inode number. - * - * Initialize the inode's attributes and extent pointers if it - * already has them (it will not if the inode has no links). + * Read the disk inode attributes into the in-core inode structure. */ int xfs_iread( xfs_mount_t *mp, xfs_trans_t *tp, - xfs_ino_t ino, - xfs_inode_t **ipp, + xfs_inode_t *ip, xfs_daddr_t bno, - uint imap_flags) + uint iget_flags) { xfs_buf_t *bp; xfs_dinode_t *dip; - xfs_inode_t *ip; int error; - ip = xfs_inode_alloc(mp, ino); - if (!ip) - return ENOMEM; - /* * Fill in the location information in the in-core inode. */ ip->i_imap.im_blkno = bno; - error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, imap_flags); + error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); if (error) - goto out_destroy_inode; + return error; ASSERT(bno == 0 || bno == ip->i_imap.im_blkno); /* * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, - XFS_BUF_LOCK, imap_flags); + XFS_BUF_LOCK, iget_flags); if (error) - goto out_destroy_inode; + return error; dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); /* @@ -968,14 +885,8 @@ xfs_iread( * to worry about the inode being changed just because we released * the buffer. */ - xfs_trans_brelse(tp, bp); - *ipp = ip; - return 0; - out_brelse: xfs_trans_brelse(tp, bp); - out_destroy_inode: - xfs_destroy_inode(ip); return error; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 49e609c..ae5800e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -516,8 +516,8 @@ void xfs_ireclaim(xfs_inode_t *); /* * xfs_inode.c prototypes. */ -int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, - xfs_inode_t **, xfs_daddr_t, uint); +int xfs_iread(struct xfs_mount *, struct xfs_trans *, + struct xfs_inode *, xfs_daddr_t, uint); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, int, struct xfs_buf **, boolean_t *, xfs_inode_t **); -- cgit v0.10.2 From 0e446673a15a4e9c336b67c1a638eb12c21d0993 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Nov 2008 14:23:42 +1100 Subject: [XFS] fix error handling in xlog_recover_process_one_iunlink If we fail after xfs_iget we have to drop the reference count, spotted by Dave Chinner. Also remove some useless asserts and stop trying to deal with di_mode == 0 inodes because never gets those without passing the IGET_CREATE flag to xfs_iget. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index ce6e907..51412cc 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3147,13 +3147,12 @@ xlog_recover_process_one_iunlink( /* * Get the on disk inode to find the next inode in the bucket. */ - ASSERT(ip != NULL); error = xfs_itobp(mp, NULL, ip, &dip, &ibp, XFS_BUF_LOCK); if (error) - goto fail; + goto fail_iput; - ASSERT(dip != NULL); ASSERT(ip->i_d.di_nlink == 0); + ASSERT(ip->i_d.di_mode != 0); /* setup for the next pass */ agino = be32_to_cpu(dip->di_next_unlinked); @@ -3165,18 +3164,11 @@ xlog_recover_process_one_iunlink( */ ip->i_d.di_dmevmask = 0; - /* - * If this is a new inode, handle it specially. Otherwise, just - * drop our reference to the inode. If there are no other - * references, this will send the inode to xfs_inactive() which - * will truncate the file and free the inode. - */ - if (ip->i_d.di_mode == 0) - xfs_iput_new(ip, 0); - else - IRELE(ip); + IRELE(ip); return agino; + fail_iput: + IRELE(ip); fail: /* * We can't read in the inode this bucket points to, or this inode -- cgit v0.10.2 From 743bb4650da9e2595d6cedd01c680b5b9398c74a Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:06 -0600 Subject: [XFS] Move copy_from_user calls out of ioctl helpers into ioctl switch. Moving the copy_from_user out of some of the ioctl helpers will make it easier for the compat ioctl switch to copy in the right struct, then just pass to the underlying helper. Also, move common access checks into the helpers themselves, and out of the native ioctl switch code, to reduce code duplication between native & compat ioctl callers. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 534b175..03c55b8 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -71,23 +71,19 @@ STATIC int xfs_find_handle( unsigned int cmd, - void __user *arg) + xfs_fsop_handlereq_t *hreq) { int hsize; xfs_handle_t handle; - xfs_fsop_handlereq_t hreq; struct inode *inode; - if (copy_from_user(&hreq, arg, sizeof(hreq))) - return -XFS_ERROR(EFAULT); - memset((char *)&handle, 0, sizeof(handle)); switch (cmd) { case XFS_IOC_PATH_TO_FSHANDLE: case XFS_IOC_PATH_TO_HANDLE: { struct path path; - int error = user_lpath((const char __user *)hreq.path, &path); + int error = user_lpath((const char __user *)hreq->path, &path); if (error) return error; @@ -101,7 +97,7 @@ xfs_find_handle( case XFS_IOC_FD_TO_HANDLE: { struct file *file; - file = fget(hreq.fd); + file = fget(hreq->fd); if (!file) return -EBADF; @@ -158,8 +154,8 @@ xfs_find_handle( } /* now copy our handle into the user buffer & write out the size */ - if (copy_to_user(hreq.ohandle, &handle, hsize) || - copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) { + if (copy_to_user(hreq->ohandle, &handle, hsize) || + copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) { iput(inode); return -XFS_ERROR(EFAULT); } @@ -252,7 +248,7 @@ xfs_vget_fsop_handlereq( STATIC int xfs_open_by_handle( xfs_mount_t *mp, - void __user *arg, + xfs_fsop_handlereq_t *hreq, struct file *parfilp, struct inode *parinode) { @@ -262,14 +258,11 @@ xfs_open_by_handle( struct file *filp; struct inode *inode; struct dentry *dentry; - xfs_fsop_handlereq_t hreq; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); - if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode); if (error) return -error; @@ -280,10 +273,10 @@ xfs_open_by_handle( } #if BITS_PER_LONG != 32 - hreq.oflags |= O_LARGEFILE; + hreq->oflags |= O_LARGEFILE; #endif /* Put open permission in namei format. */ - permflag = hreq.oflags; + permflag = hreq->oflags; if ((permflag+1) & O_ACCMODE) permflag++; if (permflag & O_TRUNC) @@ -321,7 +314,7 @@ xfs_open_by_handle( mntget(parfilp->f_path.mnt); /* Create file pointer. */ - filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags); + filp = dentry_open(dentry, parfilp->f_path.mnt, hreq->oflags); if (IS_ERR(filp)) { put_unused_fd(new_fd); return -XFS_ERROR(-PTR_ERR(filp)); @@ -365,21 +358,18 @@ do_readlink( STATIC int xfs_readlink_by_handle( xfs_mount_t *mp, - void __user *arg, + xfs_fsop_handlereq_t *hreq, struct inode *parinode) { struct inode *inode; - xfs_fsop_handlereq_t hreq; __u32 olen; void *link; int error; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); - if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode); if (error) return -error; @@ -389,7 +379,7 @@ xfs_readlink_by_handle( goto out_iput; } - if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) { + if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { error = -XFS_ERROR(EFAULT); goto out_iput; } @@ -401,7 +391,7 @@ xfs_readlink_by_handle( error = -xfs_readlink(XFS_I(inode), link); if (error) goto out_kfree; - error = do_readlink(hreq.ohandle, olen, link); + error = do_readlink(hreq->ohandle, olen, link); if (error) goto out_kfree; @@ -668,12 +658,19 @@ xfs_ioc_space( struct file *filp, int ioflags, unsigned int cmd, - void __user *arg) + xfs_flock64_t *bf) { - xfs_flock64_t bf; int attr_flags = 0; int error; + /* + * Only allow the sys admin to reserve space unless + * unwritten extents are enabled. + */ + if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && + !capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) return -XFS_ERROR(EPERM); @@ -683,15 +680,12 @@ xfs_ioc_space( if (!S_ISREG(inode->i_mode)) return -XFS_ERROR(EINVAL); - if (copy_from_user(&bf, arg, sizeof(bf))) - return -XFS_ERROR(EFAULT); - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) attr_flags |= XFS_ATTR_NONBLOCK; if (ioflags & IO_INVIS) attr_flags |= XFS_ATTR_DMI; - error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, attr_flags); + error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); return -error; } @@ -1356,17 +1350,13 @@ xfs_ioctl( case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - /* - * Only allow the sys admin to reserve space unless - * unwritten extents are enabled. - */ - if (!xfs_sb_version_hasextflgbit(&mp->m_sb) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - - return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg); + case XFS_IOC_UNRESVSP64: { + xfs_flock64_t bf; + if (copy_from_user(&bf, arg, sizeof(bf))) + return -XFS_ERROR(EFAULT); + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); + } case XFS_IOC_DIOINFO: { struct dioattr da; xfs_buftarg_t *target = @@ -1426,18 +1416,30 @@ xfs_ioctl( case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: - case XFS_IOC_PATH_TO_FSHANDLE: - return xfs_find_handle(cmd, arg); + case XFS_IOC_PATH_TO_FSHANDLE: { + xfs_fsop_handlereq_t hreq; - case XFS_IOC_OPEN_BY_HANDLE: - return xfs_open_by_handle(mp, arg, filp, inode); + if (copy_from_user(&hreq, arg, sizeof(hreq))) + return -XFS_ERROR(EFAULT); + return xfs_find_handle(cmd, &hreq); + } + case XFS_IOC_OPEN_BY_HANDLE: { + xfs_fsop_handlereq_t hreq; + if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + return xfs_open_by_handle(mp, &hreq, filp, inode); + } case XFS_IOC_FSSETDM_BY_HANDLE: return xfs_fssetdm_by_handle(mp, arg, inode); - case XFS_IOC_READLINK_BY_HANDLE: - return xfs_readlink_by_handle(mp, arg, inode); + case XFS_IOC_READLINK_BY_HANDLE: { + xfs_fsop_handlereq_t hreq; + if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + return xfs_readlink_by_handle(mp, &hreq, inode); + } case XFS_IOC_ATTRLIST_BY_HANDLE: return xfs_attrlist_by_handle(mp, arg, inode); @@ -1445,7 +1447,11 @@ xfs_ioctl( return xfs_attrmulti_by_handle(mp, arg, filp, inode); case XFS_IOC_SWAPEXT: { - error = xfs_swapext((struct xfs_swapext __user *)arg); + struct xfs_swapext sxp; + + if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); return -error; } @@ -1501,9 +1507,6 @@ xfs_ioctl( case XFS_IOC_FSGROWFSDATA: { xfs_growfs_data_t in; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); @@ -1514,9 +1517,6 @@ xfs_ioctl( case XFS_IOC_FSGROWFSLOG: { xfs_growfs_log_t in; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); @@ -1527,9 +1527,6 @@ xfs_ioctl( case XFS_IOC_FSGROWFSRT: { xfs_growfs_rt_t in; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 75b0cd4..b4c1ee7 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -49,9 +49,8 @@ */ int xfs_swapext( - xfs_swapext_t __user *sxu) + xfs_swapext_t *sxp) { - xfs_swapext_t *sxp; xfs_inode_t *ip, *tip; struct file *file, *target_file; int error = 0; @@ -62,11 +61,6 @@ xfs_swapext( goto out; } - if (copy_from_user(sxp, sxu, sizeof(xfs_swapext_t))) { - error = XFS_ERROR(EFAULT); - goto out_free_sxp; - } - /* Pull information for the target fd */ file = fget((int)sxp->sx_fdtarget); if (!file) { diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h index da17820..4f55a63 100644 --- a/fs/xfs/xfs_dfrag.h +++ b/fs/xfs/xfs_dfrag.h @@ -46,7 +46,7 @@ typedef struct xfs_swapext /* * Syscall interface for xfs_swapext */ -int xfs_swapext(struct xfs_swapext __user *sx); +int xfs_swapext(struct xfs_swapext *sx); int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip, struct xfs_swapext *sxp); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index f1d0585..852b6d3 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -435,6 +435,9 @@ xfs_growfs_data( xfs_growfs_data_t *in) { int error; + + if (!capable(CAP_SYS_ADMIN)) + return XFS_ERROR(EPERM); if (!mutex_trylock(&mp->m_growlock)) return XFS_ERROR(EWOULDBLOCK); error = xfs_growfs_data_private(mp, in); @@ -448,6 +451,9 @@ xfs_growfs_log( xfs_growfs_log_t *in) { int error; + + if (!capable(CAP_SYS_ADMIN)) + return XFS_ERROR(EPERM); if (!mutex_trylock(&mp->m_growlock)) return XFS_ERROR(EWOULDBLOCK); error = xfs_growfs_log_private(mp, in); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index f18b9b2..edf12c7 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1876,6 +1876,8 @@ xfs_growfs_rt( /* * Initial error checking. */ + if (!capable(CAP_SYS_ADMIN)) + return XFS_ERROR(EPERM); if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL || (nrblocks = in->newblocks) <= sbp->sb_rblocks || (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) -- cgit v0.10.2 From ffae263a640b736a7206a0d7bd14ab44eb58cd28 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:07 -0600 Subject: [XFS] Move compat ioctl structs & numbers into xfs_ioctl32.h This makes the c file less cluttered and a bit more readable. Consistently name the ioctl number macros with "_32" and the compatibility stuctures with "_compat." Rename the helpers which simply copy in the arg with "_copyin" for easy identification. Finally, for a few of the existing helpers, modify them so that they directly call the native ioctl helper after userspace argument fixup. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 6ce9c9c..620bb2d 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -48,36 +48,12 @@ #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) -#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) -#define BROKEN_X86_ALIGNMENT -#define _PACKED __attribute__((packed)) -/* on ia32 l_start is on a 32-bit boundary */ -typedef struct xfs_flock64_32 { - __s16 l_type; - __s16 l_whence; - __s64 l_start __attribute__((packed)); - /* len == 0 means until end of file */ - __s64 l_len __attribute__((packed)); - __s32 l_sysid; - __u32 l_pid; - __s32 l_pad[4]; /* reserve area */ -} xfs_flock64_32_t; - -#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32) -#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32) -#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32) -#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32) -#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32) -#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32) -#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32) -#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32) - -/* just account for different alignment */ +#ifdef BROKEN_X86_ALIGNMENT STATIC unsigned long xfs_ioctl32_flock( unsigned long arg) { - xfs_flock64_32_t __user *p32 = (void __user *)arg; + compat_xfs_flock64_t __user *p32 = (void __user *)arg; xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || @@ -92,32 +68,6 @@ xfs_ioctl32_flock( return (unsigned long)p; } -typedef struct compat_xfs_fsop_geom_v1 { - __u32 blocksize; /* filesystem (data) block size */ - __u32 rtextsize; /* realtime extent size */ - __u32 agblocks; /* fsblocks in an AG */ - __u32 agcount; /* number of allocation groups */ - __u32 logblocks; /* fsblocks in the log */ - __u32 sectsize; /* (data) sector size, bytes */ - __u32 inodesize; /* inode size in bytes */ - __u32 imaxpct; /* max allowed inode space(%) */ - __u64 datablocks; /* fsblocks in data subvolume */ - __u64 rtblocks; /* fsblocks in realtime subvol */ - __u64 rtextents; /* rt extents in realtime subvol*/ - __u64 logstart; /* starting fsblock of the log */ - unsigned char uuid[16]; /* unique id of the filesystem */ - __u32 sunit; /* stripe unit, fsblocks */ - __u32 swidth; /* stripe width, fsblocks */ - __s32 version; /* structure version */ - __u32 flags; /* superblock version flags */ - __u32 logsectsize; /* log sector size, bytes */ - __u32 rtsectsize; /* realtime sector size, bytes */ - __u32 dirblocksize; /* directory block size, bytes */ -} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; - -#define XFS_IOC_FSGEOMETRY_V1_32 \ - _IOR ('X', 100, struct compat_xfs_fsop_geom_v1) - STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg) { compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg; @@ -128,12 +78,6 @@ STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg) return (unsigned long)p; } -typedef struct compat_xfs_inogrp { - __u64 xi_startino; /* starting inode number */ - __s32 xi_alloccount; /* # bits set in allocmask */ - __u64 xi_allocmask; /* mask of allocated inodes */ -} __attribute__((packed)) compat_xfs_inogrp_t; - STATIC int xfs_inumbers_fmt_compat( void __user *ubuffer, const xfs_inogrp_t *buffer, @@ -154,19 +98,11 @@ STATIC int xfs_inumbers_fmt_compat( } #else - #define xfs_inumbers_fmt_compat xfs_inumbers_fmt -#define _PACKED - #endif /* XFS_IOC_FSBULKSTAT and friends */ -typedef struct compat_xfs_bstime { - __s32 tv_sec; /* seconds */ - __s32 tv_nsec; /* and nanoseconds */ -} compat_xfs_bstime_t; - STATIC int xfs_bstime_store_compat( compat_xfs_bstime_t __user *p32, const xfs_bstime_t *p) @@ -180,30 +116,6 @@ STATIC int xfs_bstime_store_compat( return 0; } -typedef struct compat_xfs_bstat { - __u64 bs_ino; /* inode number */ - __u16 bs_mode; /* type and mode */ - __u16 bs_nlink; /* number of links */ - __u32 bs_uid; /* user id */ - __u32 bs_gid; /* group id */ - __u32 bs_rdev; /* device value */ - __s32 bs_blksize; /* block size */ - __s64 bs_size; /* file size */ - compat_xfs_bstime_t bs_atime; /* access time */ - compat_xfs_bstime_t bs_mtime; /* modify time */ - compat_xfs_bstime_t bs_ctime; /* inode change time */ - int64_t bs_blocks; /* number of blocks */ - __u32 bs_xflags; /* extended flags */ - __s32 bs_extsize; /* extent size */ - __s32 bs_extents; /* number of extents */ - __u32 bs_gen; /* generation count */ - __u16 bs_projid; /* project id */ - unsigned char bs_pad[14]; /* pad space, unused */ - __u32 bs_dmevmask; /* DMIG event mask */ - __u16 bs_dmstate; /* DMIG state info */ - __u16 bs_aextents; /* attribute number of extents */ -} _PACKED compat_xfs_bstat_t; - STATIC int xfs_bulkstat_one_fmt_compat( void __user *ubuffer, const xfs_bstat_t *buffer) @@ -234,22 +146,6 @@ STATIC int xfs_bulkstat_one_fmt_compat( return sizeof(*p32); } - - -typedef struct compat_xfs_fsop_bulkreq { - compat_uptr_t lastip; /* last inode # pointer */ - __s32 icount; /* count of entries in buffer */ - compat_uptr_t ubuffer; /* user buffer for inode desc. */ - compat_uptr_t ocount; /* output count pointer */ -} compat_xfs_fsop_bulkreq_t; - -#define XFS_IOC_FSBULKSTAT_32 \ - _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) -#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ - _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) -#define XFS_IOC_FSINUMBERS_32 \ - _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) - /* copied from xfs_ioctl.c */ STATIC int xfs_ioc_bulkstat_compat( @@ -320,29 +216,6 @@ xfs_ioc_bulkstat_compat( return 0; } - - -typedef struct compat_xfs_fsop_handlereq { - __u32 fd; /* fd for FD_TO_HANDLE */ - compat_uptr_t path; /* user pathname */ - __u32 oflags; /* open flags */ - compat_uptr_t ihandle; /* user supplied handle */ - __u32 ihandlen; /* user supplied length */ - compat_uptr_t ohandle; /* user buffer for handle */ - compat_uptr_t ohandlen; /* user buffer length */ -} compat_xfs_fsop_handlereq_t; - -#define XFS_IOC_PATH_TO_FSHANDLE_32 \ - _IOWR('X', 104, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_PATH_TO_HANDLE_32 \ - _IOWR('X', 105, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_FD_TO_HANDLE_32 \ - _IOWR('X', 106, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_OPEN_BY_HANDLE_32 \ - _IOWR('X', 107, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_READLINK_BY_HANDLE_32 \ - _IOWR('X', 108, struct compat_xfs_fsop_handlereq) - STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg) { compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg; @@ -365,7 +238,6 @@ STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg) return (unsigned long)p; } - STATIC long xfs_compat_ioctl( int mode, @@ -404,9 +276,9 @@ xfs_compat_ioctl( case XFS_IOC_ERROR_CLEARALL: break; - case XFS_IOC32_GETXFLAGS: - case XFS_IOC32_SETXFLAGS: - case XFS_IOC32_GETVERSION: + case XFS_IOC_GETXFLAGS_32: + case XFS_IOC_SETXFLAGS_32: + case XFS_IOC_GETVERSION_32: cmd = _NATIVE_IOC(cmd, long); break; #ifdef BROKEN_X86_ALIGNMENT @@ -426,7 +298,6 @@ xfs_compat_ioctl( arg = xfs_ioctl32_geom_v1(arg); cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1); break; - #else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: @@ -464,7 +335,6 @@ xfs_compat_ioctl( error = xfs_ioctl(XFS_I(inode), file, mode, cmd, (void __user *)arg); xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); - return error; } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 02de6e6..003a10e6 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -18,7 +18,152 @@ #ifndef __XFS_IOCTL32_H__ #define __XFS_IOCTL32_H__ +#include + extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long); extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long); +/* + * on 32-bit arches, ioctl argument structures may have different sizes + * and/or alignment. We define compat structures which match the + * 32-bit sizes/alignments here, and their associated ioctl numbers. + * + * xfs_ioctl32.c contains routines to copy these structures in and out. + */ + +/* stock kernel-level ioctls we support */ +#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS +#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS +#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION + +/* + * On intel, even if sizes match, alignment and/or padding may differ. + */ +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) +#define BROKEN_X86_ALIGNMENT +#define __compat_packed __attribute__((packed)) +#else +#define __compat_packed +#endif + +typedef struct compat_xfs_bstime { + compat_time_t tv_sec; /* seconds */ + __s32 tv_nsec; /* and nanoseconds */ +} compat_xfs_bstime_t; + +typedef struct compat_xfs_bstat { + __u64 bs_ino; /* inode number */ + __u16 bs_mode; /* type and mode */ + __u16 bs_nlink; /* number of links */ + __u32 bs_uid; /* user id */ + __u32 bs_gid; /* group id */ + __u32 bs_rdev; /* device value */ + __s32 bs_blksize; /* block size */ + __s64 bs_size; /* file size */ + compat_xfs_bstime_t bs_atime; /* access time */ + compat_xfs_bstime_t bs_mtime; /* modify time */ + compat_xfs_bstime_t bs_ctime; /* inode change time */ + int64_t bs_blocks; /* number of blocks */ + __u32 bs_xflags; /* extended flags */ + __s32 bs_extsize; /* extent size */ + __s32 bs_extents; /* number of extents */ + __u32 bs_gen; /* generation count */ + __u16 bs_projid; /* project id */ + unsigned char bs_pad[14]; /* pad space, unused */ + __u32 bs_dmevmask; /* DMIG event mask */ + __u16 bs_dmstate; /* DMIG state info */ + __u16 bs_aextents; /* attribute number of extents */ +} __compat_packed compat_xfs_bstat_t; + +typedef struct compat_xfs_fsop_bulkreq { + compat_uptr_t lastip; /* last inode # pointer */ + __s32 icount; /* count of entries in buffer */ + compat_uptr_t ubuffer; /* user buffer for inode desc. */ + compat_uptr_t ocount; /* output count pointer */ +} compat_xfs_fsop_bulkreq_t; + +#define XFS_IOC_FSBULKSTAT_32 \ + _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ + _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSINUMBERS_32 \ + _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) + +typedef struct compat_xfs_fsop_handlereq { + __u32 fd; /* fd for FD_TO_HANDLE */ + compat_uptr_t path; /* user pathname */ + __u32 oflags; /* open flags */ + compat_uptr_t ihandle; /* user supplied handle */ + __u32 ihandlen; /* user supplied length */ + compat_uptr_t ohandle; /* user buffer for handle */ + compat_uptr_t ohandlen; /* user buffer length */ +} compat_xfs_fsop_handlereq_t; + +#define XFS_IOC_PATH_TO_FSHANDLE_32 \ + _IOWR('X', 104, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_PATH_TO_HANDLE_32 \ + _IOWR('X', 105, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_FD_TO_HANDLE_32 \ + _IOWR('X', 106, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_OPEN_BY_HANDLE_32 \ + _IOWR('X', 107, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_READLINK_BY_HANDLE_32 \ + _IOWR('X', 108, struct compat_xfs_fsop_handlereq) + +#ifdef BROKEN_X86_ALIGNMENT +/* on ia32 l_start is on a 32-bit boundary */ +typedef struct compat_xfs_flock64 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +} compat_xfs_flock64_t; + +#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64) +#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64) +#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64) +#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64) +#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64) +#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) +#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) +#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) + +typedef struct compat_xfs_fsop_geom_v1 { + __u32 blocksize; /* filesystem (data) block size */ + __u32 rtextsize; /* realtime extent size */ + __u32 agblocks; /* fsblocks in an AG */ + __u32 agcount; /* number of allocation groups */ + __u32 logblocks; /* fsblocks in the log */ + __u32 sectsize; /* (data) sector size, bytes */ + __u32 inodesize; /* inode size in bytes */ + __u32 imaxpct; /* max allowed inode space(%) */ + __u64 datablocks; /* fsblocks in data subvolume */ + __u64 rtblocks; /* fsblocks in realtime subvol */ + __u64 rtextents; /* rt extents in realtime subvol*/ + __u64 logstart; /* starting fsblock of the log */ + unsigned char uuid[16]; /* unique id of the filesystem */ + __u32 sunit; /* stripe unit, fsblocks */ + __u32 swidth; /* stripe width, fsblocks */ + __s32 version; /* structure version */ + __u32 flags; /* superblock version flags */ + __u32 logsectsize; /* log sector size, bytes */ + __u32 rtsectsize; /* realtime sector size, bytes */ + __u32 dirblocksize; /* directory block size, bytes */ +} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; + +#define XFS_IOC_FSGEOMETRY_V1_32 \ + _IOR('X', 100, struct compat_xfs_fsop_geom_v1) + +typedef struct compat_xfs_inogrp { + __u64 xi_startino; /* starting inode number */ + __s32 xi_alloccount; /* # bits set in allocmask */ + __u64 xi_allocmask; /* mask of allocated inodes */ +} __attribute__((packed)) compat_xfs_inogrp_t; + +#endif /* BROKEN_X86_ALIGNMENT */ + #endif /* __XFS_IOCTL32_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 4ae03ba..589c41c 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -418,10 +418,6 @@ typedef struct xfs_handle { #define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS #define XFS_IOC_SETXFLAGS FS_IOC_SETFLAGS #define XFS_IOC_GETVERSION FS_IOC_GETVERSION -/* 32-bit compat counterparts */ -#define XFS_IOC32_GETXFLAGS FS_IOC32_GETFLAGS -#define XFS_IOC32_SETXFLAGS FS_IOC32_SETFLAGS -#define XFS_IOC32_GETVERSION FS_IOC32_GETVERSION /* * ioctl commands that replace IRIX fcntl()'s -- cgit v0.10.2 From d5547f9feea459dfc9e7313bd1d561394e2c129f Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:08 -0600 Subject: [XFS] Clean up some existing compat ioctl calls Create a new xfs_ioctl.h file which has prototypes for ioctl helpers that may be called in compat mode. Change several compat ioctl cases which are IOW to simply copy in the userspace argument, then call the common ioctl helper. This also fixes xfs_compat_ioc_fsgeometry_v1(), which had it backwards before; it copied in an (empty) arg, then copied out the native result, which probably corrupted userspace. It should be translating on the copyout. Also, a bit of formatting cleanup for consistency, and conversion of all error returns to use XFS_ERROR(). Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 03c55b8..498d3e1 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -68,7 +68,7 @@ * XFS_IOC_PATH_TO_HANDLE * returns full handle for a path */ -STATIC int +int xfs_find_handle( unsigned int cmd, xfs_fsop_handlereq_t *hreq) @@ -245,7 +245,7 @@ xfs_vget_fsop_handlereq( return 0; } -STATIC int +int xfs_open_by_handle( xfs_mount_t *mp, xfs_fsop_handlereq_t *hreq, @@ -355,7 +355,7 @@ do_readlink( } -STATIC int +int xfs_readlink_by_handle( xfs_mount_t *mp, xfs_fsop_handlereq_t *hreq, @@ -651,7 +651,7 @@ xfs_attrmulti_by_handle( return -error; } -STATIC int +int xfs_ioc_space( struct xfs_inode *ip, struct inode *inode, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h new file mode 100644 index 0000000..2df849f --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_IOCTL_H__ +#define __XFS_IOCTL_H__ + +extern int +xfs_ioc_space( + struct xfs_inode *ip, + struct inode *inode, + struct file *filp, + int ioflags, + unsigned int cmd, + xfs_flock64_t *bf); + +extern int +xfs_find_handle( + unsigned int cmd, + xfs_fsop_handlereq_t *hreq); + +extern int +xfs_open_by_handle( + xfs_mount_t *mp, + xfs_fsop_handlereq_t *hreq, + struct file *parfilp, + struct inode *parinode); + +extern int +xfs_readlink_by_handle( + xfs_mount_t *mp, + xfs_fsop_handlereq_t *hreq, + struct inode *parinode); +#endif diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 620bb2d..35edc05 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -43,55 +43,62 @@ #include "xfs_error.h" #include "xfs_dfrag.h" #include "xfs_vnodeops.h" +#include "xfs_fsops.h" +#include "xfs_ioctl.h" #include "xfs_ioctl32.h" #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) #ifdef BROKEN_X86_ALIGNMENT -STATIC unsigned long -xfs_ioctl32_flock( - unsigned long arg) +STATIC int +xfs_compat_flock64_copyin( + xfs_flock64_t *bf, + compat_xfs_flock64_t __user *arg32) { - compat_xfs_flock64_t __user *p32 = (void __user *)arg; - xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); - - if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || - copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || - copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || - copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || - copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || - copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || - copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32))) - return -EFAULT; - - return (unsigned long)p; + if (get_user(bf->l_type, &arg32->l_type) || + get_user(bf->l_whence, &arg32->l_whence) || + get_user(bf->l_start, &arg32->l_start) || + get_user(bf->l_len, &arg32->l_len) || + get_user(bf->l_sysid, &arg32->l_sysid) || + get_user(bf->l_pid, &arg32->l_pid) || + copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) + return -XFS_ERROR(EFAULT); + return 0; } -STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg) +STATIC int +xfs_compat_ioc_fsgeometry_v1( + struct xfs_mount *mp, + compat_xfs_fsop_geom_v1_t __user *arg32) { - compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg; - xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p)); + xfs_fsop_geom_t fsgeo; + int error; - if (copy_in_user(p, p32, sizeof(*p32))) - return -EFAULT; - return (unsigned long)p; + error = xfs_fs_geometry(mp, &fsgeo, 3); + if (error) + return -error; + /* The 32-bit variant simply has some padding at the end */ + if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) + return -XFS_ERROR(EFAULT); + return 0; } -STATIC int xfs_inumbers_fmt_compat( - void __user *ubuffer, - const xfs_inogrp_t *buffer, - long count, - long *written) +STATIC int +xfs_inumbers_fmt_compat( + void __user *ubuffer, + const xfs_inogrp_t *buffer, + long count, + long *written) { - compat_xfs_inogrp_t __user *p32 = ubuffer; - long i; + compat_xfs_inogrp_t __user *p32 = ubuffer; + long i; for (i = 0; i < count; i++) { if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) - return -EFAULT; + return -XFS_ERROR(EFAULT); } *written = count * sizeof(*p32); return 0; @@ -103,24 +110,26 @@ STATIC int xfs_inumbers_fmt_compat( /* XFS_IOC_FSBULKSTAT and friends */ -STATIC int xfs_bstime_store_compat( - compat_xfs_bstime_t __user *p32, - const xfs_bstime_t *p) +STATIC int +xfs_bstime_store_compat( + compat_xfs_bstime_t __user *p32, + const xfs_bstime_t *p) { - __s32 sec32; + __s32 sec32; sec32 = p->tv_sec; if (put_user(sec32, &p32->tv_sec) || put_user(p->tv_nsec, &p32->tv_nsec)) - return -EFAULT; + return -XFS_ERROR(EFAULT); return 0; } -STATIC int xfs_bulkstat_one_fmt_compat( +STATIC int +xfs_bulkstat_one_fmt_compat( void __user *ubuffer, const xfs_bstat_t *buffer) { - compat_xfs_bstat_t __user *p32 = ubuffer; + compat_xfs_bstat_t __user *p32 = ubuffer; if (put_user(buffer->bs_ino, &p32->bs_ino) || put_user(buffer->bs_mode, &p32->bs_mode) || @@ -142,7 +151,7 @@ STATIC int xfs_bulkstat_one_fmt_compat( put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) - return -EFAULT; + return -XFS_ERROR(EFAULT); return sizeof(*p32); } @@ -165,20 +174,20 @@ xfs_ioc_bulkstat_compat( /* should be called again (unused here, but used in dmapi) */ if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + return -XFS_ERROR(EPERM); if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); if (get_user(addr, &p32->lastip)) - return -EFAULT; + return -XFS_ERROR(EFAULT); bulkreq.lastip = compat_ptr(addr); if (get_user(bulkreq.icount, &p32->icount) || get_user(addr, &p32->ubuffer)) - return -EFAULT; + return -XFS_ERROR(EFAULT); bulkreq.ubuffer = compat_ptr(addr); if (get_user(addr, &p32->ocount)) - return -EFAULT; + return -XFS_ERROR(EFAULT); bulkreq.ocount = compat_ptr(addr); if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) @@ -216,38 +225,40 @@ xfs_ioc_bulkstat_compat( return 0; } -STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg) +STATIC int +xfs_compat_handlereq_copyin( + xfs_fsop_handlereq_t *hreq, + compat_xfs_fsop_handlereq_t __user *arg32) { - compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg; - xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p)); - u32 addr; - - if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) || - get_user(addr, &p32->path) || - put_user(compat_ptr(addr), &p->path) || - copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) || - get_user(addr, &p32->ihandle) || - put_user(compat_ptr(addr), &p->ihandle) || - copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) || - get_user(addr, &p32->ohandle) || - put_user(compat_ptr(addr), &p->ohandle) || - get_user(addr, &p32->ohandlen) || - put_user(compat_ptr(addr), &p->ohandlen)) - return -EFAULT; - - return (unsigned long)p; + compat_xfs_fsop_handlereq_t hreq32; + + if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + + hreq->fd = hreq32.fd; + hreq->path = compat_ptr(hreq32.path); + hreq->oflags = hreq32.oflags; + hreq->ihandle = compat_ptr(hreq32.ihandle); + hreq->ihandlen = hreq32.ihandlen; + hreq->ohandle = compat_ptr(hreq32.ohandle); + hreq->ohandlen = compat_ptr(hreq32.ohandlen); + + return 0; } STATIC long xfs_compat_ioctl( - int mode, - struct file *file, + xfs_inode_t *ip, + struct file *filp, + int ioflags, unsigned cmd, - unsigned long arg) + void __user *arg) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; + xfs_mount_t *mp = ip->i_mount; int error; + xfs_itrace_entry(XFS_I(inode)); switch (cmd) { case XFS_IOC_DIOINFO: case XFS_IOC_FSGEOMETRY: @@ -290,14 +301,16 @@ xfs_compat_ioctl( case XFS_IOC_RESVSP_32: case XFS_IOC_UNRESVSP_32: case XFS_IOC_RESVSP64_32: - case XFS_IOC_UNRESVSP64_32: - arg = xfs_ioctl32_flock(arg); + case XFS_IOC_UNRESVSP64_32: { + struct xfs_flock64 bf; + + if (xfs_compat_flock64_copyin(&bf, arg)) + return -XFS_ERROR(EFAULT); cmd = _NATIVE_IOC(cmd, struct xfs_flock64); - break; + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); + } case XFS_IOC_FSGEOMETRY_V1_32: - arg = xfs_ioctl32_geom_v1(arg); - cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1); - break; + return xfs_compat_ioc_fsgeometry_v1(mp, arg); #else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: @@ -323,35 +336,55 @@ xfs_compat_ioctl( cmd, (void __user*)arg); case XFS_IOC_FD_TO_HANDLE_32: case XFS_IOC_PATH_TO_HANDLE_32: - case XFS_IOC_PATH_TO_FSHANDLE_32: - case XFS_IOC_OPEN_BY_HANDLE_32: - case XFS_IOC_READLINK_BY_HANDLE_32: - arg = xfs_ioctl32_fshandle(arg); + case XFS_IOC_PATH_TO_FSHANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); - break; + return xfs_find_handle(cmd, &hreq); + } + case XFS_IOC_OPEN_BY_HANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + return xfs_open_by_handle(mp, &hreq, filp, inode); + } + case XFS_IOC_READLINK_BY_HANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + return xfs_readlink_by_handle(mp, &hreq, inode); + } default: - return -ENOIOCTLCMD; + return -XFS_ERROR(ENOIOCTLCMD); } - error = xfs_ioctl(XFS_I(inode), file, mode, cmd, (void __user *)arg); - xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); + error = xfs_ioctl(ip, filp, ioflags, cmd, arg); return error; } long xfs_file_compat_ioctl( - struct file *file, - unsigned cmd, - unsigned long arg) + struct file *filp, + unsigned int cmd, + unsigned long p) { - return xfs_compat_ioctl(0, file, cmd, arg); + struct inode *inode = filp->f_path.dentry->d_inode; + + return xfs_compat_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); } long xfs_file_compat_invis_ioctl( - struct file *file, - unsigned cmd, - unsigned long arg) + struct file *filp, + unsigned int cmd, + unsigned long p) { - return xfs_compat_ioctl(IO_INVIS, file, cmd, arg); + struct inode *inode = filp->f_path.dentry->d_inode; + + return xfs_compat_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, + (void __user *)p); } -- cgit v0.10.2 From e94fc4a43e5c39f689e83caf6d2f0939081f5e6b Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:09 -0600 Subject: [XFS] Add compat handlers for swapext ioctl The big hitter here was the bstat field, which contains different sized time_t on 32 vs. 64 bit. Add a copyin function to translate the 32-bit arg to 64-bit, and call the swapext ioctl helper. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 35edc05..132f3dd 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -108,6 +108,50 @@ xfs_inumbers_fmt_compat( #define xfs_inumbers_fmt_compat xfs_inumbers_fmt #endif +STATIC int +xfs_ioctl32_bstime_copyin( + xfs_bstime_t *bstime, + compat_xfs_bstime_t __user *bstime32) +{ + compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */ + + if (get_user(sec32, &bstime32->tv_sec) || + get_user(bstime->tv_nsec, &bstime32->tv_nsec)) + return -XFS_ERROR(EFAULT); + bstime->tv_sec = sec32; + return 0; +} + +/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ +STATIC int +xfs_ioctl32_bstat_copyin( + xfs_bstat_t *bstat, + compat_xfs_bstat_t __user *bstat32) +{ + if (get_user(bstat->bs_ino, &bstat32->bs_ino) || + get_user(bstat->bs_mode, &bstat32->bs_mode) || + get_user(bstat->bs_nlink, &bstat32->bs_nlink) || + get_user(bstat->bs_uid, &bstat32->bs_uid) || + get_user(bstat->bs_gid, &bstat32->bs_gid) || + get_user(bstat->bs_rdev, &bstat32->bs_rdev) || + get_user(bstat->bs_blksize, &bstat32->bs_blksize) || + get_user(bstat->bs_size, &bstat32->bs_size) || + xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) || + xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) || + xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) || + get_user(bstat->bs_blocks, &bstat32->bs_size) || + get_user(bstat->bs_xflags, &bstat32->bs_size) || + get_user(bstat->bs_extsize, &bstat32->bs_extsize) || + get_user(bstat->bs_extents, &bstat32->bs_extents) || + get_user(bstat->bs_gen, &bstat32->bs_gen) || + get_user(bstat->bs_projid, &bstat32->bs_projid) || + get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || + get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || + get_user(bstat->bs_aextents, &bstat32->bs_aextents)) + return -XFS_ERROR(EFAULT); + return 0; +} + /* XFS_IOC_FSBULKSTAT and friends */ STATIC int @@ -292,6 +336,18 @@ xfs_compat_ioctl( case XFS_IOC_GETVERSION_32: cmd = _NATIVE_IOC(cmd, long); break; + case XFS_IOC_SWAPEXT: { + struct xfs_swapext sxp; + struct compat_xfs_swapext __user *sxu = arg; + + /* Bulk copy in up to the sx_stat field, then grab bstat */ + if (copy_from_user(&sxp, sxu, + offsetof(xfs_swapext_t, sx_stat)) || + xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); + return -error; + } #ifdef BROKEN_X86_ALIGNMENT /* xfs_flock_t has wrong u32 vs u64 alignment */ case XFS_IOC_ALLOCSP_32: @@ -322,11 +378,6 @@ xfs_compat_ioctl( case XFS_IOC_UNRESVSP64: case XFS_IOC_FSGEOMETRY_V1: break; - - /* xfs_bstat_t still has wrong u32 vs u64 alignment */ - case XFS_IOC_SWAPEXT: - break; - #endif case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 003a10e6..bf08ab1 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -110,6 +110,19 @@ typedef struct compat_xfs_fsop_handlereq { #define XFS_IOC_READLINK_BY_HANDLE_32 \ _IOWR('X', 108, struct compat_xfs_fsop_handlereq) +/* The bstat field in the swapext struct needs translation */ +typedef struct compat_xfs_swapext { + __int64_t sx_version; /* version */ + __int64_t sx_fdtarget; /* fd of target file */ + __int64_t sx_fdtmp; /* fd of tmp file */ + xfs_off_t sx_offset; /* offset into file */ + xfs_off_t sx_length; /* leng from offset */ + char sx_pad[16]; /* pad space, unused */ + compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ +} __compat_packed compat_xfs_swapext_t; + +#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) + #ifdef BROKEN_X86_ALIGNMENT /* on ia32 l_start is on a 32-bit boundary */ typedef struct compat_xfs_flock64 { -- cgit v0.10.2 From 471d59103167c84f17b9bcfee22ed10b44ff206e Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:10 -0600 Subject: [XFS] Add compat handlers for data & rt growfs ioctls The args for XFS_IOC_FSGROWFSDATA and XFS_IOC_FSGROWFSRTA have padding on the end on intel, so add arg copyin functions, and then just call the growfs ioctl helpers. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 132f3dd..d1ac5d5 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -44,6 +44,8 @@ #include "xfs_dfrag.h" #include "xfs_vnodeops.h" #include "xfs_fsops.h" +#include "xfs_alloc.h" +#include "xfs_rtalloc.h" #include "xfs_ioctl.h" #include "xfs_ioctl32.h" @@ -85,6 +87,28 @@ xfs_compat_ioc_fsgeometry_v1( } STATIC int +xfs_compat_growfs_data_copyin( + struct xfs_growfs_data *in, + compat_xfs_growfs_data_t __user *arg32) +{ + if (get_user(in->newblocks, &arg32->newblocks) || + get_user(in->imaxpct, &arg32->imaxpct)) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_compat_growfs_rt_copyin( + struct xfs_growfs_rt *in, + compat_xfs_growfs_rt_t __user *arg32) +{ + if (get_user(in->newblocks, &arg32->newblocks) || + get_user(in->extsize, &arg32->extsize)) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int xfs_inumbers_fmt_compat( void __user *ubuffer, const xfs_inogrp_t *buffer, @@ -367,6 +391,22 @@ xfs_compat_ioctl( } case XFS_IOC_FSGEOMETRY_V1_32: return xfs_compat_ioc_fsgeometry_v1(mp, arg); + case XFS_IOC_FSGROWFSDATA_32: { + struct xfs_growfs_data in; + + if (xfs_compat_growfs_data_copyin(&in, arg)) + return -XFS_ERROR(EFAULT); + error = xfs_growfs_data(mp, &in); + return -error; + } + case XFS_IOC_FSGROWFSRT_32: { + struct xfs_growfs_rt in; + + if (xfs_compat_growfs_rt_copyin(&in, arg)) + return -XFS_ERROR(EFAULT); + error = xfs_growfs_rt(mp, &in); + return -error; + } #else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index bf08ab1..8b2a12a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -177,6 +177,20 @@ typedef struct compat_xfs_inogrp { __u64 xi_allocmask; /* mask of allocated inodes */ } __attribute__((packed)) compat_xfs_inogrp_t; +/* These growfs input structures have padding on the end, so must translate */ +typedef struct compat_xfs_growfs_data { + __u64 newblocks; /* new data subvol size, fsblocks */ + __u32 imaxpct; /* new inode space percentage limit */ +} __attribute__((packed)) compat_xfs_growfs_data_t; + +typedef struct compat_xfs_growfs_rt { + __u64 newblocks; /* new realtime size, fsblocks */ + __u32 extsize; /* new realtime extent size, fsblocks */ +} __attribute__((packed)) compat_xfs_growfs_rt_t; + +#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data) +#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt) + #endif /* BROKEN_X86_ALIGNMENT */ #endif /* __XFS_IOCTL32_H__ */ -- cgit v0.10.2 From 2ee4fa5cb716eba104a4ef8efe159e1007a2aef6 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:11 -0600 Subject: [XFS] Make the bulkstat_one compat ioctl handling more sane Currently the compat formatter was handled by passing in "private_data" for the xfs_bulkstat_one formatter, which was really just another formatter... IMHO this got confusing. Instead, just make a new xfs_bulkstat_one_compat formatter for xfs_bulkstat, and call it via a wrapper. Also, don't translate the ioctl nrs into their native counterparts, that just clouds the issue; we're in a compat handler anyway, just switch on the 32-bit cmds. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index d1ac5d5..a97022f 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -223,14 +223,30 @@ xfs_bulkstat_one_fmt_compat( return sizeof(*p32); } +STATIC int +xfs_bulkstat_one_compat( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + void *private_data, /* my private data */ + xfs_daddr_t bno, /* starting bno of inode cluster */ + int *ubused, /* bytes used by me */ + void *dibuff, /* on-disk inode buffer */ + int *stat) /* BULKSTAT_RV_... */ +{ + return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, + xfs_bulkstat_one_fmt_compat, bno, + ubused, dibuff, stat); +} + /* copied from xfs_ioctl.c */ STATIC int -xfs_ioc_bulkstat_compat( - xfs_mount_t *mp, - unsigned int cmd, - void __user *arg) +xfs_compat_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + compat_xfs_fsop_bulkreq_t __user *p32) { - compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg; u32 addr; xfs_fsop_bulkreq_t bulkreq; int count; /* # of records returned */ @@ -267,14 +283,12 @@ xfs_ioc_bulkstat_compat( if (bulkreq.ubuffer == NULL) return -XFS_ERROR(EINVAL); - if (cmd == XFS_IOC_FSINUMBERS) + if (cmd == XFS_IOC_FSINUMBERS_32) error = xfs_inumbers(mp, &inlast, &count, bulkreq.ubuffer, xfs_inumbers_fmt_compat); else { - /* declare a var to get a warning in case the type changes */ - bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat; error = xfs_bulkstat(mp, &inlast, &count, - xfs_bulkstat_one, formatter, + xfs_bulkstat_one_compat, NULL, sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, BULKSTAT_FG_QUICK, &done); } @@ -422,9 +436,7 @@ xfs_compat_ioctl( case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: case XFS_IOC_FSINUMBERS_32: - cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq); - return xfs_ioc_bulkstat_compat(XFS_I(inode)->i_mount, - cmd, (void __user*)arg); + return xfs_compat_ioc_bulkstat(mp, cmd, arg); case XFS_IOC_FD_TO_HANDLE_32: case XFS_IOC_PATH_TO_HANDLE_32: case XFS_IOC_PATH_TO_FSHANDLE_32: { diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index d4c0de8..7bd49b8 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -202,13 +202,13 @@ xfs_bulkstat_one_fmt( * Return stat information for one inode. * Return 0 if ok, else errno. */ -int /* error status */ -xfs_bulkstat_one( +int /* error status */ +xfs_bulkstat_one_int( xfs_mount_t *mp, /* mount point for filesystem */ xfs_ino_t ino, /* inode number to get data for */ void __user *buffer, /* buffer to place output in */ int ubsize, /* size of buffer */ - void *private_data, /* my private data */ + bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ xfs_daddr_t bno, /* starting bno of inode cluster */ int *ubused, /* bytes used by me */ void *dibuff, /* on-disk inode buffer */ @@ -217,7 +217,6 @@ xfs_bulkstat_one( xfs_bstat_t *buf; /* return buffer */ int error = 0; /* error value */ xfs_dinode_t *dip; /* dinode inode pointer */ - bulkstat_one_fmt_pf formatter = private_data ? : xfs_bulkstat_one_fmt; dip = (xfs_dinode_t *)dibuff; *stat = BULKSTAT_RV_NOTHING; @@ -255,6 +254,23 @@ xfs_bulkstat_one( return error; } +int +xfs_bulkstat_one( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + void *private_data, /* my private data */ + xfs_daddr_t bno, /* starting bno of inode cluster */ + int *ubused, /* bytes used by me */ + void *dibuff, /* on-disk inode buffer */ + int *stat) /* BULKSTAT_RV_... */ +{ + return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, + xfs_bulkstat_one_fmt, bno, + ubused, dibuff, stat); +} + /* * Test to see whether we can use the ondisk inode directly, based * on the given bulkstat flags, filling in dipp accordingly. diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index a1f18fc..e210a4c 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -74,6 +74,18 @@ typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ const xfs_bstat_t *buffer); /* buffer to read from */ int +xfs_bulkstat_one_int( + xfs_mount_t *mp, + xfs_ino_t ino, + void __user *buffer, + int ubsize, + bulkstat_one_fmt_pf formatter, + xfs_daddr_t bno, + int *ubused, + void *dibuff, + int *stat); + +int xfs_bulkstat_one( xfs_mount_t *mp, xfs_ino_t ino, -- cgit v0.10.2 From 65fbaf2489c667bf79ae1f20403f30c66568d445 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:12 -0600 Subject: [XFS] Fix xfs_bulkstat_one size checks & error handling The 32-bit xfs_blkstat_one handler was failing because a size check checked whether the remaining (32-bit) user buffer was less than the (64-bit) bulkstat buffer, and failed with ENOMEM if so. Move this check into the respective handlers so that they check the correct sizes. Also, the formatters were returning negative errors or positive bytes copied; this was odd in the positive error value world of xfs, and handled wrong by at least some of the callers, which treated the bytes returned as an error value. Move the bytes-used assignment into the formatters. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index a97022f..2d33606 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -192,35 +192,43 @@ xfs_bstime_store_compat( return 0; } +/* Return 0 on success or positive error (to xfs_bulkstat()) */ STATIC int xfs_bulkstat_one_fmt_compat( void __user *ubuffer, + int ubsize, + int *ubused, const xfs_bstat_t *buffer) { compat_xfs_bstat_t __user *p32 = ubuffer; - if (put_user(buffer->bs_ino, &p32->bs_ino) || - put_user(buffer->bs_mode, &p32->bs_mode) || - put_user(buffer->bs_nlink, &p32->bs_nlink) || - put_user(buffer->bs_uid, &p32->bs_uid) || - put_user(buffer->bs_gid, &p32->bs_gid) || - put_user(buffer->bs_rdev, &p32->bs_rdev) || - put_user(buffer->bs_blksize, &p32->bs_blksize) || - put_user(buffer->bs_size, &p32->bs_size) || + if (ubsize < sizeof(*p32)) + return XFS_ERROR(ENOMEM); + + if (put_user(buffer->bs_ino, &p32->bs_ino) || + put_user(buffer->bs_mode, &p32->bs_mode) || + put_user(buffer->bs_nlink, &p32->bs_nlink) || + put_user(buffer->bs_uid, &p32->bs_uid) || + put_user(buffer->bs_gid, &p32->bs_gid) || + put_user(buffer->bs_rdev, &p32->bs_rdev) || + put_user(buffer->bs_blksize, &p32->bs_blksize) || + put_user(buffer->bs_size, &p32->bs_size) || xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || - put_user(buffer->bs_blocks, &p32->bs_blocks) || - put_user(buffer->bs_xflags, &p32->bs_xflags) || - put_user(buffer->bs_extsize, &p32->bs_extsize) || - put_user(buffer->bs_extents, &p32->bs_extents) || - put_user(buffer->bs_gen, &p32->bs_gen) || - put_user(buffer->bs_projid, &p32->bs_projid) || - put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || - put_user(buffer->bs_dmstate, &p32->bs_dmstate) || + put_user(buffer->bs_blocks, &p32->bs_blocks) || + put_user(buffer->bs_xflags, &p32->bs_xflags) || + put_user(buffer->bs_extsize, &p32->bs_extsize) || + put_user(buffer->bs_extents, &p32->bs_extents) || + put_user(buffer->bs_gen, &p32->bs_gen) || + put_user(buffer->bs_projid, &p32->bs_projid) || + put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || + put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) - return -XFS_ERROR(EFAULT); - return sizeof(*p32); + return XFS_ERROR(EFAULT); + if (ubused) + *ubused = sizeof(*p32); + return 0; } STATIC int diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 7bd49b8..e19d0a8 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -188,14 +188,21 @@ xfs_bulkstat_one_dinode( } } +/* Return 0 on success or positive error */ STATIC int xfs_bulkstat_one_fmt( void __user *ubuffer, + int ubsize, + int *ubused, const xfs_bstat_t *buffer) { + if (ubsize < sizeof(*buffer)) + return XFS_ERROR(ENOMEM); if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) - return -EFAULT; - return sizeof(*buffer); + return XFS_ERROR(EFAULT); + if (ubused) + *ubused = sizeof(*buffer); + return 0; } /* @@ -223,8 +230,6 @@ xfs_bulkstat_one_int( if (!buffer || xfs_internal_inum(mp, ino)) return XFS_ERROR(EINVAL); - if (ubsize < sizeof(*buf)) - return XFS_ERROR(ENOMEM); buf = kmem_alloc(sizeof(*buf), KM_SLEEP); @@ -239,15 +244,11 @@ xfs_bulkstat_one_int( xfs_bulkstat_one_dinode(mp, ino, dip, buf); } - error = formatter(buffer, buf); - if (error < 0) { - error = EFAULT; + error = formatter(buffer, ubsize, ubused, buf); + if (error) goto out_free; - } *stat = BULKSTAT_RV_DIDONE; - if (ubused) - *ubused = error; out_free: kmem_free(buf); diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index e210a4c..1fb04e7 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -71,6 +71,8 @@ xfs_bulkstat_single( typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ void __user *ubuffer, /* buffer to write to */ + int ubsize, /* remaining user buffer sz */ + int *ubused, /* bytes used by formatter */ const xfs_bstat_t *buffer); /* buffer to read from */ int -- cgit v0.10.2 From af819d27637119105213433881f158931e29620b Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:13 -0600 Subject: [XFS] Fix compat XFS_IOC_FSBULKSTAT_SINGLE ioctl The XFS_IOC_FSBULKSTAT_SINGLE ioctl passes in the desired inode number, while XFS_IOC_FSBULKSTAT passes in the previous/last-stat'd inode number. The compat handler wasn't differentiating these, so when a XFS_IOC_FSBULKSTAT_SINGLE request for inode 128 was sent in, stat information for 131 was sent out. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 2d33606..cc1fe96 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -291,15 +291,22 @@ xfs_compat_ioc_bulkstat( if (bulkreq.ubuffer == NULL) return -XFS_ERROR(EINVAL); - if (cmd == XFS_IOC_FSINUMBERS_32) + if (cmd == XFS_IOC_FSINUMBERS_32) { error = xfs_inumbers(mp, &inlast, &count, bulkreq.ubuffer, xfs_inumbers_fmt_compat); - else { + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { + int res; + + error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, + sizeof(compat_xfs_bstat_t), + NULL, 0, NULL, NULL, &res); + } else if (cmd == XFS_IOC_FSBULKSTAT_32) { error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one_compat, NULL, sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, BULKSTAT_FG_QUICK, &done); - } + } else + error = XFS_ERROR(EINVAL); if (error) return -error; -- cgit v0.10.2 From ebeecd2b04645a4b79e1bc00d69cf4f98e03a684 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:14 -0600 Subject: [XFS] Hook up compat XFS_IOC_ATTRLIST_BY_HANDLE ioctl handler Add a compat handler for XFS_IOC_ATTRLIST_BY_HANDLE Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index cc1fe96..e7eb0d7 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -46,6 +46,7 @@ #include "xfs_fsops.h" #include "xfs_alloc.h" #include "xfs_rtalloc.h" +#include "xfs_attr.h" #include "xfs_ioctl.h" #include "xfs_ioctl32.h" @@ -343,6 +344,138 @@ xfs_compat_handlereq_copyin( return 0; } +/* + * Convert userspace handle data into inode. + * + * We use the fact that all the fsop_handlereq ioctl calls have a data + * structure argument whose first component is always a xfs_fsop_handlereq_t, + * so we can pass that sub structure into this handy, shared routine. + * + * If no error, caller must always iput the returned inode. + */ +STATIC int +xfs_vget_fsop_handlereq_compat( + xfs_mount_t *mp, + struct inode *parinode, /* parent inode pointer */ + compat_xfs_fsop_handlereq_t *hreq, + struct inode **inode) +{ + void __user *hanp; + size_t hlen; + xfs_fid_t *xfid; + xfs_handle_t *handlep; + xfs_handle_t handle; + xfs_inode_t *ip; + xfs_ino_t ino; + __u32 igen; + int error; + + /* + * Only allow handle opens under a directory. + */ + if (!S_ISDIR(parinode->i_mode)) + return XFS_ERROR(ENOTDIR); + + hanp = compat_ptr(hreq->ihandle); + hlen = hreq->ihandlen; + handlep = &handle; + + if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) + return XFS_ERROR(EINVAL); + if (copy_from_user(handlep, hanp, hlen)) + return XFS_ERROR(EFAULT); + if (hlen < sizeof(*handlep)) + memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); + if (hlen > sizeof(handlep->ha_fsid)) { + if (handlep->ha_fid.fid_len != + (hlen - sizeof(handlep->ha_fsid) - + sizeof(handlep->ha_fid.fid_len)) || + handlep->ha_fid.fid_pad) + return XFS_ERROR(EINVAL); + } + + /* + * Crack the handle, obtain the inode # & generation # + */ + xfid = (struct xfs_fid *)&handlep->ha_fid; + if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) { + ino = xfid->fid_ino; + igen = xfid->fid_gen; + } else { + return XFS_ERROR(EINVAL); + } + + /* + * Get the XFS inode, building a Linux inode to go with it. + */ + error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); + if (error) + return error; + if (ip == NULL) + return XFS_ERROR(EIO); + if (ip->i_d.di_gen != igen) { + xfs_iput_new(ip, XFS_ILOCK_SHARED); + return XFS_ERROR(ENOENT); + } + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + *inode = VFS_I(ip); + return 0; +} + +STATIC int +xfs_compat_attrlist_by_handle( + xfs_mount_t *mp, + void __user *arg, + struct inode *parinode) +{ + int error; + attrlist_cursor_kern_t *cursor; + compat_xfs_fsop_attrlist_handlereq_t al_hreq; + struct inode *inode; + char *kbuf; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&al_hreq, arg, + sizeof(compat_xfs_fsop_attrlist_handlereq_t))) + return -XFS_ERROR(EFAULT); + if (al_hreq.buflen > XATTR_LIST_MAX) + return -XFS_ERROR(EINVAL); + + /* + * Reject flags, only allow namespaces. + */ + if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) + return -XFS_ERROR(EINVAL); + + error = xfs_vget_fsop_handlereq_compat(mp, parinode, &al_hreq.hreq, + &inode); + if (error) + goto out; + + kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); + if (!kbuf) + goto out_vn_rele; + + cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; + error = xfs_attr_list(XFS_I(inode), kbuf, al_hreq.buflen, + al_hreq.flags, cursor); + if (error) + goto out_kfree; + + if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) + error = -EFAULT; + + out_kfree: + kfree(kbuf); + out_vn_rele: + iput(inode); + out: + return -error; +} + STATIC long xfs_compat_ioctl( xfs_inode_t *ip, @@ -368,7 +501,6 @@ xfs_compat_ioctl( case XFS_IOC_GETBMAPX: /* not handled case XFS_IOC_FSSETDM_BY_HANDLE: - case XFS_IOC_ATTRLIST_BY_HANDLE: case XFS_IOC_ATTRMULTI_BY_HANDLE: */ case XFS_IOC_FSCOUNTS: @@ -476,6 +608,8 @@ xfs_compat_ioctl( return -XFS_ERROR(EFAULT); return xfs_readlink_by_handle(mp, &hreq, inode); } + case XFS_IOC_ATTRLIST_BY_HANDLE_32: + return xfs_compat_attrlist_by_handle(mp, arg, inode); default: return -XFS_ERROR(ENOIOCTLCMD); } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 8b2a12a..bbf3a2c 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -123,6 +123,18 @@ typedef struct compat_xfs_swapext { #define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) +typedef struct compat_xfs_fsop_attrlist_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ + struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ + __u32 flags; /* which namespace to use */ + __u32 buflen; /* length of buffer supplied */ + compat_uptr_t buffer; /* returned names */ +} __compat_packed compat_xfs_fsop_attrlist_handlereq_t; + +/* Note: actually this is read/write */ +#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \ + _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq) + #ifdef BROKEN_X86_ALIGNMENT /* on ia32 l_start is on a 32-bit boundary */ typedef struct compat_xfs_flock64 { -- cgit v0.10.2 From 28750975ace79c547407a84d3969cbed516be8f8 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:15 -0600 Subject: [XFS] Hook up compat XFS_IOC_ATTRMULTI_BY_HANDLE ioctl handler Add a compat handler for XFS_IOC_ATTRMULTI_BY_HANDLE Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 498d3e1..c8f1e63 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -490,7 +490,7 @@ xfs_attrlist_by_handle( return -error; } -STATIC int +int xfs_attrmulti_attr_get( struct inode *inode, char *name, @@ -519,7 +519,7 @@ xfs_attrmulti_attr_get( return error; } -STATIC int +int xfs_attrmulti_attr_set( struct inode *inode, char *name, @@ -549,7 +549,7 @@ xfs_attrmulti_attr_set( return error; } -STATIC int +int xfs_attrmulti_attr_remove( struct inode *inode, char *name, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index 2df849f..f67dc69 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -44,4 +44,27 @@ xfs_readlink_by_handle( xfs_mount_t *mp, xfs_fsop_handlereq_t *hreq, struct inode *parinode); + +extern int +xfs_attrmulti_attr_get( + struct inode *inode, + char *name, + char __user *ubuf, + __uint32_t *len, + __uint32_t flags); + +extern int + xfs_attrmulti_attr_set( + struct inode *inode, + char *name, + const char __user *ubuf, + __uint32_t len, + __uint32_t flags); + +extern int +xfs_attrmulti_attr_remove( + struct inode *inode, + char *name, + __uint32_t flags); + #endif diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index e7eb0d7..17993352a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -476,6 +476,93 @@ xfs_compat_attrlist_by_handle( return -error; } +STATIC int +xfs_compat_attrmulti_by_handle( + xfs_mount_t *mp, + void __user *arg, + struct inode *parinode) +{ + int error; + compat_xfs_attr_multiop_t *ops; + compat_xfs_fsop_attrmulti_handlereq_t am_hreq; + struct inode *inode; + unsigned int i, size; + char *attr_name; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&am_hreq, arg, + sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) + return -XFS_ERROR(EFAULT); + + error = xfs_vget_fsop_handlereq_compat(mp, parinode, &am_hreq.hreq, + &inode); + if (error) + goto out; + + error = E2BIG; + size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); + if (!size || size > 16 * PAGE_SIZE) + goto out_vn_rele; + + error = ENOMEM; + ops = kmalloc(size, GFP_KERNEL); + if (!ops) + goto out_vn_rele; + + error = EFAULT; + if (copy_from_user(ops, compat_ptr(am_hreq.ops), size)) + goto out_kfree_ops; + + attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); + if (!attr_name) + goto out_kfree_ops; + + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_error = strncpy_from_user(attr_name, + compat_ptr(ops[i].am_attrname), + MAXNAMELEN); + if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) + error = -ERANGE; + if (ops[i].am_error < 0) + break; + + switch (ops[i].am_opcode) { + case ATTR_OP_GET: + ops[i].am_error = xfs_attrmulti_attr_get(inode, + attr_name, + compat_ptr(ops[i].am_attrvalue), + &ops[i].am_length, ops[i].am_flags); + break; + case ATTR_OP_SET: + ops[i].am_error = xfs_attrmulti_attr_set(inode, + attr_name, + compat_ptr(ops[i].am_attrvalue), + ops[i].am_length, ops[i].am_flags); + break; + case ATTR_OP_REMOVE: + ops[i].am_error = xfs_attrmulti_attr_remove(inode, + attr_name, ops[i].am_flags); + break; + default: + ops[i].am_error = EINVAL; + } + } + + if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) + error = XFS_ERROR(EFAULT); + + kfree(attr_name); + out_kfree_ops: + kfree(ops); + out_vn_rele: + iput(inode); + out: + return -error; +} + STATIC long xfs_compat_ioctl( xfs_inode_t *ip, @@ -499,10 +586,7 @@ xfs_compat_ioctl( case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: case XFS_IOC_GETBMAPX: -/* not handled - case XFS_IOC_FSSETDM_BY_HANDLE: - case XFS_IOC_ATTRMULTI_BY_HANDLE: -*/ +/* case XFS_IOC_FSSETDM_BY_HANDLE: not handled */ case XFS_IOC_FSCOUNTS: case XFS_IOC_SET_RESBLKS: case XFS_IOC_GET_RESBLKS: @@ -610,6 +694,8 @@ xfs_compat_ioctl( } case XFS_IOC_ATTRLIST_BY_HANDLE_32: return xfs_compat_attrlist_by_handle(mp, arg, inode); + case XFS_IOC_ATTRMULTI_BY_HANDLE_32: + return xfs_compat_attrmulti_by_handle(mp, arg, inode); default: return -XFS_ERROR(ENOIOCTLCMD); } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index bbf3a2c..785e7ec 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -135,6 +135,26 @@ typedef struct compat_xfs_fsop_attrlist_handlereq { #define XFS_IOC_ATTRLIST_BY_HANDLE_32 \ _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq) +/* am_opcodes defined in xfs_fs.h */ +typedef struct compat_xfs_attr_multiop { + __u32 am_opcode; + __s32 am_error; + compat_uptr_t am_attrname; + compat_uptr_t am_attrvalue; + __u32 am_length; + __u32 am_flags; +} compat_xfs_attr_multiop_t; + +typedef struct compat_xfs_fsop_attrmulti_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ + __u32 opcount;/* count of following multiop */ + /* ptr to compat_xfs_attr_multiop */ + compat_uptr_t ops; /* attr_multi data */ +} compat_xfs_fsop_attrmulti_handlereq_t; + +#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \ + _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) + #ifdef BROKEN_X86_ALIGNMENT /* on ia32 l_start is on a 32-bit boundary */ typedef struct compat_xfs_flock64 { -- cgit v0.10.2 From 710d62aaaf17c841b8bdbc7a775f8910a7160248 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:16 -0600 Subject: [XFS] Hook up compat XFS_IOC_FSSETDM_BY_HANDLE ioctl handler Add a compat handler for XFS_IOC_FSSETDM_BY_HANDLE. I haven't tested this, lacking dmapi tools to do so (unless xfsqa magically gets this somehow?) Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 17993352a..9234360 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -563,6 +563,46 @@ xfs_compat_attrmulti_by_handle( return -error; } +STATIC int +xfs_compat_fssetdm_by_handle( + xfs_mount_t *mp, + void __user *arg, + struct inode *parinode) +{ + int error; + struct fsdmidata fsd; + compat_xfs_fsop_setdm_handlereq_t dmhreq; + struct inode *inode; + + if (!capable(CAP_MKNOD)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&dmhreq, arg, + sizeof(compat_xfs_fsop_setdm_handlereq_t))) + return -XFS_ERROR(EFAULT); + + error = xfs_vget_fsop_handlereq_compat(mp, parinode, &dmhreq.hreq, + &inode); + if (error) + return -error; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + error = -XFS_ERROR(EPERM); + goto out; + } + + if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { + error = -XFS_ERROR(EFAULT); + goto out; + } + + error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask, + fsd.fsd_dmstate); + +out: + iput(inode); + return error; +} + STATIC long xfs_compat_ioctl( xfs_inode_t *ip, @@ -586,7 +626,6 @@ xfs_compat_ioctl( case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: case XFS_IOC_GETBMAPX: -/* case XFS_IOC_FSSETDM_BY_HANDLE: not handled */ case XFS_IOC_FSCOUNTS: case XFS_IOC_SET_RESBLKS: case XFS_IOC_GET_RESBLKS: @@ -696,6 +735,8 @@ xfs_compat_ioctl( return xfs_compat_attrlist_by_handle(mp, arg, inode); case XFS_IOC_ATTRMULTI_BY_HANDLE_32: return xfs_compat_attrmulti_by_handle(mp, arg, inode); + case XFS_IOC_FSSETDM_BY_HANDLE_32: + return xfs_compat_fssetdm_by_handle(mp, arg, inode); default: return -XFS_ERROR(ENOIOCTLCMD); } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 785e7ec..af91874 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -155,6 +155,15 @@ typedef struct compat_xfs_fsop_attrmulti_handlereq { #define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \ _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) +typedef struct compat_xfs_fsop_setdm_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle information */ + /* ptr to struct fsdmidata */ + compat_uptr_t data; /* DMAPI data */ +} compat_xfs_fsop_setdm_handlereq_t; + +#define XFS_IOC_FSSETDM_BY_HANDLE_32 \ + _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq) + #ifdef BROKEN_X86_ALIGNMENT /* on ia32 l_start is on a 32-bit boundary */ typedef struct compat_xfs_flock64 { -- cgit v0.10.2 From e5d412f17846b0aea9e5250926f994ab2e4e1006 Mon Sep 17 00:00:00 2001 From: "sandeen@sandeen.net" Date: Tue, 25 Nov 2008 21:20:17 -0600 Subject: [XFS] Reorder xfs_ioctl32.c for some tidiness Put things in IMHO a more readable order, now that it's all done; add some comments. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 9234360..b34b3d8 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -16,11 +16,7 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include -#include #include -#include -#include -#include #include #include "xfs.h" #include "xfs_fs.h" @@ -131,7 +127,7 @@ xfs_inumbers_fmt_compat( #else #define xfs_inumbers_fmt_compat xfs_inumbers_fmt -#endif +#endif /* BROKEN_X86_ALIGNMENT */ STATIC int xfs_ioctl32_bstime_copyin( @@ -617,6 +613,7 @@ xfs_compat_ioctl( xfs_itrace_entry(XFS_I(inode)); switch (cmd) { + /* No size or alignment issues on any arch */ case XFS_IOC_DIOINFO: case XFS_IOC_FSGEOMETRY: case XFS_IOC_FSGETXATTR: @@ -629,35 +626,28 @@ xfs_compat_ioctl( case XFS_IOC_FSCOUNTS: case XFS_IOC_SET_RESBLKS: case XFS_IOC_GET_RESBLKS: - case XFS_IOC_FSGROWFSDATA: case XFS_IOC_FSGROWFSLOG: - case XFS_IOC_FSGROWFSRT: case XFS_IOC_FREEZE: case XFS_IOC_THAW: case XFS_IOC_GOINGDOWN: case XFS_IOC_ERROR_INJECTION: case XFS_IOC_ERROR_CLEARALL: - break; - - case XFS_IOC_GETXFLAGS_32: - case XFS_IOC_SETXFLAGS_32: - case XFS_IOC_GETVERSION_32: - cmd = _NATIVE_IOC(cmd, long); - break; - case XFS_IOC_SWAPEXT: { - struct xfs_swapext sxp; - struct compat_xfs_swapext __user *sxu = arg; - - /* Bulk copy in up to the sx_stat field, then grab bstat */ - if (copy_from_user(&sxp, sxu, - offsetof(xfs_swapext_t, sx_stat)) || - xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) - return -XFS_ERROR(EFAULT); - error = xfs_swapext(&sxp); - return -error; - } -#ifdef BROKEN_X86_ALIGNMENT - /* xfs_flock_t has wrong u32 vs u64 alignment */ + return xfs_ioctl(ip, filp, ioflags, cmd, arg); +#ifndef BROKEN_X86_ALIGNMENT + /* These are handled fine if no alignment issues */ + case XFS_IOC_ALLOCSP: + case XFS_IOC_FREESP: + case XFS_IOC_RESVSP: + case XFS_IOC_UNRESVSP: + case XFS_IOC_ALLOCSP64: + case XFS_IOC_FREESP64: + case XFS_IOC_RESVSP64: + case XFS_IOC_UNRESVSP64: + case XFS_IOC_FSGEOMETRY_V1: + case XFS_IOC_FSGROWFSDATA: + case XFS_IOC_FSGROWFSRT: + return xfs_ioctl(ip, filp, ioflags, cmd, arg); +#else case XFS_IOC_ALLOCSP_32: case XFS_IOC_FREESP_32: case XFS_IOC_ALLOCSP64_32: @@ -691,18 +681,25 @@ xfs_compat_ioctl( error = xfs_growfs_rt(mp, &in); return -error; } -#else /* These are handled fine if no alignment issues */ - case XFS_IOC_ALLOCSP: - case XFS_IOC_FREESP: - case XFS_IOC_RESVSP: - case XFS_IOC_UNRESVSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - case XFS_IOC_FSGEOMETRY_V1: - break; #endif + /* long changes size, but xfs only copiese out 32 bits */ + case XFS_IOC_GETXFLAGS_32: + case XFS_IOC_SETXFLAGS_32: + case XFS_IOC_GETVERSION_32: + cmd = _NATIVE_IOC(cmd, long); + return xfs_ioctl(ip, filp, ioflags, cmd, arg); + case XFS_IOC_SWAPEXT: { + struct xfs_swapext sxp; + struct compat_xfs_swapext __user *sxu = arg; + + /* Bulk copy in up to the sx_stat field, then copy bstat */ + if (copy_from_user(&sxp, sxu, + offsetof(struct xfs_swapext, sx_stat)) || + xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); + return -error; + } case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: case XFS_IOC_FSINUMBERS_32: @@ -740,9 +737,6 @@ xfs_compat_ioctl( default: return -XFS_ERROR(ENOIOCTLCMD); } - - error = xfs_ioctl(ip, filp, ioflags, cmd, arg); - return error; } long -- cgit v0.10.2 From ddcd856d81861a523d79d077facd875da1f66792 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 07:55:34 -0500 Subject: [XFS] fix compile on 32 bit systems The recent compat patches make xfs_file.c include xfs_ioctl32.h unconditional, which breaks the build on 32 bit systems which don't have the various compat defintions. Remove the include and move the defintion of xfs_file_compat_ioctl to xfs_ioctl.h so that we can avoid including all the compat defintions in xfs_file.c Signed-off-by: Christoph Hellwig Tested-by: Kamalesh Babulal Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 72fc8d8..d377db0 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -36,9 +36,9 @@ #include "xfs_inode.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_ioctl32.h" #include "xfs_vnodeops.h" #include "xfs_da_btree.h" +#include "xfs_ioctl.h" #include #include diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index f67dc69..a3446aa 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -67,4 +67,16 @@ xfs_attrmulti_attr_remove( char *name, __uint32_t flags); +extern long +xfs_file_compat_ioctl( + struct file *file, + unsigned int cmd, + unsigned long arg); + +extern long +xfs_file_compat_ioctl_invis( + struct file *file, + unsigned int cmd, + unsigned long arg); + #endif diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index af91874..1024c4f 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -20,9 +20,6 @@ #include -extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long); -extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long); - /* * on 32-bit arches, ioctl argument structures may have different sizes * and/or alignment. We define compat structures which match the -- cgit v0.10.2 From 2234d54d3d855d6ffae88a24772a9389d6755e0c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:22 +0100 Subject: remove useless mnt_want_write call in xfs_write When mnt_want_write was introduced a call to it was added around xfs_ichgtime, but there is no need for this because a file can't be open read/write on a r/o mount, and a mount can't degrade r/o while we still have files open for writing. As the mnt_want_write changes were never merged into the CVS tree this patch is for mainline only. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 4959c87..59b7d5f 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -51,7 +51,6 @@ #include "xfs_vnodeops.h" #include -#include #include @@ -668,15 +667,8 @@ start: if (new_size > xip->i_size) xip->i_new_size = new_size; - /* - * We're not supposed to change timestamps in readonly-mounted - * filesystems. Throw it away if anyone asks us. - */ - if (likely(!(ioflags & IO_INVIS) && - !mnt_want_write(file->f_path.mnt))) { + if (likely(!(ioflags & IO_INVIS))) xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - mnt_drop_write(file->f_path.mnt); - } /* * If the offset is beyond the size of the file, we have a couple -- cgit v0.10.2 From 73e6335c14209e508bec8ca7985d1fbde183bd1f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:23 +0100 Subject: remove unused behvavior cruft in xfs_super.h Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 56dc48a..d5d776d 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -20,24 +20,12 @@ #include -#ifdef CONFIG_XFS_DMAPI -# define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops) -# define vfs_initdmapi() dmapi_init() -# define vfs_exitdmapi() dmapi_uninit() -#else -# define vfs_insertdmapi(vfs) do { } while (0) -# define vfs_initdmapi() do { } while (0) -# define vfs_exitdmapi() do { } while (0) -#endif - #ifdef CONFIG_XFS_QUOTA -# define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops) extern void xfs_qm_init(void); extern void xfs_qm_exit(void); # define vfs_initquota() xfs_qm_init() # define vfs_exitquota() xfs_qm_exit() #else -# define vfs_insertquota(vfs) do { } while (0) # define vfs_initquota() do { } while (0) # define vfs_exitquota() do { } while (0) #endif -- cgit v0.10.2 From ccd0be6cfc6943c4e0b3e3cdb598e0b7354a2d78 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:24 +0100 Subject: remove unused prototypes for xfs_ihash_init / xfs_ihash_free Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ae5800e..c38d450 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -496,8 +496,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) /* * xfs_iget.c prototypes. */ -void xfs_ihash_init(struct xfs_mount *); -void xfs_ihash_free(struct xfs_mount *); xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, struct xfs_trans *); int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, -- cgit v0.10.2 From 5cafdeb2891a415a5dbf0ad80f0afedf8369e6bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:25 +0100 Subject: cleanup the inode reclaim path Merge xfs_iextract and xfs_idestroy into xfs_ireclaim as they are never called individually. Also rewrite most comments in this area as they were severly out of date. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 27ec241..f58e5e0 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -450,65 +450,109 @@ xfs_iput_new( IRELE(ip); } - /* - * This routine embodies the part of the reclaim code that pulls - * the inode from the inode hash table and the mount structure's - * inode list. - * This should only be called from xfs_reclaim(). + * This is called free all the memory associated with an inode. + * It must free the inode itself and any buffers allocated for + * if_extents/if_data and if_broot. It must also free the lock + * associated with the inode. + * + * Note: because we don't initialise everything on reallocation out + * of the zone, we must ensure we nullify everything correctly before + * freeing the structure. */ void -xfs_ireclaim(xfs_inode_t *ip) +xfs_ireclaim( + struct xfs_inode *ip) { - /* - * Remove from old hash list and mount list. - */ - XFS_STATS_INC(xs_ig_reclaims); + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; - xfs_iextract(ip); + XFS_STATS_INC(xs_ig_reclaims); /* - * Here we do a spurious inode lock in order to coordinate with inode - * cache radix tree lookups. This is because the lookup can reference - * the inodes in the cache without taking references. We make that OK - * here by ensuring that we wait until the inode is unlocked after the - * lookup before we go ahead and free it. We get both the ilock and - * the iolock because the code may need to drop the ilock one but will - * still hold the iolock. + * Remove the inode from the per-AG radix tree. It doesn't matter + * if it was never added to it because radix_tree_delete can deal + * with that case just fine. */ - xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + pag = xfs_get_perag(mp, ip->i_ino); + write_lock(&pag->pag_ici_lock); + radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); + write_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); /* - * Release dquots (and their references) if any. An inode may escape - * xfs_inactive and get here via vn_alloc->vn_reclaim path. + * Here we do an (almost) spurious inode lock in order to coordinate + * with inode cache radix tree lookups. This is because the lookup + * can reference the inodes in the cache without taking references. + * + * We make that OK here by ensuring that we wait until the inode is + * unlocked after the lookup before we go ahead and free it. We get + * both the ilock and the iolock because the code may need to drop the + * ilock one but will still hold the iolock. */ - XFS_QM_DQDETACH(ip->i_mount, ip); - + xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); /* - * Free all memory associated with the inode. + * Release dquots (and their references) if any. */ + XFS_QM_DQDETACH(ip->i_mount, ip); xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_idestroy(ip); -} -/* - * This routine removes an about-to-be-destroyed inode from - * all of the lists in which it is located with the exception - * of the behavior chain. - */ -void -xfs_iextract( - xfs_inode_t *ip) -{ - xfs_mount_t *mp = ip->i_mount; - xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + switch (ip->i_d.di_mode & S_IFMT) { + case S_IFREG: + case S_IFDIR: + case S_IFLNK: + xfs_idestroy_fork(ip, XFS_DATA_FORK); + break; + } - write_lock(&pag->pag_ici_lock); - radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); - write_unlock(&pag->pag_ici_lock); - xfs_put_perag(mp, pag); + if (ip->i_afp) + xfs_idestroy_fork(ip, XFS_ATTR_FORK); - mp->m_ireclaims++; +#ifdef XFS_INODE_TRACE + ktrace_free(ip->i_trace); +#endif +#ifdef XFS_BMAP_TRACE + ktrace_free(ip->i_xtrace); +#endif +#ifdef XFS_BTREE_TRACE + ktrace_free(ip->i_btrace); +#endif +#ifdef XFS_RW_TRACE + ktrace_free(ip->i_rwtrace); +#endif +#ifdef XFS_ILOCK_TRACE + ktrace_free(ip->i_lock_trace); +#endif +#ifdef XFS_DIR2_TRACE + ktrace_free(ip->i_dir_trace); +#endif + if (ip->i_itemp) { + /* + * Only if we are shutting down the fs will we see an + * inode still in the AIL. If it is there, we should remove + * it to prevent a use-after-free from occurring. + */ + xfs_log_item_t *lip = &ip->i_itemp->ili_item; + struct xfs_ail *ailp = lip->li_ailp; + + ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || + XFS_FORCED_SHUTDOWN(ip->i_mount)); + if (lip->li_flags & XFS_LI_IN_AIL) { + spin_lock(&ailp->xa_lock); + if (lip->li_flags & XFS_LI_IN_AIL) + xfs_trans_ail_delete(ailp, lip); + else + spin_unlock(&ailp->xa_lock); + } + xfs_inode_item_destroy(ip); + ip->i_itemp = NULL; + } + /* asserts to verify all state is correct here */ + ASSERT(atomic_read(&ip->i_iocount) == 0); + ASSERT(atomic_read(&ip->i_pincount) == 0); + ASSERT(!spin_is_locked(&ip->i_flags_lock)); + ASSERT(completion_done(&ip->i_flush)); + kmem_zone_free(xfs_inode_zone, ip); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 872191b..4e664f5 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2450,78 +2450,6 @@ xfs_idestroy_fork( } /* - * This is called free all the memory associated with an inode. - * It must free the inode itself and any buffers allocated for - * if_extents/if_data and if_broot. It must also free the lock - * associated with the inode. - * - * Note: because we don't initialise everything on reallocation out - * of the zone, we must ensure we nullify everything correctly before - * freeing the structure. - */ -void -xfs_idestroy( - xfs_inode_t *ip) -{ - switch (ip->i_d.di_mode & S_IFMT) { - case S_IFREG: - case S_IFDIR: - case S_IFLNK: - xfs_idestroy_fork(ip, XFS_DATA_FORK); - break; - } - if (ip->i_afp) - xfs_idestroy_fork(ip, XFS_ATTR_FORK); - -#ifdef XFS_INODE_TRACE - ktrace_free(ip->i_trace); -#endif -#ifdef XFS_BMAP_TRACE - ktrace_free(ip->i_xtrace); -#endif -#ifdef XFS_BTREE_TRACE - ktrace_free(ip->i_btrace); -#endif -#ifdef XFS_RW_TRACE - ktrace_free(ip->i_rwtrace); -#endif -#ifdef XFS_ILOCK_TRACE - ktrace_free(ip->i_lock_trace); -#endif -#ifdef XFS_DIR2_TRACE - ktrace_free(ip->i_dir_trace); -#endif - if (ip->i_itemp) { - /* - * Only if we are shutting down the fs will we see an - * inode still in the AIL. If it is there, we should remove - * it to prevent a use-after-free from occurring. - */ - xfs_log_item_t *lip = &ip->i_itemp->ili_item; - struct xfs_ail *ailp = lip->li_ailp; - - ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || - XFS_FORCED_SHUTDOWN(ip->i_mount)); - if (lip->li_flags & XFS_LI_IN_AIL) { - spin_lock(&ailp->xa_lock); - if (lip->li_flags & XFS_LI_IN_AIL) - xfs_trans_ail_delete(ailp, lip); - else - spin_unlock(&ailp->xa_lock); - } - xfs_inode_item_destroy(ip); - ip->i_itemp = NULL; - } - /* asserts to verify all state is correct here */ - ASSERT(atomic_read(&ip->i_iocount) == 0); - ASSERT(atomic_read(&ip->i_pincount) == 0); - ASSERT(!spin_is_locked(&ip->i_flags_lock)); - ASSERT(completion_done(&ip->i_flush)); - kmem_zone_free(xfs_inode_zone, ip); -} - - -/* * Increment the pin count of the given buffer. * This value is protected by ipinlock spinlock in the mount structure. */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index c38d450..03ae96b 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -529,8 +529,6 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, xfs_fsize_t, int, int); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); -void xfs_idestroy(xfs_inode_t *); -void xfs_iextract(xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_ipin(xfs_inode_t *); void xfs_iunpin(xfs_inode_t *); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4fce22a..3825255 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -241,7 +241,6 @@ typedef struct xfs_mount { xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ spinlock_t m_agirotor_lock;/* .. and lock protecting it */ xfs_agnumber_t m_maxagi; /* highest inode alloc group */ - uint m_ireclaims; /* count of calls to reclaim*/ uint m_readio_log; /* min read size log bytes */ uint m_readio_blocks; /* min read size blocks */ uint m_writeio_log; /* min write size log bytes */ -- cgit v0.10.2 From 5d765b976c3a41faf9a73718fb8cc5833990a8ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:26 +0100 Subject: kill xfs_buf_iostart xfs_buf_iostart is a "shared" helper for xfs_buf_read_flags, xfs_bawrite, and xfs_bdwrite - except that there isn't much shared code but rather special cases for each caller. So remove this function and move the functionality to the caller. xfs_bawrite and xfs_bdwrite are now big enough to be moved out of line and the xfs_buf_read_flags is moved into a new helper called _xfs_buf_read. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 36d5fcd..cd89c56 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -630,6 +630,29 @@ xfs_buf_get_flags( return NULL; } +STATIC int +_xfs_buf_read( + xfs_buf_t *bp, + xfs_buf_flags_t flags) +{ + int status; + + XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags); + + ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); + ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); + + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); + bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); + + status = xfs_buf_iorequest(bp); + if (!status && !(flags & XBF_ASYNC)) + status = xfs_buf_iowait(bp); + return status; +} + xfs_buf_t * xfs_buf_read_flags( xfs_buftarg_t *target, @@ -646,7 +669,7 @@ xfs_buf_read_flags( if (!XFS_BUF_ISDONE(bp)) { XB_TRACE(bp, "read", (unsigned long)flags); XFS_STATS_INC(xb_get_read); - xfs_buf_iostart(bp, flags); + _xfs_buf_read(bp, flags); } else if (flags & XBF_ASYNC) { XB_TRACE(bp, "read_async", (unsigned long)flags); /* @@ -1048,50 +1071,39 @@ xfs_buf_ioerror( XB_TRACE(bp, "ioerror", (unsigned long)error); } -/* - * Initiate I/O on a buffer, based on the flags supplied. - * The b_iodone routine in the buffer supplied will only be called - * when all of the subsidiary I/O requests, if any, have been completed. - */ int -xfs_buf_iostart( - xfs_buf_t *bp, - xfs_buf_flags_t flags) +xfs_bawrite( + void *mp, + struct xfs_buf *bp) { - int status = 0; + XB_TRACE(bp, "bawrite", 0); - XB_TRACE(bp, "iostart", (unsigned long)flags); + ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); - if (flags & XBF_DELWRI) { - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); - bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); - xfs_buf_delwri_queue(bp, 1); - return 0; - } + xfs_buf_delwri_dequeue(bp); - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ - XBF_READ_AHEAD | _XBF_RUN_QUEUES); - bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \ - XBF_READ_AHEAD | _XBF_RUN_QUEUES); + bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); + bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); + + bp->b_fspriv3 = mp; + bp->b_strat = xfs_bdstrat_cb; + return xfs_bdstrat_cb(bp); +} - BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL); +void +xfs_bdwrite( + void *mp, + struct xfs_buf *bp) +{ + XB_TRACE(bp, "bdwrite", 0); - /* For writes allow an alternate strategy routine to precede - * the actual I/O request (which may not be issued at all in - * a shutdown situation, for example). - */ - status = (flags & XBF_WRITE) ? - xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp); + bp->b_strat = xfs_bdstrat_cb; + bp->b_fspriv3 = mp; - /* Wait for I/O if we are not an async request. - * Note: async I/O request completion will release the buffer, - * and that can already be done by this point. So using the - * buffer pointer from here on, after async I/O, is invalid. - */ - if (!status && !(flags & XBF_ASYNC)) - status = xfs_buf_iowait(bp); + bp->b_flags &= ~XBF_READ; + bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); - return status; + xfs_buf_delwri_queue(bp, 1); } STATIC_INLINE void diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 456519a..0e2aa16 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -214,9 +214,10 @@ extern void xfs_buf_lock(xfs_buf_t *); extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ +extern int xfs_bawrite(void *mp, xfs_buf_t *bp); +extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); -extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t); extern int xfs_buf_iorequest(xfs_buf_t *); extern int xfs_buf_iowait(xfs_buf_t *); extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, @@ -366,14 +367,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_TARGET(bp) ((bp)->b_target) #define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) -static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) -{ - bp->b_fspriv3 = mp; - bp->b_strat = xfs_bdstrat_cb; - xfs_buf_delwri_dequeue(bp); - return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); -} - static inline void xfs_buf_relse(xfs_buf_t *bp) { if (!bp->b_relse) @@ -414,17 +407,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp) return error; } -/* - * No error can be returned from xfs_buf_iostart for delwri - * buffers as they are queued and no I/O is issued. - */ -static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp) -{ - bp->b_strat = xfs_bdstrat_cb; - bp->b_fspriv3 = mp; - (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); -} - #define XFS_bdstrat(bp) xfs_buf_iorequest(bp) #define xfs_iowait(bp) xfs_buf_iowait(bp) -- cgit v0.10.2 From d9424b3c4a1e96f87c6cfd4d8dd2f8d9bbb4dcc5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:27 +0100 Subject: stop using igrab in xfs_vn_link ->link is guranteed to get an already reference inode passed so we can do a simple increment of i_count instead of using igrab and thus avoid banging on the global inode_lock. This is what most filesystems already do. Also move the increment after the call to xfs_link to simplify error handling. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 2903faf..76b570d 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -367,21 +367,18 @@ xfs_vn_link( struct inode *dir, struct dentry *dentry) { - struct inode *inode; /* inode of guy being linked to */ + struct inode *inode = old_dentry->d_inode; struct xfs_name name; int error; - inode = old_dentry->d_inode; xfs_dentry_to_name(&name, dentry); - igrab(inode); error = xfs_link(XFS_I(dir), XFS_I(inode), &name); - if (unlikely(error)) { - iput(inode); + if (unlikely(error)) return -error; - } xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; } -- cgit v0.10.2 From 39e2defe73106ca2e1c85e5286038a0a13f49513 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:28 +0100 Subject: reduce l_icloglock roundtrips All but one caller of xlog_state_want_sync drop and re-acquire l_icloglock around the call to it, just so that xlog_state_want_sync can acquire and drop it. Move all lock operation out of l_icloglock and assert that the lock is held when it is called. Note that it would make sense to extende this scheme to xlog_state_release_iclog, but the locking in there is more complicated and we'd like to keep the atomic_dec_and_lock optmization for those callers not having l_icloglock yet. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index aadaa14..f4726f7 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -729,8 +729,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) spin_lock(&log->l_icloglock); iclog = log->l_iclog; atomic_inc(&iclog->ic_refcnt); - spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); + spin_unlock(&log->l_icloglock); error = xlog_state_release_iclog(log, iclog); spin_lock(&log->l_icloglock); @@ -767,9 +767,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) spin_lock(&log->l_icloglock); iclog = log->l_iclog; atomic_inc(&iclog->ic_refcnt); - spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); + spin_unlock(&log->l_icloglock); error = xlog_state_release_iclog(log, iclog); spin_lock(&log->l_icloglock); @@ -1984,7 +1984,9 @@ xlog_write(xfs_mount_t * mp, if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) { xlog_state_finish_copy(log, iclog, record_cnt, data_cnt); record_cnt = data_cnt = 0; + spin_lock(&log->l_icloglock); xlog_state_want_sync(log, iclog); + spin_unlock(&log->l_icloglock); if (commit_iclog) { ASSERT(flags & XLOG_COMMIT_TRANS); *commit_iclog = iclog; @@ -3193,7 +3195,7 @@ try_again: STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) { - spin_lock(&log->l_icloglock); + ASSERT(spin_is_locked(&log->l_icloglock)); if (iclog->ic_state == XLOG_STATE_ACTIVE) { xlog_state_switch_iclogs(log, iclog, 0); @@ -3201,10 +3203,7 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) ASSERT(iclog->ic_state & (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR)); } - - spin_unlock(&log->l_icloglock); -} /* xlog_state_want_sync */ - +} /***************************************************************************** -- cgit v0.10.2 From 63ad2a5c4cf37e3242142eee8a8dcd4a8515302e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:29 +0100 Subject: remove dead code from sv_t implementation Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h index 351a8f4..4dfc7c3 100644 --- a/fs/xfs/linux-2.6/sv.h +++ b/fs/xfs/linux-2.6/sv.h @@ -32,23 +32,15 @@ typedef struct sv_s { wait_queue_head_t waiters; } sv_t; -#define SV_FIFO 0x0 /* sv_t is FIFO type */ -#define SV_LIFO 0x2 /* sv_t is LIFO type */ -#define SV_PRIO 0x4 /* sv_t is PRIO type */ -#define SV_KEYED 0x6 /* sv_t is KEYED type */ -#define SV_DEFAULT SV_FIFO - - -static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state, - unsigned long timeout) +static inline void _sv_wait(sv_t *sv, spinlock_t *lock) { DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&sv->waiters, &wait); - __set_current_state(state); + __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(lock); - schedule_timeout(timeout); + schedule(); remove_wait_queue(&sv->waiters, &wait); } @@ -58,13 +50,7 @@ static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state, #define sv_destroy(sv) \ /*NOTHING*/ #define sv_wait(sv, pri, lock, s) \ - _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) -#define sv_wait_sig(sv, pri, lock, s) \ - _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) -#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \ - _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts)) -#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \ - _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts)) + _sv_wait(sv, lock) #define sv_signal(sv) \ wake_up(&(sv)->waiters) #define sv_broadcast(sv) \ -- cgit v0.10.2 From df6771bde14551eceeacf331666a92735e0773ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:30 +0100 Subject: kill dead quota flags Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 12c4ec7..48965ec 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -84,11 +84,9 @@ typedef struct xfs_dqblk { #define XFS_DQ_USER 0x0001 /* a user quota */ #define XFS_DQ_PROJ 0x0002 /* project quota */ #define XFS_DQ_GROUP 0x0004 /* a group quota */ -#define XFS_DQ_FLOCKED 0x0008 /* flush lock taken */ -#define XFS_DQ_DIRTY 0x0010 /* dquot is dirty */ -#define XFS_DQ_WANT 0x0020 /* for lookup/reclaim race */ -#define XFS_DQ_INACTIVE 0x0040 /* dq off mplist & hashlist */ -#define XFS_DQ_MARKER 0x0080 /* sentinel */ +#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ +#define XFS_DQ_WANT 0x0010 /* for lookup/reclaim race */ +#define XFS_DQ_INACTIVE 0x0020 /* dq off mplist & hashlist */ #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) -- cgit v0.10.2 From 5efcbb853bc2f051d720a191268f8dd901fea9c2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:31 +0100 Subject: cleanup xfs_sb.h feature flag helpers The various inlines in xfs_sb.h that deal with the superblock version and fature flags were converted from macros a while ago, and this show by the odd coding style full of useless braces and backslashes and the avoidance of conditionals. Clean these up to look like normal C code. Signed-off-by: Christoph Hellwig Reviewed-by: Donald Douwsma Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 3f8cf15..e123c14 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -296,30 +296,34 @@ typedef enum { #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) -#ifdef __KERNEL__ static inline int xfs_sb_good_version(xfs_sb_t *sbp) { - return (((sbp->sb_versionnum >= XFS_SB_VERSION_1) && \ - (sbp->sb_versionnum <= XFS_SB_VERSION_3)) || \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - !((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \ - ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \ - (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) && \ - (sbp->sb_shared_vn <= XFS_SB_MAX_SHARED_VN))); -} + /* We always support version 1-3 */ + if (sbp->sb_versionnum >= XFS_SB_VERSION_1 && + sbp->sb_versionnum <= XFS_SB_VERSION_3) + return 1; + + /* We support version 4 if all feature bits are supported */ + if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) { + if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || + ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && + (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) + return 0; + +#ifdef __KERNEL__ + if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN) + return 0; #else -static inline int xfs_sb_good_version(xfs_sb_t *sbp) -{ - return (((sbp->sb_versionnum >= XFS_SB_VERSION_1) && \ - (sbp->sb_versionnum <= XFS_SB_VERSION_3)) || \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - !((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || \ - ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && \ - (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) && \ - (!(sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) || \ - (sbp->sb_shared_vn <= XFS_SB_MAX_SHARED_VN)))); + if ((sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) && + sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN) + return 0; +#endif + + return 1; + } + + return 0; } -#endif /* __KERNEL__ */ /* * Detect a mismatched features2 field. Older kernels read/wrote @@ -332,123 +336,127 @@ static inline int xfs_sb_has_mismatched_features2(xfs_sb_t *sbp) static inline unsigned xfs_sb_version_tonew(unsigned v) { - return ((((v) == XFS_SB_VERSION_1) ? \ - 0 : \ - (((v) == XFS_SB_VERSION_2) ? \ - XFS_SB_VERSION_ATTRBIT : \ - (XFS_SB_VERSION_ATTRBIT | XFS_SB_VERSION_NLINKBIT))) | \ - XFS_SB_VERSION_4); + if (v == XFS_SB_VERSION_1) + return XFS_SB_VERSION_4; + + if (v == XFS_SB_VERSION_2) + return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; + + return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT | + XFS_SB_VERSION_NLINKBIT; } static inline unsigned xfs_sb_version_toold(unsigned v) { - return (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \ - 0 : \ - (((v) & XFS_SB_VERSION_NLINKBIT) ? \ - XFS_SB_VERSION_3 : \ - (((v) & XFS_SB_VERSION_ATTRBIT) ? \ - XFS_SB_VERSION_2 : \ - XFS_SB_VERSION_1))); + if (v & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) + return 0; + if (v & XFS_SB_VERSION_NLINKBIT) + return XFS_SB_VERSION_3; + if (v & XFS_SB_VERSION_ATTRBIT) + return XFS_SB_VERSION_2; + return XFS_SB_VERSION_1; } static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp) { - return ((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \ - ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); + return sbp->sb_versionnum == XFS_SB_VERSION_2 || + sbp->sb_versionnum == XFS_SB_VERSION_3 || + (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); } static inline void xfs_sb_version_addattr(xfs_sb_t *sbp) { - (sbp)->sb_versionnum = (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \ - XFS_SB_VERSION_2 : \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) ? \ - ((sbp)->sb_versionnum | XFS_SB_VERSION_ATTRBIT) : \ - (XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT))); + if (sbp->sb_versionnum == XFS_SB_VERSION_1) + sbp->sb_versionnum = XFS_SB_VERSION_2; + else if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) + sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; + else + sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; } static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp) { - return ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \ - ((XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); + return sbp->sb_versionnum == XFS_SB_VERSION_3 || + (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); } static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp) { - (sbp)->sb_versionnum = ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \ - XFS_SB_VERSION_3 : \ - ((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT)); + if (sbp->sb_versionnum <= XFS_SB_VERSION_2) + sbp->sb_versionnum = XFS_SB_VERSION_3; + else + sbp->sb_versionnum |= XFS_SB_VERSION_NLINKBIT; } static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); } static inline void xfs_sb_version_addquota(xfs_sb_t *sbp) { - (sbp)->sb_versionnum = \ - (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \ - ((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \ - (xfs_sb_version_tonew((sbp)->sb_versionnum) | \ - XFS_SB_VERSION_QUOTABIT)); + if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) + sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; + else + sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) | + XFS_SB_VERSION_QUOTABIT; } static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT); } static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); } static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT); } static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT); } static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); } static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT); } static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); } static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); } static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) { - return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ - ((sbp)->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && + (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); } /* @@ -463,22 +471,20 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) { - return (xfs_sb_version_hasmorebits(sbp) && \ - ((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT)); + return xfs_sb_version_hasmorebits(sbp) && + (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT); } static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) { - return (xfs_sb_version_hasmorebits(sbp)) && \ - ((sbp)->sb_features2 & XFS_SB_VERSION2_ATTR2BIT); + return xfs_sb_version_hasmorebits(sbp) && + (sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT); } static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) { - ((sbp)->sb_versionnum = \ - ((sbp)->sb_versionnum | XFS_SB_VERSION_MOREBITSBIT), \ - ((sbp)->sb_features2 = \ - ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT))); + sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT; + sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; } static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) -- cgit v0.10.2 From 6bd16ff27060819d16b3e7abe59b6644b349aea3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:32 +0100 Subject: kill dead inode flags There are a few inode flags around that aren't used anywhere, so remove them. Also update xfsidbg to display all used inode flags correctly. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 8fbc97d..bb224d0 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -317,14 +317,9 @@ xfs_map_blocks( xfs_iomap_t *mapp, int flags) { - xfs_inode_t *ip = XFS_I(inode); - int error, nmaps = 1; - - error = xfs_iomap(ip, offset, count, - flags, mapp, &nmaps); - if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) - xfs_iflags_set(ip, XFS_IMODIFIED); - return -error; + int nmaps = 1; + + return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); } STATIC_INLINE int diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index d377db0..f999d20 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -281,11 +281,8 @@ xfs_file_ioctl( unsigned int cmd, unsigned long p) { - int error; struct inode *inode = filp->f_path.dentry->d_inode; - error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); - xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); /* NOTE: some of the ioctl's return positive #'s as a * byte count indicating success, such as @@ -293,7 +290,7 @@ xfs_file_ioctl( * like most other routines. This means true * errors need to be returned as a negative value. */ - return error; + return xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); } STATIC long @@ -302,11 +299,8 @@ xfs_file_ioctl_invis( unsigned int cmd, unsigned long p) { - int error; struct inode *inode = filp->f_path.dentry->d_inode; - error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p); - xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); /* NOTE: some of the ioctl's return positive #'s as a * byte count indicating success, such as @@ -314,7 +308,7 @@ xfs_file_ioctl_invis( * like most other routines. This means true * errors need to be returned as a negative value. */ - return error; + return xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p); } /* diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 76b570d..7aa53fe 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -159,8 +159,6 @@ xfs_init_security( } error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); - if (!error) - xfs_iflags_set(ip, XFS_IMODIFIED); kfree(name); kfree(value); @@ -261,7 +259,6 @@ xfs_vn_mknod( error = _ACL_INHERIT(inode, mode, default_acl); if (unlikely(error)) goto out_cleanup_inode; - xfs_iflags_set(ip, XFS_IMODIFIED); _ACL_FREE(default_acl); } @@ -377,7 +374,6 @@ xfs_vn_link( if (unlikely(error)) return -error; - xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; @@ -888,7 +884,6 @@ xfs_setup_inode( inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; xfs_diflags_to_iflags(inode, ip); - xfs_iflags_clear(ip, XFS_IMODIFIED); switch (inode->i_mode & S_IFMT) { case S_IFREG: diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 5389f07..37f2d11 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1025,7 +1025,6 @@ xfs_fs_clear_inode( XFS_STATS_DEC(vn_active); xfs_inactive(ip); - xfs_iflags_clear(ip, XFS_IMODIFIED); } STATIC void diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index f58e5e0..82eb8ed 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -362,7 +362,6 @@ again: } xfs_put_perag(mp, pag); - xfs_iflags_set(ip, XFS_IMODIFIED); *ipp = ip; ASSERT(ip->i_df.if_ext_max == diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 03ae96b..ddac1b8 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -403,17 +403,12 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) /* * In-core inode flags. */ -#define XFS_IGRIO 0x0001 /* inode used for guaranteed rate i/o */ -#define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ -#define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ -#define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ -#define XFS_ISTALE 0x0010 /* inode has been staled */ -#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */ -#define XFS_INEW 0x0040 -#define XFS_IFILESTREAM 0x0080 /* inode is in a filestream directory */ -#define XFS_IMODIFIED 0x0100 /* XFS inode state possibly differs */ - /* to the Linux inode state. */ -#define XFS_ITRUNCATED 0x0200 /* truncated down so flush-on-close */ +#define XFS_IRECLAIM 0x0001 /* we have started reclaiming this inode */ +#define XFS_ISTALE 0x0002 /* inode has been staled */ +#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ +#define XFS_INEW 0x0008 /* inode has just been allocated */ +#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ +#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ /* * Flags for inode locking. -- cgit v0.10.2 From e88f11abe09d14718b82a991db118c5e485aa897 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:33 +0100 Subject: remove unused m_inode_quiesce member from struct xfs_mount Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 3825255..cfdac80 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -302,7 +302,6 @@ typedef struct xfs_mount { int m_attr_magicpct;/* 37% of the blocksize */ int m_dir_magicpct; /* 37% of the dir blocksize */ __uint8_t m_mk_sharedro; /* mark shared ro on unmount */ - __uint8_t m_inode_quiesce;/* call quiesce on new inodes. */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ int m_dirblksize; /* directory block sz--bytes */ -- cgit v0.10.2 From b56757becf8bc62292263a24a23cf55edb4be55f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:34 +0100 Subject: remove leftovers of shared read-only support We never supported shared read-only filesystems, so remove the dead code left over from IRIX for it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 37f2d11..c4f788e 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1388,35 +1388,6 @@ xfs_finish_flags( return XFS_ERROR(EROFS); } -#if 0 /* shared mounts were never supported on Linux */ - /* - * check for shared mount. - */ - if (ap->flags & XFSMNT_SHARED) { - if (!xfs_sb_version_hasshared(&mp->m_sb)) - return XFS_ERROR(EINVAL); - - /* - * For IRIX 6.5, shared mounts must have the shared - * version bit set, have the persistent readonly - * field set, must be version 0 and can only be mounted - * read-only. - */ - if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) || - (mp->m_sb.sb_shared_vn != 0)) - return XFS_ERROR(EINVAL); - - mp->m_flags |= XFS_MOUNT_SHARED; - - /* - * Shared XFS V0 can't deal with DMI. Return EINVAL. - */ - if (mp->m_sb.sb_shared_vn == 0 && - (mp->m_flags & XFS_MOUNT_DMAPI)) - return XFS_ERROR(EINVAL); - } -#endif - return 0; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 3f999b1..1672ff5 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1352,24 +1352,6 @@ xfs_log_sbcount( return error; } -STATIC void -xfs_mark_shared_ro( - xfs_mount_t *mp, - xfs_buf_t *bp) -{ - xfs_dsb_t *sb = XFS_BUF_TO_SBP(bp); - __uint16_t version; - - if (!(sb->sb_flags & XFS_SBF_READONLY)) - sb->sb_flags |= XFS_SBF_READONLY; - - version = be16_to_cpu(sb->sb_versionnum); - if ((version & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4 || - !(version & XFS_SB_VERSION_SHAREDBIT)) - version |= XFS_SB_VERSION_SHAREDBIT; - sb->sb_versionnum = cpu_to_be16(version); -} - int xfs_unmountfs_writesb(xfs_mount_t *mp) { @@ -1385,12 +1367,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) sbp = xfs_getsb(mp, 0); - /* - * mark shared-readonly if desired - */ - if (mp->m_mk_sharedro) - xfs_mark_shared_ro(mp, sbp); - XFS_BUF_UNDONE(sbp); XFS_BUF_UNREAD(sbp); XFS_BUF_UNDELAYWRITE(sbp); @@ -1402,8 +1378,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) if (error) xfs_ioerror_alert("xfs_unmountfs_writesb", mp, sbp, XFS_BUF_ADDR(sbp)); - if (error && mp->m_mk_sharedro) - xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); xfs_buf_relse(sbp); } return error; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index cfdac80..2ab2df5 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -301,7 +301,6 @@ typedef struct xfs_mount { int m_sinoalign; /* stripe unit inode alignment */ int m_attr_magicpct;/* 37% of the blocksize */ int m_dir_magicpct; /* 37% of the dir blocksize */ - __uint8_t m_mk_sharedro; /* mark shared ro on unmount */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ int m_dirblksize; /* directory block sz--bytes */ @@ -349,7 +348,6 @@ typedef struct xfs_mount { #define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */ #define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */ #define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */ -#define XFS_MOUNT_SHARED (1ULL << 11) /* shared mount */ #define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */ #define XFS_MOUNT_OSYNCISOSYNC (1ULL << 13) /* o_sync is REALLY o_sync */ /* osyncisdsync is now default*/ -- cgit v0.10.2 From 070c4616ec62fc207e2aeef9d0f28af294c651d0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:35 +0100 Subject: use xfs_trans_ijoin in xfs_trans_iget Use xfs_trans_ijoin in xfs_trans_iget in case we need to join an inode into a transaction instead of opencoding it. Based on a discussion with and an incomplete patch from Niv Sardi. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 2a1c0f0..23d276a 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -85,7 +85,6 @@ xfs_trans_iget( { int error; xfs_inode_t *ip; - xfs_inode_log_item_t *iip; /* * If the transaction pointer is NULL, just call the normal @@ -138,34 +137,7 @@ xfs_trans_iget( } ASSERT(ip != NULL); - /* - * Get a log_item_desc to point at the new item. - */ - if (ip->i_itemp == NULL) - xfs_inode_item_init(ip, mp); - iip = ip->i_itemp; - (void) xfs_trans_add_item(tp, (xfs_log_item_t *)(iip)); - - xfs_trans_inode_broot_debug(ip); - - /* - * If the IO lock has been acquired, mark that in - * the inode log item so we'll know to unlock it - * when the transaction commits. - */ - ASSERT(iip->ili_flags == 0); - if (lock_flags & XFS_IOLOCK_EXCL) { - iip->ili_flags |= XFS_ILI_IOLOCKED_EXCL; - } else if (lock_flags & XFS_IOLOCK_SHARED) { - iip->ili_flags |= XFS_ILI_IOLOCKED_SHARED; - } - - /* - * Initialize i_transp so we can find it with xfs_inode_incore() - * above. - */ - ip->i_transp = tp; - + xfs_trans_ijoin(tp, ip, lock_flags); *ipp = ip; return 0; } -- cgit v0.10.2 From e57481dc269cd3773b22f53bfb869308780a7bf1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:36 +0100 Subject: no explicit xfs_iflush for special inodes during unmount Currently we explicitly call xfs_iflush on the quota, real-time and root inodes from xfs_unmount_flush. But we just called xfs_sync_inodes with SYNC_ATTR and do an XFS_bflush aka xfs_flush_buftarg to make sure all inodes are on disk already, so there is no need for these special cases. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index fb49e0f..314ca18 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -65,11 +65,6 @@ extern void vn_iowait(struct xfs_inode *ip); extern void vn_iowake(struct xfs_inode *ip); extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); -static inline int vn_count(struct inode *vp) -{ - return atomic_read(&vp->i_count); -} - #define IHOLD(ip) \ do { \ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 5b198d1..6b13960 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -395,13 +395,10 @@ xfs_qm_mount_quotas( /* * Called from the vfsops layer. */ -int +void xfs_qm_unmount_quotas( xfs_mount_t *mp) { - xfs_inode_t *uqp, *gqp; - int error = 0; - /* * Release the dquots that root inode, et al might be holding, * before we flush quotas and blow away the quotainfo structure. @@ -414,43 +411,18 @@ xfs_qm_unmount_quotas( xfs_qm_dqdetach(mp->m_rsumip); /* - * Flush out the quota inodes. + * Release the quota inodes. */ - uqp = gqp = NULL; if (mp->m_quotainfo) { - if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) { - xfs_ilock(uqp, XFS_ILOCK_EXCL); - xfs_iflock(uqp); - error = xfs_iflush(uqp, XFS_IFLUSH_SYNC); - xfs_iunlock(uqp, XFS_ILOCK_EXCL); - if (unlikely(error == EFSCORRUPTED)) { - XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)", - XFS_ERRLEVEL_LOW, mp); - goto out; - } + if (mp->m_quotainfo->qi_uquotaip) { + IRELE(mp->m_quotainfo->qi_uquotaip); + mp->m_quotainfo->qi_uquotaip = NULL; } - if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) { - xfs_ilock(gqp, XFS_ILOCK_EXCL); - xfs_iflock(gqp); - error = xfs_iflush(gqp, XFS_IFLUSH_SYNC); - xfs_iunlock(gqp, XFS_ILOCK_EXCL); - if (unlikely(error == EFSCORRUPTED)) { - XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)", - XFS_ERRLEVEL_LOW, mp); - goto out; - } + if (mp->m_quotainfo->qi_gquotaip) { + IRELE(mp->m_quotainfo->qi_gquotaip); + mp->m_quotainfo->qi_gquotaip = NULL; } } - if (uqp) { - IRELE(uqp); - mp->m_quotainfo->qi_uquotaip = NULL; - } - if (gqp) { - IRELE(gqp); - mp->m_quotainfo->qi_gquotaip = NULL; - } -out: - return XFS_ERROR(error); } /* diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index 4f2de977..ddf0916 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -167,7 +167,7 @@ extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); extern void xfs_qm_mount_quotas(xfs_mount_t *); extern int xfs_qm_quotacheck(xfs_mount_t *); extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); -extern int xfs_qm_unmount_quotas(xfs_mount_t *); +extern void xfs_qm_unmount_quotas(xfs_mount_t *); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); extern int xfs_qm_sync(xfs_mount_t *, int); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 2ab2df5..4f64fd1 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -117,7 +117,7 @@ struct xfs_quotainfo; typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint); -typedef int (*xfs_qmunmount_t)(struct xfs_mount *); +typedef void (*xfs_qmunmount_t)(struct xfs_mount *); typedef void (*xfs_qmdone_t)(struct xfs_mount *); typedef void (*xfs_dqrele_t)(struct xfs_dquot *); typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index cad3da3..559fb8d 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -68,74 +68,16 @@ xfs_unmount_flush( rid of. */ int relocation) /* Called from vfs relocation. */ { - xfs_inode_t *rip = mp->m_rootip; - xfs_inode_t *rbmip; - xfs_inode_t *rsumip = NULL; - int error; - - xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); - xfs_iflock(rip); - - /* - * Flush out the real time inodes. - */ - if ((rbmip = mp->m_rbmip) != NULL) { - xfs_ilock(rbmip, XFS_ILOCK_EXCL); - xfs_iflock(rbmip); - error = xfs_iflush(rbmip, XFS_IFLUSH_SYNC); - xfs_iunlock(rbmip, XFS_ILOCK_EXCL); - - if (error == EFSCORRUPTED) - goto fscorrupt_out; - - ASSERT(vn_count(VFS_I(rbmip)) == 1); - - rsumip = mp->m_rsumip; - xfs_ilock(rsumip, XFS_ILOCK_EXCL); - xfs_iflock(rsumip); - error = xfs_iflush(rsumip, XFS_IFLUSH_SYNC); - xfs_iunlock(rsumip, XFS_ILOCK_EXCL); - - if (error == EFSCORRUPTED) - goto fscorrupt_out; - - ASSERT(vn_count(VFS_I(rsumip)) == 1); - } - - /* - * Synchronously flush root inode to disk - */ - error = xfs_iflush(rip, XFS_IFLUSH_SYNC); - if (error == EFSCORRUPTED) - goto fscorrupt_out2; - - if (vn_count(VFS_I(rip)) != 1 && !relocation) { - xfs_iunlock(rip, XFS_ILOCK_EXCL); - return XFS_ERROR(EBUSY); - } - /* * Release dquot that rootinode, rbmino and rsumino might be holding, * flush and purge the quota inodes. */ - error = XFS_QM_UNMOUNT(mp); - if (error == EFSCORRUPTED) - goto fscorrupt_out2; + XFS_QM_UNMOUNT(mp); - if (rbmip) { - IRELE(rbmip); - IRELE(rsumip); - } + if (mp->m_rbmip) + IRELE(mp->m_rbmip); + if (mp->m_rsumip) + IRELE(mp->m_rsumip); - xfs_iunlock(rip, XFS_ILOCK_EXCL); return 0; - -fscorrupt_out: - xfs_ifunlock(rip); - -fscorrupt_out2: - xfs_iunlock(rip, XFS_ILOCK_EXCL); - - return XFS_ERROR(EFSCORRUPTED); } - -- cgit v0.10.2 From f95099ba5ae06b96a9c17ef93cc655f686d79077 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:37 +0100 Subject: kill xfs_unmount_flush There's almost nothing left in this function, instead remove the IRELE on the real times inodes and the call to XFS_QM_UNMOUNT into xfs_unmountfs. For the regular unmount case that means it now also happenes after dmapi notification, but otherwise there is no difference in behaviour. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 51b87de..a5a9ef0 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -85,7 +85,6 @@ xfs-y += xfs_alloc.o \ xfs_trans_inode.o \ xfs_trans_item.o \ xfs_utils.o \ - xfs_vfsops.o \ xfs_vnodeops.o \ xfs_rw.o \ xfs_dmops.o \ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c4f788e..4ebbd68 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1043,7 +1043,6 @@ xfs_fs_put_super( struct xfs_mount *mp = XFS_M(sb); struct xfs_inode *rip = mp->m_rootip; int unmount_event_flags = 0; - int error; xfs_syncd_stop(mp); xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI); @@ -1071,8 +1070,6 @@ xfs_fs_put_super( xfs_filestream_unmount(mp); XFS_bflush(mp->m_ddev_targp); - error = xfs_unmount_flush(mp, 0); - WARN_ON(error); if (mp->m_flags & XFS_MOUNT_DMAPI) { XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, @@ -1535,8 +1532,6 @@ xfs_fs_fill_super( xfs_filestream_unmount(mp); XFS_bflush(mp->m_ddev_targp); - error = xfs_unmount_flush(mp, 0); - WARN_ON(error); xfs_unmountfs(mp); goto out_free_sb; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 1672ff5..3c97c64 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1220,6 +1220,16 @@ xfs_unmountfs( __uint64_t resblks; int error; + /* + * Release dquot that rootinode, rbmino and rsumino might be holding, + * and release the quota inodes. + */ + XFS_QM_UNMOUNT(mp); + + if (mp->m_rbmip) + IRELE(mp->m_rbmip); + if (mp->m_rsumip) + IRELE(mp->m_rsumip); IRELE(mp->m_rootip); /* diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 4f64fd1..ae5da88 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -509,7 +509,6 @@ extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); extern void xfs_unmountfs(xfs_mount_t *); extern int xfs_unmountfs_writesb(xfs_mount_t *); -extern int xfs_unmount_flush(xfs_mount_t *, int); extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t, int64_t, int); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c deleted file mode 100644 index 559fb8d..0000000 --- a/fs/xfs/xfs_vfsops.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" -#include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_btree.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" -#include "xfs_quota.h" -#include "xfs_error.h" -#include "xfs_bmap.h" -#include "xfs_rw.h" -#include "xfs_buf_item.h" -#include "xfs_log_priv.h" -#include "xfs_dir2_trace.h" -#include "xfs_extfree_item.h" -#include "xfs_acl.h" -#include "xfs_attr.h" -#include "xfs_mru_cache.h" -#include "xfs_filestream.h" -#include "xfs_fsops.h" -#include "xfs_vnodeops.h" -#include "xfs_utils.h" -#include "xfs_sync.h" - - -/* - * xfs_unmount_flush implements a set of flush operation on special - * inodes, which are needed as a separate set of operations so that - * they can be called as part of relocation process. - */ -int -xfs_unmount_flush( - xfs_mount_t *mp, /* Mount structure we are getting - rid of. */ - int relocation) /* Called from vfs relocation. */ -{ - /* - * Release dquot that rootinode, rbmino and rsumino might be holding, - * flush and purge the quota inodes. - */ - XFS_QM_UNMOUNT(mp); - - if (mp->m_rbmip) - IRELE(mp->m_rbmip); - if (mp->m_rsumip) - IRELE(mp->m_rsumip); - - return 0; -} -- cgit v0.10.2 From 583fa586f0e4a8222dd091ce971b85c1364f3d92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:38 +0100 Subject: kill vn_ioerror There's just one caller of this helper, and it's much cleaner to just merge the xfs_do_force_shutdown call into it. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index bb224d0..f35dba9 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -146,16 +146,25 @@ xfs_destroy_ioend( xfs_ioend_t *ioend) { struct buffer_head *bh, *next; + struct xfs_inode *ip = XFS_I(ioend->io_inode); for (bh = ioend->io_buffer_head; bh; bh = next) { next = bh->b_private; bh->b_end_io(bh, !ioend->io_error); } - if (unlikely(ioend->io_error)) { - vn_ioerror(XFS_I(ioend->io_inode), ioend->io_error, - __FILE__,__LINE__); + + /* + * Volume managers supporting multiple paths can send back ENODEV + * when the final path disappears. In this case continuing to fill + * the page cache with dirty data which cannot be written out is + * evil, so prevent that. + */ + if (unlikely(ioend->io_error == -ENODEV)) { + xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, + __FILE__, __LINE__); } - vn_iowake(XFS_I(ioend->io_inode)); + + vn_iowake(ip); mempool_free(ioend, xfs_ioend_pool); } diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index ad18262..a8cf97a 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -66,22 +66,6 @@ vn_iowake( wake_up(vptosync(ip)); } -/* - * Volume managers supporting multiple paths can send back ENODEV when the - * final path disappears. In this case continuing to fill the page cache - * with dirty data which cannot be written out is evil, so prevent that. - */ -void -vn_ioerror( - xfs_inode_t *ip, - int error, - char *f, - int l) -{ - if (unlikely(error == -ENODEV)) - xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); -} - #ifdef XFS_INODE_TRACE #define KTRACE_ENTER(ip, vk, s, line, ra) \ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 314ca18..07fed88 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -63,7 +63,6 @@ extern void vn_init(void); */ extern void vn_iowait(struct xfs_inode *ip); extern void vn_iowake(struct xfs_inode *ip); -extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); #define IHOLD(ip) \ do { \ -- cgit v0.10.2 From 25e41b3d521f52771354a718042a753a3e77df0a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:39 +0100 Subject: move vn_iowait / vn_iowake into xfs_aops.c The whole machinery to wait on I/O completion is related to the I/O path and should be there instead of in xfs_vnode.c. Also give the functions more descriptive names. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index f35dba9..de3a198 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -42,6 +42,40 @@ #include #include + +/* + * Prime number of hash buckets since address is used as the key. + */ +#define NVSYNC 37 +#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) +static wait_queue_head_t xfs_ioend_wq[NVSYNC]; + +void __init +xfs_ioend_init(void) +{ + int i; + + for (i = 0; i < NVSYNC; i++) + init_waitqueue_head(&xfs_ioend_wq[i]); +} + +void +xfs_ioend_wait( + xfs_inode_t *ip) +{ + wait_queue_head_t *wq = to_ioend_wq(ip); + + wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); +} + +STATIC void +xfs_ioend_wake( + xfs_inode_t *ip) +{ + if (atomic_dec_and_test(&ip->i_iocount)) + wake_up(to_ioend_wq(ip)); +} + STATIC void xfs_count_page_state( struct page *page, @@ -164,7 +198,7 @@ xfs_destroy_ioend( __FILE__, __LINE__); } - vn_iowake(ip); + xfs_ioend_wake(ip); mempool_free(ioend, xfs_ioend_pool); } @@ -516,7 +550,7 @@ xfs_cancel_ioend( unlock_buffer(bh); } while ((bh = next_bh) != NULL); - vn_iowake(XFS_I(ioend->io_inode)); + xfs_ioend_wake(XFS_I(ioend->io_inode)); mempool_free(ioend, xfs_ioend_pool); } while ((ioend = next) != NULL); } diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 3ba0631..7b26f5f 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -43,4 +43,7 @@ typedef struct xfs_ioend { extern const struct address_space_operations xfs_address_space_operations; extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); +extern void xfs_ioend_init(void); +extern void xfs_ioend_wait(struct xfs_inode *); + #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 4ebbd68..36f6cc7 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1822,7 +1822,7 @@ init_xfs_fs(void) XFS_BUILD_OPTIONS " enabled\n"); ktrace_init(64); - vn_init(); + xfs_ioend_init(); xfs_dir_startup(); error = xfs_init_zones(); diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index d12d31b..ca5bd29 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -133,7 +133,7 @@ xfs_sync_inodes_ag( lock_flags |= XFS_IOLOCK_SHARED; error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); if (flags & SYNC_IOWAIT) - vn_iowait(ip); + xfs_ioend_wait(ip); } xfs_ilock(ip, XFS_ILOCK_SHARED); diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index a8cf97a..f6d1411 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -32,40 +32,6 @@ #include "xfs_mount.h" -/* - * Dedicated vnode inactive/reclaim sync wait queues. - * Prime number of hash buckets since address is used as the key. - */ -#define NVSYNC 37 -#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) -static wait_queue_head_t vsync[NVSYNC]; - -void __init -vn_init(void) -{ - int i; - - for (i = 0; i < NVSYNC; i++) - init_waitqueue_head(&vsync[i]); -} - -void -vn_iowait( - xfs_inode_t *ip) -{ - wait_queue_head_t *wq = vptosync(ip); - - wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); -} - -void -vn_iowake( - xfs_inode_t *ip) -{ - if (atomic_dec_and_test(&ip->i_iocount)) - wake_up(vptosync(ip)); -} - #ifdef XFS_INODE_TRACE #define KTRACE_ENTER(ip, vk, s, line, ra) \ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 07fed88..bd3e05c 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -54,16 +54,6 @@ struct attrlist_cursor_kern; Prevent VM access to the pages until the operation completes. */ - -extern void vn_init(void); - -/* - * Yeah, these don't take vnode anymore at all, all this should be - * cleaned up at some point. - */ -extern void vn_iowait(struct xfs_inode *ip); -extern void vn_iowake(struct xfs_inode *ip); - #define IHOLD(ip) \ do { \ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4e664f5..063da34 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1322,8 +1322,8 @@ xfs_itrunc_trace( * direct I/O with the truncate operation. Also, because we hold * the IOLOCK in exclusive mode, we prevent new direct I/Os from being * started until the truncate completes and drops the lock. Essentially, - * the vn_iowait() call forms an I/O barrier that provides strict ordering - * between direct I/Os and the truncate operation. + * the xfs_ioend_wait() call forms an I/O barrier that provides strict + * ordering between direct I/Os and the truncate operation. * * The flags parameter can have either the value XFS_ITRUNC_DEFINITE * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used @@ -1354,7 +1354,7 @@ xfs_itruncate_start( /* wait for the completion of any pending DIOs */ if (new_size == 0 || new_size < ip->i_size) - vn_iowait(ip); + xfs_ioend_wait(ip); /* * Call toss_pages or flushinval_pages to get rid of pages diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index b29a0eb..2d57aae 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -338,7 +338,7 @@ xfs_setattr( } /* wait for all I/O to complete */ - vn_iowait(ip); + xfs_ioend_wait(ip); if (!code) code = xfs_itruncate_data(ip, iattr->ia_size); @@ -2758,7 +2758,7 @@ xfs_reclaim( return 0; } - vn_iowait(ip); + xfs_ioend_wait(ip); ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); @@ -3149,7 +3149,8 @@ xfs_free_file_space( need_iolock = 0; if (need_iolock) { xfs_ilock(ip, XFS_IOLOCK_EXCL); - vn_iowait(ip); /* wait for the completion of any pending DIOs */ + /* wait for the completion of any pending DIOs */ + xfs_ioend_wait(ip); } rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); -- cgit v0.10.2 From 5a8d0f3c7af801c7263fbba39952504d6fc7ff60 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Dec 2008 12:20:40 +0100 Subject: move inode tracing out of xfs_vnode. Move the inode tracing into xfs_iget.c / xfs_inode.h and kill xfs_vnode.c now that it's empty. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Niv Sardi diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index a5a9ef0..c3dc491 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -107,7 +107,6 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ xfs_lrw.o \ xfs_super.o \ xfs_sync.o \ - xfs_vnode.o \ xfs_xattr.o) # Objects in support/ diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c deleted file mode 100644 index f6d1411..0000000 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_vnodeops.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" - -/* - * And this gunk is needed for xfs_mount.h" - */ -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_dmapi.h" -#include "xfs_inum.h" -#include "xfs_ag.h" -#include "xfs_mount.h" - - -#ifdef XFS_INODE_TRACE - -#define KTRACE_ENTER(ip, vk, s, line, ra) \ - ktrace_enter( (ip)->i_trace, \ -/* 0 */ (void *)(__psint_t)(vk), \ -/* 1 */ (void *)(s), \ -/* 2 */ (void *)(__psint_t) line, \ -/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \ -/* 4 */ (void *)(ra), \ -/* 5 */ NULL, \ -/* 6 */ (void *)(__psint_t)current_cpu(), \ -/* 7 */ (void *)(__psint_t)current_pid(), \ -/* 8 */ (void *)__return_address, \ -/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL) - -/* - * Vnode tracing code. - */ -void -_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra); -} - -void -_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra); -} - -void -xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra); -} - -void -_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra); -} - -void -xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra); -} -#endif /* XFS_INODE_TRACE */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index bd3e05c..f65983a 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -54,19 +54,6 @@ struct attrlist_cursor_kern; Prevent VM access to the pages until the operation completes. */ -#define IHOLD(ip) \ -do { \ - ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ - atomic_inc(&(VFS_I(ip)->i_count)); \ - xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ -} while (0) - -#define IRELE(ip) \ -do { \ - xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ - iput(VFS_I(ip)); \ -} while (0) - /* * Dealing with bad inodes */ @@ -103,39 +90,4 @@ static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt) PAGECACHE_TAG_DIRTY) -/* - * Tracking vnode activity. - */ -#if defined(XFS_INODE_TRACE) - -#define INODE_TRACE_SIZE 16 /* number of trace entries */ -#define INODE_KTRACE_ENTRY 1 -#define INODE_KTRACE_EXIT 2 -#define INODE_KTRACE_HOLD 3 -#define INODE_KTRACE_REF 4 -#define INODE_KTRACE_RELE 5 - -extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *); -extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *); -extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *); -extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); -extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); -#define xfs_itrace_entry(ip) \ - _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address) -#define xfs_itrace_exit(ip) \ - _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address) -#define xfs_itrace_exit_tag(ip, tag) \ - _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) -#define xfs_itrace_ref(ip) \ - _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address) - -#else -#define xfs_itrace_entry(a) -#define xfs_itrace_exit(a) -#define xfs_itrace_exit_tag(a, b) -#define xfs_itrace_hold(a, b, c, d) -#define xfs_itrace_ref(a) -#define xfs_itrace_rele(a, b, c, d) -#endif - #endif /* __XFS_VNODE_H__ */ diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 82eb8ed..e2fb621 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -805,3 +805,51 @@ xfs_isilocked( } #endif +#ifdef XFS_INODE_TRACE + +#define KTRACE_ENTER(ip, vk, s, line, ra) \ + ktrace_enter((ip)->i_trace, \ +/* 0 */ (void *)(__psint_t)(vk), \ +/* 1 */ (void *)(s), \ +/* 2 */ (void *)(__psint_t) line, \ +/* 3 */ (void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \ +/* 4 */ (void *)(ra), \ +/* 5 */ NULL, \ +/* 6 */ (void *)(__psint_t)current_cpu(), \ +/* 7 */ (void *)(__psint_t)current_pid(), \ +/* 8 */ (void *)__return_address, \ +/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL) + +/* + * Vnode tracing code. + */ +void +_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) +{ + KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra); +} + +void +_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) +{ + KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra); +} + +void +xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra); +} + +void +_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra); +} + +void +xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) +{ + KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra); +} +#endif /* XFS_INODE_TRACE */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ddac1b8..558253e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -536,6 +536,51 @@ void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); void xfs_synchronize_atime(xfs_inode_t *); void xfs_mark_inode_dirty_sync(xfs_inode_t *); +#if defined(XFS_INODE_TRACE) + +#define INODE_TRACE_SIZE 16 /* number of trace entries */ +#define INODE_KTRACE_ENTRY 1 +#define INODE_KTRACE_EXIT 2 +#define INODE_KTRACE_HOLD 3 +#define INODE_KTRACE_REF 4 +#define INODE_KTRACE_RELE 5 + +extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *); +extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *); +extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *); +extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); +extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); +#define xfs_itrace_entry(ip) \ + _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address) +#define xfs_itrace_exit(ip) \ + _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address) +#define xfs_itrace_exit_tag(ip, tag) \ + _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) +#define xfs_itrace_ref(ip) \ + _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address) + +#else +#define xfs_itrace_entry(a) +#define xfs_itrace_exit(a) +#define xfs_itrace_exit_tag(a, b) +#define xfs_itrace_hold(a, b, c, d) +#define xfs_itrace_ref(a) +#define xfs_itrace_rele(a, b, c, d) +#endif + +#define IHOLD(ip) \ +do { \ + ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ + atomic_inc(&(VFS_I(ip)->i_count)); \ + xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ +} while (0) + +#define IRELE(ip) \ +do { \ + xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ + iput(VFS_I(ip)); \ +} while (0) + #endif /* __KERNEL__ */ int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, -- cgit v0.10.2 From 8bb57320f3f5dd8c2373c0b66e4950391e037109 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 4 Dec 2008 14:23:27 +0100 Subject: [XFS] Fix compile with CONFIG_COMPAT enabled Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index a3446aa..d92131c 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -74,7 +74,7 @@ xfs_file_compat_ioctl( unsigned long arg); extern long -xfs_file_compat_ioctl_invis( +xfs_file_compat_invis_ioctl( struct file *file, unsigned int cmd, unsigned long arg); -- cgit v0.10.2 From 6a0775a991d5597ce98f1e15373288ea133cc793 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2008 09:09:34 +1100 Subject: [XFS] Fix hang after disallowed rename across directory quota domains When project quota is active and is being used for directory tree quota control, we disallow rename outside the current directory tree. This requires a check to be made after all the inodes involved in the rename are locked. We fail to unlock the inodes correctly if we disallow the rename when the target is outside the current directory tree. This results in a hang on the next access to the inodes involved in failed rename. Reported-by: Arkadiusz Miskiewicz Signed-off-by: Dave Chinner Tested-by: Arkadiusz Miskiewicz Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 02f0e8f..10642fc 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -212,7 +212,7 @@ xfs_rename( if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { error = XFS_ERROR(EXDEV); - xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); + xfs_rename_unlock4(inodes, XFS_ILOCK_EXCL); xfs_trans_cancel(tp, cancel_flags); goto std_return; } -- cgit v0.10.2 From c6422617a1c0d7787e515748b01f594fe43aea98 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Fri, 5 Dec 2008 13:16:15 +1100 Subject: [XFS] Check return value of xfs_buf_get_noaddr() We check the return value of all other calls to xfs_buf_get_noaddr(). Make sense to do it here too. Signed-off-by: Lachlan McIlroy Reviewed-by: Christoph Hellwig Reviewed-by: Eric Sandeen diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 2d57aae..4547608 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -3034,6 +3034,8 @@ xfs_zero_remaining_bytes( bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp); + if (!bp) + return XFS_ERROR(ENOMEM); for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { offset_fsb = XFS_B_TO_FSBT(mp, offset); -- cgit v0.10.2 From a5b429d41fede3a90deb532f5c2318393ed3a17b Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Fri, 5 Dec 2008 13:31:51 +1100 Subject: [XFS] Remove unused variable in ktrace_free() entries_size is probably left over from when we used to pass the size to kmem_free(). Signed-off-by: Lachlan McIlroy Reviewed-by: Christoph Hellwig Reviewed-by: Eric Sandeen diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c index a34ef05..2d494c2 100644 --- a/fs/xfs/support/ktrace.c +++ b/fs/xfs/support/ktrace.c @@ -113,21 +113,16 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep) void ktrace_free(ktrace_t *ktp) { - int entries_size; - if (ktp == (ktrace_t *)NULL) return; /* * Special treatment for the Vnode trace buffer. */ - if (ktp->kt_nentries == ktrace_zentries) { + if (ktp->kt_nentries == ktrace_zentries) kmem_zone_free(ktrace_ent_zone, ktp->kt_entries); - } else { - entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t)); - + else kmem_free(ktp->kt_entries); - } kmem_zone_free(ktrace_hdr_zone, ktp); } -- cgit v0.10.2 From 797eaed40e1df4a3b9ece6894a71ce2b568bca38 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Fri, 5 Dec 2008 14:15:49 +1100 Subject: [XFS] Remove unnecessary assertion Hit this assert because an inode was tagged with XFS_ICI_RECLAIM_TAG but not XFS_IRECLAIMABLE|XFS_IRECLAIM. This is because xfs_iget_cache_hit() first clears XFS_IRECLAIMABLE and then calls __xfs_inode_clear_reclaim_tag() while only holding the pag_ici_lock in read mode so we can race with xfs_reclaim_inodes_ag(). Looks like xfs_reclaim_inodes_ag() will do the right thing anyway so just remove the assert. Thanks to Christoph for pointing out where the problem was. Signed-off-by: Lachlan McIlroy Reviewed-by: Christoph Hellwig diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index ca5bd29..2ed0353 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -707,8 +707,6 @@ restart: break; } - ASSERT(xfs_iflags_test(ip, (XFS_IRECLAIMABLE|XFS_IRECLAIM))); - /* ignore if already under reclaim */ if (xfs_iflags_test(ip, XFS_IRECLAIM)) { read_unlock(&pag->pag_ici_lock); -- cgit v0.10.2 From e055f13a6d8448d4f23121b7b11340c3fb55cce6 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Wed, 10 Dec 2008 11:51:54 +1100 Subject: [XFS] Remove unused tracing code None of this code appears to be used anywhere so remove it. Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 063da34..f6b4c0f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3232,8 +3232,6 @@ corrupt_out: #ifdef XFS_ILOCK_TRACE -ktrace_t *xfs_ilock_trace_buf; - void xfs_ilock_trace(xfs_inode_t *ip, int lock, unsigned int lockflags, inst_t *ra) { diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 558253e..64222e2 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -223,7 +223,6 @@ struct xfs_dquot; #if defined(XFS_ILOCK_TRACE) #define XFS_ILOCK_KTRACE_SIZE 32 -extern ktrace_t *xfs_ilock_trace_buf; extern void xfs_ilock_trace(struct xfs_inode *, int, unsigned int, inst_t *); #else #define xfs_ilock_trace(i,n,f,ra) -- cgit v0.10.2 From 15ac08a8b2c129abccf1be47b6ab09491e013db2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:30 -0500 Subject: [XFS] replace b_fspriv with b_mount Replace the b_fspriv pointer and it's ugly accessors with a properly types xfs_mount pointer. Also switch log reocvery over to it instead of using b_fspriv for the mount pointer. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index cd89c56..b9c304a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1085,7 +1085,7 @@ xfs_bawrite( bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); - bp->b_fspriv3 = mp; + bp->b_mount = mp; bp->b_strat = xfs_bdstrat_cb; return xfs_bdstrat_cb(bp); } @@ -1098,7 +1098,7 @@ xfs_bdwrite( XB_TRACE(bp, "bdwrite", 0); bp->b_strat = xfs_bdstrat_cb; - bp->b_fspriv3 = mp; + bp->b_mount = mp; bp->b_flags &= ~XBF_READ; bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 0e2aa16..af8764e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -168,7 +168,7 @@ typedef struct xfs_buf { struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; void *b_fspriv2; - void *b_fspriv3; + struct xfs_mount *b_mount; unsigned short b_error; /* error code on I/O */ unsigned int b_page_count; /* size of page array */ unsigned int b_offset; /* page offset in first page */ @@ -335,8 +335,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) -#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3) -#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val)) #define XFS_BUF_SET_START(bp) do { } while (0) #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 59b7d5f..92ce34b 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -847,13 +847,7 @@ retry: int xfs_bdstrat_cb(struct xfs_buf *bp) { - xfs_mount_t *mp; - - mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_buf_iorequest(bp); - return 0; - } else { + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { xfs_buftrace("XFS__BDSTRAT IOERROR", bp); /* * Metadata write that didn't get logged but @@ -866,6 +860,9 @@ xfs_bdstrat_cb(struct xfs_buf *bp) else return (xfs_bioerror(bp)); } + + xfs_buf_iorequest(bp); + return 0; } /* diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index d245d04..d74ce11 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -707,8 +707,8 @@ xfs_buf_item_init( * the first. If we do already have one, there is * nothing to do here so return. */ - if (XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *) != mp) - XFS_BUF_SET_FSPRIVATE3(bp, mp); + if (bp->b_mount != mp) + bp->b_mount = mp; XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 51412cc..35cca98 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -267,21 +267,16 @@ STATIC void xlog_recover_iodone( struct xfs_buf *bp) { - xfs_mount_t *mp; - - ASSERT(XFS_BUF_FSPRIVATE(bp, void *)); - if (XFS_BUF_GETERROR(bp)) { /* * We're not going to bother about retrying * this during recovery. One strike! */ - mp = XFS_BUF_FSPRIVATE(bp, xfs_mount_t *); xfs_ioerror_alert("xlog_recover_iodone", - mp, bp, XFS_BUF_ADDR(bp)); - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + bp->b_mount, bp, XFS_BUF_ADDR(bp)); + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); } - XFS_BUF_SET_FSPRIVATE(bp, NULL); + bp->b_mount = NULL; XFS_BUF_CLR_IODONE_FUNC(bp); xfs_biodone(bp); } @@ -2225,9 +2220,8 @@ xlog_recover_do_buffer_trans( XFS_BUF_STALE(bp); error = xfs_bwrite(mp, bp); } else { - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || - XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); - XFS_BUF_SET_FSPRIVATE(bp, mp); + ASSERT(bp->b_mount == NULL || bp->b_mount == mp); + bp->b_mount = mp; XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); } @@ -2490,9 +2484,8 @@ xlog_recover_do_inode_trans( write_inode_buffer: if (ITEM_TYPE(item) == XFS_LI_INODE) { - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || - XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); - XFS_BUF_SET_FSPRIVATE(bp, mp); + ASSERT(bp->b_mount == NULL || bp->b_mount == mp); + bp->b_mount = mp; XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); } else { @@ -2623,9 +2616,8 @@ xlog_recover_do_dquot_trans( memcpy(ddq, recddq, item->ri_buf[1].i_len); ASSERT(dq_f->qlf_size == 2); - ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL || - XFS_BUF_FSPRIVATE(bp, xfs_mount_t *) == mp); - XFS_BUF_SET_FSPRIVATE(bp, mp); + ASSERT(bp->b_mount == NULL || bp->b_mount == mp); + bp->b_mount = mp; XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index 3a82576..36f3a21 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -406,7 +406,7 @@ xfs_bwrite( * XXXsup how does this work for quotas. */ XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb); - XFS_BUF_SET_FSPRIVATE3(bp, mp); + bp->b_mount = mp; XFS_BUF_WRITE(bp); if ((error = XFS_bwrite(bp))) { -- cgit v0.10.2 From 2175dd95741bda5f438e4efe388a8c1bb5abf1cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:31 -0500 Subject: [XFS] simplify projid check in xfs_rename Check for the project ID after attaching all inodes to the transaction. That way the unlock in the error case is done by the transaction subsystem, which guaratees that is uses the right flags (which was wrong from day one of this check), and avoids having special code unlocking an array of inodes with potential duplicates. Attaching the inode first is the method used by xfs_rename and the other namespace methods all other error that require multiple locked inodes. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 10642fc..86471bb 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -42,31 +42,6 @@ /* - * Given an array of up to 4 inode pointers, unlock the pointed to inodes. - * If there are fewer than 4 entries in the array, the empty entries will - * be at the end and will have NULL pointers in them. - */ -STATIC void -xfs_rename_unlock4( - xfs_inode_t **i_tab, - uint lock_mode) -{ - int i; - - xfs_iunlock(i_tab[0], lock_mode); - for (i = 1; i < 4; i++) { - if (i_tab[i] == NULL) - break; - - /* - * Watch out for duplicate entries in the table. - */ - if (i_tab[i] != i_tab[i-1]) - xfs_iunlock(i_tab[i], lock_mode); - } -} - -/* * Enter all inodes for a rename transaction into a sorted array. */ STATIC void @@ -205,19 +180,6 @@ xfs_rename( xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); /* - * If we are using project inheritance, we only allow renames - * into our tree when the project IDs are the same; else the - * tree quota mechanism would be circumvented. - */ - if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && - (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { - error = XFS_ERROR(EXDEV); - xfs_rename_unlock4(inodes, XFS_ILOCK_EXCL); - xfs_trans_cancel(tp, cancel_flags); - goto std_return; - } - - /* * Join all the inodes to the transaction. From this point on, * we can rely on either trans_commit or trans_cancel to unlock * them. Note that we need to add a vnode reference to the @@ -242,6 +204,17 @@ xfs_rename( } /* + * If we are using project inheritance, we only allow renames + * into our tree when the project IDs are the same; else the + * tree quota mechanism would be circumvented. + */ + if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && + (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { + error = XFS_ERROR(EXDEV); + goto error_return; + } + + /* * Set up the target. */ if (target_ip == NULL) { -- cgit v0.10.2 From 6d73cf133c5477f7038577bfeda603ce9946f8cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:32 -0500 Subject: [XFS] resync headers with libxfs - xfs_sb.h add the XFS_SB_VERSION2_PARENTBIT features2 that has been around in userspace for some time - xfs_inode.h: move a few things out of __KERNEL__ that are needed by userspace - xfs_mount.h: only include xfs_sync.h under __KERNEL__ - xfs_inode.c: minor whitespace fixup. I accidentaly changes this when importing this file for use by userspace. Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f6b4c0f..3adb868 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -870,7 +870,7 @@ xfs_iread( * around for a while. This helps to keep recently accessed * meta-data in-core longer. */ - XFS_BUF_SET_REF(bp, XFS_INO_REF); + XFS_BUF_SET_REF(bp, XFS_INO_REF); /* * Use xfs_trans_brelse() to release the buffer containing the diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 64222e2..f0e4d79 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -482,12 +482,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) ((pip)->i_d.di_mode & S_ISGID)) /* - * Flags for xfs_iget() - */ -#define XFS_IGET_CREATE 0x1 -#define XFS_IGET_BULKSTAT 0x2 - -/* * xfs_iget.c prototypes. */ xfs_inode_t *xfs_inode_incore(struct xfs_mount *, xfs_ino_t, @@ -508,8 +502,6 @@ void xfs_ireclaim(xfs_inode_t *); /* * xfs_inode.c prototypes. */ -int xfs_iread(struct xfs_mount *, struct xfs_trans *, - struct xfs_inode *, xfs_daddr_t, uint); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, int, struct xfs_buf **, boolean_t *, xfs_inode_t **); @@ -582,12 +574,20 @@ do { \ #endif /* __KERNEL__ */ +/* + * Flags for xfs_iget() + */ +#define XFS_IGET_CREATE 0x1 +#define XFS_IGET_BULKSTAT 0x2 + int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, struct xfs_dinode **, struct xfs_buf **, int *, uint); int xfs_itobp(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, struct xfs_dinode **, struct xfs_buf **, uint); +int xfs_iread(struct xfs_mount *, struct xfs_trans *, + struct xfs_inode *, xfs_daddr_t, uint); void xfs_dinode_from_disk(struct xfs_icdinode *, struct xfs_dinode *); void xfs_dinode_to_disk(struct xfs_dinode *, diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index ae5da88..c1e0284 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -18,8 +18,6 @@ #ifndef __XFS_MOUNT_H__ #define __XFS_MOUNT_H__ -#include "xfs_sync.h" - typedef struct xfs_trans_reservations { uint tr_write; /* extent alloc trans */ uint tr_itruncate; /* truncate trans */ @@ -53,6 +51,8 @@ typedef struct xfs_trans_reservations { #else /* __KERNEL__ */ +#include "xfs_sync.h" + struct cred; struct log; struct xfs_mount_args; diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index e123c14..1ed7191 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -79,6 +79,7 @@ struct xfs_mount; #define XFS_SB_VERSION2_LAZYSBCOUNTBIT 0x00000002 /* Superblk counters */ #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ +#define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ #define XFS_SB_VERSION2_OKREALFBITS \ (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ -- cgit v0.10.2 From 4d4be482a4d78ca906f45e99fd9fdb91e907f5ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:33 -0500 Subject: [XFS] add a FMODE flag to make XFS invisible I/O less hacky XFS has a mode called invisble I/O that doesn't update any of the timestamps. It's used for HSM-style applications and exposed through the nasty open by handle ioctl. Instead of doing directly assignment of file operations that set an internal flag for it add a new FMODE_NOCMTIME flag that we can check in the normal file operations. (addition of the generic VFS flag has been ACKed by Al as an interims solution) Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f999d20..a0c45cc 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -45,81 +45,45 @@ static struct vm_operations_struct xfs_file_vm_ops; -STATIC_INLINE ssize_t -__xfs_file_read( +STATIC ssize_t +xfs_file_aio_read( struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, - int ioflags, loff_t pos) { struct file *file = iocb->ki_filp; + int ioflags = IO_ISAIO; BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; + if (file->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov, nr_segs, &iocb->ki_pos, ioflags); } STATIC ssize_t -xfs_file_aio_read( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos); -} - -STATIC ssize_t -xfs_file_aio_read_invis( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); -} - -STATIC_INLINE ssize_t -__xfs_file_write( +xfs_file_aio_write( struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, - int ioflags, loff_t pos) { - struct file *file = iocb->ki_filp; + struct file *file = iocb->ki_filp; + int ioflags = IO_ISAIO; BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; + if (file->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs, &iocb->ki_pos, ioflags); } STATIC ssize_t -xfs_file_aio_write( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos); -} - -STATIC ssize_t -xfs_file_aio_write_invis( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); -} - -STATIC ssize_t xfs_file_splice_read( struct file *infilp, loff_t *ppos, @@ -127,20 +91,13 @@ xfs_file_splice_read( size_t len, unsigned int flags) { - return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), - infilp, ppos, pipe, len, flags, 0); -} + int ioflags = 0; + + if (infilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; -STATIC ssize_t -xfs_file_splice_read_invis( - struct file *infilp, - loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, - unsigned int flags) -{ return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), - infilp, ppos, pipe, len, flags, IO_INVIS); + infilp, ppos, pipe, len, flags, ioflags); } STATIC ssize_t @@ -151,20 +108,13 @@ xfs_file_splice_write( size_t len, unsigned int flags) { - return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), - pipe, outfilp, ppos, len, flags, 0); -} + int ioflags = 0; + + if (outfilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; -STATIC ssize_t -xfs_file_splice_write_invis( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t len, - unsigned int flags) -{ return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), - pipe, outfilp, ppos, len, flags, IO_INVIS); + pipe, outfilp, ppos, len, flags, ioflags); } STATIC int @@ -275,42 +225,6 @@ xfs_file_mmap( return 0; } -STATIC long -xfs_file_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - - /* NOTE: some of the ioctl's return positive #'s as a - * byte count indicating success, such as - * readlink_by_handle. So we don't "sign flip" - * like most other routines. This means true - * errors need to be returned as a negative value. - */ - return xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); -} - -STATIC long -xfs_file_ioctl_invis( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - - /* NOTE: some of the ioctl's return positive #'s as a - * byte count indicating success, such as - * readlink_by_handle. So we don't "sign flip" - * like most other routines. This means true - * errors need to be returned as a negative value. - */ - return xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p); -} - /* * mmap()d file has taken write protection fault and is being made * writable. We can set the page state up correctly for a writable @@ -346,25 +260,6 @@ const struct file_operations xfs_file_operations = { #endif }; -const struct file_operations xfs_invis_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = xfs_file_aio_read_invis, - .aio_write = xfs_file_aio_write_invis, - .splice_read = xfs_file_splice_read_invis, - .splice_write = xfs_file_splice_write_invis, - .unlocked_ioctl = xfs_file_ioctl_invis, -#ifdef CONFIG_COMPAT - .compat_ioctl = xfs_file_compat_invis_ioctl, -#endif - .mmap = xfs_file_mmap, - .open = xfs_file_open, - .release = xfs_file_release, - .fsync = xfs_file_fsync, -}; - - const struct file_operations xfs_dir_file_operations = { .open = xfs_dir_open, .read = generic_read_dir, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index c8f1e63..0264c87 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -319,10 +319,11 @@ xfs_open_by_handle( put_unused_fd(new_fd); return -XFS_ERROR(-PTR_ERR(filp)); } + if (inode->i_mode & S_IFREG) { /* invisible operation should not change atime */ filp->f_flags |= O_NOATIME; - filp->f_op = &xfs_invis_file_operations; + filp->f_mode |= FMODE_NOCMTIME; } fd_install(new_fd, filp); @@ -1328,21 +1329,31 @@ xfs_ioc_getbmapx( return 0; } -int -xfs_ioctl( - xfs_inode_t *ip, +/* + * Note: some of the ioctl's return positive numbers as a + * byte count indicating success, such as readlink_by_handle. + * So we don't "sign flip" like most other routines. This means + * true errors need to be returned as a negative value. + */ +long +xfs_file_ioctl( struct file *filp, - int ioflags, unsigned int cmd, - void __user *arg) + unsigned long p) { struct inode *inode = filp->f_path.dentry->d_inode; - xfs_mount_t *mp = ip->i_mount; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; int error; - xfs_itrace_entry(XFS_I(inode)); - switch (cmd) { + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + xfs_itrace_entry(ip); + + switch (cmd) { case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index d92131c..8c16bf2 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -68,13 +68,13 @@ xfs_attrmulti_attr_remove( __uint32_t flags); extern long -xfs_file_compat_ioctl( - struct file *file, +xfs_file_ioctl( + struct file *filp, unsigned int cmd, - unsigned long arg); + unsigned long p); extern long -xfs_file_compat_invis_ioctl( +xfs_file_compat_ioctl( struct file *file, unsigned int cmd, unsigned long arg); diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index b34b3d8..0504cec 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -599,19 +599,24 @@ out: return error; } -STATIC long -xfs_compat_ioctl( - xfs_inode_t *ip, - struct file *filp, - int ioflags, - unsigned cmd, - void __user *arg) +long +xfs_file_compat_ioctl( + struct file *filp, + unsigned cmd, + unsigned long p) { - struct inode *inode = filp->f_path.dentry->d_inode; - xfs_mount_t *mp = ip->i_mount; - int error; + struct inode *inode = filp->f_path.dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; + int error; + + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + xfs_itrace_entry(ip); - xfs_itrace_entry(XFS_I(inode)); switch (cmd) { /* No size or alignment issues on any arch */ case XFS_IOC_DIOINFO: @@ -632,7 +637,7 @@ xfs_compat_ioctl( case XFS_IOC_GOINGDOWN: case XFS_IOC_ERROR_INJECTION: case XFS_IOC_ERROR_CLEARALL: - return xfs_ioctl(ip, filp, ioflags, cmd, arg); + return xfs_file_ioctl(filp, cmd, p); #ifndef BROKEN_X86_ALIGNMENT /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: @@ -646,7 +651,7 @@ xfs_compat_ioctl( case XFS_IOC_FSGEOMETRY_V1: case XFS_IOC_FSGROWFSDATA: case XFS_IOC_FSGROWFSRT: - return xfs_ioctl(ip, filp, ioflags, cmd, arg); + return xfs_file_ioctl(filp, cmd, p); #else case XFS_IOC_ALLOCSP_32: case XFS_IOC_FREESP_32: @@ -687,7 +692,7 @@ xfs_compat_ioctl( case XFS_IOC_SETXFLAGS_32: case XFS_IOC_GETVERSION_32: cmd = _NATIVE_IOC(cmd, long); - return xfs_ioctl(ip, filp, ioflags, cmd, arg); + return xfs_file_ioctl(filp, cmd, p); case XFS_IOC_SWAPEXT: { struct xfs_swapext sxp; struct compat_xfs_swapext __user *sxu = arg; @@ -738,26 +743,3 @@ xfs_compat_ioctl( return -XFS_ERROR(ENOIOCTLCMD); } } - -long -xfs_file_compat_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - return xfs_compat_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); -} - -long -xfs_file_compat_invis_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - return xfs_compat_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, - (void __user *)p); -} diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h index 8b1a1e3..ef41c92 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/linux-2.6/xfs_iops.h @@ -22,7 +22,6 @@ struct xfs_inode; extern const struct file_operations xfs_file_operations; extern const struct file_operations xfs_dir_file_operations; -extern const struct file_operations xfs_invis_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 2a45b00..55d955e 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -53,8 +53,6 @@ int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, int flags, struct attrlist_cursor_kern *cursor); -int xfs_ioctl(struct xfs_inode *ip, struct file *filp, - int ioflags, unsigned int cmd, void __user *arg); ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags); diff --git a/include/linux/fs.h b/include/linux/fs.h index 51bd937..965b9ba 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -81,6 +81,14 @@ extern int dir_notify_enable; #define FMODE_WRITE_IOCTL ((__force fmode_t)128) #define FMODE_NDELAY_NOW ((__force fmode_t)256) +/* + * Don't update ctime and mtime. + * + * Currently a special hack for the XFS open_by_handle ioctl, but we'll + * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. + */ +#define FMODE_NOCMTIME ((__force fmode_t)2048) + #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 -- cgit v0.10.2 From c4cd747ee6c3ba1e7727878e3fce482d0d8c0136 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:34 -0500 Subject: [XFS] use inode_change_ok for setattr permission checking Instead of implementing our own checks use inode_change_ok to check for necessary permission in setattr. There is a slight change in behaviour as inode_change_ok doesn't allow i_mode updates to add the suid or sgid without superuser privilegues while the old XFS code just stripped away those bits from the file mode. (First sent on Semptember 29th) Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 4547608..f07bf87 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -70,7 +70,6 @@ xfs_setattr( gid_t gid=0, igid=0; int timeflags = 0; struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; - int file_owner; int need_iolock = 1; xfs_itrace_entry(ip); @@ -81,6 +80,10 @@ xfs_setattr( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); + code = -inode_change_ok(inode, iattr); + if (code) + return code; + olddquot1 = olddquot2 = NULL; udqp = gdqp = NULL; @@ -158,56 +161,6 @@ xfs_setattr( xfs_ilock(ip, lock_flags); - /* boolean: are we the file owner? */ - file_owner = (current_fsuid() == ip->i_d.di_uid); - - /* - * Change various properties of a file. - * Only the owner or users with CAP_FOWNER - * capability may do these things. - */ - if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) { - /* - * CAP_FOWNER overrides the following restrictions: - * - * The user ID of the calling process must be equal - * to the file owner ID, except in cases where the - * CAP_FSETID capability is applicable. - */ - if (!file_owner && !capable(CAP_FOWNER)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - - /* - * CAP_FSETID overrides the following restrictions: - * - * The effective user ID of the calling process shall match - * the file owner when setting the set-user-ID and - * set-group-ID bits on that file. - * - * The effective group ID or one of the supplementary group - * IDs of the calling process shall match the group owner of - * the file when setting the set-group-ID bit on that file - */ - if (mask & ATTR_MODE) { - mode_t m = 0; - - if ((iattr->ia_mode & S_ISUID) && !file_owner) - m |= S_ISUID; - if ((iattr->ia_mode & S_ISGID) && - !in_group_p((gid_t)ip->i_d.di_gid)) - m |= S_ISGID; -#if 0 - /* Linux allows this, Irix doesn't. */ - if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode)) - m |= S_ISVTX; -#endif - if (m && !capable(CAP_FSETID)) - iattr->ia_mode &= ~m; - } - } - /* * Change file ownership. Must be the owner or privileged. */ @@ -224,22 +177,6 @@ xfs_setattr( uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; /* - * CAP_CHOWN overrides the following restrictions: - * - * If _POSIX_CHOWN_RESTRICTED is defined, this capability - * shall override the restriction that a process cannot - * change the user ID of a file it owns and the restriction - * that the group ID supplied to the chown() function - * shall be equal to either the group ID or one of the - * supplementary group IDs of the calling process. - */ - if ((iuid != uid || - (igid != gid && !in_group_p((gid_t)gid))) && - !capable(CAP_CHOWN)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - /* * Do a quota reservation only if uid/gid is actually * going to change. */ @@ -276,36 +213,22 @@ xfs_setattr( code = XFS_ERROR(EINVAL); goto error_return; } + /* * Make sure that the dquots are attached to the inode. */ - if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) + code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); + if (code) goto error_return; - } - /* - * Change file access or modified times. - */ - if (mask & (ATTR_ATIME|ATTR_MTIME)) { - if (!file_owner) { - if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) && - !capable(CAP_FOWNER)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - } - } - - /* - * Now we can make the changes. Before we join the inode - * to the transaction, if ATTR_SIZE is set then take care of - * the part of the truncation that must be done without the - * inode lock. This needs to be done before joining the inode - * to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. - */ - if (mask & ATTR_SIZE) { - code = 0; + /* + * Now we can make the changes. Before we join the inode + * to the transaction, if ATTR_SIZE is set then take care of + * the part of the truncation that must be done without the + * inode lock. This needs to be done before joining the inode + * to the transaction, because the inode cannot be unlocked + * once it is a part of the transaction. + */ if (iattr->ia_size > ip->i_size) { /* * Do the first part of growing a file: zero any data @@ -360,17 +283,10 @@ xfs_setattr( } commit_flags = XFS_TRANS_RELEASE_LOG_RES; xfs_ilock(ip, XFS_ILOCK_EXCL); - } - if (tp) { xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ihold(tp, ip); - } - /* - * Truncate file. Must have write permission and not be a directory. - */ - if (mask & ATTR_SIZE) { /* * Only change the c/mtime if we are changing the size * or we are explicitly asked to change it. This handles @@ -410,20 +326,9 @@ xfs_setattr( */ xfs_iflags_set(ip, XFS_ITRUNCATED); } - } - - /* - * Change file access modes. - */ - if (mask & ATTR_MODE) { - ip->i_d.di_mode &= S_IFMT; - ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT; - - inode->i_mode &= S_IFMT; - inode->i_mode |= iattr->ia_mode & ~S_IFMT; - - xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); - timeflags |= XFS_ICHGTIME_CHG; + } else if (tp) { + xfs_trans_ijoin(tp, ip, lock_flags); + xfs_trans_ihold(tp, ip); } /* @@ -471,6 +376,24 @@ xfs_setattr( timeflags |= XFS_ICHGTIME_CHG; } + /* + * Change file access modes. + */ + if (mask & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + + ip->i_d.di_mode &= S_IFMT; + ip->i_d.di_mode |= mode & ~S_IFMT; + + inode->i_mode &= S_IFMT; + inode->i_mode |= mode & ~S_IFMT; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + timeflags |= XFS_ICHGTIME_CHG; + } /* * Change file access or modified times. -- cgit v0.10.2 From cfbe52672fbc6f333892e8dde82c35e0a76aa5f5 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Fri, 12 Dec 2008 15:27:25 +1100 Subject: [XFS] set b_error from bio error in xfs_buf_bio_end_io Preserve any error returned by the bio layer. Reviewed-by: Eric Sandeen Reviewed-by: Tim Shimmin Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index cd89c56..aae8d07 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1126,8 +1126,7 @@ xfs_buf_bio_end_io( unsigned int blocksize = bp->b_target->bt_bsize; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - bp->b_error = EIO; + xfs_buf_ioerror(bp, -error); do { struct page *page = bvec->bv_page; -- cgit v0.10.2 From d415867e0abc35e3b2f0d4196e98c339d6fe29a2 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Mon, 22 Dec 2008 17:50:56 +1100 Subject: [XFS] Use the incore inode size in xfs_file_readdir() We should be using the incore inode size here not the linux inode size. The incore inode size is always up to date for directories whereas the linux inode size is not updated for directories. We've hit assertions in xfs_bmap() and traced it back to the linux inode size being zero but the incore size being correct. Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index a0c45cc..e14c4e3 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -204,7 +204,7 @@ xfs_file_readdir( * point we can change the ->readdir prototype to include the * buffer size. */ - bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size); + bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size); error = xfs_readdir(ip, dirent, bufsize, (xfs_off_t *)&filp->f_pos, filldir); -- cgit v0.10.2 From 4fdc7781799926dca6c3a3bb6e9533a9718c4dea Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Mon, 22 Dec 2008 17:52:58 +1100 Subject: [XFS] Remove XFS_BUF_SHUT() and friends Code does nothing so remove it. Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index af8764e..288ae7c 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -312,10 +312,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) #define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) -#define XFS_BUF_SHUT(bp) do { } while (0) -#define XFS_BUF_UNSHUT(bp) do { } while (0) -#define XFS_BUF_ISSHUT(bp) (0) - #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index d74ce11..92af409 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -998,21 +998,7 @@ xfs_buf_iodone_callbacks( xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); - - /* - * XFS_SHUT flag gets set when we go thru the - * entire buffer cache and deliberately start - * throwing away delayed write buffers. - * Since there's no biowait done on those, - * we should just brelse them. - */ - if (XFS_BUF_ISSHUT(bp)) { - XFS_BUF_UNSHUT(bp); - xfs_buf_relse(bp); - } else { - xfs_biodone(bp); - } - + xfs_biodone(bp); return; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3adb868..5a5e035 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2804,7 +2804,6 @@ cluster_corrupt_out: XFS_BUF_CLR_BDSTRAT_FUNC(bp); XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); - XFS_BUF_SHUT(bp); XFS_BUF_ERROR(bp,EIO); xfs_biodone(bp); } else { -- cgit v0.10.2 From 9f6c92b9cc2fd41d6c7b493be5637cc5b5659880 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Mon, 22 Dec 2008 17:56:49 +1100 Subject: [XFS] Fix speculative allocation beyond eof Speculative allocation beyond eof doesn't work properly. It was broken some time ago after a code cleanup that moved what is now xfs_iomap_eof_align_last_fsb() and xfs_iomap_eof_want_preallocate() out of xfs_iomap_write_delay() into separate functions. The code used to use the current file size in various checks but got changed to be max(file_size, i_new_size). Since i_new_size is the result of 'offset + count' then in xfs_iomap_eof_want_preallocate() the check for '(offset + count) <= isize' will always be true. ie if 'offset + count' is > ip->i_size then isize will be i_new_size and equal to 'offset + count'. This change fixes all the places that used to use the current file size. Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 67f22b2..911062c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -290,7 +290,6 @@ STATIC int xfs_iomap_eof_align_last_fsb( xfs_mount_t *mp, xfs_inode_t *ip, - xfs_fsize_t isize, xfs_extlen_t extsize, xfs_fileoff_t *last_fsb) { @@ -306,14 +305,14 @@ xfs_iomap_eof_align_last_fsb( * stripe width and we are allocating past the allocation eof. */ else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) && - (isize >= XFS_FSB_TO_B(mp, mp->m_swidth))) + (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth))) new_last_fsb = roundup_64(*last_fsb, mp->m_swidth); /* * Roundup the allocation request to a stripe unit (m_dalign) boundary * if the file size is >= stripe unit size, and we are allocating past * the allocation eof. */ - else if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign))) + else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign))) new_last_fsb = roundup_64(*last_fsb, mp->m_dalign); /* @@ -403,7 +402,6 @@ xfs_iomap_write_direct( xfs_filblks_t count_fsb, resaligned; xfs_fsblock_t firstfsb; xfs_extlen_t extsz, temp; - xfs_fsize_t isize; int nimaps; int bmapi_flag; int quota_flag; @@ -426,15 +424,10 @@ xfs_iomap_write_direct( rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); - isize = ip->i_size; - if (ip->i_new_size > isize) - isize = ip->i_new_size; - offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); - if ((offset + count) > isize) { - error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, - &last_fsb); + if ((offset + count) > ip->i_size) { + error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) goto error_out; } else { @@ -559,7 +552,6 @@ STATIC int xfs_iomap_eof_want_preallocate( xfs_mount_t *mp, xfs_inode_t *ip, - xfs_fsize_t isize, xfs_off_t offset, size_t count, int ioflag, @@ -573,7 +565,7 @@ xfs_iomap_eof_want_preallocate( int n, error, imaps; *prealloc = 0; - if ((ioflag & BMAPI_SYNC) || (offset + count) <= isize) + if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size) return 0; /* @@ -617,7 +609,6 @@ xfs_iomap_write_delay( xfs_fileoff_t ioalign; xfs_fsblock_t firstblock; xfs_extlen_t extsz; - xfs_fsize_t isize; int nimaps; xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; int prealloc, fsynced = 0; @@ -637,11 +628,7 @@ xfs_iomap_write_delay( offset_fsb = XFS_B_TO_FSBT(mp, offset); retry: - isize = ip->i_size; - if (ip->i_new_size > isize) - isize = ip->i_new_size; - - error = xfs_iomap_eof_want_preallocate(mp, ip, isize, offset, count, + error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, ioflag, imap, XFS_WRITE_IMAPS, &prealloc); if (error) return error; @@ -655,8 +642,7 @@ retry: } if (prealloc || extsz) { - error = xfs_iomap_eof_align_last_fsb(mp, ip, isize, extsz, - &last_fsb); + error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) return error; } -- cgit v0.10.2 From efc557570dc99b46e46a7be51c3c7402b485e829 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Dec 2008 12:27:36 -0500 Subject: [XFS] avoid memory allocations in xfs_fs_vcmn_err xfs_fs_vcmn_err can be called under a spinlock, but does a sleeping memory allocation to create buffer for it's internal sprintf. Fortunately it's the only caller of icmn_err, so we can merge the two and have one single static buffer and spinlock protecting it. While we're at it make sure we proper __attribute__ format annotations so that the compiler can detect mismatched format strings. Reported-by: Alexander Beregalov Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c index 6361042..ae54829 100644 --- a/fs/xfs/support/debug.c +++ b/fs/xfs/support/debug.c @@ -18,6 +18,13 @@ #include #include "debug.h" +/* xfs_mount.h drags a lot of crap in, sorry.. */ +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_ag.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" + static char message[1024]; /* keep it off the stack */ static DEFINE_SPINLOCK(xfs_err_lock); @@ -55,22 +62,42 @@ cmn_err(register int level, char *fmt, ...) } void -icmn_err(register int level, char *fmt, va_list ap) +xfs_fs_vcmn_err( + int level, + struct xfs_mount *mp, + char *fmt, + va_list ap) { - ulong flags; - int len; + unsigned long flags; + int len = 0; level &= XFS_ERR_MASK; - if(level > XFS_MAX_ERR_LEVEL) + if (level > XFS_MAX_ERR_LEVEL) level = XFS_MAX_ERR_LEVEL; + spin_lock_irqsave(&xfs_err_lock,flags); - len = vsnprintf(message, sizeof(message), fmt, ap); + + if (mp) { + len = sprintf(message, "Filesystem \"%s\": ", mp->m_fsname); + + /* + * Skip the printk if we can't print anything useful + * due to an over-long device name. + */ + if (len >= sizeof(message)) + goto out; + } + + len = vsnprintf(message + len, sizeof(message) - len, fmt, ap); if (len >= sizeof(message)) len = sizeof(message) - 1; if (message[len-1] == '\n') message[len-1] = 0; + printk("%s%s\n", err_level[level], message); + out: spin_unlock_irqrestore(&xfs_err_lock,flags); + BUG_ON(level == CE_PANIC); } diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index 75845f9..6f4fd37 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h @@ -27,8 +27,6 @@ #define CE_ALERT 1 /* alert */ #define CE_PANIC 0 /* panic */ -extern void icmn_err(int, char *, va_list) - __attribute__ ((format (printf, 2, 0))); extern void cmn_err(int, char *, ...) __attribute__ ((format (printf, 2, 3))); extern void assfail(char *expr, char *f, int l); diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index f227ecd..92d5cd5 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -153,21 +153,6 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) } #endif /* DEBUG */ -static void -xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap) -{ - if (mp != NULL) { - char *newfmt; - int len = 16 + mp->m_fsname_len + strlen(fmt); - - newfmt = kmem_alloc(len, KM_SLEEP); - sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt); - icmn_err(level, newfmt, ap); - kmem_free(newfmt); - } else { - icmn_err(level, fmt, ap); - } -} void xfs_fs_cmn_err(int level, xfs_mount_t *mp, char *fmt, ...) diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 11543f1..0c93051 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -159,11 +159,15 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); #define XFS_PTAG_FSBLOCK_ZERO 0x00000080 struct xfs_mount; -/* PRINTFLIKE4 */ + +extern void xfs_fs_vcmn_err(int level, struct xfs_mount *mp, + char *fmt, va_list ap) + __attribute__ ((format (printf, 3, 0))); extern void xfs_cmn_err(int panic_tag, int level, struct xfs_mount *mp, - char *fmt, ...); -/* PRINTFLIKE3 */ -extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...); + char *fmt, ...) + __attribute__ ((format (printf, 4, 5))); +extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...) + __attribute__ ((format (printf, 3, 4))); extern void xfs_hex_dump(void *p, int length); -- cgit v0.10.2 From ad1ad968f4e7b06c75741575ea077e25a87da49a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 22 Dec 2008 09:59:06 -0500 Subject: [XFS] handle unaligned data in xfs_bmbt_disk_get_all In libxfs xfs_bmbt_disk_get_all needs to handle unaligned data and thus has been updated to use get_unaligned_be64. In kernelspace we don't strictly need it as the routine is only used for tracing and xfsidbg, but let's keep the two implementations in sync. Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index e46e02b..8f1ec73 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -208,7 +208,8 @@ xfs_bmbt_disk_get_all( xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s) { - __xfs_bmbt_get_all(be64_to_cpu(r->l0), be64_to_cpu(r->l1), s); + __xfs_bmbt_get_all(get_unaligned_be64(&r->l0), + get_unaligned_be64(&r->l1), s); } /* -- cgit v0.10.2 From 25051158bbed127e8672b43396c71c5eb610e5f1 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Wed, 24 Dec 2008 14:07:32 +1100 Subject: [XFS] Fix race in xfs_write() between direct and buffered I/O with DMAPI The iolock is dropped and re-acquired around the call to XFS_SEND_NAMESP(). While the iolock is released the file can become cached. We then 'goto retry' and - if we are doing direct I/O - mapping->nrpages may now be non zero but need_i_mutex will be zero and we will hit the WARN_ON(). Since we have dropped the I/O lock then the file size may have also changed so what we need to do here is 'goto start' like we do for the XFS_SEND_DATA() DMAPI event. We also need to update the filesize before releasing the iolock so that needs to be done before the XFS_SEND_NAMESP event. If we drop the iolock before setting the filesize we could race with a truncate. Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 92ce34b..7e90daa 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -707,7 +707,6 @@ start: } } -retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -763,6 +762,17 @@ retry: if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); + isize = i_size_read(inode); + if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) + *offset = isize; + + if (*offset > xip->i_size) { + xfs_ilock(xip, XFS_ILOCK_EXCL); + if (*offset > xip->i_size) + xip->i_size = *offset; + xfs_iunlock(xip, XFS_ILOCK_EXCL); + } + if (ret == -ENOSPC && DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_iunlock(xip, iolock); @@ -776,20 +786,7 @@ retry: xfs_ilock(xip, iolock); if (error) goto out_unlock_internal; - pos = xip->i_size; - ret = 0; - goto retry; - } - - isize = i_size_read(inode); - if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) - *offset = isize; - - if (*offset > xip->i_size) { - xfs_ilock(xip, XFS_ILOCK_EXCL); - if (*offset > xip->i_size) - xip->i_size = *offset; - xfs_iunlock(xip, XFS_ILOCK_EXCL); + goto start; } error = -ret; -- cgit v0.10.2